248 lines
9.7 KiB
LLVM
248 lines
9.7 KiB
LLVM
|
;; Test context disambiguation for a callgraph containing multiple memprof
|
||
|
;; contexts and no inlining, where we need to perform additional cloning
|
||
|
;; during function assignment/cloning to handle the combination of contexts
|
||
|
;; to 2 different allocations.
|
||
|
;;
|
||
|
;; void E(char **buf1, char **buf2) {
|
||
|
;; *buf1 = new char[10];
|
||
|
;; *buf2 = new char[10];
|
||
|
;; }
|
||
|
;;
|
||
|
;; void B(char **buf1, char **buf2) {
|
||
|
;; E(buf1, buf2);
|
||
|
;; }
|
||
|
;;
|
||
|
;; void C(char **buf1, char **buf2) {
|
||
|
;; E(buf1, buf2);
|
||
|
;; }
|
||
|
;;
|
||
|
;; void D(char **buf1, char **buf2) {
|
||
|
;; E(buf1, buf2);
|
||
|
;; }
|
||
|
;; int main(int argc, char **argv) {
|
||
|
;; char *cold1, *cold2, *default1, *default2, *default3, *default4;
|
||
|
;; B(&default1, &default2);
|
||
|
;; C(&default3, &cold1);
|
||
|
;; D(&cold2, &default4);
|
||
|
;; memset(cold1, 0, 10);
|
||
|
;; memset(cold2, 0, 10);
|
||
|
;; memset(default1, 0, 10);
|
||
|
;; memset(default2, 0, 10);
|
||
|
;; memset(default3, 0, 10);
|
||
|
;; memset(default4, 0, 10);
|
||
|
;; delete[] default1;
|
||
|
;; delete[] default2;
|
||
|
;; delete[] default3;
|
||
|
;; delete[] default4;
|
||
|
;; sleep(10);
|
||
|
;; delete[] cold1;
|
||
|
;; delete[] cold2;
|
||
|
;; return 0;
|
||
|
;; }
|
||
|
;;
|
||
|
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||
|
;; memory freed after sleep(10) results in cold lifetimes.
|
||
|
;;
|
||
|
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||
|
|
||
|
;; -stats requires asserts
|
||
|
; REQUIRES: asserts
|
||
|
|
||
|
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
|
||
|
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||
|
; RUN: -stats -pass-remarks=memprof-context-disambiguation \
|
||
|
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
|
||
|
; RUN: --check-prefix=STATS --check-prefix=REMARKS
|
||
|
|
||
|
|
||
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||
|
target triple = "x86_64-unknown-linux-gnu"
|
||
|
|
||
|
define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 {
|
||
|
entry:
|
||
|
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !7
|
||
|
%call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !8, !callsite !15
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare ptr @_Znam(i64) #1
|
||
|
|
||
|
define internal void @_Z1BPPcS0_(ptr %0, ptr %1) {
|
||
|
entry:
|
||
|
call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !16
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline
|
||
|
define internal void @_Z1CPPcS0_(ptr %0, ptr %1) #2 {
|
||
|
entry:
|
||
|
call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !17
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define internal void @_Z1DPPcS0_(ptr %0, ptr %1) #3 {
|
||
|
entry:
|
||
|
call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !18
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||
|
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
|
||
|
|
||
|
declare i32 @sleep() #5
|
||
|
|
||
|
; uselistorder directives
|
||
|
uselistorder ptr @_Znam, { 1, 0 }
|
||
|
|
||
|
attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
|
||
|
attributes #1 = { "no-trapping-math"="true" }
|
||
|
attributes #2 = { noinline }
|
||
|
attributes #3 = { "frame-pointer"="all" }
|
||
|
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||
|
attributes #5 = { "disable-tail-calls"="true" }
|
||
|
attributes #6 = { builtin }
|
||
|
|
||
|
!0 = !{!1, !3, !5}
|
||
|
!1 = !{!2, !"cold"}
|
||
|
!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
|
||
|
!3 = !{!4, !"notcold"}
|
||
|
!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
|
||
|
!5 = !{!6, !"notcold"}
|
||
|
!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
|
||
|
!7 = !{i64 -3461278137325233666}
|
||
|
!8 = !{!9, !11, !13}
|
||
|
!9 = !{!10, !"notcold"}
|
||
|
!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
|
||
|
!11 = !{!12, !"cold"}
|
||
|
!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
|
||
|
!13 = !{!14, !"notcold"}
|
||
|
!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
|
||
|
!15 = !{i64 -1415475215210681400}
|
||
|
!16 = !{i64 -2441057035866683071}
|
||
|
!17 = !{i64 -3483158674395044949}
|
||
|
!18 = !{i64 -7799663586031895603}
|
||
|
|
||
|
|
||
|
;; Originally we create a single clone of each call to new from E, since each
|
||
|
;; allocates cold memory for a single caller.
|
||
|
|
||
|
; DUMP: CCG after cloning:
|
||
|
; DUMP: Callsite Context Graph:
|
||
|
; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
|
||
|
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||
|
; DUMP: AllocTypes: NotCold
|
||
|
; DUMP: ContextIds: 2 3
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: CallerEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
|
||
|
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
|
||
|
; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
|
||
|
|
||
|
; DUMP: Node [[D:0x[a-z0-9]+]]
|
||
|
; DUMP: call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0)
|
||
|
; DUMP: AllocTypes: NotColdCold
|
||
|
; DUMP: ContextIds: 1 6
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
|
||
|
; DUMP: Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
|
||
|
; DUMP: CallerEdges:
|
||
|
|
||
|
; DUMP: Node [[C]]
|
||
|
; DUMP: call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0)
|
||
|
; DUMP: AllocTypes: NotColdCold
|
||
|
; DUMP: ContextIds: 2 5
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
|
||
|
; DUMP: Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
|
||
|
; DUMP: CallerEdges:
|
||
|
|
||
|
; DUMP: Node [[B]]
|
||
|
; DUMP: call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0)
|
||
|
; DUMP: AllocTypes: NotCold
|
||
|
; DUMP: ContextIds: 3 4
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
|
||
|
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
|
||
|
; DUMP: CallerEdges:
|
||
|
|
||
|
; DUMP: Node [[ENEW2ORIG]]
|
||
|
; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||
|
; DUMP: AllocTypes: NotCold
|
||
|
; DUMP: ContextIds: 4 6
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: CallerEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
|
||
|
; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
|
||
|
; DUMP: Clones: [[ENEW2CLONE]]
|
||
|
|
||
|
; DUMP: Node [[ENEW1CLONE]]
|
||
|
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||
|
; DUMP: AllocTypes: Cold
|
||
|
; DUMP: ContextIds: 1
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: CallerEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
|
||
|
; DUMP: Clone of [[ENEW1ORIG]]
|
||
|
|
||
|
; DUMP: Node [[ENEW2CLONE]]
|
||
|
; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||
|
; DUMP: AllocTypes: Cold
|
||
|
; DUMP: ContextIds: 5
|
||
|
; DUMP: CalleeEdges:
|
||
|
; DUMP: CallerEdges:
|
||
|
; DUMP: Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
|
||
|
; DUMP: Clone of [[ENEW2ORIG]]
|
||
|
|
||
|
|
||
|
;; We greedily create a clone of E that is initially used by the clones of the
|
||
|
;; first call to new. However, we end up with an incompatible set of callers
|
||
|
;; given the second call to new which has clones with a different combination of
|
||
|
;; callers. Eventually, we create 2 more clones, and the first clone becomes dead.
|
||
|
; REMARKS: created clone _Z1EPPcS0_.memprof.1
|
||
|
; REMARKS: created clone _Z1EPPcS0_.memprof.2
|
||
|
; REMARKS: created clone _Z1EPPcS0_.memprof.3
|
||
|
; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2
|
||
|
; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold
|
||
|
; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3
|
||
|
; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold
|
||
|
; REMARKS: call in clone _Z1BPPcS0_ assigned to call function clone _Z1EPPcS0_
|
||
|
; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
|
||
|
; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold
|
||
|
; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold
|
||
|
; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
|
||
|
|
||
|
|
||
|
;; Original version of E is used for the non-cold allocations, both from B.
|
||
|
; IR: define internal {{.*}} @_Z1EPPcS0_(
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
|
||
|
; IR: define internal {{.*}} @_Z1BPPcS0_(
|
||
|
; IR: call {{.*}} @_Z1EPPcS0_(
|
||
|
;; C calls a clone of E with the first new allocating cold memory and the
|
||
|
;; second allocating non-cold memory.
|
||
|
; IR: define internal {{.*}} @_Z1CPPcS0_(
|
||
|
; IR: call {{.*}} @_Z1EPPcS0_.memprof.3(
|
||
|
;; D calls a clone of E with the first new allocating non-cold memory and the
|
||
|
;; second allocating cold memory.
|
||
|
; IR: define internal {{.*}} @_Z1DPPcS0_(
|
||
|
; IR: call {{.*}} @_Z1EPPcS0_.memprof.2(
|
||
|
;; Transient clone that will get removed as it ends up with no callers.
|
||
|
;; Its calls to new never get updated with a memprof attribute as a result.
|
||
|
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.1(
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT:[0-9]+]]
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT]]
|
||
|
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2(
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
|
||
|
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3(
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
|
||
|
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
|
||
|
; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
|
||
|
; IR: attributes #[[DEFAULT]] = { builtin }
|
||
|
; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
|
||
|
|
||
|
|
||
|
; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
|
||
|
; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
|
||
|
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
|