/builds/2mk6rsew/0/parcoach/parcoach/src/aSSA/MemorySSA.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | #include "parcoach/MemorySSA.h" |
2 | | |
3 | | #include "MSSAMuChi.h" |
4 | | #include "PTACallGraph.h" |
5 | | #include "Utils.h" |
6 | | #include "parcoach/Collectives.h" |
7 | | #include "parcoach/MemoryRegion.h" |
8 | | #include "parcoach/ModRefAnalysis.h" |
9 | | #include "parcoach/Options.h" |
10 | | |
11 | | #include "llvm/IR/InstIterator.h" |
12 | | #include "llvm/Support/FileSystem.h" |
13 | | |
14 | | #define DEBUG_TYPE "mssa" |
15 | | |
16 | | using namespace llvm; |
17 | | |
18 | | namespace parcoach { |
19 | | namespace { |
20 | | cl::opt<bool> OptDumpSsa("dump-ssa", cl::desc("Dump the all-inclusive SSA"), |
21 | | cl::cat(ParcoachCategory)); |
22 | | |
23 | | cl::opt<std::string> OptDumpSsaFunc("dump-ssa-func", |
24 | | cl::desc("Dump the all-inclusive SSA " |
25 | | "for a particular function."), |
26 | | cl::cat(ParcoachCategory)); |
27 | | |
28 | | } // namespace |
29 | | |
30 | | MemorySSA::MemorySSA(Module &M, Andersen const &PTA, PTACallGraph const &CG, |
31 | | MemReg const &Regions, ModRefAnalysisResult *MRA, |
32 | | ExtInfo const &ExtInfo, ModuleAnalysisManager &AM) |
33 | | : PTA(PTA), CG(CG), Regions(Regions), MRA(MRA), extInfo(ExtInfo), |
34 | 1.76k | curDF(NULL), curDT(NULL) { |
35 | 1.76k | buildSSA(M, AM); |
36 | 1.76k | } |
37 | | |
38 | 1.76k | MemorySSA::~MemorySSA() {} |
39 | | |
40 | 1.76k | void MemorySSA::buildSSA(llvm::Module &M, llvm::ModuleAnalysisManager &AM) { |
41 | 1.76k | TimeTraceScope TTS("MemorySSA"); |
42 | | // Get an inner FunctionAnalysisManager from the module one. |
43 | 1.76k | auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); |
44 | 19.8k | for (Function &F : M) { |
45 | 19.8k | if (!CG.isReachableFromEntry(F)) { |
46 | | // errs() << F.getName() << " is not reachable from entry\n"; |
47 | | |
48 | 1.91k | continue; |
49 | 1.91k | } |
50 | | |
51 | 17.9k | if (isIntrinsicDbgFunction(&F)) { |
52 | 0 | continue; |
53 | 0 | } |
54 | | |
55 | 17.9k | if (F.isDeclaration()) { |
56 | 16.1k | continue; |
57 | 16.1k | } |
58 | | |
59 | | // errs() << " + Fun: " << counter << " - " << F.getName() << "\n"; |
60 | 1.82k | DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); |
61 | 1.82k | DominanceFrontier &DF = FAM.getResult<DominanceFrontierAnalysis>(F); |
62 | 1.82k | PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F); |
63 | | |
64 | 1.82k | buildSSA(&F, DT, DF, PDT); |
65 | 1.82k | if (OptDumpSsa) { |
66 | 1 | dumpMSSA(&F); |
67 | 1 | } |
68 | 1.82k | if (F.getName().equals(OptDumpSsaFunc)) { |
69 | 0 | dumpMSSA(&F); |
70 | 0 | } |
71 | 1.82k | } |
72 | 1.76k | } |
73 | | |
74 | | void MemorySSA::buildSSA(Function const *F, DominatorTree &DT, |
75 | 1.82k | DominanceFrontier &DF, PostDominatorTree &PDT) { |
76 | 1.82k | curDF = &DF; |
77 | 1.82k | curDT = &DT; |
78 | 1.82k | curPDT = &PDT; |
79 | | |
80 | 1.82k | usedRegs.clear(); |
81 | 1.82k | regDefToBBMap.clear(); |
82 | | |
83 | 1.82k | { |
84 | 1.82k | TimeTraceScope TTS("parcoach::MemorySSA::ComputeMuChi"); |
85 | 1.82k | computeMuChi(F); |
86 | 1.82k | } |
87 | | |
88 | 1.82k | { |
89 | 1.82k | TimeTraceScope TTS("parcoach::MemorySSA::ComputePhi"); |
90 | 1.82k | computePhi(F); |
91 | 1.82k | } |
92 | | |
93 | 1.82k | { |
94 | 1.82k | TimeTraceScope TTS("parcoach::MemorySSA::Rename"); |
95 | 1.82k | rename(F); |
96 | 1.82k | } |
97 | | |
98 | 1.82k | { |
99 | 1.82k | TimeTraceScope TTS("parcoach::MemorySSA::ComputePhiPredicates"); |
100 | 1.82k | computePhiPredicates(F); |
101 | 1.82k | } |
102 | 1.82k | } |
103 | | |
104 | 1.82k | void MemorySSA::computeMuChi(Function const *F) { |
105 | 190k | for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) { |
106 | 188k | Instruction const *Inst = &*I; |
107 | | |
108 | | /* Call Site */ |
109 | 188k | if (isCallSite(Inst)) { |
110 | 53.8k | if (isIntrinsicDbgInst(Inst)) { |
111 | 26.2k | continue; |
112 | 26.2k | } |
113 | | |
114 | 27.5k | CallBase *Cs = const_cast<CallBase *>(cast<CallBase>(Inst)); |
115 | 27.5k | Function *Callee = Cs->getCalledFunction(); |
116 | | |
117 | | // indirect call |
118 | 27.5k | if (Callee == NULL) { |
119 | 6 | for (Function const *MayCallee : CG.getIndirectCallMap().lookup(Inst)) { |
120 | 6 | if (isIntrinsicDbgFunction(MayCallee)) { |
121 | 0 | continue; |
122 | 0 | } |
123 | | |
124 | 6 | computeMuChiForCalledFunction(Cs, const_cast<Function *>(MayCallee)); |
125 | 6 | if (MayCallee->isDeclaration()) { |
126 | 2 | createArtificalChiForCalledFunction(Cs, MayCallee); |
127 | 2 | } |
128 | 6 | } |
129 | 3 | } |
130 | | |
131 | | // direct call |
132 | 27.5k | else { |
133 | 27.5k | if (isIntrinsicDbgFunction(Callee)) { |
134 | 0 | continue; |
135 | 0 | } |
136 | | |
137 | 27.5k | computeMuChiForCalledFunction(Cs, Callee); |
138 | 27.5k | if (Callee->isDeclaration()) { |
139 | 27.5k | createArtificalChiForCalledFunction(Cs, Callee); |
140 | 27.5k | } |
141 | 27.5k | } |
142 | | |
143 | 27.5k | continue; |
144 | 27.5k | } |
145 | | |
146 | | /* Load Instruction |
147 | | * We insert a Mu function for each region pointed-to by |
148 | | * the load instruction. |
149 | | */ |
150 | 134k | if (isa<LoadInst>(Inst)) { |
151 | 43.9k | LoadInst const *LI = cast<LoadInst>(Inst); |
152 | 43.9k | std::vector<Value const *> PtsSet; |
153 | 43.9k | bool Found = PTA.getPointsToSet(LI->getPointerOperand(), PtsSet); |
154 | 43.9k | assert(Found && "Load not found in MSSA"); |
155 | 43.9k | if (!Found) { |
156 | 0 | continue; |
157 | 0 | } |
158 | 43.9k | MemRegVector Regs; |
159 | 43.9k | Regions.getValuesRegion(PtsSet, Regs); |
160 | | |
161 | 43.9k | for (auto *R : Regs) { |
162 | 43.9k | if (MRA->inGlobalKillSet(R)) { |
163 | 0 | continue; |
164 | 0 | } |
165 | | |
166 | 43.9k | loadToMuMap[LI].emplace_back(std::make_unique<MSSALoadMu>(R, LI)); |
167 | 43.9k | usedRegs.insert(R); |
168 | 43.9k | } |
169 | | |
170 | 43.9k | continue; |
171 | 43.9k | } |
172 | | |
173 | | /* Store Instruction |
174 | | * We insert a Chi function for each region pointed-to by |
175 | | * the store instruction. |
176 | | */ |
177 | 90.4k | if (isa<StoreInst>(Inst)) { |
178 | 26.4k | StoreInst const *SI = cast<StoreInst>(Inst); |
179 | 26.4k | std::vector<Value const *> PtsSet; |
180 | 26.4k | bool Found = PTA.getPointsToSet(SI->getPointerOperand(), PtsSet); |
181 | 26.4k | assert(Found && "Store not found in MSSA"); |
182 | 26.4k | if (!Found) { |
183 | 0 | continue; |
184 | 0 | } |
185 | 26.4k | MemRegVector Regs; |
186 | 26.4k | Regions.getValuesRegion(PtsSet, Regs); |
187 | | |
188 | 26.4k | for (auto *R : Regs) { |
189 | 26.4k | if (MRA->inGlobalKillSet(R)) { |
190 | 0 | continue; |
191 | 0 | } |
192 | | |
193 | 26.4k | storeToChiMap[SI].emplace_back(std::make_unique<MSSAStoreChi>(R, SI)); |
194 | 26.4k | usedRegs.insert(R); |
195 | 26.4k | regDefToBBMap[R].insert(Inst->getParent()); |
196 | 26.4k | } |
197 | | |
198 | 26.4k | continue; |
199 | 26.4k | } |
200 | 90.4k | } |
201 | | |
202 | | /* Create an EntryChi and a ReturnMu for each memory region used by the |
203 | | * function. |
204 | | */ |
205 | 40.3k | for (auto *R : usedRegs) { |
206 | 40.3k | auto &Chi = |
207 | 40.3k | funToEntryChiMap[F].emplace_back(std::make_unique<MSSAEntryChi>(R, F)); |
208 | | |
209 | 40.3k | funRegToEntryChiMap[F][Chi->region] = Chi.get(); |
210 | 40.3k | regDefToBBMap[R].insert(&F->getEntryBlock()); |
211 | 40.3k | } |
212 | | |
213 | 1.82k | if (!functionDoesNotRet(F)) { |
214 | 40.3k | for (auto *R : usedRegs) { |
215 | 40.3k | auto &Mu = |
216 | 40.3k | funToReturnMuMap[F].emplace_back(std::make_unique<MSSARetMu>(R, F)); |
217 | 40.3k | funRegToReturnMuMap[F][Mu->region] = Mu.get(); |
218 | 40.3k | } |
219 | 1.82k | } |
220 | 1.82k | } |
221 | | |
222 | | void MemorySSA::computeMuChiForCalledFunction(CallBase *Inst, |
223 | 27.5k | Function *Callee) { |
224 | 27.5k | #if defined(PARCOACH_ENABLE_CUDA) || defined(PARCOACH_ENABLE_OPENMP) |
225 | 27.5k | Collective const *Coll = Collective::find(*Callee); |
226 | 27.5k | #endif |
227 | | #ifdef PARCOACH_ENABLE_CUDA |
228 | | // If the called function is a CUDA synchronization, create an artificial CHI |
229 | | // for each shared region. |
230 | | if (Coll && isa<CudaCollective>(Coll) && Coll->Name == "llvm.nvvm.barrier0") { |
231 | | for (auto *r : Regions.getCudaSharedRegions()) { |
232 | | callSiteToSyncChiMap[inst].emplace( |
233 | | std::make_unique<MSSASyncChi>(r, inst)); |
234 | | regDefToBBMap[r].insert(inst->getParent()); |
235 | | usedRegs.insert(r); |
236 | | } |
237 | | // NOTE (PV): I removed a return here because we want each arg of the |
238 | | // barrier to go through the muchi parameters thingy! |
239 | | // Otherwise a used region may not appear in the mu map and would lead |
240 | | // to breaking an assert! |
241 | | } |
242 | | #endif |
243 | | |
244 | 27.5k | #ifdef PARCOACH_ENABLE_OPENMP |
245 | | // If the called function is an OMP barrier, create an artificial CHI |
246 | | // for each shared region. |
247 | 27.5k | if (Coll && isa<OMPCollective>(Coll) && Coll->Name == "__kmpc_barrier") { |
248 | 21 | for (auto *R : |
249 | 21 | getRange(Regions.getOmpSharedRegions(), Inst->getFunction())) { |
250 | 1 | callSiteToSyncChiMap[Inst].emplace_back( |
251 | 1 | std::make_unique<MSSASyncChi>(R, Inst)); |
252 | 1 | regDefToBBMap[R].insert(Inst->getParent()); |
253 | 1 | usedRegs.insert(R); |
254 | 1 | } |
255 | | // NOTE (PV): I removed a return here because we want each arg of the |
256 | | // barrier to go through the muchi parameters thingy! |
257 | | // Otherwise a used region may not appear in the mu map and would lead |
258 | | // to breaking an assert! |
259 | 21 | } |
260 | 27.5k | #endif |
261 | | |
262 | | // If the callee is a declaration (external function), we create a Mu |
263 | | // for each pointer argument and a Chi for each modified argument. |
264 | 27.5k | if (Callee->isDeclaration()) { |
265 | 27.5k | assert(isa<CallInst>(Inst)); // InvokeInst are not handled yet |
266 | 27.5k | CallInst *CI = cast<CallInst>(Inst); |
267 | 27.5k | extFuncToCSMap[Callee].insert(Inst); |
268 | | |
269 | 27.5k | auto const *Info = extInfo.getExtModInfo(Callee); |
270 | | |
271 | | // Mu and Chi for parameters |
272 | 98.5k | for (unsigned I = 0; I < CI->arg_size(); ++I) { |
273 | 70.9k | Value *Arg = CI->getArgOperand(I); |
274 | 70.9k | if (!Arg->getType()->isPointerTy()) { |
275 | 15.9k | continue; |
276 | 15.9k | } |
277 | | |
278 | | // Case where argument is a inttoptr cast (e.g. MPI_IN_PLACE) |
279 | 55.0k | ConstantExpr *Ce = dyn_cast<ConstantExpr>(Arg); |
280 | 55.0k | if (Ce) { |
281 | 0 | Instruction *Inst = Ce->getAsInstruction(); |
282 | 0 | assert(Inst); |
283 | 0 | bool IsAIntToPtr = isa<IntToPtrInst>(Inst); |
284 | 0 | Inst->deleteValue(); |
285 | 0 | if (IsAIntToPtr) { |
286 | 0 | continue; |
287 | 0 | } |
288 | 0 | } |
289 | | |
290 | 55.0k | std::vector<Value const *> PtsSet; |
291 | 55.0k | std::vector<Value const *> ArgPtsSet; |
292 | 55.0k | bool Found = PTA.getPointsToSet(Arg, ArgPtsSet); |
293 | 55.0k | assert(Found && "arg not found in MSSA"); |
294 | 55.0k | if (!Found) { |
295 | 0 | continue; |
296 | 0 | } |
297 | 55.0k | PtsSet.insert(PtsSet.end(), ArgPtsSet.begin(), ArgPtsSet.end()); |
298 | | |
299 | 55.0k | MemRegVector Regs; |
300 | 55.0k | Regions.getValuesRegion(PtsSet, Regs); |
301 | | |
302 | | // Mus |
303 | 55.0k | for (auto *R : Regs) { |
304 | 54.7k | if (MRA->inGlobalKillSet(R)) { |
305 | 0 | continue; |
306 | 0 | } |
307 | | |
308 | 54.7k | callSiteToMuMap[Inst].emplace_back( |
309 | 54.7k | std::make_unique<MSSAExtCallMu>(R, Callee, I)); |
310 | 54.7k | usedRegs.insert(R); |
311 | 54.7k | } |
312 | | |
313 | 55.0k | if (!Info) { |
314 | 0 | continue; |
315 | 0 | } |
316 | | |
317 | | // Chis |
318 | 55.0k | if (I >= Info->NbArgs) { |
319 | 3 | assert(Callee->isVarArg()); |
320 | 3 | if (Info->ArgIsMod[Info->NbArgs - 1]) { |
321 | 2 | for (auto *R : Regs) { |
322 | 2 | if (MRA->inGlobalKillSet(R)) { |
323 | 0 | continue; |
324 | 0 | } |
325 | | |
326 | 2 | callSiteToChiMap[Inst].emplace_back( |
327 | 2 | std::make_unique<MSSAExtCallChi>(R, Callee, I, Inst)); |
328 | 2 | regDefToBBMap[R].insert(Inst->getParent()); |
329 | 2 | } |
330 | 2 | } |
331 | 55.0k | } else { |
332 | 55.0k | if (Info->ArgIsMod[I]) { |
333 | 17.2k | for (auto *R : Regs) { |
334 | 16.9k | if (MRA->inGlobalKillSet(R)) { |
335 | 0 | continue; |
336 | 0 | } |
337 | | |
338 | 16.9k | callSiteToChiMap[Inst].emplace_back( |
339 | 16.9k | std::make_unique<MSSAExtCallChi>(R, Callee, I, Inst)); |
340 | 16.9k | regDefToBBMap[R].insert(Inst->getParent()); |
341 | 16.9k | } |
342 | 17.2k | } |
343 | 55.0k | } |
344 | 55.0k | } |
345 | | |
346 | | // Chi for return value if it is a pointer |
347 | 27.5k | if (Info && Info->RetIsMod && CI->getType()->isPointerTy()) { |
348 | 2.60k | std::vector<Value const *> PtsSet; |
349 | 2.60k | bool Found = PTA.getPointsToSet(CI, PtsSet); |
350 | 2.60k | assert(Found && "CI not found in MSSA"); |
351 | 2.60k | if (!Found) { |
352 | 0 | return; |
353 | 0 | } |
354 | | |
355 | 2.60k | MemRegVector Regs; |
356 | 2.60k | Regions.getValuesRegion(PtsSet, Regs); |
357 | | |
358 | 2.60k | for (auto *R : Regs) { |
359 | 2.54k | if (MRA->inGlobalKillSet(R)) { |
360 | 0 | continue; |
361 | 0 | } |
362 | | |
363 | 2.54k | extCallSiteToCallerRetChi[Inst].emplace_back( |
364 | 2.54k | std::make_unique<MSSAExtRetCallChi>(R, Callee)); |
365 | 2.54k | regDefToBBMap[R].insert(Inst->getParent()); |
366 | 2.54k | usedRegs.insert(R); |
367 | 2.54k | } |
368 | 2.60k | } |
369 | 27.5k | } |
370 | | |
371 | | // If the callee is not a declaration we create a Mu(Chi) for each region |
372 | | // in Ref(Mod) callee. |
373 | 64 | else { |
374 | 64 | Function const *Caller = Inst->getParent()->getParent(); |
375 | 64 | MemRegSet KillSet = MRA->getFuncKill(Caller); |
376 | | |
377 | | // Create Mu for each region \in ref(callee) |
378 | 371 | for (auto *R : MRA->getFuncRef(Callee)) { |
379 | 371 | if (KillSet.find(R) == KillSet.end()) { |
380 | 215 | callSiteToMuMap[Inst].emplace_back( |
381 | 215 | std::make_unique<MSSACallMu>(R, Callee)); |
382 | 215 | usedRegs.insert(R); |
383 | 215 | } |
384 | 371 | } |
385 | | |
386 | | // Create Chi for each region \in mod(callee) |
387 | 210 | for (auto *R : MRA->getFuncMod(Callee)) { |
388 | 210 | if (KillSet.find(R) == KillSet.end()) { |
389 | 31 | callSiteToChiMap[Inst].emplace_back( |
390 | 31 | std::make_unique<MSSACallChi>(R, Callee, Inst)); |
391 | 31 | regDefToBBMap[R].insert(Inst->getParent()); |
392 | 31 | usedRegs.insert(R); |
393 | 31 | } |
394 | 210 | } |
395 | 64 | } |
396 | 27.5k | } |
397 | | |
398 | 1.82k | void MemorySSA::computePhi(Function const *F) { |
399 | | // For each memory region used, compute basic blocks where phi must be |
400 | | // inserted. |
401 | 40.3k | for (auto *R : usedRegs) { |
402 | 40.3k | std::vector<BasicBlock const *> Worklist; |
403 | 40.3k | std::set<BasicBlock const *> DomFronPlus; |
404 | 40.3k | std::set<BasicBlock const *> Work; |
405 | | |
406 | 70.7k | for (BasicBlock const *X : regDefToBBMap[R]) { |
407 | 70.7k | Worklist.push_back(X); |
408 | 70.7k | Work.insert(X); |
409 | 70.7k | } |
410 | | |
411 | 118k | while (!Worklist.empty()) { |
412 | 78.2k | BasicBlock const *X = Worklist.back(); |
413 | 78.2k | Worklist.pop_back(); |
414 | | |
415 | 78.2k | auto It = curDF->find(const_cast<BasicBlock *>(X)); |
416 | 78.2k | if (It == curDF->end()) { |
417 | 0 | errs() << "Error: basic block not in the dom frontier !\n"; |
418 | 0 | exit(EXIT_FAILURE); |
419 | 0 | continue; |
420 | 0 | } |
421 | | |
422 | 78.2k | auto DomSet = It->second; |
423 | 78.2k | for (auto *Y : DomSet) { |
424 | 14.6k | if (DomFronPlus.find(Y) != DomFronPlus.end()) { |
425 | 6.55k | continue; |
426 | 6.55k | } |
427 | | |
428 | 8.11k | bbToPhiMap[Y].emplace_back(std::make_unique<MSSAPhi>(R)); |
429 | 8.11k | DomFronPlus.insert(Y); |
430 | | |
431 | 8.11k | if (Work.find(Y) != Work.end()) { |
432 | 646 | continue; |
433 | 646 | } |
434 | | |
435 | 7.47k | Work.insert(Y); |
436 | 7.47k | Worklist.push_back(Y); |
437 | 7.47k | } |
438 | 78.2k | } |
439 | 40.3k | } |
440 | 1.82k | } |
441 | | |
442 | 1.82k | void MemorySSA::rename(Function const *F) { |
443 | 1.82k | std::map<MemRegEntry *, unsigned> C; |
444 | 1.82k | std::map<MemRegEntry *, std::vector<MSSAVar *>> S; |
445 | | |
446 | | // Initialization: |
447 | | |
448 | | // C(*) <- 0 |
449 | 40.3k | for (auto *R : usedRegs) { |
450 | 40.3k | C[R] = 0; |
451 | 40.3k | } |
452 | | |
453 | | // Compute LHS version for each region. |
454 | 40.3k | for (auto &Chi : funToEntryChiMap[F]) { |
455 | 40.3k | Chi->var = std::make_unique<MSSAVar>(Chi.get(), 0, &F->getEntryBlock()); |
456 | | |
457 | 40.3k | S[Chi->region].push_back(Chi->var.get()); |
458 | 40.3k | C[Chi->region]++; |
459 | 40.3k | } |
460 | | |
461 | 1.82k | renameBB(F, &F->getEntryBlock(), C, S); |
462 | 1.82k | } |
463 | | |
464 | | void MemorySSA::renameBB(Function const *F, llvm::BasicBlock const *X, |
465 | | std::map<MemRegEntry *, unsigned> &C, |
466 | 15.0k | std::map<MemRegEntry *, std::vector<MSSAVar *>> &S) { |
467 | | // Compute LHS for PHI |
468 | 15.0k | for (auto &Phi : bbToPhiMap[X]) { |
469 | 8.11k | auto *V = Phi->region; |
470 | 8.11k | unsigned I = C[V]; |
471 | 8.11k | Phi->var = std::make_unique<MSSAVar>(Phi.get(), I, X); |
472 | 8.11k | S[V].push_back(Phi->var.get()); |
473 | 8.11k | C[V]++; |
474 | 8.11k | } |
475 | | |
476 | | // For each ordinary assignment A do |
477 | | // For each variable V in RHS(A) |
478 | | // Replace use of V by use of Vi where i = Top(S(V)) |
479 | | // Let V be LHS(A) |
480 | | // i <- C(V) |
481 | | // replace V by Vi |
482 | | // push i onto S(V) |
483 | | // C(V) <- i + 1 |
484 | 203k | for (auto I = X->begin(), E = X->end(); I != E; ++I) { |
485 | 188k | Instruction const *Inst = &*I; |
486 | | |
487 | 188k | if (isCallSite(Inst)) { |
488 | 53.8k | CallBase *Cs(const_cast<CallBase *>(cast<CallBase>(Inst))); |
489 | | |
490 | 54.9k | for (auto &Mu : callSiteToMuMap[Cs]) { |
491 | 54.9k | Mu->var = S[Mu->region].back(); |
492 | 54.9k | } |
493 | | |
494 | 53.8k | for (auto &Chi : callSiteToSyncChiMap[Cs]) { |
495 | 1 | auto *V = Chi->region; |
496 | 1 | unsigned I = C[V]; |
497 | 1 | Chi->var = std::make_unique<MSSAVar>(Chi.get(), I, Inst->getParent()); |
498 | 1 | Chi->opVar = S[V].back(); |
499 | 1 | S[V].push_back(Chi->var.get()); |
500 | 1 | C[V]++; |
501 | 1 | } |
502 | | |
503 | 53.8k | for (auto &Chi : callSiteToChiMap[Cs]) { |
504 | 17.0k | auto *V = Chi->region; |
505 | 17.0k | unsigned I = C[V]; |
506 | 17.0k | Chi->var = std::make_unique<MSSAVar>(Chi.get(), I, Inst->getParent()); |
507 | 17.0k | Chi->opVar = S[V].back(); |
508 | 17.0k | S[V].push_back(Chi->var.get()); |
509 | 17.0k | C[V]++; |
510 | 17.0k | } |
511 | | |
512 | 53.8k | for (auto &Chi : extCallSiteToCallerRetChi[Cs]) { |
513 | 2.54k | auto *V = Chi->region; |
514 | 2.54k | unsigned I = C[V]; |
515 | 2.54k | Chi->var = std::make_unique<MSSAVar>(Chi.get(), I, Inst->getParent()); |
516 | 2.54k | Chi->opVar = S[V].back(); |
517 | 2.54k | S[V].push_back(Chi->var.get()); |
518 | 2.54k | C[V]++; |
519 | 2.54k | } |
520 | 53.8k | } |
521 | | |
522 | 188k | if (isa<StoreInst>(Inst)) { |
523 | 26.4k | StoreInst const *SI = cast<StoreInst>(Inst); |
524 | 26.4k | for (auto &Chi : storeToChiMap[SI]) { |
525 | 26.4k | auto *V = Chi->region; |
526 | 26.4k | unsigned I = C[V]; |
527 | 26.4k | Chi->var = std::make_unique<MSSAVar>(Chi.get(), I, Inst->getParent()); |
528 | 26.4k | Chi->opVar = S[V].back(); |
529 | 26.4k | S[V].push_back(Chi->var.get()); |
530 | 26.4k | C[V]++; |
531 | 26.4k | } |
532 | 26.4k | } |
533 | | |
534 | 188k | if (isa<LoadInst>(Inst)) { |
535 | 43.9k | LoadInst const *LI = cast<LoadInst>(Inst); |
536 | 43.9k | for (auto &Mu : loadToMuMap[LI]) { |
537 | 43.9k | Mu->var = S[Mu->region].back(); |
538 | 43.9k | } |
539 | 43.9k | } |
540 | | |
541 | 188k | if (isa<ReturnInst>(Inst)) { |
542 | 40.3k | for (auto &Mu : funToReturnMuMap[F]) { |
543 | 40.3k | Mu->var = S[Mu->region].back(); |
544 | 40.3k | } |
545 | 1.82k | } |
546 | 188k | } |
547 | | |
548 | | // For each successor Y of X |
549 | | // For each Phi function F in Y |
550 | | // Replace operands V by Vi where i = Top(S(V)) |
551 | 33.8k | for (auto I = succ_begin(X), E = succ_end(X); I != E; ++I) { |
552 | 18.7k | BasicBlock const *Y = *I; |
553 | 18.7k | for (auto &Phi : bbToPhiMap[Y]) { |
554 | 16.3k | unsigned Index = whichPred(X, Y); |
555 | 16.3k | Phi->opsVar[Index] = S[Phi->region].back(); |
556 | 16.3k | } |
557 | 18.7k | } |
558 | | |
559 | | // For each successor of X in the dominator tree |
560 | 15.0k | DomTreeNode *DTnode = curDT->getNode(const_cast<BasicBlock *>(X)); |
561 | 15.0k | assert(DTnode); |
562 | 28.3k | for (auto I = DTnode->begin(), E = DTnode->end(); I != E; ++I) { |
563 | 13.2k | BasicBlock const *Y = (*I)->getBlock(); |
564 | 13.2k | renameBB(F, Y, C, S); |
565 | 13.2k | } |
566 | | |
567 | | // For each assignment of A in X |
568 | | // pop(S(A)) |
569 | 15.0k | for (auto &Phi : bbToPhiMap[X]) { |
570 | 8.11k | auto *V = Phi->region; |
571 | 8.11k | S[V].pop_back(); |
572 | 8.11k | } |
573 | 203k | for (auto I = X->begin(), E = X->end(); I != E; ++I) { |
574 | 188k | Instruction const *Inst = &*I; |
575 | | |
576 | 188k | if (isa<CallInst>(Inst)) { |
577 | 53.8k | CallBase *Cs(const_cast<CallBase *>(cast<CallBase>(Inst))); |
578 | | |
579 | 53.8k | for (auto &Chi : callSiteToSyncChiMap[Cs]) { |
580 | 1 | auto *V = Chi->region; |
581 | 1 | S[V].pop_back(); |
582 | 1 | } |
583 | | |
584 | 53.8k | for (auto &Chi : callSiteToChiMap[Cs]) { |
585 | 17.0k | auto *V = Chi->region; |
586 | 17.0k | S[V].pop_back(); |
587 | 17.0k | } |
588 | | |
589 | 53.8k | for (auto &Chi : extCallSiteToCallerRetChi[Cs]) { |
590 | 2.54k | auto *V = Chi->region; |
591 | 2.54k | S[V].pop_back(); |
592 | 2.54k | } |
593 | 53.8k | } |
594 | | |
595 | 188k | if (auto const *SI = dyn_cast<StoreInst>(Inst)) { |
596 | 26.4k | for (auto &Chi : storeToChiMap[SI]) { |
597 | 26.4k | auto *V = Chi->region; |
598 | 26.4k | S[V].pop_back(); |
599 | 26.4k | } |
600 | 26.4k | } |
601 | 188k | } |
602 | 15.0k | } |
603 | | |
604 | 1.82k | void MemorySSA::computePhiPredicates(llvm::Function const *F) { |
605 | 1.82k | ValueSet Preds; |
606 | | |
607 | 15.0k | for (BasicBlock const &BB : *F) { |
608 | 15.0k | for (auto &Phi : bbToPhiMap[&BB]) { |
609 | 8.11k | computeMSSAPhiPredicates(Phi.get()); |
610 | 8.11k | } |
611 | | |
612 | 188k | for (Instruction const &Inst : BB) { |
613 | 188k | PHINode const *Phi = dyn_cast<PHINode>(&Inst); |
614 | 188k | if (!Phi) { |
615 | 188k | continue; |
616 | 188k | } |
617 | 49 | computeLLVMPhiPredicates(Phi); |
618 | 49 | } |
619 | 15.0k | } |
620 | 1.82k | } |
621 | | |
622 | 49 | void MemorySSA::computeLLVMPhiPredicates(llvm::PHINode const *Phi) { |
623 | | // For each argument of the PHINode |
624 | 147 | for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) { |
625 | | // Get IPDF |
626 | 98 | std::vector<BasicBlock *> IPDF = |
627 | 98 | iterated_postdominance_frontier(*curPDT, Phi->getIncomingBlock(I)); |
628 | | |
629 | 296 | for (unsigned N = 0; N < IPDF.size(); ++N) { |
630 | | // Push conditions of each BB in the IPDF |
631 | 198 | Instruction const *Ti = IPDF[N]->getTerminator(); |
632 | 198 | assert(Ti); |
633 | | |
634 | 198 | if (isa<BranchInst>(Ti)) { |
635 | 198 | BranchInst const *BI = cast<BranchInst>(Ti); |
636 | 198 | assert(BI); |
637 | | |
638 | 198 | if (BI->isUnconditional()) { |
639 | 0 | continue; |
640 | 0 | } |
641 | | |
642 | 198 | Value const *Cond = BI->getCondition(); |
643 | | |
644 | 198 | llvmPhiToPredMap[Phi].insert(Cond); |
645 | 198 | } else if (isa<SwitchInst>(Ti)) { |
646 | 0 | SwitchInst const *SI = cast<SwitchInst>(Ti); |
647 | 0 | assert(SI); |
648 | 0 | Value const *Cond = SI->getCondition(); |
649 | 0 | llvmPhiToPredMap[Phi].insert(Cond); |
650 | 0 | } |
651 | 198 | } |
652 | 98 | } |
653 | 49 | } |
654 | | |
655 | 8.11k | void MemorySSA::computeMSSAPhiPredicates(MSSAPhi *Phi) { |
656 | | // For each argument of the PHINode |
657 | 16.3k | for (auto I : Phi->opsVar) { |
658 | 16.3k | MSSAVar *Op = I.second; |
659 | | // Get IPDF |
660 | 16.3k | std::vector<BasicBlock *> IPDF = iterated_postdominance_frontier( |
661 | 16.3k | *curPDT, const_cast<BasicBlock *>(Op->bb)); |
662 | | |
663 | 34.3k | for (unsigned N = 0; N < IPDF.size(); ++N) { |
664 | | // Push conditions of each BB in the IPDF |
665 | 18.0k | Instruction const *Ti = IPDF[N]->getTerminator(); |
666 | 18.0k | assert(Ti); |
667 | | |
668 | 18.0k | if (isa<BranchInst>(Ti)) { |
669 | 18.0k | BranchInst const *BI = cast<BranchInst>(Ti); |
670 | 18.0k | assert(BI); |
671 | | |
672 | 18.0k | if (BI->isUnconditional()) { |
673 | 0 | continue; |
674 | 0 | } |
675 | | |
676 | 18.0k | Value const *Cond = BI->getCondition(); |
677 | | |
678 | 18.0k | Phi->preds.insert(Cond); |
679 | 18.0k | } else if (isa<SwitchInst>(Ti)) { |
680 | 0 | SwitchInst const *SI = cast<SwitchInst>(Ti); |
681 | 0 | assert(SI); |
682 | 0 | Value const *Cond = SI->getCondition(); |
683 | 0 | Phi->preds.insert(Cond); |
684 | 0 | } |
685 | 18.0k | } |
686 | 16.3k | } |
687 | 8.11k | } |
688 | | |
689 | 16.3k | unsigned MemorySSA::whichPred(BasicBlock const *Pred, BasicBlock const *Succ) { |
690 | 16.3k | unsigned Index = 0; |
691 | 24.7k | for (auto I = pred_begin(Succ), E = pred_end(Succ); I != E; ++I, ++Index) { |
692 | 24.7k | if (Pred == *I) { |
693 | 16.3k | return Index; |
694 | 16.3k | } |
695 | 24.7k | } |
696 | | |
697 | 0 | return Index; |
698 | 16.3k | } |
699 | | |
700 | 1 | void MemorySSA::dumpMSSA(llvm::Function const *F) { |
701 | 1 | std::string Filename = F->getName().str(); |
702 | 1 | Filename.append("-assa.ll"); |
703 | 1 | errs() << "Writing '" << Filename << "' ...\n"; |
704 | 1 | std::error_code EC; |
705 | 1 | raw_fd_ostream Stream(Filename, EC, sys::fs::OF_Text); |
706 | | |
707 | | // Function header |
708 | 1 | Stream << "define " << *F->getReturnType() << " @" << F->getName().str() |
709 | 1 | << "("; |
710 | 2 | for (Argument const &Arg : F->args()) { |
711 | 2 | Stream << Arg << ", "; |
712 | 2 | } |
713 | 1 | Stream << ") {\n"; |
714 | | |
715 | | // Dump entry chi |
716 | 2 | for (auto &Chi : funToEntryChiMap[F]) { |
717 | 2 | Stream << Chi->region->getName() << Chi->var->version << "\n"; |
718 | 2 | } |
719 | | |
720 | | // For each basic block |
721 | 1 | for (auto const &BB : *F) { |
722 | | |
723 | | // BB name |
724 | 1 | Stream << BB.getName().str() << ":\n"; |
725 | | |
726 | | // Phi functions |
727 | 1 | for (auto &Phi : bbToPhiMap[&BB]) { |
728 | 0 | Stream << Phi->region->getName() << Phi->var->version << " = phi( "; |
729 | 0 | for (auto I : Phi->opsVar) { |
730 | 0 | Stream << Phi->region->getName() << I.second->version << ", "; |
731 | 0 | } |
732 | 0 | for (Value const *V : Phi->preds) { |
733 | 0 | Stream << getValueLabel(V) << ", "; |
734 | 0 | } |
735 | 0 | Stream << ")\n"; |
736 | 0 | } |
737 | | |
738 | | // For each instruction |
739 | 8 | for (auto const &Inst : BB) { |
740 | | |
741 | | // LLVM Phi node: print predicates |
742 | 8 | if (PHINode const *PHI = dyn_cast<PHINode>(&Inst)) { |
743 | 0 | Stream << getValueLabel(PHI) << " = phi("; |
744 | 0 | for (Value const *Incoming : PHI->incoming_values()) { |
745 | 0 | Stream << getValueLabel(Incoming) << ", "; |
746 | 0 | } |
747 | 0 | for (Value const *Pred : llvmPhiToPredMap[PHI]) { |
748 | 0 | Stream << getValueLabel(Pred) << ", "; |
749 | 0 | } |
750 | 0 | Stream << ")\n"; |
751 | 0 | continue; |
752 | 0 | } |
753 | | |
754 | | // Load inst |
755 | 8 | if (LoadInst const *LI = dyn_cast<LoadInst>(&Inst)) { |
756 | 2 | Stream << getValueLabel(LI) << " = mu("; |
757 | | |
758 | 2 | for (auto &Mu : loadToMuMap[LI]) { |
759 | 2 | Stream << Mu->region->getName() << Mu->var->version << ", "; |
760 | 2 | } |
761 | | |
762 | 2 | Stream << getValueLabel(LI->getPointerOperand()) << ")\n"; |
763 | 2 | continue; |
764 | 2 | } |
765 | | |
766 | | // Store inst |
767 | 6 | if (auto const *SI = dyn_cast<StoreInst>(&Inst)) { |
768 | 2 | for (auto &Chi : storeToChiMap[SI]) { |
769 | 2 | Stream << Chi->region->getName() << Chi->var->version << " = X(" |
770 | 2 | << Chi->region->getName() << Chi->opVar->version << ", " |
771 | 2 | << getValueLabel(SI->getValueOperand()) << ", " |
772 | 2 | << getValueLabel(SI->getPointerOperand()) << ")\n"; |
773 | 2 | } |
774 | 2 | continue; |
775 | 2 | } |
776 | | |
777 | | // Call inst |
778 | 4 | if (CallInst const *CI = dyn_cast<CallInst>(&Inst)) { |
779 | 0 | if (isIntrinsicDbgInst(CI)) { |
780 | 0 | continue; |
781 | 0 | } |
782 | | |
783 | 0 | CallInst *Cs(const_cast<CallInst *>(CI)); |
784 | 0 | Stream << *CI << "\n"; |
785 | 0 | for (auto &Mu : callSiteToMuMap[Cs]) { |
786 | 0 | Stream << " mu(" << Mu->region->getName() << Mu->var->version |
787 | 0 | << ")\n"; |
788 | 0 | } |
789 | |
|
790 | 0 | for (auto &Chi : callSiteToChiMap[Cs]) { |
791 | 0 | Stream << Chi->region->getName() << Chi->var->version << " = " |
792 | 0 | << " X(" << Chi->region->getName() << Chi->opVar->version |
793 | 0 | << ")\n"; |
794 | 0 | } |
795 | |
|
796 | 0 | for (auto &Chi : callSiteToSyncChiMap[Cs]) { |
797 | 0 | Stream << Chi->region->getName() << Chi->var->version << " = " |
798 | 0 | << " X(" << Chi->region->getName() << Chi->opVar->version |
799 | 0 | << ")\n"; |
800 | 0 | } |
801 | |
|
802 | 0 | for (auto &Chi : extCallSiteToCallerRetChi[Cs]) { |
803 | 0 | Stream << Chi->region->getName() << Chi->var->version << " = " |
804 | 0 | << " X(" << Chi->region->getName() << Chi->opVar->version |
805 | 0 | << ")\n"; |
806 | 0 | } |
807 | 0 | continue; |
808 | 0 | } |
809 | | |
810 | 4 | Stream << Inst << "\n"; |
811 | 4 | } |
812 | 1 | } |
813 | | |
814 | | // Dump return mu |
815 | 2 | for (auto &Mu : funToReturnMuMap[F]) { |
816 | 2 | Stream << " mu(" << Mu->region->getName() << Mu->var->version << ")\n"; |
817 | 2 | } |
818 | | |
819 | 1 | Stream << "}\n"; |
820 | 1 | } |
821 | | |
822 | | void MemorySSA::createArtificalChiForCalledFunction( |
823 | 27.5k | llvm::CallBase *CB, llvm::Function const *Callee) { |
824 | | // Not sure if we need mod/ref info here, we can just create entry/exit chi |
825 | | // for each pointer arguments and then only connect the exit/return chis of |
826 | | // modified arguments in the dep graph. |
827 | | |
828 | | // If it is a var arg function, create artificial entry and exit chi for the |
829 | | // var arg. |
830 | 27.5k | if (Callee->isVarArg()) { |
831 | 5.13k | auto [ItEntry, _] = extCallSiteToVarArgEntryChi[Callee].emplace( |
832 | 5.13k | CB, std::make_unique<MSSAExtVarArgChi>(Callee)); |
833 | 5.13k | auto &EntryChi = ItEntry->second; |
834 | 5.13k | EntryChi->var = std::make_unique<MSSAVar>(EntryChi.get(), 0, nullptr); |
835 | | |
836 | 5.13k | auto [ItOut, __] = extCallSiteToVarArgExitChi[Callee].emplace( |
837 | 5.13k | CB, std::make_unique<MSSAExtVarArgChi>(Callee)); |
838 | 5.13k | auto &OutChi = ItOut->second; |
839 | 5.13k | OutChi->var = std::make_unique<MSSAVar>(EntryChi.get(), 1, nullptr); |
840 | 5.13k | OutChi->opVar = EntryChi->var.get(); |
841 | 5.13k | } |
842 | | |
843 | | // Create artifical entry and exit chi for each pointer argument. |
844 | 27.5k | unsigned ArgId = 0; |
845 | 67.5k | for (Argument const &Arg : Callee->args()) { |
846 | 67.5k | if (!Arg.getType()->isPointerTy()) { |
847 | 12.4k | ArgId++; |
848 | 12.4k | continue; |
849 | 12.4k | } |
850 | | |
851 | 55.0k | auto Chi = std::make_unique<MSSAExtArgChi>(Callee, ArgId); |
852 | 55.0k | auto [ItEntry, EntryInserted] = |
853 | 55.0k | extCallSiteToArgEntryChi[Callee][CB].emplace(ArgId, Chi.get()); |
854 | 55.0k | if (EntryInserted) { |
855 | 55.0k | AllocatedArgChi.emplace_back(std::move(Chi)); |
856 | 55.0k | } |
857 | 55.0k | auto &EntryChi = ItEntry->second; |
858 | 55.0k | EntryChi->var = std::make_unique<MSSAVar>(EntryChi, 0, nullptr); |
859 | | |
860 | 55.0k | auto ChiOut = std::make_unique<MSSAExtArgChi>(Callee, ArgId); |
861 | 55.0k | auto [ItOut, OutInserted] = |
862 | 55.0k | extCallSiteToArgExitChi[Callee][CB].emplace(ArgId, ChiOut.get()); |
863 | 55.0k | if (OutInserted) { |
864 | 55.0k | AllocatedArgChi.emplace_back(std::move(ChiOut)); |
865 | 55.0k | } |
866 | 55.0k | auto &ExitChi = ItOut->second; |
867 | 55.0k | ExitChi->var = std::make_unique<MSSAVar>(ExitChi, 1, nullptr); |
868 | 55.0k | ExitChi->opVar = EntryChi->var.get(); |
869 | | |
870 | 55.0k | ArgId++; |
871 | 55.0k | } |
872 | | |
873 | | // Create artifical chi for return value if it is a pointer. |
874 | 27.5k | if (Callee->getReturnType()->isPointerTy()) { |
875 | 2.60k | auto [ItRet, _] = extCallSiteToCalleeRetChi[Callee].emplace( |
876 | 2.60k | CB, std::make_unique<MSSAExtRetChi>(Callee)); |
877 | 2.60k | auto &RetChi = ItRet->second; |
878 | 2.60k | RetChi->var = std::make_unique<MSSAVar>(RetChi.get(), 0, nullptr); |
879 | 2.60k | } |
880 | 27.5k | } |
881 | | |
882 | | AnalysisKey MemorySSAAnalysis::Key; |
883 | | MemorySSAAnalysis::Result MemorySSAAnalysis::run(Module &M, |
884 | 1.76k | ModuleAnalysisManager &AM) { |
885 | 1.76k | TimeTraceScope TTS("parcoach::MemorySSAAnalysisPass"); |
886 | 1.76k | auto const &AA = AM.getResult<AndersenAA>(M); |
887 | 1.76k | auto const &ExtInfo = AM.getResult<ExtInfoAnalysis>(M); |
888 | 1.76k | auto const &MRA = AM.getResult<ModRefAnalysis>(M); |
889 | 1.76k | auto const &PTACG = AM.getResult<PTACallGraphAnalysis>(M); |
890 | 1.76k | auto const &Regions = AM.getResult<MemRegAnalysis>(M); |
891 | 1.76k | return std::make_unique<MemorySSA>(M, AA, *PTACG, *Regions, MRA.get(), |
892 | 1.76k | *ExtInfo, AM); |
893 | 1.76k | } |
894 | | } // namespace parcoach |