Add one more argument to the prefetch intrinsic to indicate whether it's a data
[oota-llvm.git] / lib / VMCore / AutoUpgrade.cpp
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the auto-upgrade helper functions 
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/AutoUpgrade.h"
15 #include "llvm/Constants.h"
16 #include "llvm/Function.h"
17 #include "llvm/LLVMContext.h"
18 #include "llvm/Module.h"
19 #include "llvm/IntrinsicInst.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/Support/CallSite.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/IRBuilder.h"
24 #include <cstring>
25 using namespace llvm;
26
27
28 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
29   assert(F && "Illegal to upgrade a non-existent Function.");
30
31   // Get the Function's name.
32   const std::string& Name = F->getName();
33
34   // Convenience
35   const FunctionType *FTy = F->getFunctionType();
36
37   // Quickly eliminate it, if it's not a candidate.
38   if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' || 
39       Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.')
40     return false;
41
42   Module *M = F->getParent();
43   switch (Name[5]) {
44   default: break;
45   case 'a':
46     // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
47     // and atomics with default address spaces to their new names to their new
48     // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
49     if (Name.compare(5,7,"atomic.",7) == 0) {
50       if (Name.compare(12,3,"lcs",3) == 0) {
51         std::string::size_type delim = Name.find('.',12);
52         F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
53                    ".p0" + Name.substr(delim+1));
54         NewFn = F;
55         return true;
56       }
57       else if (Name.compare(12,3,"las",3) == 0) {
58         std::string::size_type delim = Name.find('.',12);
59         F->setName("llvm.atomic.load.add"+Name.substr(delim)
60                    + ".p0" + Name.substr(delim+1));
61         NewFn = F;
62         return true;
63       }
64       else if (Name.compare(12,3,"lss",3) == 0) {
65         std::string::size_type delim = Name.find('.',12);
66         F->setName("llvm.atomic.load.sub"+Name.substr(delim)
67                    + ".p0" + Name.substr(delim+1));
68         NewFn = F;
69         return true;
70       }
71       else if (Name.rfind(".p") == std::string::npos) {
72         // We don't have an address space qualifier so this has be upgraded
73         // to the new name.  Copy the type name at the end of the intrinsic
74         // and add to it
75         std::string::size_type delim = Name.find_last_of('.');
76         assert(delim != std::string::npos && "can not find type");
77         F->setName(Name + ".p0" + Name.substr(delim+1));
78         NewFn = F;
79         return true;
80       }
81     } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
82       if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
83             Name.compare(14, 5, "vaddl", 5) == 0 ||
84             Name.compare(14, 5, "vsubl", 5) == 0 ||
85             Name.compare(14, 5, "vaddw", 5) == 0 ||
86             Name.compare(14, 5, "vsubw", 5) == 0 ||
87             Name.compare(14, 5, "vmlal", 5) == 0 ||
88             Name.compare(14, 5, "vmlsl", 5) == 0 ||
89             Name.compare(14, 5, "vabdl", 5) == 0 ||
90             Name.compare(14, 5, "vabal", 5) == 0) &&
91            (Name.compare(19, 2, "s.", 2) == 0 ||
92             Name.compare(19, 2, "u.", 2) == 0)) ||
93
94           (Name.compare(14, 4, "vaba", 4) == 0 &&
95            (Name.compare(18, 2, "s.", 2) == 0 ||
96             Name.compare(18, 2, "u.", 2) == 0)) ||
97
98           (Name.compare(14, 6, "vmovn.", 6) == 0)) {
99
100         // Calls to these are transformed into IR without intrinsics.
101         NewFn = 0;
102         return true;
103       }
104       // Old versions of NEON ld/st intrinsics are missing alignment arguments.
105       bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
106       bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
107       if (isVLd || isVSt) {
108         unsigned NumVecs = Name.at(17) - '0';
109         if (NumVecs == 0 || NumVecs > 4)
110           return false;
111         bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
112         if (!isLaneOp && Name.at(18) != '.')
113           return false;
114         unsigned ExpectedArgs = 2; // for the address and alignment
115         if (isVSt || isLaneOp)
116           ExpectedArgs += NumVecs;
117         if (isLaneOp)
118           ExpectedArgs += 1; // for the lane number
119         unsigned NumP = FTy->getNumParams();
120         if (NumP != ExpectedArgs - 1)
121           return false;
122
123         // Change the name of the old (bad) intrinsic, because 
124         // its type is incorrect, but we cannot overload that name.
125         F->setName("");
126
127         // One argument is missing: add the alignment argument.
128         std::vector<const Type*> NewParams;
129         for (unsigned p = 0; p < NumP; ++p)
130           NewParams.push_back(FTy->getParamType(p));
131         NewParams.push_back(Type::getInt32Ty(F->getContext()));
132         FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
133                                                  NewParams, false);
134         NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
135         return true;
136       }
137     }
138     break;
139   case 'b':
140     //  This upgrades the name of the llvm.bswap intrinsic function to only use 
141     //  a single type name for overloading. We only care about the old format
142     //  'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being 
143     //  a '.' after 'bswap.'
144     if (Name.compare(5,6,"bswap.",6) == 0) {
145       std::string::size_type delim = Name.find('.',11);
146       
147       if (delim != std::string::npos) {
148         //  Construct the new name as 'llvm.bswap' + '.i*'
149         F->setName(Name.substr(0,10)+Name.substr(delim));
150         NewFn = F;
151         return true;
152       }
153     }
154     break;
155
156   case 'c':
157     //  We only want to fix the 'llvm.ct*' intrinsics which do not have the 
158     //  correct return type, so we check for the name, and then check if the 
159     //  return type does not match the parameter type.
160     if ( (Name.compare(5,5,"ctpop",5) == 0 ||
161           Name.compare(5,4,"ctlz",4) == 0 ||
162           Name.compare(5,4,"cttz",4) == 0) &&
163         FTy->getReturnType() != FTy->getParamType(0)) {
164       //  We first need to change the name of the old (bad) intrinsic, because 
165       //  its type is incorrect, but we cannot overload that name. We 
166       //  arbitrarily unique it here allowing us to construct a correctly named 
167       //  and typed function below.
168       F->setName("");
169
170       //  Now construct the new intrinsic with the correct name and type. We 
171       //  leave the old function around in order to query its type, whatever it 
172       //  may be, and correctly convert up to the new type.
173       NewFn = cast<Function>(M->getOrInsertFunction(Name, 
174                                                     FTy->getParamType(0),
175                                                     FTy->getParamType(0),
176                                                     (Type *)0));
177       return true;
178     }
179     break;
180
181   case 'e':
182     //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
183     if (Name.compare("llvm.eh.selector.i32") == 0) {
184       F->setName("llvm.eh.selector");
185       NewFn = F;
186       return true;
187     }
188     //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
189     if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
190       F->setName("llvm.eh.typeid.for");
191       NewFn = F;
192       return true;
193     }
194     //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
195     if (Name.compare("llvm.eh.selector.i64") == 0) {
196       NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
197       return true;
198     }
199     //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
200     if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
201       NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
202       return true;
203     }
204     break;
205
206   case 'm': {
207     // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
208     // new format that allows overloading the pointer for different address
209     // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
210     const char* NewFnName = NULL;
211     if (Name.compare(5,8,"memcpy.i",8) == 0) {
212       if (Name[13] == '8')
213         NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
214       else if (Name.compare(13,2,"16") == 0)
215         NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
216       else if (Name.compare(13,2,"32") == 0)
217         NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
218       else if (Name.compare(13,2,"64") == 0)
219         NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
220     } else if (Name.compare(5,9,"memmove.i",9) == 0) {
221       if (Name[14] == '8')
222         NewFnName = "llvm.memmove.p0i8.p0i8.i8";
223       else if (Name.compare(14,2,"16") == 0)
224         NewFnName = "llvm.memmove.p0i8.p0i8.i16";
225       else if (Name.compare(14,2,"32") == 0)
226         NewFnName = "llvm.memmove.p0i8.p0i8.i32";
227       else if (Name.compare(14,2,"64") == 0)
228         NewFnName = "llvm.memmove.p0i8.p0i8.i64";
229     }
230     else if (Name.compare(5,8,"memset.i",8) == 0) {
231       if (Name[13] == '8')
232         NewFnName = "llvm.memset.p0i8.i8";
233       else if (Name.compare(13,2,"16") == 0)
234         NewFnName = "llvm.memset.p0i8.i16";
235       else if (Name.compare(13,2,"32") == 0)
236         NewFnName = "llvm.memset.p0i8.i32";
237       else if (Name.compare(13,2,"64") == 0)
238         NewFnName = "llvm.memset.p0i8.i64";
239     }
240     if (NewFnName) {
241       NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
242                                             FTy->getReturnType(),
243                                             FTy->getParamType(0),
244                                             FTy->getParamType(1),
245                                             FTy->getParamType(2),
246                                             FTy->getParamType(3),
247                                             Type::getInt1Ty(F->getContext()),
248                                             (Type *)0));
249       return true;
250     }
251     break;
252   }
253   case 'p':
254     //  This upgrades the llvm.part.select overloaded intrinsic names to only 
255     //  use one type specifier in the name. We only care about the old format
256     //  'llvm.part.select.i*.i*', and solve as above with bswap.
257     if (Name.compare(5,12,"part.select.",12) == 0) {
258       std::string::size_type delim = Name.find('.',17);
259       
260       if (delim != std::string::npos) {
261         //  Construct a new name as 'llvm.part.select' + '.i*'
262         F->setName(Name.substr(0,16)+Name.substr(delim));
263         NewFn = F;
264         return true;
265       }
266       break;
267     }
268
269     //  This upgrades the llvm.part.set intrinsics similarly as above, however 
270     //  we care about 'llvm.part.set.i*.i*.i*', but only the first two types 
271     //  must match. There is an additional type specifier after these two 
272     //  matching types that we must retain when upgrading.  Thus, we require 
273     //  finding 2 periods, not just one, after the intrinsic name.
274     if (Name.compare(5,9,"part.set.",9) == 0) {
275       std::string::size_type delim = Name.find('.',14);
276
277       if (delim != std::string::npos &&
278           Name.find('.',delim+1) != std::string::npos) {
279         //  Construct a new name as 'llvm.part.select' + '.i*.i*'
280         F->setName(Name.substr(0,13)+Name.substr(delim));
281         NewFn = F;
282         return true;
283       }
284       break;
285     }
286
287     //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
288     //  which is a instruction / data cache identifier. The old version only
289     //  implicitly accepted the data version.
290     if (Name.compare(5,8,"prefetch",8) == 0) {
291       // Don't do anything if it has the correct number of arguments already
292       if (FTy->getNumParams() == 4)
293         break;
294
295       assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
296       //  We first need to change the name of the old (bad) intrinsic, because
297       //  its type is incorrect, but we cannot overload that name. We
298       //  arbitrarily unique it here allowing us to construct a correctly named
299       //  and typed function below.
300       F->setName("");
301       NewFn = cast<Function>(M->getOrInsertFunction(Name,
302                                                     FTy->getReturnType(),
303                                                     FTy->getParamType(0),
304                                                     FTy->getParamType(1),
305                                                     FTy->getParamType(2),
306                                                     FTy->getParamType(2),
307                                                     (Type*)0));
308       return true;
309     }
310
311     break;
312   case 'x':
313     // This fixes the poorly named crc32 intrinsics
314     if (Name.compare(5, 13, "x86.sse42.crc", 13) == 0) {
315       const char* NewFnName = NULL;
316       if (Name.compare(18, 2, "32", 2) == 0) {
317         if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
318           NewFnName = "llvm.x86.sse42.crc32.32.8";
319         } else if (Name.compare(20, 3, ".16") == 0 && Name.length() == 23) {
320           NewFnName = "llvm.x86.sse42.crc32.32.16";
321         } else if (Name.compare(20, 3, ".32") == 0 && Name.length() == 23) {
322           NewFnName = "llvm.x86.sse42.crc32.32.32";
323         }
324       }
325       else if (Name.compare(18, 2, "64", 2) == 0) {
326         if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
327           NewFnName = "llvm.x86.sse42.crc32.64.8";
328         } else if (Name.compare(20, 3, ".64") == 0 && Name.length() == 23) {
329           NewFnName = "llvm.x86.sse42.crc32.64.64";
330         }
331       }
332       if (NewFnName) {
333         F->setName(NewFnName);
334         NewFn = F;
335         return true;
336       }
337     }
338
339     // This fixes all MMX shift intrinsic instructions to take a
340     // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
341     if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
342       const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
343
344       if (Name.compare(13, 4, "padd", 4) == 0   ||
345           Name.compare(13, 4, "psub", 4) == 0   ||
346           Name.compare(13, 4, "pmul", 4) == 0   ||
347           Name.compare(13, 5, "pmadd", 5) == 0  ||
348           Name.compare(13, 4, "pand", 4) == 0   ||
349           Name.compare(13, 3, "por", 3) == 0    ||
350           Name.compare(13, 4, "pxor", 4) == 0   ||
351           Name.compare(13, 4, "pavg", 4) == 0   ||
352           Name.compare(13, 4, "pmax", 4) == 0   ||
353           Name.compare(13, 4, "pmin", 4) == 0   ||
354           Name.compare(13, 4, "psad", 4) == 0   ||
355           Name.compare(13, 4, "psll", 4) == 0   ||
356           Name.compare(13, 4, "psrl", 4) == 0   ||
357           Name.compare(13, 4, "psra", 4) == 0   ||
358           Name.compare(13, 4, "pack", 4) == 0   ||
359           Name.compare(13, 6, "punpck", 6) == 0 ||
360           Name.compare(13, 4, "pcmp", 4) == 0) {
361         assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
362         const Type *SecondParamTy = X86_MMXTy;
363
364         if (Name.compare(13, 5, "pslli", 5) == 0 ||
365             Name.compare(13, 5, "psrli", 5) == 0 ||
366             Name.compare(13, 5, "psrai", 5) == 0)
367           SecondParamTy = FTy->getParamType(1);
368
369         // Don't do anything if it has the correct types.
370         if (FTy->getReturnType() == X86_MMXTy &&
371             FTy->getParamType(0) == X86_MMXTy &&
372             FTy->getParamType(1) == SecondParamTy)
373           break;
374
375         // We first need to change the name of the old (bad) intrinsic, because
376         // its type is incorrect, but we cannot overload that name. We
377         // arbitrarily unique it here allowing us to construct a correctly named
378         // and typed function below.
379         F->setName("");
380
381         // Now construct the new intrinsic with the correct name and type. We
382         // leave the old function around in order to query its type, whatever it
383         // may be, and correctly convert up to the new type.
384         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
385                                                       X86_MMXTy, X86_MMXTy,
386                                                       SecondParamTy, (Type*)0));
387         return true;
388       }
389
390       if (Name.compare(13, 8, "maskmovq", 8) == 0) {
391         // Don't do anything if it has the correct types.
392         if (FTy->getParamType(0) == X86_MMXTy &&
393             FTy->getParamType(1) == X86_MMXTy)
394           break;
395
396         F->setName("");
397         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
398                                                       FTy->getReturnType(),
399                                                       X86_MMXTy,
400                                                       X86_MMXTy,
401                                                       FTy->getParamType(2),
402                                                       (Type*)0));
403         return true;
404       }
405
406       if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
407         if (FTy->getParamType(0) == X86_MMXTy)
408           break;
409
410         F->setName("");
411         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
412                                                       FTy->getReturnType(),
413                                                       X86_MMXTy,
414                                                       (Type*)0));
415         return true;
416       }
417
418       if (Name.compare(13, 5, "movnt", 5) == 0) {
419         if (FTy->getParamType(1) == X86_MMXTy)
420           break;
421
422         F->setName("");
423         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
424                                                       FTy->getReturnType(),
425                                                       FTy->getParamType(0),
426                                                       X86_MMXTy,
427                                                       (Type*)0));
428         return true;
429       }
430
431       if (Name.compare(13, 7, "palignr", 7) == 0) {
432         if (FTy->getReturnType() == X86_MMXTy &&
433             FTy->getParamType(0) == X86_MMXTy &&
434             FTy->getParamType(1) == X86_MMXTy)
435           break;
436
437         F->setName("");
438         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
439                                                       X86_MMXTy,
440                                                       X86_MMXTy,
441                                                       X86_MMXTy,
442                                                       FTy->getParamType(2),
443                                                       (Type*)0));
444         return true;
445       }
446
447       if (Name.compare(13, 5, "pextr", 5) == 0) {
448         if (FTy->getParamType(0) == X86_MMXTy)
449           break;
450
451         F->setName("");
452         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
453                                                       FTy->getReturnType(),
454                                                       X86_MMXTy,
455                                                       FTy->getParamType(1),
456                                                       (Type*)0));
457         return true;
458       }
459
460       if (Name.compare(13, 5, "pinsr", 5) == 0) {
461         if (FTy->getReturnType() == X86_MMXTy &&
462             FTy->getParamType(0) == X86_MMXTy)
463           break;
464
465         F->setName("");
466         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
467                                                       X86_MMXTy,
468                                                       X86_MMXTy,
469                                                       FTy->getParamType(1),
470                                                       FTy->getParamType(2),
471                                                       (Type*)0));
472         return true;
473       }
474
475       if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
476         if (FTy->getReturnType() == X86_MMXTy)
477           break;
478
479         F->setName("");
480         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
481                                                       X86_MMXTy,
482                                                       FTy->getParamType(0),
483                                                       (Type*)0));
484         return true;
485       }
486
487       if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
488         if (FTy->getParamType(0) == X86_MMXTy)
489           break;
490
491         F->setName("");
492         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
493                                                       FTy->getReturnType(),
494                                                       X86_MMXTy,
495                                                       (Type*)0));
496         return true;
497       }
498
499       if (Name.compare(13, 8, "vec.init", 8) == 0) {
500         if (FTy->getReturnType() == X86_MMXTy)
501           break;
502
503         F->setName("");
504
505         if (Name.compare(21, 2, ".b", 2) == 0)
506           NewFn = cast<Function>(M->getOrInsertFunction(Name, 
507                                                         X86_MMXTy,
508                                                         FTy->getParamType(0),
509                                                         FTy->getParamType(1),
510                                                         FTy->getParamType(2),
511                                                         FTy->getParamType(3),
512                                                         FTy->getParamType(4),
513                                                         FTy->getParamType(5),
514                                                         FTy->getParamType(6),
515                                                         FTy->getParamType(7),
516                                                         (Type*)0));
517         else if (Name.compare(21, 2, ".w", 2) == 0)
518           NewFn = cast<Function>(M->getOrInsertFunction(Name, 
519                                                         X86_MMXTy,
520                                                         FTy->getParamType(0),
521                                                         FTy->getParamType(1),
522                                                         FTy->getParamType(2),
523                                                         FTy->getParamType(3),
524                                                         (Type*)0));
525         else if (Name.compare(21, 2, ".d", 2) == 0)
526           NewFn = cast<Function>(M->getOrInsertFunction(Name, 
527                                                         X86_MMXTy,
528                                                         FTy->getParamType(0),
529                                                         FTy->getParamType(1),
530                                                         (Type*)0));
531         return true;
532       }
533
534
535       if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
536         if (FTy->getReturnType() == X86_MMXTy &&
537             FTy->getParamType(0) == X86_MMXTy)
538           break;
539
540         F->setName("");
541         NewFn = cast<Function>(M->getOrInsertFunction(Name, 
542                                                       X86_MMXTy,
543                                                       X86_MMXTy,
544                                                       FTy->getParamType(1),
545                                                       (Type*)0));
546         return true;
547       }
548
549       if (Name.compare(13, 9, "emms", 4) == 0 ||
550           Name.compare(13, 9, "femms", 5) == 0) {
551         NewFn = 0;
552         break;
553       }
554
555       // We really shouldn't get here ever.
556       assert(0 && "Invalid MMX intrinsic!");
557       break;
558     } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
559                Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
560                Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
561                Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
562                Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
563                Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
564                Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
565                Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
566                Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
567       // Calls to these intrinsics are transformed into ShuffleVector's.
568       NewFn = 0;
569       return true;
570     } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
571       // Calls to these intrinsics are transformed into vector multiplies.
572       NewFn = 0;
573       return true;
574     } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
575                Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
576       // Calls to these intrinsics are transformed into vector shuffles, shifts,
577       // or 0.
578       NewFn = 0;
579       return true;           
580     } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
581                Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
582                Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
583       // Calls to these instructions are transformed into unaligned loads.
584       NewFn = 0;
585       return true;
586     } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
587                Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
588                Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
589                Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
590       // Calls to these instructions are transformed into nontemporal stores.
591       NewFn = 0;
592       return true;
593     } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
594       // This is an SSE/MMX instruction.
595       const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
596       NewFn =
597         cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
598                                               X86_MMXTy,
599                                               X86_MMXTy,
600                                               Type::getInt8Ty(F->getContext()),
601                                               (Type*)0));
602       return true;
603     }
604
605     break;
606   }
607
608   //  This may not belong here. This function is effectively being overloaded 
609   //  to both detect an intrinsic which needs upgrading, and to provide the 
610   //  upgraded form of the intrinsic. We should perhaps have two separate 
611   //  functions for this.
612   return false;
613 }
614
615 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
616   NewFn = 0;
617   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
618
619   // Upgrade intrinsic attributes.  This does not change the function.
620   if (NewFn)
621     F = NewFn;
622   if (unsigned id = F->getIntrinsicID())
623     F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
624   return Upgraded;
625 }
626
627 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
628   StringRef Name(GV->getName());
629
630   // We are only upgrading one symbol here.
631   if (Name == ".llvm.eh.catch.all.value") {
632     GV->setName("llvm.eh.catch.all.value");
633     return true;
634   }
635
636   return false;
637 }
638
639 /// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
640 /// have vector elements twice as big as one or both source operands, do the
641 /// sign- or zero-extension that used to be handled by intrinsics.  The
642 /// extended values are returned via V0 and V1.
643 static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
644                            Value *&V0, Value *&V1) {
645   Function *F = CI->getCalledFunction();
646   const std::string& Name = F->getName();
647   bool isLong = (Name.at(18) == 'l');
648   bool isSigned = (Name.at(19) == 's');
649
650   if (isSigned) {
651     if (isLong)
652       V0 = new SExtInst(Arg0, CI->getType(), "", CI);
653     else
654       V0 = Arg0;
655     V1 = new SExtInst(Arg1, CI->getType(), "", CI);
656   } else {
657     if (isLong)
658       V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
659     else
660       V0 = Arg0;
661     V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
662   }
663 }
664
665 /// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
666 /// or vabal intrinsics, construct a call to a vabd intrinsic.  Examine the
667 /// name of the old intrinsic to determine whether to use a signed or unsigned
668 /// vabd intrinsic.  Get the type from the old call instruction, adjusted for
669 /// half-size vector elements if the old intrinsic was vabdl or vabal.
670 static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
671   Function *F = CI->getCalledFunction();
672   const std::string& Name = F->getName();
673   bool isLong = (Name.at(18) == 'l');
674   bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
675
676   Intrinsic::ID intID;
677   if (isSigned)
678     intID = Intrinsic::arm_neon_vabds;
679   else
680     intID = Intrinsic::arm_neon_vabdu;
681
682   const Type *Ty = CI->getType();
683   if (isLong)
684     Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
685
686   Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
687   Value *Operands[2];
688   Operands[0] = Arg0;
689   Operands[1] = Arg1;
690   return CallInst::Create(VABD, Operands, Operands+2, 
691                           "upgraded."+CI->getName(), CI);
692 }
693
694 /// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
695 static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
696                                  Value **Operands, unsigned NumOps,
697                                  bool AssignName = true) {
698   // Construct a new CallInst.
699   CallInst *NewCI =
700     CallInst::Create(NewFn, Operands, Operands + NumOps,
701                      AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
702
703   NewCI->setTailCall(OldCI->isTailCall());
704   NewCI->setCallingConv(OldCI->getCallingConv());
705
706   // Handle any uses of the old CallInst. If the type has changed, add a cast.
707   if (!OldCI->use_empty()) {
708     if (OldCI->getType() != NewCI->getType()) {
709       Function *OldFn = OldCI->getCalledFunction();
710       CastInst *RetCast =
711         CastInst::Create(CastInst::getCastOpcode(NewCI, true,
712                                                  OldFn->getReturnType(), true),
713                          NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
714
715       // Replace all uses of the old call with the new cast which has the
716       // correct type.
717       OldCI->replaceAllUsesWith(RetCast);
718     } else {
719       OldCI->replaceAllUsesWith(NewCI);
720     }
721   }
722
723   // Clean up the old call now that it has been completely upgraded.
724   OldCI->eraseFromParent();
725 }
726
727 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
728 // upgraded intrinsic. All argument and return casting must be provided in 
729 // order to seamlessly integrate with existing context.
730 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
731   Function *F = CI->getCalledFunction();
732   LLVMContext &C = CI->getContext();
733   ImmutableCallSite CS(CI);
734
735   assert(F && "CallInst has no function associated with it.");
736
737   if (!NewFn) {
738     // Get the Function's name.
739     const std::string& Name = F->getName();
740
741     // Upgrade ARM NEON intrinsics.
742     if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
743       Instruction *NewI;
744       Value *V0, *V1;
745       if (Name.compare(14, 7, "vmovls.", 7) == 0) {
746         NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
747                             "upgraded." + CI->getName(), CI);
748       } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
749         NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
750                             "upgraded." + CI->getName(), CI);
751       } else if (Name.compare(14, 4, "vadd", 4) == 0) {
752         ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
753         NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
754       } else if (Name.compare(14, 4, "vsub", 4) == 0) {
755         ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
756         NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
757       } else if (Name.compare(14, 4, "vmul", 4) == 0) {
758         ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
759         NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
760       } else if (Name.compare(14, 4, "vmla", 4) == 0) {
761         ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
762         Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
763         NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
764                                          "upgraded."+CI->getName(), CI);
765       } else if (Name.compare(14, 4, "vmls", 4) == 0) {
766         ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
767         Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
768         NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
769                                          "upgraded."+CI->getName(), CI);
770       } else if (Name.compare(14, 4, "vabd", 4) == 0) {
771         NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
772         NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
773       } else if (Name.compare(14, 4, "vaba", 4) == 0) {
774         NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
775         if (Name.at(18) == 'l')
776           NewI = new ZExtInst(NewI, CI->getType(), "", CI);
777         NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
778                                          "upgraded."+CI->getName(), CI);
779       } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
780         NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
781                              "upgraded." + CI->getName(), CI);
782       } else {
783         llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
784       }
785       // Replace any uses of the old CallInst.
786       if (!CI->use_empty())
787         CI->replaceAllUsesWith(NewI);
788       CI->eraseFromParent();
789       return;
790     }
791
792     bool isLoadH = false, isLoadL = false, isMovL = false;
793     bool isMovSD = false, isShufPD = false;
794     bool isUnpckhPD = false, isUnpcklPD = false;
795     bool isPunpckhQPD = false, isPunpcklQPD = false;
796     if (F->getName() == "llvm.x86.sse2.loadh.pd")
797       isLoadH = true;
798     else if (F->getName() == "llvm.x86.sse2.loadl.pd")
799       isLoadL = true;
800     else if (F->getName() == "llvm.x86.sse2.movl.dq")
801       isMovL = true;
802     else if (F->getName() == "llvm.x86.sse2.movs.d")
803       isMovSD = true;
804     else if (F->getName() == "llvm.x86.sse2.shuf.pd")
805       isShufPD = true;
806     else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
807       isUnpckhPD = true;
808     else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
809       isUnpcklPD = true;
810     else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
811       isPunpckhQPD = true;
812     else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
813       isPunpcklQPD = true;
814
815     if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
816         isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
817       std::vector<Constant*> Idxs;
818       Value *Op0 = CI->getArgOperand(0);
819       ShuffleVectorInst *SI = NULL;
820       if (isLoadH || isLoadL) {
821         Value *Op1 = UndefValue::get(Op0->getType());
822         Value *Addr = new BitCastInst(CI->getArgOperand(1), 
823                                   Type::getDoublePtrTy(C),
824                                       "upgraded.", CI);
825         Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
826         Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
827         Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
828
829         if (isLoadH) {
830           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
831           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
832         } else {
833           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
834           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
835         }
836         Value *Mask = ConstantVector::get(Idxs);
837         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
838       } else if (isMovL) {
839         Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
840         Idxs.push_back(Zero);
841         Idxs.push_back(Zero);
842         Idxs.push_back(Zero);
843         Idxs.push_back(Zero);
844         Value *ZeroV = ConstantVector::get(Idxs);
845
846         Idxs.clear(); 
847         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
848         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
849         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
850         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
851         Value *Mask = ConstantVector::get(Idxs);
852         SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
853       } else if (isMovSD ||
854                  isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
855         Value *Op1 = CI->getArgOperand(1);
856         if (isMovSD) {
857           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
858           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
859         } else if (isUnpckhPD || isPunpckhQPD) {
860           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
861           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
862         } else {
863           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
864           Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
865         }
866         Value *Mask = ConstantVector::get(Idxs);
867         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
868       } else if (isShufPD) {
869         Value *Op1 = CI->getArgOperand(1);
870         unsigned MaskVal =
871                         cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
872         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
873         Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
874                                                ((MaskVal >> 1) & 1)+2));
875         Value *Mask = ConstantVector::get(Idxs);
876         SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
877       }
878
879       assert(SI && "Unexpected!");
880
881       // Handle any uses of the old CallInst.
882       if (!CI->use_empty())
883         //  Replace all uses of the old call with the new cast which has the 
884         //  correct type.
885         CI->replaceAllUsesWith(SI);
886       
887       //  Clean up the old call now that it has been completely upgraded.
888       CI->eraseFromParent();
889     } else if (F->getName() == "llvm.x86.sse41.pmulld") {
890       // Upgrade this set of intrinsics into vector multiplies.
891       Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
892                                                    CI->getArgOperand(1),
893                                                    CI->getName(),
894                                                    CI);
895       // Fix up all the uses with our new multiply.
896       if (!CI->use_empty())
897         CI->replaceAllUsesWith(Mul);
898         
899       // Remove upgraded multiply.
900       CI->eraseFromParent();
901     } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
902       Value *Op1 = CI->getArgOperand(0);
903       Value *Op2 = CI->getArgOperand(1);
904       Value *Op3 = CI->getArgOperand(2);
905       unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
906       Value *Rep;
907       IRBuilder<> Builder(C);
908       Builder.SetInsertPoint(CI->getParent(), CI);
909
910       // If palignr is shifting the pair of input vectors less than 9 bytes,
911       // emit a shuffle instruction.
912       if (shiftVal <= 8) {
913         const Type *IntTy = Type::getInt32Ty(C);
914         const Type *EltTy = Type::getInt8Ty(C);
915         const Type *VecTy = VectorType::get(EltTy, 8);
916         
917         Op2 = Builder.CreateBitCast(Op2, VecTy);
918         Op1 = Builder.CreateBitCast(Op1, VecTy);
919
920         llvm::SmallVector<llvm::Constant*, 8> Indices;
921         for (unsigned i = 0; i != 8; ++i)
922           Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
923
924         Value *SV = ConstantVector::get(Indices);
925         Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
926         Rep = Builder.CreateBitCast(Rep, F->getReturnType());
927       }
928
929       // If palignr is shifting the pair of input vectors more than 8 but less
930       // than 16 bytes, emit a logical right shift of the destination.
931       else if (shiftVal < 16) {
932         // MMX has these as 1 x i64 vectors for some odd optimization reasons.
933         const Type *EltTy = Type::getInt64Ty(C);
934         const Type *VecTy = VectorType::get(EltTy, 1);
935
936         Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
937         Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
938
939         // create i32 constant
940         Function *I =
941           Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
942         Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
943       }
944
945       // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
946       else {
947         Rep = Constant::getNullValue(F->getReturnType());
948       }
949       
950       // Replace any uses with our new instruction.
951       if (!CI->use_empty())
952         CI->replaceAllUsesWith(Rep);
953         
954       // Remove upgraded instruction.
955       CI->eraseFromParent();
956       
957     } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
958       Value *Op1 = CI->getArgOperand(0);
959       Value *Op2 = CI->getArgOperand(1);
960       Value *Op3 = CI->getArgOperand(2);
961       unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
962       Value *Rep;
963       IRBuilder<> Builder(C);
964       Builder.SetInsertPoint(CI->getParent(), CI);
965
966       // If palignr is shifting the pair of input vectors less than 17 bytes,
967       // emit a shuffle instruction.
968       if (shiftVal <= 16) {
969         const Type *IntTy = Type::getInt32Ty(C);
970         const Type *EltTy = Type::getInt8Ty(C);
971         const Type *VecTy = VectorType::get(EltTy, 16);
972         
973         Op2 = Builder.CreateBitCast(Op2, VecTy);
974         Op1 = Builder.CreateBitCast(Op1, VecTy);
975
976         llvm::SmallVector<llvm::Constant*, 16> Indices;
977         for (unsigned i = 0; i != 16; ++i)
978           Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
979
980         Value *SV = ConstantVector::get(Indices);
981         Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
982         Rep = Builder.CreateBitCast(Rep, F->getReturnType());
983       }
984
985       // If palignr is shifting the pair of input vectors more than 16 but less
986       // than 32 bytes, emit a logical right shift of the destination.
987       else if (shiftVal < 32) {
988         const Type *EltTy = Type::getInt64Ty(C);
989         const Type *VecTy = VectorType::get(EltTy, 2);
990         const Type *IntTy = Type::getInt32Ty(C);
991
992         Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
993         Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
994
995         // create i32 constant
996         Function *I =
997           Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
998         Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
999       }
1000
1001       // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
1002       else {
1003         Rep = Constant::getNullValue(F->getReturnType());
1004       }
1005       
1006       // Replace any uses with our new instruction.
1007       if (!CI->use_empty())
1008         CI->replaceAllUsesWith(Rep);
1009         
1010       // Remove upgraded instruction.
1011       CI->eraseFromParent();
1012     
1013     } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
1014                F->getName() == "llvm.x86.sse2.loadu.dq" ||
1015                F->getName() == "llvm.x86.sse2.loadu.pd") {
1016       // Convert to a native, unaligned load.
1017       const Type *VecTy = CI->getType();
1018       const Type *IntTy = IntegerType::get(C, 128);
1019       IRBuilder<> Builder(C);
1020       Builder.SetInsertPoint(CI->getParent(), CI);
1021
1022       Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
1023                                         PointerType::getUnqual(IntTy),
1024                                         "cast");
1025       LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
1026       LI->setAlignment(1);      // Unaligned load.
1027       BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
1028
1029       // Fix up all the uses with our new load.
1030       if (!CI->use_empty())
1031         CI->replaceAllUsesWith(BC);
1032
1033       // Remove intrinsic.
1034       CI->eraseFromParent();
1035     } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
1036                F->getName() == "llvm.x86.sse2.movnt.dq" ||
1037                F->getName() == "llvm.x86.sse2.movnt.pd" ||
1038                F->getName() == "llvm.x86.sse2.movnt.i") {
1039       IRBuilder<> Builder(C);
1040       Builder.SetInsertPoint(CI->getParent(), CI);
1041
1042       Module *M = F->getParent();
1043       SmallVector<Value *, 1> Elts;
1044       Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
1045       MDNode *Node = MDNode::get(C, Elts);
1046
1047       Value *Arg0 = CI->getArgOperand(0);
1048       Value *Arg1 = CI->getArgOperand(1);
1049
1050       // Convert the type of the pointer to a pointer to the stored type.
1051       Value *BC = Builder.CreateBitCast(Arg0,
1052                                         PointerType::getUnqual(Arg1->getType()),
1053                                         "cast");
1054       StoreInst *SI = Builder.CreateStore(Arg1, BC);
1055       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
1056       SI->setAlignment(16);
1057
1058       // Remove intrinsic.
1059       CI->eraseFromParent();
1060     } else {
1061       llvm_unreachable("Unknown function for CallInst upgrade.");
1062     }
1063     return;
1064   }
1065
1066   switch (NewFn->getIntrinsicID()) {
1067   default: llvm_unreachable("Unknown function for CallInst upgrade.");
1068   case Intrinsic::arm_neon_vld1:
1069   case Intrinsic::arm_neon_vld2:
1070   case Intrinsic::arm_neon_vld3:
1071   case Intrinsic::arm_neon_vld4:
1072   case Intrinsic::arm_neon_vst1:
1073   case Intrinsic::arm_neon_vst2:
1074   case Intrinsic::arm_neon_vst3:
1075   case Intrinsic::arm_neon_vst4:
1076   case Intrinsic::arm_neon_vld2lane:
1077   case Intrinsic::arm_neon_vld3lane:
1078   case Intrinsic::arm_neon_vld4lane:
1079   case Intrinsic::arm_neon_vst2lane:
1080   case Intrinsic::arm_neon_vst3lane:
1081   case Intrinsic::arm_neon_vst4lane: {
1082     // Add a default alignment argument of 1.
1083     SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
1084     Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
1085     CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
1086                                        CI->getName(), CI);
1087     NewCI->setTailCall(CI->isTailCall());
1088     NewCI->setCallingConv(CI->getCallingConv());
1089
1090     //  Handle any uses of the old CallInst.
1091     if (!CI->use_empty())
1092       //  Replace all uses of the old call with the new cast which has the 
1093       //  correct type.
1094       CI->replaceAllUsesWith(NewCI);
1095     
1096     //  Clean up the old call now that it has been completely upgraded.
1097     CI->eraseFromParent();
1098     break;
1099   }        
1100
1101   case Intrinsic::x86_mmx_padd_b:
1102   case Intrinsic::x86_mmx_padd_w:
1103   case Intrinsic::x86_mmx_padd_d:
1104   case Intrinsic::x86_mmx_padd_q:
1105   case Intrinsic::x86_mmx_padds_b:
1106   case Intrinsic::x86_mmx_padds_w:
1107   case Intrinsic::x86_mmx_paddus_b:
1108   case Intrinsic::x86_mmx_paddus_w:
1109   case Intrinsic::x86_mmx_psub_b:
1110   case Intrinsic::x86_mmx_psub_w:
1111   case Intrinsic::x86_mmx_psub_d:
1112   case Intrinsic::x86_mmx_psub_q:
1113   case Intrinsic::x86_mmx_psubs_b:
1114   case Intrinsic::x86_mmx_psubs_w:
1115   case Intrinsic::x86_mmx_psubus_b:
1116   case Intrinsic::x86_mmx_psubus_w:
1117   case Intrinsic::x86_mmx_pmulh_w:
1118   case Intrinsic::x86_mmx_pmull_w:
1119   case Intrinsic::x86_mmx_pmulhu_w:
1120   case Intrinsic::x86_mmx_pmulu_dq:
1121   case Intrinsic::x86_mmx_pmadd_wd:
1122   case Intrinsic::x86_mmx_pand:
1123   case Intrinsic::x86_mmx_pandn:
1124   case Intrinsic::x86_mmx_por:
1125   case Intrinsic::x86_mmx_pxor:
1126   case Intrinsic::x86_mmx_pavg_b:
1127   case Intrinsic::x86_mmx_pavg_w:
1128   case Intrinsic::x86_mmx_pmaxu_b:
1129   case Intrinsic::x86_mmx_pmaxs_w:
1130   case Intrinsic::x86_mmx_pminu_b:
1131   case Intrinsic::x86_mmx_pmins_w:
1132   case Intrinsic::x86_mmx_psad_bw:
1133   case Intrinsic::x86_mmx_psll_w:
1134   case Intrinsic::x86_mmx_psll_d:
1135   case Intrinsic::x86_mmx_psll_q:
1136   case Intrinsic::x86_mmx_pslli_w:
1137   case Intrinsic::x86_mmx_pslli_d:
1138   case Intrinsic::x86_mmx_pslli_q:
1139   case Intrinsic::x86_mmx_psrl_w:
1140   case Intrinsic::x86_mmx_psrl_d:
1141   case Intrinsic::x86_mmx_psrl_q:
1142   case Intrinsic::x86_mmx_psrli_w:
1143   case Intrinsic::x86_mmx_psrli_d:
1144   case Intrinsic::x86_mmx_psrli_q:
1145   case Intrinsic::x86_mmx_psra_w:
1146   case Intrinsic::x86_mmx_psra_d:
1147   case Intrinsic::x86_mmx_psrai_w:
1148   case Intrinsic::x86_mmx_psrai_d:
1149   case Intrinsic::x86_mmx_packsswb:
1150   case Intrinsic::x86_mmx_packssdw:
1151   case Intrinsic::x86_mmx_packuswb:
1152   case Intrinsic::x86_mmx_punpckhbw:
1153   case Intrinsic::x86_mmx_punpckhwd:
1154   case Intrinsic::x86_mmx_punpckhdq:
1155   case Intrinsic::x86_mmx_punpcklbw:
1156   case Intrinsic::x86_mmx_punpcklwd:
1157   case Intrinsic::x86_mmx_punpckldq:
1158   case Intrinsic::x86_mmx_pcmpeq_b:
1159   case Intrinsic::x86_mmx_pcmpeq_w:
1160   case Intrinsic::x86_mmx_pcmpeq_d:
1161   case Intrinsic::x86_mmx_pcmpgt_b:
1162   case Intrinsic::x86_mmx_pcmpgt_w:
1163   case Intrinsic::x86_mmx_pcmpgt_d: {
1164     Value *Operands[2];
1165     
1166     // Cast the operand to the X86 MMX type.
1167     Operands[0] = new BitCastInst(CI->getArgOperand(0), 
1168                                   NewFn->getFunctionType()->getParamType(0),
1169                                   "upgraded.", CI);
1170
1171     switch (NewFn->getIntrinsicID()) {
1172     default:
1173       // Cast to the X86 MMX type.
1174       Operands[1] = new BitCastInst(CI->getArgOperand(1), 
1175                                     NewFn->getFunctionType()->getParamType(1),
1176                                     "upgraded.", CI);
1177       break;
1178     case Intrinsic::x86_mmx_pslli_w:
1179     case Intrinsic::x86_mmx_pslli_d:
1180     case Intrinsic::x86_mmx_pslli_q:
1181     case Intrinsic::x86_mmx_psrli_w:
1182     case Intrinsic::x86_mmx_psrli_d:
1183     case Intrinsic::x86_mmx_psrli_q:
1184     case Intrinsic::x86_mmx_psrai_w:
1185     case Intrinsic::x86_mmx_psrai_d:
1186       // These take an i32 as their second parameter.
1187       Operands[1] = CI->getArgOperand(1);
1188       break;
1189     }
1190
1191     ConstructNewCallInst(NewFn, CI, Operands, 2);
1192     break;
1193   }
1194   case Intrinsic::x86_mmx_maskmovq: {
1195     Value *Operands[3];
1196
1197     // Cast the operands to the X86 MMX type.
1198     Operands[0] = new BitCastInst(CI->getArgOperand(0), 
1199                                   NewFn->getFunctionType()->getParamType(0),
1200                                   "upgraded.", CI);
1201     Operands[1] = new BitCastInst(CI->getArgOperand(1), 
1202                                   NewFn->getFunctionType()->getParamType(1),
1203                                   "upgraded.", CI);
1204     Operands[2] = CI->getArgOperand(2);
1205
1206     ConstructNewCallInst(NewFn, CI, Operands, 3, false);
1207     break;
1208   }
1209   case Intrinsic::x86_mmx_pmovmskb: {
1210     Value *Operands[1];
1211
1212     // Cast the operand to the X86 MMX type.
1213     Operands[0] = new BitCastInst(CI->getArgOperand(0), 
1214                                   NewFn->getFunctionType()->getParamType(0),
1215                                   "upgraded.", CI);
1216
1217     ConstructNewCallInst(NewFn, CI, Operands, 1);
1218     break;
1219   }
1220   case Intrinsic::x86_mmx_movnt_dq: {
1221     Value *Operands[2];
1222
1223     Operands[0] = CI->getArgOperand(0);
1224
1225     // Cast the operand to the X86 MMX type.
1226     Operands[1] = new BitCastInst(CI->getArgOperand(1),
1227                                   NewFn->getFunctionType()->getParamType(1),
1228                                   "upgraded.", CI);
1229
1230     ConstructNewCallInst(NewFn, CI, Operands, 2, false);
1231     break;
1232   }
1233   case Intrinsic::x86_mmx_palignr_b: {
1234     Value *Operands[3];
1235
1236     // Cast the operands to the X86 MMX type.
1237     Operands[0] = new BitCastInst(CI->getArgOperand(0),
1238                                   NewFn->getFunctionType()->getParamType(0),
1239                                   "upgraded.", CI);
1240     Operands[1] = new BitCastInst(CI->getArgOperand(1),
1241                                   NewFn->getFunctionType()->getParamType(1),
1242                                   "upgraded.", CI);
1243     Operands[2] = CI->getArgOperand(2);
1244
1245     ConstructNewCallInst(NewFn, CI, Operands, 3);
1246     break;
1247   }
1248   case Intrinsic::x86_mmx_pextr_w: {
1249     Value *Operands[2];
1250
1251     // Cast the operands to the X86 MMX type.
1252     Operands[0] = new BitCastInst(CI->getArgOperand(0),
1253                                   NewFn->getFunctionType()->getParamType(0),
1254                                   "upgraded.", CI);
1255     Operands[1] = CI->getArgOperand(1);
1256
1257     ConstructNewCallInst(NewFn, CI, Operands, 2);
1258     break;
1259   }
1260   case Intrinsic::x86_mmx_pinsr_w: {
1261     Value *Operands[3];
1262
1263     // Cast the operands to the X86 MMX type.
1264     Operands[0] = new BitCastInst(CI->getArgOperand(0),
1265                                   NewFn->getFunctionType()->getParamType(0),
1266                                   "upgraded.", CI);
1267     Operands[1] = CI->getArgOperand(1);
1268     Operands[2] = CI->getArgOperand(2);
1269
1270     ConstructNewCallInst(NewFn, CI, Operands, 3);
1271     break;
1272   }
1273   case Intrinsic::x86_sse_pshuf_w: {
1274     IRBuilder<> Builder(C);
1275     Builder.SetInsertPoint(CI->getParent(), CI);
1276
1277     // Cast the operand to the X86 MMX type.
1278     Value *Operands[2];
1279     Operands[0] =
1280       Builder.CreateBitCast(CI->getArgOperand(0), 
1281                             NewFn->getFunctionType()->getParamType(0),
1282                             "upgraded.");
1283     Operands[1] =
1284       Builder.CreateTrunc(CI->getArgOperand(1),
1285                           Type::getInt8Ty(C),
1286                           "upgraded.");
1287
1288     ConstructNewCallInst(NewFn, CI, Operands, 2);
1289     break;
1290   }
1291
1292   case Intrinsic::ctlz:
1293   case Intrinsic::ctpop:
1294   case Intrinsic::cttz: {
1295     //  Build a small vector of the original arguments.
1296     SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
1297
1298     //  Construct a new CallInst
1299     CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
1300                                        "upgraded."+CI->getName(), CI);
1301     NewCI->setTailCall(CI->isTailCall());
1302     NewCI->setCallingConv(CI->getCallingConv());
1303
1304     //  Handle any uses of the old CallInst.
1305     if (!CI->use_empty()) {
1306       //  Check for sign extend parameter attributes on the return values.
1307       bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
1308       bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
1309       
1310       //  Construct an appropriate cast from the new return type to the old.
1311       CastInst *RetCast = CastInst::Create(
1312                             CastInst::getCastOpcode(NewCI, SrcSExt,
1313                                                     F->getReturnType(),
1314                                                     DestSExt),
1315                             NewCI, F->getReturnType(),
1316                             NewCI->getName(), CI);
1317       NewCI->moveBefore(RetCast);
1318
1319       //  Replace all uses of the old call with the new cast which has the 
1320       //  correct type.
1321       CI->replaceAllUsesWith(RetCast);
1322     }
1323
1324     //  Clean up the old call now that it has been completely upgraded.
1325     CI->eraseFromParent();
1326   }
1327   break;
1328   case Intrinsic::eh_selector:
1329   case Intrinsic::eh_typeid_for: {
1330     // Only the return type changed.
1331     SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
1332     CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
1333                                        "upgraded." + CI->getName(), CI);
1334     NewCI->setTailCall(CI->isTailCall());
1335     NewCI->setCallingConv(CI->getCallingConv());
1336
1337     //  Handle any uses of the old CallInst.
1338     if (!CI->use_empty()) {
1339       //  Construct an appropriate cast from the new return type to the old.
1340       CastInst *RetCast =
1341         CastInst::Create(CastInst::getCastOpcode(NewCI, true,
1342                                                  F->getReturnType(), true),
1343                          NewCI, F->getReturnType(), NewCI->getName(), CI);
1344       CI->replaceAllUsesWith(RetCast);
1345     }
1346     CI->eraseFromParent();
1347   }
1348   break;
1349   case Intrinsic::memcpy:
1350   case Intrinsic::memmove:
1351   case Intrinsic::memset: {
1352     // Add isVolatile
1353     const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
1354     Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
1355                            CI->getArgOperand(2), CI->getArgOperand(3),
1356                            llvm::ConstantInt::get(I1Ty, 0) };
1357     CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
1358                                        CI->getName(), CI);
1359     NewCI->setTailCall(CI->isTailCall());
1360     NewCI->setCallingConv(CI->getCallingConv());
1361     //  Handle any uses of the old CallInst.
1362     if (!CI->use_empty())
1363       //  Replace all uses of the old call with the new cast which has the 
1364       //  correct type.
1365       CI->replaceAllUsesWith(NewCI);
1366     
1367     //  Clean up the old call now that it has been completely upgraded.
1368     CI->eraseFromParent();
1369     break;
1370   }
1371   case Intrinsic::prefetch: {
1372     IRBuilder<> Builder(C);
1373     Builder.SetInsertPoint(CI->getParent(), CI);
1374     const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
1375
1376     // Add the extra "data cache" argument
1377     Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
1378                            CI->getArgOperand(2),
1379                            llvm::ConstantInt::get(I32Ty, 1) };
1380     CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+4,
1381                                        CI->getName(), CI);
1382     NewCI->setTailCall(CI->isTailCall());
1383     NewCI->setCallingConv(CI->getCallingConv());
1384     //  Handle any uses of the old CallInst.
1385     if (!CI->use_empty())
1386       //  Replace all uses of the old call with the new cast which has the
1387       //  correct type.
1388       CI->replaceAllUsesWith(NewCI);
1389
1390     //  Clean up the old call now that it has been completely upgraded.
1391     CI->eraseFromParent();
1392     break;
1393   }
1394   }
1395 }
1396
1397 // This tests each Function to determine if it needs upgrading. When we find 
1398 // one we are interested in, we then upgrade all calls to reflect the new 
1399 // function.
1400 void llvm::UpgradeCallsToIntrinsic(Function* F) {
1401   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
1402
1403   // Upgrade the function and check if it is a totaly new function.
1404   Function* NewFn;
1405   if (UpgradeIntrinsicFunction(F, NewFn)) {
1406     if (NewFn != F) {
1407       // Replace all uses to the old function with the new one if necessary.
1408       for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
1409            UI != UE; ) {
1410         if (CallInst* CI = dyn_cast<CallInst>(*UI++))
1411           UpgradeIntrinsicCall(CI, NewFn);
1412       }
1413       // Remove old function, no longer used, from the module.
1414       F->eraseFromParent();
1415     }
1416   }
1417 }
1418
1419 /// This function strips all debug info intrinsics, except for llvm.dbg.declare.
1420 /// If an llvm.dbg.declare intrinsic is invalid, then this function simply
1421 /// strips that use.
1422 void llvm::CheckDebugInfoIntrinsics(Module *M) {
1423
1424
1425   if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
1426     while (!FuncStart->use_empty()) {
1427       CallInst *CI = cast<CallInst>(FuncStart->use_back());
1428       CI->eraseFromParent();
1429     }
1430     FuncStart->eraseFromParent();
1431   }
1432   
1433   if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
1434     while (!StopPoint->use_empty()) {
1435       CallInst *CI = cast<CallInst>(StopPoint->use_back());
1436       CI->eraseFromParent();
1437     }
1438     StopPoint->eraseFromParent();
1439   }
1440
1441   if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
1442     while (!RegionStart->use_empty()) {
1443       CallInst *CI = cast<CallInst>(RegionStart->use_back());
1444       CI->eraseFromParent();
1445     }
1446     RegionStart->eraseFromParent();
1447   }
1448
1449   if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
1450     while (!RegionEnd->use_empty()) {
1451       CallInst *CI = cast<CallInst>(RegionEnd->use_back());
1452       CI->eraseFromParent();
1453     }
1454     RegionEnd->eraseFromParent();
1455   }
1456   
1457   if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
1458     if (!Declare->use_empty()) {
1459       DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
1460       if (!isa<MDNode>(DDI->getArgOperand(0)) ||
1461           !isa<MDNode>(DDI->getArgOperand(1))) {
1462         while (!Declare->use_empty()) {
1463           CallInst *CI = cast<CallInst>(Declare->use_back());
1464           CI->eraseFromParent();
1465         }
1466         Declare->eraseFromParent();
1467       }
1468     }
1469   }
1470 }