From 61ab3a1cc5b266ff666f276faebb75e1dd2c13c2 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Fri, 14 Nov 2014 21:52:18 +0000 Subject: [PATCH] =?utf8?q?Add=20the=20code=20and=20test=20cases=20for=2064?= =?utf8?q?-bit=20ARM=20to=20llvm-objdump=E2=80=99s=20Mach-O=20symbolizer.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit FYI, removed the unused MCInstrAnalysis as it does not exist for 64-bit ARM and was causing a “couldn't initialize disassembler for target” error. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222045 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/Inputs/ObjC.exe.macho-aarch64 | Bin 0 -> 49736 bytes .../AArch64/Inputs/ObjC.obj.macho-aarch64 | Bin 0 -> 2008 bytes .../AArch64/Inputs/hello.exe.macho-aarch64 | Bin 0 -> 49416 bytes .../AArch64/Inputs/hello.obj.macho-aarch64 | Bin 0 -> 604 bytes test/tools/llvm-objdump/AArch64/lit.local.cfg | 2 + .../AArch64/macho-symbolized-disassembly.test | 23 ++ tools/llvm-objdump/MachODump.cpp | 208 ++++++++++++++++-- 7 files changed, 210 insertions(+), 23 deletions(-) create mode 100755 test/tools/llvm-objdump/AArch64/Inputs/ObjC.exe.macho-aarch64 create mode 100644 test/tools/llvm-objdump/AArch64/Inputs/ObjC.obj.macho-aarch64 create mode 100755 test/tools/llvm-objdump/AArch64/Inputs/hello.exe.macho-aarch64 create mode 100644 test/tools/llvm-objdump/AArch64/Inputs/hello.obj.macho-aarch64 create mode 100644 test/tools/llvm-objdump/AArch64/lit.local.cfg create mode 100644 test/tools/llvm-objdump/AArch64/macho-symbolized-disassembly.test diff --git a/test/tools/llvm-objdump/AArch64/Inputs/ObjC.exe.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/ObjC.exe.macho-aarch64 new file mode 100755 index 0000000000000000000000000000000000000000..d28cbcbda3e70d7d62da556ac32126c1c20c975b GIT binary patch literal 49736 zcmeI5J!~9B6vt~KhsVGF-7SP6$WD--C7(Mf#HYffo)9mRUnZAwDtl?{-LdIO&bC@m+?qNP zvCeCB0V!w4=4M-mgZf%)PUyIu((E02SV~{r^mRZ@Qm_7DxS3D%6pndc4Qq>(!;F-k zU(I`s;iTT^N1@(1JtK;vf=;bR35VZ%6C# zGsMPmQX?D*f1A(AXm&Om8>#7s)Y3VM1GB;=MGbrNIa#R09ur#E@p|d7T<}xD^^vho ze4SF3a?)EZ=PM<-?7K(w(zS4Xksil!J^D+DHuA{mKFr5&L~?y*Ez~o00mpGYoo+Nz z2G5CI&Mhdu6-hmd>iK`xsVg{+?@y;2jn?y2$yM|8_eknprh3+k_RDd6ee|D=h5}8k zS19E?j}?=8a|u0;<9a00qSt&cO- zsE2Gisr{T|ZEJOUZ&81TTAs%;{4Ox>h4Xi)U8A^*e2P47JeNIsCgWpK{|?n3H~tJw zFvW}xQhBj{B{%Z5o=69djU`zm+}9Ab4PEg)=^D+w(tg8B3=9v@zU_Qa7V!8WUT5%nz7$Vb z&q&2p$F8Y$psC-wu3u@ObzS^V8_332CB&;Vi@86nhvYBn_z}fyZ;+R00u}){9Z(sb_Sbv(@tG&iL zoz1+xlrz@5MQ!?<`#n2LhP9t1-%EAf{q2IWbw-GdJE@+(_D*KEI)%00OTL%tynBlB z4@sMj|J|oOY>S#$r*o}0=m_QMUCJNay7zTGC4cTQQ$J00&@ss;dV>C-?%!w^X&!rY zX?Kg;?Y3~v6dilKe|Y!;bK(A<)Te6+N394tI9Z9=MP(MMuB#m1j&YzleI-es_N&F|PdmrUxr O_TM+E^*$VVSpEU$8OFK* literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/AArch64/Inputs/ObjC.obj.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/ObjC.obj.macho-aarch64 new file mode 100644 index 0000000000000000000000000000000000000000..836607672d45e154d142b04a7dd0ba72df8e6311 GIT binary patch literal 2008 zcmb7FO-vI(6do)>6|DFhO-xuKB$%iHIjAH}sAY+g<;QL{98AX2ZlTa_30(pS7Y-b_ zn6!xp4<0-M(ZnM+G+gjxycmx?;=#ltn4o@d_YG_pJou8??|tuY-tL=DKmYpiyIF`j z0ij0-tBW2Q7UU|{_6b)#_R;BP3NcDK5XBpz(+ZOjBBf(3IG*nDq&}8B}XG;3u`QufD^G36Cvh1C|uZ>UEtR#A^%N4eYxu2-;} zNjp`r&~I&juei=B#vOiNrmCi*$aniIS$XP?-oLiLRgP3m>~eoUxxbS_D3=>4WxC|d z=Ija13--6c_BO^a>i8u%Le$Ip3kLhEac*Z?Q0wN;`7Sp3m7CDUE(^#f31RJB^KFs7E7dL4WM3X_LqmTJT>6E;I=7 z9Dz;HN3lq#0a}!Q4hy0GE&Gd`Yx^Nl`LiXy;Ov*tX9Z`bd;@qII0(d^)VTARYOhd->=uLVbhmyLan>i#@Jd|^zF4_RYT!*vg6!U(BK~QGFF9b0(CZF z-(UQ=`XMC0!gfS#YhGkhj&;PrhUBEBrRQd6E!)v3PD|Mt&B+xkEm!K*I{Jcn+w|l1 ziRRP&>as_UOnU})eB)brAilFP>0$I4@Dl7aY{E_;l8tr`qZ$x%Hm-RXrI}*3#zhaK zJU;_>8qG0^8Hn>jL;&IfkT~uWmqU+GJm%qY)Nm1b4O6g3=pe9AzY#R@p7{&n*tpF6 z9s5_Ae`5a%^Bv+ib5-X1#F4N3A?y{wiyhoE0p3a+`>XmL#PQ+O@BU_h_Yn8(*GnAr zU-SNtiPH>$BxA(G_pt{52~h!^dWld>Jty$s`Y{U@xfg>#$6r5#A@ zrvsmN3Nu~8@O-9j=<(!8LMHS9J)sZBbyM~zr@N!^$V~dZ?Xp|>k4sj*f8>h1#05tG zD4Bx`SGq0Tb)WZC{*a!0D4)g>ayT}mdlC2Di0&@bxQ(mYV+YODCfxD74EXqe0nV`t AjsO4v literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/AArch64/Inputs/hello.exe.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/hello.exe.macho-aarch64 new file mode 100755 index 0000000000000000000000000000000000000000..c30d35824fb72f2214dea53fa257fd10712b5576 GIT binary patch literal 49416 zcmeI*KWrOy6u|M9*dc}na3sVZffToD5&zT_#E=1jgBrCJ2-1?FV)+r**T&Mtj?QNq zs{@t_f`Nfb9T_^dOpJlWMr=q37>F{V!o-5aNGhe_y}RF~XD2Xq{66WI-@V_T=ihzo ztam@%`RneK$e3u1Yn#@#RTTL|DtaaIl(y5_oLihfwebGJxp!@JIFil7L;EtTv!x52 zTUt23G(4T`?+uTM&nI*^Yqr9oKM)Q(iKU(jmulKN@ zw>%$1wROU|PV;i3S$B=*#a5CJe;*sZo_*&QmXFp*XVSm+z5c1qxs&rt^SPOx|4~)_ znC3}T)2^VMwXC*tms+{6^*`&C)9qs4qV3P(0lmoo4xI~^UAwzlYlUttj`r0n_tNzx zU%>Kwy-6L<=lA{(-ahlrTMH-8EcuOtk^9u#qT~-_UH)80^yDXO!9dq&SJM7T?X##1 zMC=|5+rGJx z-{ng>X5&)z&8{`<^X>j@&hAB|ueUvR?@W5G_qy#kSUvvcaXnw7mMoHIwX@ItV34{# z@ADPSEnd`S^Oh%{tJeeq2q1s}0tg_000IagfB*srAb(}%h;@3;k zJ9m3hzPVi}ebv|XJ6FEDIViZznM&c;uZwbC#%DHPE=t+*zfbjVS-)0D#~1Zg)!Vzf zPuQtX2VvN%Ty8~S{UOWko#M~fg+{)&cV^qWwqMoWTGzjK+pg_>=2N<}ob1-*fcR3^ z=igU;z!HWKKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0009ILKmY** z5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0;64jXA1R-;lGD|CH>|sM+^M-* zqgf9kIUDI?45IRM)vZO1W_(dIjWH(^GVWGaT(BNo=){3r36^d7;nn3vQ;yjK2#T_C z*FQucySVe))P&AFW)-yUg)M({teoUF@pzJ_&oHpuo^7E0-kzf)x$U>-8faAQx+3m? Pv$^DK*?sK0Q{DG3o$1<{ literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/AArch64/Inputs/hello.obj.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/hello.obj.macho-aarch64 new file mode 100644 index 0000000000000000000000000000000000000000..704dbab0282ed6807ec0af5bf2ddb1205bf92ba8 GIT binary patch literal 604 zcmX^A>+L@t1_nk3AOI08K%4+#C;%}KNCNQ-Fbg~33l(kvo4~*TFXn;9|66@<#R2B{YaXdNIr$3Z`9(P?Tp-&(ZUnkTR{=-^-Qyzzp>6qrG#`=!ML~R^ zpB#WR2!PZf^Fd1ak<^3Y5{V0pz}&>lJcjrJU>uaBG31ow78o%2#Onbg1Value; + uint64_t value = op_info->Value; // Make sure all fields returned are zero if we don't set them. memset((void *)op_info, '\0', sizeof(struct LLVMOpInfo1)); @@ -624,6 +625,83 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, op_info->AddSymbol.Value = value; return 1; } else if (Arch == Triple::aarch64) { + if (Offset != 0 || Size != 4) + return 0; + // First search the section's relocation entries (if any) for an entry + // for this section offset. + uint64_t sect_addr = info->S.getAddress(); + uint64_t sect_offset = (Pc + Offset) - sect_addr; + bool reloc_found = false; + DataRefImpl Rel; + MachO::any_relocation_info RE; + bool isExtern = false; + SymbolRef Symbol; + uint32_t r_type = 0; + for (const RelocationRef &Reloc : info->S.relocations()) { + uint64_t RelocOffset; + Reloc.getOffset(RelocOffset); + if (RelocOffset == sect_offset) { + Rel = Reloc.getRawDataRefImpl(); + RE = info->O->getRelocation(Rel); + r_type = info->O->getAnyRelocationType(RE); + if (r_type == MachO::ARM64_RELOC_ADDEND) { + DataRefImpl RelNext = Rel; + info->O->moveRelocationNext(RelNext); + MachO::any_relocation_info RENext = info->O->getRelocation(RelNext); + if (value == 0) { + value = info->O->getPlainRelocationSymbolNum(RENext); + op_info->Value = value; + } + } + // NOTE: Scattered relocations don't exist on arm64. + isExtern = info->O->getPlainRelocationExternal(RE); + if (isExtern) { + symbol_iterator RelocSym = Reloc.getSymbol(); + Symbol = *RelocSym; + } + reloc_found = true; + break; + } + } + if (reloc_found && isExtern) { + StringRef SymName; + Symbol.getName(SymName); + const char *name = SymName.data(); + op_info->AddSymbol.Present = 1; + op_info->AddSymbol.Name = name; + + switch (r_type) { + case MachO::ARM64_RELOC_PAGE21: + /* @page */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_PAGE; + break; + case MachO::ARM64_RELOC_PAGEOFF12: + /* @pageoff */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_PAGEOFF; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGE21: + /* @gotpage */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_GOTPAGE; + break; + case MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12: + /* @gotpageoff */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF; + break; + case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21: + /* @tvlppage is not implemented in llvm-mc */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_TLVP; + break; + case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12: + /* @tvlppageoff is not implemented in llvm-mc */ + op_info->VariantKind = LLVMDisassembler_VariantKind_ARM64_TLVOFF; + break; + default: + case MachO::ARM64_RELOC_BRANCH26: + op_info->VariantKind = LLVMDisassembler_VariantKind_None; + break; + } + return 1; + } return 0; } else { return 0; @@ -789,9 +867,10 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue, static void method_reference(struct DisassembleInfo *info, uint64_t *ReferenceType, const char **ReferenceName) { + unsigned int Arch = info->O->getArch(); if (*ReferenceName != nullptr) { if (strcmp(*ReferenceName, "_objc_msgSend") == 0) { - if (info->selector_name != NULL) { + if (info->selector_name != nullptr) { if (info->method != nullptr) free(info->method); if (info->class_name != nullptr) { @@ -809,7 +888,12 @@ static void method_reference(struct DisassembleInfo *info, } else { info->method = (char *)malloc(9 + strlen(info->selector_name)); if (info->method != nullptr) { - strcpy(info->method, "-[%rdi "); + if (Arch == Triple::x86_64) + strcpy(info->method, "-[%rdi "); + else if (Arch == Triple::aarch64) + strcpy(info->method, "-[x0 "); + else + strcpy(info->method, "-[r? "); strcat(info->method, info->selector_name); strcat(info->method, "]"); *ReferenceName = info->method; @@ -819,12 +903,17 @@ static void method_reference(struct DisassembleInfo *info, info->class_name = nullptr; } } else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) { - if (info->selector_name != NULL) { + if (info->selector_name != nullptr) { if (info->method != nullptr) free(info->method); info->method = (char *)malloc(17 + strlen(info->selector_name)); if (info->method != nullptr) { - strcpy(info->method, "-[[%rdi super] "); + if (Arch == Triple::x86_64) + strcpy(info->method, "-[[%rdi super] "); + else if (Arch == Triple::aarch64) + strcpy(info->method, "-[[x0 super] "); + else + strcpy(info->method, "-[[r? super] "); strcat(info->method, info->selector_name); strcat(info->method, "]"); *ReferenceName = info->method; @@ -1187,12 +1276,6 @@ uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue, const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC, uint64_t *ReferenceType, struct DisassembleInfo *info) { - // TODO: This rouine's code and the routines it calls are only work with - // x86_64 Mach-O files for now. - unsigned int Arch = info->O->getArch(); - if (Arch != Triple::x86_64) - return nullptr; - // First see if there is an external relocation entry at the ReferencePC. uint64_t sect_addr = info->S.getAddress(); uint64_t sect_offset = ReferencePC - sect_addr; @@ -1369,6 +1452,86 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue, method_reference(info, ReferenceType, ReferenceName); else *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + // If this is arm64 and the reference is an adrp instruction save the + // instruction, passed in ReferenceValue and the address of the instruction + // for use later if we see and add immediate instruction. + } else if (info->O->getArch() == Triple::aarch64 && + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) { + info->adrp_inst = ReferenceValue; + info->adrp_addr = ReferencePC; + SymbolName = nullptr; + *ReferenceName = nullptr; + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + // If this is arm64 and reference is an add immediate instruction and we + // have + // seen an adrp instruction just before it and the adrp's Xd register + // matches + // this add's Xn register reconstruct the value being referenced and look to + // see if it is a literal pointer. Note the add immediate instruction is + // passed in ReferenceValue. + } else if (info->O->getArch() == Triple::aarch64 && + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri && + ReferencePC - 4 == info->adrp_addr && + (info->adrp_inst & 0x9f000000) == 0x90000000 && + (info->adrp_inst & 0x1f) == ((ReferenceValue >> 5) & 0x1f)) { + uint32_t addxri_inst; + uint64_t adrp_imm, addxri_imm; + + adrp_imm = + ((info->adrp_inst & 0x00ffffe0) >> 3) | ((info->adrp_inst >> 29) & 0x3); + if (info->adrp_inst & 0x0200000) + adrp_imm |= 0xfffffffffc000000LL; + + addxri_inst = ReferenceValue; + addxri_imm = (addxri_inst >> 10) & 0xfff; + if (((addxri_inst >> 22) & 0x3) == 1) + addxri_imm <<= 12; + + ReferenceValue = (info->adrp_addr & 0xfffffffffffff000LL) + + (adrp_imm << 12) + addxri_imm; + + *ReferenceName = + GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info); + if (*ReferenceName == nullptr) + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + // If this is arm64 and the reference is a load register instruction and we + // have seen an adrp instruction just before it and the adrp's Xd register + // matches this add's Xn register reconstruct the value being referenced and + // look to see if it is a literal pointer. Note the load register + // instruction is passed in ReferenceValue. + } else if (info->O->getArch() == Triple::aarch64 && + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXui && + ReferencePC - 4 == info->adrp_addr && + (info->adrp_inst & 0x9f000000) == 0x90000000 && + (info->adrp_inst & 0x1f) == ((ReferenceValue >> 5) & 0x1f)) { + uint32_t ldrxui_inst; + uint64_t adrp_imm, ldrxui_imm; + + adrp_imm = + ((info->adrp_inst & 0x00ffffe0) >> 3) | ((info->adrp_inst >> 29) & 0x3); + if (info->adrp_inst & 0x0200000) + adrp_imm |= 0xfffffffffc000000LL; + + ldrxui_inst = ReferenceValue; + ldrxui_imm = (ldrxui_inst >> 10) & 0xfff; + + ReferenceValue = (info->adrp_addr & 0xfffffffffffff000LL) + + (adrp_imm << 12) + (ldrxui_imm << 3); + + *ReferenceName = + GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info); + if (*ReferenceName == nullptr) + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + } + // If this arm64 and is an load register (PC-relative) instruction the + // ReferenceValue is the PC plus the immediate value. + else if (info->O->getArch() == Triple::aarch64 && + (*ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_LDRXl || + *ReferenceType == LLVMDisassembler_ReferenceType_In_ARM64_ADR)) { + *ReferenceName = + GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info); + if (*ReferenceName == nullptr) + *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; } #if HAVE_CXXABI_H else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) { @@ -1435,15 +1598,9 @@ static void DisassembleInputMachO2(StringRef Filename, MCPU = McpuDefault; std::unique_ptr InstrInfo(TheTarget->createMCInstrInfo()); - std::unique_ptr InstrAnalysis( - TheTarget->createMCInstrAnalysis(InstrInfo.get())); std::unique_ptr ThumbInstrInfo; - std::unique_ptr ThumbInstrAnalysis; - if (ThumbTarget) { + if (ThumbTarget) ThumbInstrInfo.reset(ThumbTarget->createMCInstrInfo()); - ThumbInstrAnalysis.reset( - ThumbTarget->createMCInstrAnalysis(ThumbInstrInfo.get())); - } // Package up features to be passed to target/subtarget std::string FeaturesStr; @@ -1482,9 +1639,8 @@ static void DisassembleInputMachO2(StringRef Filename, // Comment stream and backing vector. SmallString<128> CommentsToEmit; raw_svector_ostream CommentStream(CommentsToEmit); - IP->setCommentStream(CommentStream); - if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) { + if (!AsmInfo || !STI || !DisAsm || !IP) { errs() << "error: couldn't initialize disassembler for target " << TripleName << '\n'; return; @@ -1525,8 +1681,7 @@ static void DisassembleInputMachO2(StringRef Filename, ThumbIP->setPrintImmHex(PrintImmHex); } - if (ThumbTarget && (!ThumbInstrAnalysis || !ThumbAsmInfo || !ThumbSTI || - !ThumbDisAsm || !ThumbIP)) { + if (ThumbTarget && (!ThumbAsmInfo || !ThumbSTI || !ThumbDisAsm || !ThumbIP)) { errs() << "error: couldn't initialize disassembler for target " << ThumbTripleName << '\n'; return; @@ -1798,6 +1953,13 @@ static void DisassembleInputMachO2(StringRef Filename, outs() << format("\t.byte 0x%02x #bad opcode\n", *(Bytes.data() + Index) & 0xff); Size = 1; // skip exactly one illegible byte and move on. + } else if (Arch == Triple::aarch64) { + uint32_t opcode = (*(Bytes.data() + Index) & 0xff) | + (*(Bytes.data() + Index + 1) & 0xff) << 8 | + (*(Bytes.data() + Index + 2) & 0xff) << 16 | + (*(Bytes.data() + Index + 3) & 0xff) << 24; + outs() << format("\t.long\t0x%08x\n", opcode); + Size = 4; } else { errs() << "llvm-objdump: warning: invalid instruction encoding\n"; if (Size == 0) -- 2.34.1