17ab83427SDimitry Andric //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
27ab83427SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67ab83427SDimitry Andric //
77ab83427SDimitry Andric //===----------------------------------------------------------------------===//
87ab83427SDimitry Andric
97ab83427SDimitry Andric #include "llvm/BinaryFormat/Magic.h"
10cfca06d7SDimitry Andric #include "llvm/ADT/StringRef.h"
11cfca06d7SDimitry Andric #include "llvm/ADT/Twine.h"
127ab83427SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
137ab83427SDimitry Andric #include "llvm/BinaryFormat/MachO.h"
147ab83427SDimitry Andric #include "llvm/Support/Endian.h"
15eb11fae6SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
167ab83427SDimitry Andric
177ab83427SDimitry Andric #if !defined(_MSC_VER) && !defined(__MINGW32__)
187ab83427SDimitry Andric #include <unistd.h>
197ab83427SDimitry Andric #else
207ab83427SDimitry Andric #include <io.h>
217ab83427SDimitry Andric #endif
227ab83427SDimitry Andric
237ab83427SDimitry Andric using namespace llvm;
247ab83427SDimitry Andric using namespace llvm::support::endian;
257ab83427SDimitry Andric using namespace llvm::sys::fs;
267ab83427SDimitry Andric
277ab83427SDimitry Andric template <size_t N>
startswith(StringRef Magic,const char (& S)[N])287ab83427SDimitry Andric static bool startswith(StringRef Magic, const char (&S)[N]) {
29312c0ed1SDimitry Andric return Magic.starts_with(StringRef(S, N - 1));
307ab83427SDimitry Andric }
317ab83427SDimitry Andric
32eb11fae6SDimitry Andric /// Identify the magic in magic.
identify_magic(StringRef Magic)337ab83427SDimitry Andric file_magic llvm::identify_magic(StringRef Magic) {
347ab83427SDimitry Andric if (Magic.size() < 4)
357ab83427SDimitry Andric return file_magic::unknown;
367ab83427SDimitry Andric switch ((unsigned char)Magic[0]) {
377ab83427SDimitry Andric case 0x00: {
387ab83427SDimitry Andric // COFF bigobj, CL.exe's LTO object file, or short import library file
397ab83427SDimitry Andric if (startswith(Magic, "\0\0\xFF\xFF")) {
407ab83427SDimitry Andric size_t MinSize =
417ab83427SDimitry Andric offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
427ab83427SDimitry Andric if (Magic.size() < MinSize)
437ab83427SDimitry Andric return file_magic::coff_import_library;
447ab83427SDimitry Andric
457ab83427SDimitry Andric const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
467ab83427SDimitry Andric if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
477ab83427SDimitry Andric return file_magic::coff_object;
487ab83427SDimitry Andric if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
497ab83427SDimitry Andric return file_magic::coff_cl_gl_object;
507ab83427SDimitry Andric return file_magic::coff_import_library;
517ab83427SDimitry Andric }
527ab83427SDimitry Andric // Windows resource file
5308bbd35aSDimitry Andric if (Magic.size() >= sizeof(COFF::WinResMagic) &&
5408bbd35aSDimitry Andric memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
557ab83427SDimitry Andric return file_magic::windows_resource;
567ab83427SDimitry Andric // 0x0000 = COFF unknown machine type
577ab83427SDimitry Andric if (Magic[1] == 0)
587ab83427SDimitry Andric return file_magic::coff_object;
597ab83427SDimitry Andric if (startswith(Magic, "\0asm"))
607ab83427SDimitry Andric return file_magic::wasm_object;
617ab83427SDimitry Andric break;
627ab83427SDimitry Andric }
63e6d15924SDimitry Andric
64e6d15924SDimitry Andric case 0x01:
65e6d15924SDimitry Andric // XCOFF format
66e6d15924SDimitry Andric if (startswith(Magic, "\x01\xDF"))
67e6d15924SDimitry Andric return file_magic::xcoff_object_32;
68e6d15924SDimitry Andric if (startswith(Magic, "\x01\xF7"))
69e6d15924SDimitry Andric return file_magic::xcoff_object_64;
70e6d15924SDimitry Andric break;
71e6d15924SDimitry Andric
72344a3780SDimitry Andric case 0x03:
73344a3780SDimitry Andric if (startswith(Magic, "\x03\xF0\x00"))
74344a3780SDimitry Andric return file_magic::goff_object;
75312c0ed1SDimitry Andric // SPIR-V format in little-endian mode.
76312c0ed1SDimitry Andric if (startswith(Magic, "\x03\x02\x23\x07"))
77312c0ed1SDimitry Andric return file_magic::spirv_object;
78312c0ed1SDimitry Andric break;
79312c0ed1SDimitry Andric
80312c0ed1SDimitry Andric case 0x07: // SPIR-V format in big-endian mode.
81312c0ed1SDimitry Andric if (startswith(Magic, "\x07\x23\x02\x03"))
82312c0ed1SDimitry Andric return file_magic::spirv_object;
83344a3780SDimitry Andric break;
84344a3780SDimitry Andric
85145449b1SDimitry Andric case 0x10:
86145449b1SDimitry Andric if (startswith(Magic, "\x10\xFF\x10\xAD"))
87145449b1SDimitry Andric return file_magic::offload_binary;
88145449b1SDimitry Andric break;
89145449b1SDimitry Andric
907ab83427SDimitry Andric case 0xDE: // 0x0B17C0DE = BC wraper
917ab83427SDimitry Andric if (startswith(Magic, "\xDE\xC0\x17\x0B"))
927ab83427SDimitry Andric return file_magic::bitcode;
937ab83427SDimitry Andric break;
947ab83427SDimitry Andric case 'B':
957ab83427SDimitry Andric if (startswith(Magic, "BC\xC0\xDE"))
967ab83427SDimitry Andric return file_magic::bitcode;
977ab83427SDimitry Andric break;
98b1c73532SDimitry Andric case 'C':
99b1c73532SDimitry Andric if (startswith(Magic, "CCOB"))
100b1c73532SDimitry Andric return file_magic::offload_bundle_compressed;
1014df029ccSDimitry Andric if (startswith(Magic, "CPCH"))
1024df029ccSDimitry Andric return file_magic::clang_ast;
103b1c73532SDimitry Andric break;
1047ab83427SDimitry Andric case '!':
1057ab83427SDimitry Andric if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
1067ab83427SDimitry Andric return file_magic::archive;
1077ab83427SDimitry Andric break;
1086f8fc217SDimitry Andric case '<':
1096f8fc217SDimitry Andric if (startswith(Magic, "<bigaf>\n"))
1106f8fc217SDimitry Andric return file_magic::archive;
1116f8fc217SDimitry Andric break;
1127ab83427SDimitry Andric case '\177':
1137ab83427SDimitry Andric if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
1147ab83427SDimitry Andric bool Data2MSB = Magic[5] == 2;
1157ab83427SDimitry Andric unsigned high = Data2MSB ? 16 : 17;
1167ab83427SDimitry Andric unsigned low = Data2MSB ? 17 : 16;
1177ab83427SDimitry Andric if (Magic[high] == 0) {
1187ab83427SDimitry Andric switch (Magic[low]) {
1197ab83427SDimitry Andric default:
1207ab83427SDimitry Andric return file_magic::elf;
1217ab83427SDimitry Andric case 1:
1227ab83427SDimitry Andric return file_magic::elf_relocatable;
1237ab83427SDimitry Andric case 2:
1247ab83427SDimitry Andric return file_magic::elf_executable;
1257ab83427SDimitry Andric case 3:
1267ab83427SDimitry Andric return file_magic::elf_shared_object;
1277ab83427SDimitry Andric case 4:
1287ab83427SDimitry Andric return file_magic::elf_core;
1297ab83427SDimitry Andric }
1307ab83427SDimitry Andric }
1317ab83427SDimitry Andric // It's still some type of ELF file.
1327ab83427SDimitry Andric return file_magic::elf;
1337ab83427SDimitry Andric }
1347ab83427SDimitry Andric break;
1357ab83427SDimitry Andric
1367ab83427SDimitry Andric case 0xCA:
1377ab83427SDimitry Andric if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
1387ab83427SDimitry Andric startswith(Magic, "\xCA\xFE\xBA\xBF")) {
1397ab83427SDimitry Andric // This is complicated by an overlap with Java class files.
1407ab83427SDimitry Andric // See the Mach-O section in /usr/share/file/magic for details.
1417ab83427SDimitry Andric if (Magic.size() >= 8 && Magic[7] < 43)
1427ab83427SDimitry Andric return file_magic::macho_universal_binary;
1437ab83427SDimitry Andric }
1447ab83427SDimitry Andric break;
1457ab83427SDimitry Andric
1467ab83427SDimitry Andric // The two magic numbers for mach-o are:
1477ab83427SDimitry Andric // 0xfeedface - 32-bit mach-o
1487ab83427SDimitry Andric // 0xfeedfacf - 64-bit mach-o
1497ab83427SDimitry Andric case 0xFE:
1507ab83427SDimitry Andric case 0xCE:
1517ab83427SDimitry Andric case 0xCF: {
1527ab83427SDimitry Andric uint16_t type = 0;
1537ab83427SDimitry Andric if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
1547ab83427SDimitry Andric startswith(Magic, "\xFE\xED\xFA\xCF")) {
1557ab83427SDimitry Andric /* Native endian */
1567ab83427SDimitry Andric size_t MinSize;
1577ab83427SDimitry Andric if (Magic[3] == char(0xCE))
1587ab83427SDimitry Andric MinSize = sizeof(MachO::mach_header);
1597ab83427SDimitry Andric else
1607ab83427SDimitry Andric MinSize = sizeof(MachO::mach_header_64);
1617ab83427SDimitry Andric if (Magic.size() >= MinSize)
1627ab83427SDimitry Andric type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
1637ab83427SDimitry Andric } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
1647ab83427SDimitry Andric startswith(Magic, "\xCF\xFA\xED\xFE")) {
1657ab83427SDimitry Andric /* Reverse endian */
1667ab83427SDimitry Andric size_t MinSize;
1677ab83427SDimitry Andric if (Magic[0] == char(0xCE))
1687ab83427SDimitry Andric MinSize = sizeof(MachO::mach_header);
1697ab83427SDimitry Andric else
1707ab83427SDimitry Andric MinSize = sizeof(MachO::mach_header_64);
1717ab83427SDimitry Andric if (Magic.size() >= MinSize)
1727ab83427SDimitry Andric type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
1737ab83427SDimitry Andric }
1747ab83427SDimitry Andric switch (type) {
1757ab83427SDimitry Andric default:
1767ab83427SDimitry Andric break;
1777ab83427SDimitry Andric case 1:
1787ab83427SDimitry Andric return file_magic::macho_object;
1797ab83427SDimitry Andric case 2:
1807ab83427SDimitry Andric return file_magic::macho_executable;
1817ab83427SDimitry Andric case 3:
1827ab83427SDimitry Andric return file_magic::macho_fixed_virtual_memory_shared_lib;
1837ab83427SDimitry Andric case 4:
1847ab83427SDimitry Andric return file_magic::macho_core;
1857ab83427SDimitry Andric case 5:
1867ab83427SDimitry Andric return file_magic::macho_preload_executable;
1877ab83427SDimitry Andric case 6:
1887ab83427SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib;
1897ab83427SDimitry Andric case 7:
1907ab83427SDimitry Andric return file_magic::macho_dynamic_linker;
1917ab83427SDimitry Andric case 8:
1927ab83427SDimitry Andric return file_magic::macho_bundle;
1937ab83427SDimitry Andric case 9:
1947ab83427SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib_stub;
1957ab83427SDimitry Andric case 10:
1967ab83427SDimitry Andric return file_magic::macho_dsym_companion;
1977ab83427SDimitry Andric case 11:
1987ab83427SDimitry Andric return file_magic::macho_kext_bundle;
199e3b55780SDimitry Andric case 12:
200e3b55780SDimitry Andric return file_magic::macho_file_set;
2017ab83427SDimitry Andric }
2027ab83427SDimitry Andric break;
2037ab83427SDimitry Andric }
2047ab83427SDimitry Andric case 0xF0: // PowerPC Windows
2057ab83427SDimitry Andric case 0x83: // Alpha 32-bit
2067ab83427SDimitry Andric case 0x84: // Alpha 64-bit
2077ab83427SDimitry Andric case 0x66: // MPS R4000 Windows
2087ab83427SDimitry Andric case 0x50: // mc68K
209145449b1SDimitry Andric if (startswith(Magic, "\x50\xed\x55\xba"))
210145449b1SDimitry Andric return file_magic::cuda_fatbinary;
211e3b55780SDimitry Andric [[fallthrough]];
212145449b1SDimitry Andric
2137ab83427SDimitry Andric case 0x4c: // 80386 Windows
2147ab83427SDimitry Andric case 0xc4: // ARMNT Windows
2157ab83427SDimitry Andric if (Magic[1] == 0x01)
2167ab83427SDimitry Andric return file_magic::coff_object;
217e3b55780SDimitry Andric [[fallthrough]];
2187ab83427SDimitry Andric
2197ab83427SDimitry Andric case 0x90: // PA-RISC Windows
2207ab83427SDimitry Andric case 0x68: // mc68K Windows
2217ab83427SDimitry Andric if (Magic[1] == 0x02)
2227ab83427SDimitry Andric return file_magic::coff_object;
2237ab83427SDimitry Andric break;
2247ab83427SDimitry Andric
225e6d15924SDimitry Andric case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
226e6d15924SDimitry Andric // Minidump file.
227044eb2f6SDimitry Andric if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
2287ab83427SDimitry Andric uint32_t off = read32le(Magic.data() + 0x3c);
2297ab83427SDimitry Andric // PE/COFF file, either EXE or DLL.
230312c0ed1SDimitry Andric if (Magic.substr(off).starts_with(
231044eb2f6SDimitry Andric StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
2327ab83427SDimitry Andric return file_magic::pecoff_executable;
2337ab83427SDimitry Andric }
234312c0ed1SDimitry Andric if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n"))
235eb11fae6SDimitry Andric return file_magic::pdb;
236e6d15924SDimitry Andric if (startswith(Magic, "MDMP"))
237e6d15924SDimitry Andric return file_magic::minidump;
2387ab83427SDimitry Andric break;
2397ab83427SDimitry Andric
2409df3605dSDimitry Andric case 0x64: // x86-64 or ARM64 Windows.
2419df3605dSDimitry Andric if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
2427ab83427SDimitry Andric return file_magic::coff_object;
2437ab83427SDimitry Andric break;
2447ab83427SDimitry Andric
2457fa27ce4SDimitry Andric case 0x2d: // YAML '-' MachO TBD.
2461d5ae102SDimitry Andric if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
2471d5ae102SDimitry Andric return file_magic::tapi_file;
2481d5ae102SDimitry Andric break;
2497fa27ce4SDimitry Andric case 0x7b: // JSON '{' MachO TBD.
2507fa27ce4SDimitry Andric return file_magic::tapi_file;
2517fa27ce4SDimitry Andric break;
2521d5ae102SDimitry Andric
253145449b1SDimitry Andric case 'D': // DirectX container file - DXBC
254145449b1SDimitry Andric if (startswith(Magic, "DXBC"))
255145449b1SDimitry Andric return file_magic::dxcontainer_object;
256145449b1SDimitry Andric break;
257145449b1SDimitry Andric
258e3b55780SDimitry Andric case 0x41: // ARM64EC windows
259e3b55780SDimitry Andric if (Magic[1] == char(0xA6))
260e3b55780SDimitry Andric return file_magic::coff_object;
261e3b55780SDimitry Andric break;
262e3b55780SDimitry Andric
2637fa27ce4SDimitry Andric case 0x4e: // ARM64X windows
2647fa27ce4SDimitry Andric if (Magic[1] == char(0xA6))
2657fa27ce4SDimitry Andric return file_magic::coff_object;
2667fa27ce4SDimitry Andric break;
2677fa27ce4SDimitry Andric
268b1c73532SDimitry Andric case '_': {
269b1c73532SDimitry Andric const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
270b1c73532SDimitry Andric if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
271b1c73532SDimitry Andric return file_magic::offload_bundle;
272b1c73532SDimitry Andric break;
273b1c73532SDimitry Andric }
274b1c73532SDimitry Andric
2757ab83427SDimitry Andric default:
2767ab83427SDimitry Andric break;
2777ab83427SDimitry Andric }
2787ab83427SDimitry Andric return file_magic::unknown;
2797ab83427SDimitry Andric }
2807ab83427SDimitry Andric
identify_magic(const Twine & Path,file_magic & Result)2817ab83427SDimitry Andric std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
282344a3780SDimitry Andric auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
283344a3780SDimitry Andric /*RequiresNullTerminator=*/false);
284eb11fae6SDimitry Andric if (!FileOrError)
285eb11fae6SDimitry Andric return FileOrError.getError();
2867ab83427SDimitry Andric
287eb11fae6SDimitry Andric std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
288eb11fae6SDimitry Andric Result = identify_magic(FileBuffer->getBuffer());
2897ab83427SDimitry Andric
2907ab83427SDimitry Andric return std::error_code();
2917ab83427SDimitry Andric }
292