xref: /src/contrib/llvm-project/llvm/lib/IR/StructuralHash.cpp (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
17fa27ce4SDimitry Andric //===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
2b60736ecSDimitry Andric //
3b60736ecSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b60736ecSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5b60736ecSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b60736ecSDimitry Andric //
7b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
8b60736ecSDimitry Andric 
9b60736ecSDimitry Andric #include "llvm/IR/StructuralHash.h"
10b1c73532SDimitry Andric #include "llvm/ADT/Hashing.h"
11b60736ecSDimitry Andric #include "llvm/IR/Function.h"
127fa27ce4SDimitry Andric #include "llvm/IR/GlobalVariable.h"
13b1c73532SDimitry Andric #include "llvm/IR/InstrTypes.h"
14b1c73532SDimitry Andric #include "llvm/IR/Instructions.h"
15b1c73532SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
16b60736ecSDimitry Andric #include "llvm/IR/Module.h"
17b60736ecSDimitry Andric 
18b60736ecSDimitry Andric using namespace llvm;
19b60736ecSDimitry Andric 
207fa27ce4SDimitry Andric namespace {
21b60736ecSDimitry Andric 
22b60736ecSDimitry Andric // Basic hashing mechanism to detect structural change to the IR, used to verify
23b1c73532SDimitry Andric // pass return status consistency with actual change. In addition to being used
24b1c73532SDimitry Andric // by the MergeFunctions pass.
25b60736ecSDimitry Andric 
267fa27ce4SDimitry Andric class StructuralHashImpl {
27b1c73532SDimitry Andric   uint64_t Hash;
28b60736ecSDimitry Andric 
hash(uint64_t V)29b1c73532SDimitry Andric   void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
30b1c73532SDimitry Andric 
31b1c73532SDimitry Andric   // This will produce different values on 32-bit and 64-bit systens as
32b1c73532SDimitry Andric   // hash_combine returns a size_t. However, this is only used for
33b1c73532SDimitry Andric   // detailed hashing which, in-tree, only needs to distinguish between
34b1c73532SDimitry Andric   // differences in functions.
hashArbitaryType(const T & V)35b1c73532SDimitry Andric   template <typename T> void hashArbitaryType(const T &V) {
36b1c73532SDimitry Andric     hash(hash_combine(V));
37b1c73532SDimitry Andric   }
38b1c73532SDimitry Andric 
hashType(Type * ValueType)39b1c73532SDimitry Andric   void hashType(Type *ValueType) {
40b1c73532SDimitry Andric     hash(ValueType->getTypeID());
41b1c73532SDimitry Andric     if (ValueType->isIntegerTy())
42b1c73532SDimitry Andric       hash(ValueType->getIntegerBitWidth());
43b1c73532SDimitry Andric   }
44b60736ecSDimitry Andric 
45b60736ecSDimitry Andric public:
StructuralHashImpl()467fa27ce4SDimitry Andric   StructuralHashImpl() : Hash(4) {}
47b60736ecSDimitry Andric 
updateOperand(Value * Operand)48b1c73532SDimitry Andric   void updateOperand(Value *Operand) {
49b1c73532SDimitry Andric     hashType(Operand->getType());
50b1c73532SDimitry Andric 
51b1c73532SDimitry Andric     // The cases enumerated below are not exhaustive and are only aimed to
52b1c73532SDimitry Andric     // get decent coverage over the function.
53b1c73532SDimitry Andric     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
54b1c73532SDimitry Andric       hashArbitaryType(ConstInt->getValue());
55b1c73532SDimitry Andric     } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
56b1c73532SDimitry Andric       hashArbitaryType(ConstFP->getValue());
57b1c73532SDimitry Andric     } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
58b1c73532SDimitry Andric       hash(Arg->getArgNo());
59b1c73532SDimitry Andric     } else if (Function *Func = dyn_cast<Function>(Operand)) {
60b1c73532SDimitry Andric       // Hashing the name will be deterministic as LLVM's hashing infrastructure
61b1c73532SDimitry Andric       // has explicit support for hashing strings and will not simply hash
62b1c73532SDimitry Andric       // the pointer.
63b1c73532SDimitry Andric       hashArbitaryType(Func->getName());
64b1c73532SDimitry Andric     }
65b1c73532SDimitry Andric   }
66b1c73532SDimitry Andric 
updateInstruction(const Instruction & Inst,bool DetailedHash)67b1c73532SDimitry Andric   void updateInstruction(const Instruction &Inst, bool DetailedHash) {
68b1c73532SDimitry Andric     hash(Inst.getOpcode());
69b1c73532SDimitry Andric 
70b1c73532SDimitry Andric     if (!DetailedHash)
71b1c73532SDimitry Andric       return;
72b1c73532SDimitry Andric 
73b1c73532SDimitry Andric     hashType(Inst.getType());
74b1c73532SDimitry Andric 
75b1c73532SDimitry Andric     // Handle additional properties of specific instructions that cause
76b1c73532SDimitry Andric     // semantic differences in the IR.
77b1c73532SDimitry Andric     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
78b1c73532SDimitry Andric       hash(ComparisonInstruction->getPredicate());
79b1c73532SDimitry Andric 
80b1c73532SDimitry Andric     for (const auto &Op : Inst.operands())
81b1c73532SDimitry Andric       updateOperand(Op);
82b1c73532SDimitry Andric   }
83b1c73532SDimitry Andric 
84b1c73532SDimitry Andric   // A function hash is calculated by considering only the number of arguments
85b1c73532SDimitry Andric   // and whether a function is varargs, the order of basic blocks (given by the
86b1c73532SDimitry Andric   // successors of each basic block in depth first order), and the order of
87b1c73532SDimitry Andric   // opcodes of each instruction within each of these basic blocks. This mirrors
88b1c73532SDimitry Andric   // the strategy FunctionComparator::compare() uses to compare functions by
89b1c73532SDimitry Andric   // walking the BBs in depth first order and comparing each instruction in
90b1c73532SDimitry Andric   // sequence. Because this hash currently does not look at the operands, it is
91b1c73532SDimitry Andric   // insensitive to things such as the target of calls and the constants used in
92b1c73532SDimitry Andric   // the function, which makes it useful when possibly merging functions which
93b1c73532SDimitry Andric   // are the same modulo constants and call targets.
94b1c73532SDimitry Andric   //
95b1c73532SDimitry Andric   // Note that different users of StructuralHash will want different behavior
96b1c73532SDimitry Andric   // out of it (i.e., MergeFunctions will want something different from PM
97b1c73532SDimitry Andric   // expensive checks for pass modification status). When modifying this
98b1c73532SDimitry Andric   // function, most changes should be gated behind an option and enabled
99b1c73532SDimitry Andric   // selectively.
update(const Function & F,bool DetailedHash)100b1c73532SDimitry Andric   void update(const Function &F, bool DetailedHash) {
1017fa27ce4SDimitry Andric     // Declarations don't affect analyses.
1027fa27ce4SDimitry Andric     if (F.isDeclaration())
103b60736ecSDimitry Andric       return;
104b60736ecSDimitry Andric 
105b1c73532SDimitry Andric     hash(0x62642d6b6b2d6b72); // Function header
1067fa27ce4SDimitry Andric 
1077fa27ce4SDimitry Andric     hash(F.isVarArg());
1087fa27ce4SDimitry Andric     hash(F.arg_size());
109b60736ecSDimitry Andric 
110b60736ecSDimitry Andric     SmallVector<const BasicBlock *, 8> BBs;
111b60736ecSDimitry Andric     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
112b60736ecSDimitry Andric 
113b1c73532SDimitry Andric     // Walk the blocks in the same order as
114b1c73532SDimitry Andric     // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
115b1c73532SDimitry Andric     // function "structure." (BB and opcode sequence)
116b60736ecSDimitry Andric     BBs.push_back(&F.getEntryBlock());
117b60736ecSDimitry Andric     VisitedBBs.insert(BBs[0]);
118b60736ecSDimitry Andric     while (!BBs.empty()) {
119b60736ecSDimitry Andric       const BasicBlock *BB = BBs.pop_back_val();
120b1c73532SDimitry Andric 
121b1c73532SDimitry Andric       // This random value acts as a block header, as otherwise the partition of
122b1c73532SDimitry Andric       // opcodes into BBs wouldn't affect the hash, only the order of the
123b1c73532SDimitry Andric       // opcodes
124b1c73532SDimitry Andric       hash(45798);
125b60736ecSDimitry Andric       for (auto &Inst : *BB)
126b1c73532SDimitry Andric         updateInstruction(Inst, DetailedHash);
127b60736ecSDimitry Andric 
1284df029ccSDimitry Andric       for (const BasicBlock *Succ : successors(BB))
1294df029ccSDimitry Andric         if (VisitedBBs.insert(Succ).second)
1304df029ccSDimitry Andric           BBs.push_back(Succ);
131b60736ecSDimitry Andric     }
132b60736ecSDimitry Andric   }
133b60736ecSDimitry Andric 
update(const GlobalVariable & GV)1347fa27ce4SDimitry Andric   void update(const GlobalVariable &GV) {
1357fa27ce4SDimitry Andric     // Declarations and used/compiler.used don't affect analyses.
1367fa27ce4SDimitry Andric     // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
1377fa27ce4SDimitry Andric     // we ignore anything with the `.llvm` prefix
1387fa27ce4SDimitry Andric     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
1397fa27ce4SDimitry Andric       return;
1407fa27ce4SDimitry Andric     hash(23456); // Global header
1417fa27ce4SDimitry Andric     hash(GV.getValueType()->getTypeID());
1427fa27ce4SDimitry Andric   }
1437fa27ce4SDimitry Andric 
update(const Module & M,bool DetailedHash)144b1c73532SDimitry Andric   void update(const Module &M, bool DetailedHash) {
1457fa27ce4SDimitry Andric     for (const GlobalVariable &GV : M.globals())
1467fa27ce4SDimitry Andric       update(GV);
147b60736ecSDimitry Andric     for (const Function &F : M)
148b1c73532SDimitry Andric       update(F, DetailedHash);
149b60736ecSDimitry Andric   }
150b60736ecSDimitry Andric 
getHash() const151b60736ecSDimitry Andric   uint64_t getHash() const { return Hash; }
152b60736ecSDimitry Andric };
153b60736ecSDimitry Andric 
1547fa27ce4SDimitry Andric } // namespace
155b60736ecSDimitry Andric 
StructuralHash(const Function & F,bool DetailedHash)156b1c73532SDimitry Andric IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
1577fa27ce4SDimitry Andric   StructuralHashImpl H;
158b1c73532SDimitry Andric   H.update(F, DetailedHash);
159b60736ecSDimitry Andric   return H.getHash();
160b60736ecSDimitry Andric }
161b60736ecSDimitry Andric 
StructuralHash(const Module & M,bool DetailedHash)162b1c73532SDimitry Andric IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
1637fa27ce4SDimitry Andric   StructuralHashImpl H;
164b1c73532SDimitry Andric   H.update(M, DetailedHash);
165b60736ecSDimitry Andric   return H.getHash();
166b60736ecSDimitry Andric }
167