xref: /src/contrib/llvm-project/clang/lib/CodeGen/CodeGenTBAA.cpp (revision 5deeebd8c6ca991269e72902a7a62cada57947f6)
122989816SDimitry Andric //===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===//
2bca07a45SDimitry Andric //
322989816SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
422989816SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
522989816SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bca07a45SDimitry Andric //
7bca07a45SDimitry Andric //===----------------------------------------------------------------------===//
8bca07a45SDimitry Andric //
9bca07a45SDimitry Andric // This is the code that manages TBAA information and defines the TBAA policy
10bca07a45SDimitry Andric // for the optimizer to use. Relevant standards text includes:
11bca07a45SDimitry Andric //
12bca07a45SDimitry Andric //   C99 6.5p7
13bca07a45SDimitry Andric //   C++ [basic.lval] (p10 in n3126, p15 in some earlier versions)
14bca07a45SDimitry Andric //
15bca07a45SDimitry Andric //===----------------------------------------------------------------------===//
16bca07a45SDimitry Andric 
17bca07a45SDimitry Andric #include "CodeGenTBAA.h"
18ac9a064cSDimitry Andric #include "ABIInfoImpl.h"
19e6b73279SDimitry Andric #include "CGCXXABI.h"
20ac9a064cSDimitry Andric #include "CGRecordLayout.h"
21ac9a064cSDimitry Andric #include "CodeGenTypes.h"
22bca07a45SDimitry Andric #include "clang/AST/ASTContext.h"
23809500fcSDimitry Andric #include "clang/AST/Attr.h"
24bca07a45SDimitry Andric #include "clang/AST/Mangle.h"
25809500fcSDimitry Andric #include "clang/AST/RecordLayout.h"
26676fbe81SDimitry Andric #include "clang/Basic/CodeGenOptions.h"
27ac9a064cSDimitry Andric #include "clang/Basic/TargetInfo.h"
28809500fcSDimitry Andric #include "llvm/ADT/SmallSet.h"
29809500fcSDimitry Andric #include "llvm/IR/Constants.h"
30809500fcSDimitry Andric #include "llvm/IR/LLVMContext.h"
31809500fcSDimitry Andric #include "llvm/IR/Metadata.h"
32461a67faSDimitry Andric #include "llvm/IR/Module.h"
33809500fcSDimitry Andric #include "llvm/IR/Type.h"
34ac9a064cSDimitry Andric #include "llvm/Support/Debug.h"
35bca07a45SDimitry Andric using namespace clang;
36bca07a45SDimitry Andric using namespace CodeGen;
37bca07a45SDimitry Andric 
CodeGenTBAA(ASTContext & Ctx,CodeGenTypes & CGTypes,llvm::Module & M,const CodeGenOptions & CGO,const LangOptions & Features)38ac9a064cSDimitry Andric CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes,
39ac9a064cSDimitry Andric                          llvm::Module &M, const CodeGenOptions &CGO,
40e6b73279SDimitry Andric                          const LangOptions &Features)
41ac9a064cSDimitry Andric     : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO),
42e6b73279SDimitry Andric       Features(Features), MDHelper(M.getContext()), Root(nullptr),
43e6b73279SDimitry Andric       Char(nullptr) {}
44bca07a45SDimitry Andric 
~CodeGenTBAA()45bca07a45SDimitry Andric CodeGenTBAA::~CodeGenTBAA() {
46bca07a45SDimitry Andric }
47bca07a45SDimitry Andric 
getRoot()48bca07a45SDimitry Andric llvm::MDNode *CodeGenTBAA::getRoot() {
49bca07a45SDimitry Andric   // Define the root of the tree. This identifies the tree, so that
50bca07a45SDimitry Andric   // if our LLVM IR is linked with LLVM IR from a different front-end
51bca07a45SDimitry Andric   // (or a different version of this front-end), their TBAA trees will
52bca07a45SDimitry Andric   // remain distinct, and the optimizer will treat them conservatively.
532b6b257fSDimitry Andric   if (!Root) {
542b6b257fSDimitry Andric     if (Features.CPlusPlus)
552b6b257fSDimitry Andric       Root = MDHelper.createTBAARoot("Simple C++ TBAA");
562b6b257fSDimitry Andric     else
576b9a6e39SDimitry Andric       Root = MDHelper.createTBAARoot("Simple C/C++ TBAA");
582b6b257fSDimitry Andric   }
59bca07a45SDimitry Andric 
60bca07a45SDimitry Andric   return Root;
61bca07a45SDimitry Andric }
62bca07a45SDimitry Andric 
createScalarTypeNode(StringRef Name,llvm::MDNode * Parent,uint64_t Size)63461a67faSDimitry Andric llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name,
64461a67faSDimitry Andric                                                 llvm::MDNode *Parent,
65461a67faSDimitry Andric                                                 uint64_t Size) {
666252156dSDimitry Andric   if (CodeGenOpts.NewStructPathTBAA) {
676252156dSDimitry Andric     llvm::Metadata *Id = MDHelper.createString(Name);
686252156dSDimitry Andric     return MDHelper.createTBAATypeNode(Parent, Size, Id);
696252156dSDimitry Andric   }
706a037251SDimitry Andric   return MDHelper.createTBAAScalarTypeNode(Name, Parent);
716a037251SDimitry Andric }
726a037251SDimitry Andric 
getChar()73bca07a45SDimitry Andric llvm::MDNode *CodeGenTBAA::getChar() {
74bca07a45SDimitry Andric   // Define the root of the tree for user-accessible memory. C and C++
75bca07a45SDimitry Andric   // give special powers to char and certain similar types. However,
76bca07a45SDimitry Andric   // these special powers only cover user-accessible memory, and doesn't
77bca07a45SDimitry Andric   // include things like vtables.
78bca07a45SDimitry Andric   if (!Char)
79461a67faSDimitry Andric     Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1);
80bca07a45SDimitry Andric 
81bca07a45SDimitry Andric   return Char;
82bca07a45SDimitry Andric }
83bca07a45SDimitry Andric 
TypeHasMayAlias(QualType QTy)84bca07a45SDimitry Andric static bool TypeHasMayAlias(QualType QTy) {
85bca07a45SDimitry Andric   // Tagged types have declarations, and therefore may have attributes.
86706b4fc4SDimitry Andric   if (auto *TD = QTy->getAsTagDecl())
87706b4fc4SDimitry Andric     if (TD->hasAttr<MayAliasAttr>())
88bca07a45SDimitry Andric       return true;
89bca07a45SDimitry Andric 
90706b4fc4SDimitry Andric   // Also look for may_alias as a declaration attribute on a typedef.
91706b4fc4SDimitry Andric   // FIXME: We should follow GCC and model may_alias as a type attribute
92706b4fc4SDimitry Andric   // rather than as a declaration attribute.
93706b4fc4SDimitry Andric   while (auto *TT = QTy->getAs<TypedefType>()) {
94706b4fc4SDimitry Andric     if (TT->getDecl()->hasAttr<MayAliasAttr>())
95706b4fc4SDimitry Andric       return true;
96706b4fc4SDimitry Andric     QTy = TT->desugar();
97706b4fc4SDimitry Andric   }
98bca07a45SDimitry Andric   return false;
99bca07a45SDimitry Andric }
100bca07a45SDimitry Andric 
101461a67faSDimitry Andric /// Check if the given type is a valid base type to be used in access tags.
isValidBaseType(QualType QTy)102461a67faSDimitry Andric static bool isValidBaseType(QualType QTy) {
103461a67faSDimitry Andric   if (const RecordType *TTy = QTy->getAs<RecordType>()) {
104461a67faSDimitry Andric     const RecordDecl *RD = TTy->getDecl()->getDefinition();
105461a67faSDimitry Andric     // Incomplete types are not valid base access types.
106461a67faSDimitry Andric     if (!RD)
107461a67faSDimitry Andric       return false;
108461a67faSDimitry Andric     if (RD->hasFlexibleArrayMember())
109461a67faSDimitry Andric       return false;
110461a67faSDimitry Andric     // RD can be struct, union, class, interface or enum.
111461a67faSDimitry Andric     // For now, we only handle struct and class.
112461a67faSDimitry Andric     if (RD->isStruct() || RD->isClass())
113461a67faSDimitry Andric       return true;
114461a67faSDimitry Andric   }
115461a67faSDimitry Andric   return false;
116461a67faSDimitry Andric }
11756d91b49SDimitry Andric 
getTypeInfoHelper(const Type * Ty)118461a67faSDimitry Andric llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
119461a67faSDimitry Andric   uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
120bca07a45SDimitry Andric 
121bca07a45SDimitry Andric   // Handle builtin types.
122bca07a45SDimitry Andric   if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) {
123bca07a45SDimitry Andric     switch (BTy->getKind()) {
124bca07a45SDimitry Andric     // Character types are special and can alias anything.
125bca07a45SDimitry Andric     // In C++, this technically only includes "char" and "unsigned char",
126bca07a45SDimitry Andric     // and not "signed char". In C, it includes all three. For now,
127bca07a45SDimitry Andric     // the risk of exploiting this detail in C++ seems likely to outweigh
128bca07a45SDimitry Andric     // the benefit.
129bca07a45SDimitry Andric     case BuiltinType::Char_U:
130bca07a45SDimitry Andric     case BuiltinType::Char_S:
131bca07a45SDimitry Andric     case BuiltinType::UChar:
132bca07a45SDimitry Andric     case BuiltinType::SChar:
133bca07a45SDimitry Andric       return getChar();
134bca07a45SDimitry Andric 
135bca07a45SDimitry Andric     // Unsigned types can alias their corresponding signed types.
136bca07a45SDimitry Andric     case BuiltinType::UShort:
137461a67faSDimitry Andric       return getTypeInfo(Context.ShortTy);
138bca07a45SDimitry Andric     case BuiltinType::UInt:
139461a67faSDimitry Andric       return getTypeInfo(Context.IntTy);
140bca07a45SDimitry Andric     case BuiltinType::ULong:
141461a67faSDimitry Andric       return getTypeInfo(Context.LongTy);
142bca07a45SDimitry Andric     case BuiltinType::ULongLong:
143461a67faSDimitry Andric       return getTypeInfo(Context.LongLongTy);
144bca07a45SDimitry Andric     case BuiltinType::UInt128:
145461a67faSDimitry Andric       return getTypeInfo(Context.Int128Ty);
146bca07a45SDimitry Andric 
147cfca06d7SDimitry Andric     case BuiltinType::UShortFract:
148cfca06d7SDimitry Andric       return getTypeInfo(Context.ShortFractTy);
149cfca06d7SDimitry Andric     case BuiltinType::UFract:
150cfca06d7SDimitry Andric       return getTypeInfo(Context.FractTy);
151cfca06d7SDimitry Andric     case BuiltinType::ULongFract:
152cfca06d7SDimitry Andric       return getTypeInfo(Context.LongFractTy);
153cfca06d7SDimitry Andric 
154cfca06d7SDimitry Andric     case BuiltinType::SatUShortFract:
155cfca06d7SDimitry Andric       return getTypeInfo(Context.SatShortFractTy);
156cfca06d7SDimitry Andric     case BuiltinType::SatUFract:
157cfca06d7SDimitry Andric       return getTypeInfo(Context.SatFractTy);
158cfca06d7SDimitry Andric     case BuiltinType::SatULongFract:
159cfca06d7SDimitry Andric       return getTypeInfo(Context.SatLongFractTy);
160cfca06d7SDimitry Andric 
161cfca06d7SDimitry Andric     case BuiltinType::UShortAccum:
162cfca06d7SDimitry Andric       return getTypeInfo(Context.ShortAccumTy);
163cfca06d7SDimitry Andric     case BuiltinType::UAccum:
164cfca06d7SDimitry Andric       return getTypeInfo(Context.AccumTy);
165cfca06d7SDimitry Andric     case BuiltinType::ULongAccum:
166cfca06d7SDimitry Andric       return getTypeInfo(Context.LongAccumTy);
167cfca06d7SDimitry Andric 
168cfca06d7SDimitry Andric     case BuiltinType::SatUShortAccum:
169cfca06d7SDimitry Andric       return getTypeInfo(Context.SatShortAccumTy);
170cfca06d7SDimitry Andric     case BuiltinType::SatUAccum:
171cfca06d7SDimitry Andric       return getTypeInfo(Context.SatAccumTy);
172cfca06d7SDimitry Andric     case BuiltinType::SatULongAccum:
173cfca06d7SDimitry Andric       return getTypeInfo(Context.SatLongAccumTy);
174cfca06d7SDimitry Andric 
175bca07a45SDimitry Andric     // Treat all other builtin types as distinct types. This includes
176bca07a45SDimitry Andric     // treating wchar_t, char16_t, and char32_t as distinct from their
177bca07a45SDimitry Andric     // "underlying types".
178bca07a45SDimitry Andric     default:
179461a67faSDimitry Andric       return createScalarTypeNode(BTy->getName(Features), getChar(), Size);
180bca07a45SDimitry Andric     }
181bca07a45SDimitry Andric   }
182bca07a45SDimitry Andric 
183104a02fbSDimitry Andric   // C++1z [basic.lval]p10: "If a program attempts to access the stored value of
184104a02fbSDimitry Andric   // an object through a glvalue of other than one of the following types the
185104a02fbSDimitry Andric   // behavior is undefined: [...] a char, unsigned char, or std::byte type."
186104a02fbSDimitry Andric   if (Ty->isStdByteType())
187461a67faSDimitry Andric     return getChar();
188104a02fbSDimitry Andric 
189461a67faSDimitry Andric   // Handle pointers and references.
190ac9a064cSDimitry Andric   //
191ac9a064cSDimitry Andric   // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2:
192ac9a064cSDimitry Andric   //     For two pointer types to be compatible, both shall be identically
193ac9a064cSDimitry Andric   //     qualified and both shall be pointers to compatible types.
194ac9a064cSDimitry Andric   //
195ac9a064cSDimitry Andric   // This rule is impractically strict; we want to at least ignore CVR
196ac9a064cSDimitry Andric   // qualifiers. Distinguishing by CVR qualifiers would make it UB to
197ac9a064cSDimitry Andric   // e.g. cast a `char **` to `const char * const *` and dereference it,
198ac9a064cSDimitry Andric   // which is too common and useful to invalidate. C++'s similar types
199ac9a064cSDimitry Andric   // rule permits qualifier differences in these nested positions; in fact,
200ac9a064cSDimitry Andric   // C++ even allows that cast as an implicit conversion.
201ac9a064cSDimitry Andric   //
202ac9a064cSDimitry Andric   // Other qualifiers could theoretically be distinguished, especially if
203ac9a064cSDimitry Andric   // they involve a significant representation difference.  We don't
204ac9a064cSDimitry Andric   // currently do so, however.
205ac9a064cSDimitry Andric   //
206ac9a064cSDimitry Andric   // Computing the pointee type string recursively is implicitly more
207ac9a064cSDimitry Andric   // forgiving than the standards require.  Effectively, we are turning
208ac9a064cSDimitry Andric   // the question "are these types compatible/similar" into "are
209ac9a064cSDimitry Andric   // accesses to these types allowed to alias".  In both C and C++,
210ac9a064cSDimitry Andric   // the latter question has special carve-outs for signedness
211ac9a064cSDimitry Andric   // mismatches that only apply at the top level.  As a result, we are
212ac9a064cSDimitry Andric   // allowing e.g. `int *` l-values to access `unsigned *` objects.
213ac9a064cSDimitry Andric   if (Ty->isPointerType() || Ty->isReferenceType()) {
214ac9a064cSDimitry Andric     llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size);
215ac9a064cSDimitry Andric     if (!CodeGenOpts.PointerTBAA)
216ac9a064cSDimitry Andric       return AnyPtr;
217ac9a064cSDimitry Andric     // Compute the depth of the pointer and generate a tag of the form "p<depth>
218ac9a064cSDimitry Andric     // <base type tag>".
219ac9a064cSDimitry Andric     unsigned PtrDepth = 0;
220ac9a064cSDimitry Andric     do {
221ac9a064cSDimitry Andric       PtrDepth++;
222ac9a064cSDimitry Andric       Ty = Ty->getPointeeType().getTypePtr();
223ac9a064cSDimitry Andric     } while (Ty->isPointerType());
224bca07a45SDimitry Andric     // TODO: Implement C++'s type "similarity" and consider dis-"similar"
225ac9a064cSDimitry Andric     // pointers distinct for non-builtin types.
226ac9a064cSDimitry Andric     if (isa<BuiltinType>(Ty)) {
227ac9a064cSDimitry Andric       llvm::MDNode *ScalarMD = getTypeInfoHelper(Ty);
228ac9a064cSDimitry Andric       StringRef Name =
229ac9a064cSDimitry Andric           cast<llvm::MDString>(
230ac9a064cSDimitry Andric               ScalarMD->getOperand(CodeGenOpts.NewStructPathTBAA ? 2 : 0))
231ac9a064cSDimitry Andric               ->getString();
232ac9a064cSDimitry Andric       SmallString<256> OutName("p");
233ac9a064cSDimitry Andric       OutName += std::to_string(PtrDepth);
234ac9a064cSDimitry Andric       OutName += " ";
235ac9a064cSDimitry Andric       OutName += Name;
236ac9a064cSDimitry Andric       return createScalarTypeNode(OutName, AnyPtr, Size);
237ac9a064cSDimitry Andric     }
238ac9a064cSDimitry Andric     return AnyPtr;
239ac9a064cSDimitry Andric   }
240bca07a45SDimitry Andric 
2416252156dSDimitry Andric   // Accesses to arrays are accesses to objects of their element types.
2426252156dSDimitry Andric   if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType())
2436252156dSDimitry Andric     return getTypeInfo(cast<ArrayType>(Ty)->getElementType());
2446252156dSDimitry Andric 
245bca07a45SDimitry Andric   // Enum types are distinct types. In C++ they have "underlying types",
246bca07a45SDimitry Andric   // however they aren't related for TBAA.
247bca07a45SDimitry Andric   if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) {
248b1c73532SDimitry Andric     if (!Features.CPlusPlus)
249b1c73532SDimitry Andric       return getTypeInfo(ETy->getDecl()->getIntegerType());
250b1c73532SDimitry Andric 
251bca07a45SDimitry Andric     // In C++ mode, types have linkage, so we can rely on the ODR and
252bca07a45SDimitry Andric     // on their mangled names, if they're external.
253bca07a45SDimitry Andric     // TODO: Is there a way to get a program-wide unique name for a
254bca07a45SDimitry Andric     // decl with local linkage or no linkage?
255b1c73532SDimitry Andric     if (!ETy->getDecl()->isExternallyVisible())
256461a67faSDimitry Andric       return getChar();
257bca07a45SDimitry Andric 
258dbe13110SDimitry Andric     SmallString<256> OutName;
259bca07a45SDimitry Andric     llvm::raw_svector_ostream Out(OutName);
260e6b73279SDimitry Andric     CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName(
261e6b73279SDimitry Andric         QualType(ETy, 0), Out);
262461a67faSDimitry Andric     return createScalarTypeNode(OutName, getChar(), Size);
263bca07a45SDimitry Andric   }
264bca07a45SDimitry Andric 
26577fc4c14SDimitry Andric   if (const auto *EIT = dyn_cast<BitIntType>(Ty)) {
266cfca06d7SDimitry Andric     SmallString<256> OutName;
267cfca06d7SDimitry Andric     llvm::raw_svector_ostream Out(OutName);
268cfca06d7SDimitry Andric     // Don't specify signed/unsigned since integer types can alias despite sign
269cfca06d7SDimitry Andric     // differences.
27077fc4c14SDimitry Andric     Out << "_BitInt(" << EIT->getNumBits() << ')';
271cfca06d7SDimitry Andric     return createScalarTypeNode(OutName, getChar(), Size);
272cfca06d7SDimitry Andric   }
273cfca06d7SDimitry Andric 
274bca07a45SDimitry Andric   // For now, handle any other kind of type conservatively.
275461a67faSDimitry Andric   return getChar();
276bca07a45SDimitry Andric }
277dbe13110SDimitry Andric 
getTypeInfo(QualType QTy)278461a67faSDimitry Andric llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) {
279461a67faSDimitry Andric   // At -O0 or relaxed aliasing, TBAA is not emitted for regular types.
280461a67faSDimitry Andric   if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
281461a67faSDimitry Andric     return nullptr;
282461a67faSDimitry Andric 
283461a67faSDimitry Andric   // If the type has the may_alias attribute (even on a typedef), it is
284461a67faSDimitry Andric   // effectively in the general char alias class.
285461a67faSDimitry Andric   if (TypeHasMayAlias(QTy))
286461a67faSDimitry Andric     return getChar();
287461a67faSDimitry Andric 
288461a67faSDimitry Andric   // We need this function to not fall back to returning the "omnipotent char"
289461a67faSDimitry Andric   // type node for aggregate and union types. Otherwise, any dereference of an
290461a67faSDimitry Andric   // aggregate will result into the may-alias access descriptor, meaning all
291461a67faSDimitry Andric   // subsequent accesses to direct and indirect members of that aggregate will
292461a67faSDimitry Andric   // be considered may-alias too.
293ac9a064cSDimitry Andric   // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single
294ac9a064cSDimitry Andric   // function.
295461a67faSDimitry Andric   if (isValidBaseType(QTy))
296ac9a064cSDimitry Andric     return getValidBaseTypeInfo(QTy);
297461a67faSDimitry Andric 
298461a67faSDimitry Andric   const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
299461a67faSDimitry Andric   if (llvm::MDNode *N = MetadataCache[Ty])
300461a67faSDimitry Andric     return N;
301461a67faSDimitry Andric 
302461a67faSDimitry Andric   // Note that the following helper call is allowed to add new nodes to the
303461a67faSDimitry Andric   // cache, which invalidates all its previously obtained iterators. So we
304461a67faSDimitry Andric   // first generate the node for the type and then add that node to the cache.
305461a67faSDimitry Andric   llvm::MDNode *TypeNode = getTypeInfoHelper(Ty);
306461a67faSDimitry Andric   return MetadataCache[Ty] = TypeNode;
307461a67faSDimitry Andric }
308461a67faSDimitry Andric 
getAccessInfo(QualType AccessType)30948675466SDimitry Andric TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) {
31048675466SDimitry Andric   // Pointee values may have incomplete types, but they shall never be
31148675466SDimitry Andric   // dereferenced.
31248675466SDimitry Andric   if (AccessType->isIncompleteType())
31348675466SDimitry Andric     return TBAAAccessInfo::getIncompleteInfo();
31448675466SDimitry Andric 
31548675466SDimitry Andric   if (TypeHasMayAlias(AccessType))
31648675466SDimitry Andric     return TBAAAccessInfo::getMayAliasInfo();
31748675466SDimitry Andric 
31848675466SDimitry Andric   uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity();
31948675466SDimitry Andric   return TBAAAccessInfo(getTypeInfo(AccessType), Size);
32048675466SDimitry Andric }
32148675466SDimitry Andric 
getVTablePtrAccessInfo(llvm::Type * VTablePtrType)322461a67faSDimitry Andric TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) {
323461a67faSDimitry Andric   llvm::DataLayout DL(&Module);
324461a67faSDimitry Andric   unsigned Size = DL.getPointerTypeSize(VTablePtrType);
325461a67faSDimitry Andric   return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size),
326461a67faSDimitry Andric                         Size);
327dbe13110SDimitry Andric }
32813cc256eSDimitry Andric 
32913cc256eSDimitry Andric bool
CollectFields(uint64_t BaseOffset,QualType QTy,SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & Fields,bool MayAlias)33013cc256eSDimitry Andric CodeGenTBAA::CollectFields(uint64_t BaseOffset,
33113cc256eSDimitry Andric                            QualType QTy,
33213cc256eSDimitry Andric                            SmallVectorImpl<llvm::MDBuilder::TBAAStructField> &
33313cc256eSDimitry Andric                              Fields,
33413cc256eSDimitry Andric                            bool MayAlias) {
33513cc256eSDimitry Andric   /* Things not handled yet include: C++ base classes, bitfields, */
33613cc256eSDimitry Andric 
33713cc256eSDimitry Andric   if (const RecordType *TTy = QTy->getAs<RecordType>()) {
338ac9a064cSDimitry Andric     if (TTy->isUnionType()) {
339ac9a064cSDimitry Andric       uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity();
340ac9a064cSDimitry Andric       llvm::MDNode *TBAAType = getChar();
341ac9a064cSDimitry Andric       llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
342ac9a064cSDimitry Andric       Fields.push_back(
343ac9a064cSDimitry Andric           llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag));
344ac9a064cSDimitry Andric       return true;
345ac9a064cSDimitry Andric     }
34613cc256eSDimitry Andric     const RecordDecl *RD = TTy->getDecl()->getDefinition();
34713cc256eSDimitry Andric     if (RD->hasFlexibleArrayMember())
34813cc256eSDimitry Andric       return false;
34913cc256eSDimitry Andric 
35013cc256eSDimitry Andric     // TODO: Handle C++ base classes.
35113cc256eSDimitry Andric     if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(RD))
35213cc256eSDimitry Andric       if (Decl->bases_begin() != Decl->bases_end())
35313cc256eSDimitry Andric         return false;
35413cc256eSDimitry Andric 
35513cc256eSDimitry Andric     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
356ac9a064cSDimitry Andric     const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD);
35713cc256eSDimitry Andric 
35813cc256eSDimitry Andric     unsigned idx = 0;
359ac9a064cSDimitry Andric     for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
360ac9a064cSDimitry Andric          i != e; ++i, ++idx) {
361ac9a064cSDimitry Andric       if (isEmptyFieldForLayout(Context, *i))
36222989816SDimitry Andric         continue;
363ac9a064cSDimitry Andric 
364ac9a064cSDimitry Andric       uint64_t Offset =
365ac9a064cSDimitry Andric           BaseOffset + Layout.getFieldOffset(idx) / Context.getCharWidth();
366ac9a064cSDimitry Andric 
367ac9a064cSDimitry Andric       // Create a single field for consecutive named bitfields using char as
368ac9a064cSDimitry Andric       // base type.
369ac9a064cSDimitry Andric       if ((*i)->isBitField()) {
370ac9a064cSDimitry Andric         const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(*i);
371ac9a064cSDimitry Andric         // For big endian targets the first bitfield in the consecutive run is
372ac9a064cSDimitry Andric         // at the most-significant end; see CGRecordLowering::setBitFieldInfo
373ac9a064cSDimitry Andric         // for more information.
374ac9a064cSDimitry Andric         bool IsBE = Context.getTargetInfo().isBigEndian();
375ac9a064cSDimitry Andric         bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0
376ac9a064cSDimitry Andric                             : Info.Offset == 0;
377ac9a064cSDimitry Andric         if (!IsFirst)
378ac9a064cSDimitry Andric           continue;
379ac9a064cSDimitry Andric         unsigned CurrentBitFieldSize = Info.StorageSize;
380ac9a064cSDimitry Andric         uint64_t Size =
381ac9a064cSDimitry Andric             llvm::divideCeil(CurrentBitFieldSize, Context.getCharWidth());
382ac9a064cSDimitry Andric         llvm::MDNode *TBAAType = getChar();
383ac9a064cSDimitry Andric         llvm::MDNode *TBAATag =
384ac9a064cSDimitry Andric             getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
385ac9a064cSDimitry Andric         Fields.push_back(
386ac9a064cSDimitry Andric             llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
387ac9a064cSDimitry Andric         continue;
388ac9a064cSDimitry Andric       }
389ac9a064cSDimitry Andric 
39013cc256eSDimitry Andric       QualType FieldQTy = i->getType();
39113cc256eSDimitry Andric       if (!CollectFields(Offset, FieldQTy, Fields,
39213cc256eSDimitry Andric                          MayAlias || TypeHasMayAlias(FieldQTy)))
39313cc256eSDimitry Andric         return false;
39413cc256eSDimitry Andric     }
39513cc256eSDimitry Andric     return true;
39613cc256eSDimitry Andric   }
39713cc256eSDimitry Andric 
39813cc256eSDimitry Andric   /* Otherwise, treat whatever it is as a field. */
39913cc256eSDimitry Andric   uint64_t Offset = BaseOffset;
40013cc256eSDimitry Andric   uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity();
401461a67faSDimitry Andric   llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy);
402461a67faSDimitry Andric   llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size));
4036a037251SDimitry Andric   Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag));
40413cc256eSDimitry Andric   return true;
40513cc256eSDimitry Andric }
40613cc256eSDimitry Andric 
40713cc256eSDimitry Andric llvm::MDNode *
getTBAAStructInfo(QualType QTy)40813cc256eSDimitry Andric CodeGenTBAA::getTBAAStructInfo(QualType QTy) {
409ac9a064cSDimitry Andric   if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing)
410ac9a064cSDimitry Andric     return nullptr;
411ac9a064cSDimitry Andric 
41213cc256eSDimitry Andric   const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
41313cc256eSDimitry Andric 
41413cc256eSDimitry Andric   if (llvm::MDNode *N = StructMetadataCache[Ty])
41513cc256eSDimitry Andric     return N;
41613cc256eSDimitry Andric 
41713cc256eSDimitry Andric   SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields;
41813cc256eSDimitry Andric   if (CollectFields(0, QTy, Fields, TypeHasMayAlias(QTy)))
41913cc256eSDimitry Andric     return MDHelper.createTBAAStructNode(Fields);
42013cc256eSDimitry Andric 
42113cc256eSDimitry Andric   // For now, handle any other kind of type conservatively.
4229f4dbff6SDimitry Andric   return StructMetadataCache[Ty] = nullptr;
42313cc256eSDimitry Andric }
424809500fcSDimitry Andric 
getBaseTypeInfoHelper(const Type * Ty)425461a67faSDimitry Andric llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) {
426461a67faSDimitry Andric   if (auto *TTy = dyn_cast<RecordType>(Ty)) {
427809500fcSDimitry Andric     const RecordDecl *RD = TTy->getDecl()->getDefinition();
428809500fcSDimitry Andric     const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
4291f917f69SDimitry Andric     using TBAAStructField = llvm::MDBuilder::TBAAStructField;
4301f917f69SDimitry Andric     SmallVector<TBAAStructField, 4> Fields;
4311f917f69SDimitry Andric     if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
432e3b55780SDimitry Andric       // Handle C++ base classes. Non-virtual bases can treated a kind of
4331f917f69SDimitry Andric       // field. Virtual bases are more complex and omitted, but avoid an
4341f917f69SDimitry Andric       // incomplete view for NewStructPathTBAA.
4351f917f69SDimitry Andric       if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0)
436b1c73532SDimitry Andric         return nullptr;
4371f917f69SDimitry Andric       for (const CXXBaseSpecifier &B : CXXRD->bases()) {
4381f917f69SDimitry Andric         if (B.isVirtual())
4391f917f69SDimitry Andric           continue;
4401f917f69SDimitry Andric         QualType BaseQTy = B.getType();
4411f917f69SDimitry Andric         const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl();
4421f917f69SDimitry Andric         if (BaseRD->isEmpty())
4431f917f69SDimitry Andric           continue;
4441f917f69SDimitry Andric         llvm::MDNode *TypeNode = isValidBaseType(BaseQTy)
445ac9a064cSDimitry Andric                                      ? getValidBaseTypeInfo(BaseQTy)
4461f917f69SDimitry Andric                                      : getTypeInfo(BaseQTy);
4471f917f69SDimitry Andric         if (!TypeNode)
448b1c73532SDimitry Andric           return nullptr;
4491f917f69SDimitry Andric         uint64_t Offset = Layout.getBaseClassOffset(BaseRD).getQuantity();
4501f917f69SDimitry Andric         uint64_t Size =
4511f917f69SDimitry Andric             Context.getASTRecordLayout(BaseRD).getDataSize().getQuantity();
4521f917f69SDimitry Andric         Fields.push_back(
4531f917f69SDimitry Andric             llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode));
4541f917f69SDimitry Andric       }
4551f917f69SDimitry Andric       // The order in which base class subobjects are allocated is unspecified,
4561f917f69SDimitry Andric       // so may differ from declaration order. In particular, Itanium ABI will
4571f917f69SDimitry Andric       // allocate a primary base first.
4581f917f69SDimitry Andric       // Since we exclude empty subobjects, the objects are not overlapping and
4591f917f69SDimitry Andric       // their offsets are unique.
4601f917f69SDimitry Andric       llvm::sort(Fields,
4611f917f69SDimitry Andric                  [](const TBAAStructField &A, const TBAAStructField &B) {
4621f917f69SDimitry Andric                    return A.Offset < B.Offset;
4631f917f69SDimitry Andric                  });
4641f917f69SDimitry Andric     }
465461a67faSDimitry Andric     for (FieldDecl *Field : RD->fields()) {
466ac9a064cSDimitry Andric       if (Field->isZeroSize(Context) || Field->isUnnamedBitField())
46722989816SDimitry Andric         continue;
468461a67faSDimitry Andric       QualType FieldQTy = Field->getType();
469ac9a064cSDimitry Andric       llvm::MDNode *TypeNode = isValidBaseType(FieldQTy)
470ac9a064cSDimitry Andric                                    ? getValidBaseTypeInfo(FieldQTy)
471ac9a064cSDimitry Andric                                    : getTypeInfo(FieldQTy);
472461a67faSDimitry Andric       if (!TypeNode)
473b1c73532SDimitry Andric         return nullptr;
474461a67faSDimitry Andric 
475461a67faSDimitry Andric       uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex());
476461a67faSDimitry Andric       uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity();
477461a67faSDimitry Andric       uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity();
478461a67faSDimitry Andric       Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size,
479461a67faSDimitry Andric                                                         TypeNode));
480809500fcSDimitry Andric     }
481809500fcSDimitry Andric 
482809500fcSDimitry Andric     SmallString<256> OutName;
483bfef3995SDimitry Andric     if (Features.CPlusPlus) {
484bfef3995SDimitry Andric       // Don't use the mangler for C code.
485809500fcSDimitry Andric       llvm::raw_svector_ostream Out(OutName);
486e6b73279SDimitry Andric       CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName(
487e6b73279SDimitry Andric           QualType(Ty, 0), Out);
488bfef3995SDimitry Andric     } else {
489bfef3995SDimitry Andric       OutName = RD->getName();
490bfef3995SDimitry Andric     }
491461a67faSDimitry Andric 
4926252156dSDimitry Andric     if (CodeGenOpts.NewStructPathTBAA) {
4936252156dSDimitry Andric       llvm::MDNode *Parent = getChar();
4946252156dSDimitry Andric       uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity();
4956252156dSDimitry Andric       llvm::Metadata *Id = MDHelper.createString(OutName);
4966252156dSDimitry Andric       return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields);
4976252156dSDimitry Andric     }
498461a67faSDimitry Andric 
499809500fcSDimitry Andric     // Create the struct type node with a vector of pairs (offset, type).
500461a67faSDimitry Andric     SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes;
501461a67faSDimitry Andric     for (const auto &Field : Fields)
502461a67faSDimitry Andric         OffsetsAndTypes.push_back(std::make_pair(Field.Type, Field.Offset));
503461a67faSDimitry Andric     return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes);
504809500fcSDimitry Andric   }
505809500fcSDimitry Andric 
506461a67faSDimitry Andric   return nullptr;
507809500fcSDimitry Andric }
508809500fcSDimitry Andric 
getValidBaseTypeInfo(QualType QTy)509ac9a064cSDimitry Andric llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) {
510ac9a064cSDimitry Andric   assert(isValidBaseType(QTy) && "Must be a valid base type");
511461a67faSDimitry Andric 
512461a67faSDimitry Andric   const Type *Ty = Context.getCanonicalType(QTy).getTypePtr();
513461a67faSDimitry Andric 
514b1c73532SDimitry Andric   // nullptr is a valid value in the cache, so use find rather than []
515b1c73532SDimitry Andric   auto I = BaseTypeMetadataCache.find(Ty);
516b1c73532SDimitry Andric   if (I != BaseTypeMetadataCache.end())
517b1c73532SDimitry Andric     return I->second;
518b1c73532SDimitry Andric 
519b1c73532SDimitry Andric   // First calculate the metadata, before recomputing the insertion point, as
520b1c73532SDimitry Andric   // the helper can recursively call us.
521461a67faSDimitry Andric   llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty);
522b1c73532SDimitry Andric   LLVM_ATTRIBUTE_UNUSED auto inserted =
523b1c73532SDimitry Andric       BaseTypeMetadataCache.insert({Ty, TypeNode});
524b1c73532SDimitry Andric   assert(inserted.second && "BaseType metadata was already inserted");
525b1c73532SDimitry Andric 
526b1c73532SDimitry Andric   return TypeNode;
527461a67faSDimitry Andric }
528461a67faSDimitry Andric 
getBaseTypeInfo(QualType QTy)529ac9a064cSDimitry Andric llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) {
530ac9a064cSDimitry Andric   return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr;
531ac9a064cSDimitry Andric }
532ac9a064cSDimitry Andric 
getAccessTagInfo(TBAAAccessInfo Info)533461a67faSDimitry Andric llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) {
534461a67faSDimitry Andric   assert(!Info.isIncomplete() && "Access to an object of an incomplete type!");
535461a67faSDimitry Andric 
536461a67faSDimitry Andric   if (Info.isMayAlias())
537461a67faSDimitry Andric     Info = TBAAAccessInfo(getChar(), Info.Size);
538461a67faSDimitry Andric 
539461a67faSDimitry Andric   if (!Info.AccessType)
5409f4dbff6SDimitry Andric     return nullptr;
541bfef3995SDimitry Andric 
542809500fcSDimitry Andric   if (!CodeGenOpts.StructPathTBAA)
543461a67faSDimitry Andric     Info = TBAAAccessInfo(Info.AccessType, Info.Size);
544809500fcSDimitry Andric 
545461a67faSDimitry Andric   llvm::MDNode *&N = AccessTagMetadataCache[Info];
546461a67faSDimitry Andric   if (N)
547809500fcSDimitry Andric     return N;
548809500fcSDimitry Andric 
549461a67faSDimitry Andric   if (!Info.BaseType) {
550461a67faSDimitry Andric     Info.BaseType = Info.AccessType;
551461a67faSDimitry Andric     assert(!Info.Offset && "Nonzero offset for an access with no base type!");
552461a67faSDimitry Andric   }
5536252156dSDimitry Andric   if (CodeGenOpts.NewStructPathTBAA) {
5546252156dSDimitry Andric     return N = MDHelper.createTBAAAccessTag(Info.BaseType, Info.AccessType,
5556252156dSDimitry Andric                                             Info.Offset, Info.Size);
5566252156dSDimitry Andric   }
557461a67faSDimitry Andric   return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType,
558461a67faSDimitry Andric                                               Info.Offset);
559809500fcSDimitry Andric }
5606a037251SDimitry Andric 
mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,TBAAAccessInfo TargetInfo)561461a67faSDimitry Andric TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo,
562461a67faSDimitry Andric                                                  TBAAAccessInfo TargetInfo) {
563461a67faSDimitry Andric   if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias())
564461a67faSDimitry Andric     return TBAAAccessInfo::getMayAliasInfo();
565461a67faSDimitry Andric   return TargetInfo;
566461a67faSDimitry Andric }
5676a037251SDimitry Andric 
568461a67faSDimitry Andric TBAAAccessInfo
mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,TBAAAccessInfo InfoB)569461a67faSDimitry Andric CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA,
570461a67faSDimitry Andric                                                  TBAAAccessInfo InfoB) {
571461a67faSDimitry Andric   if (InfoA == InfoB)
572461a67faSDimitry Andric     return InfoA;
573461a67faSDimitry Andric 
574461a67faSDimitry Andric   if (!InfoA || !InfoB)
575461a67faSDimitry Andric     return TBAAAccessInfo();
576461a67faSDimitry Andric 
577461a67faSDimitry Andric   if (InfoA.isMayAlias() || InfoB.isMayAlias())
578461a67faSDimitry Andric     return TBAAAccessInfo::getMayAliasInfo();
579461a67faSDimitry Andric 
580461a67faSDimitry Andric   // TODO: Implement the rest of the logic here. For example, two accesses
581461a67faSDimitry Andric   // with same final access types result in an access to an object of that final
582461a67faSDimitry Andric   // access type regardless of their base types.
583461a67faSDimitry Andric   return TBAAAccessInfo::getMayAliasInfo();
5846a037251SDimitry Andric }
58548675466SDimitry Andric 
58648675466SDimitry Andric TBAAAccessInfo
mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,TBAAAccessInfo SrcInfo)58748675466SDimitry Andric CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo,
58848675466SDimitry Andric                                             TBAAAccessInfo SrcInfo) {
58948675466SDimitry Andric   if (DestInfo == SrcInfo)
59048675466SDimitry Andric     return DestInfo;
59148675466SDimitry Andric 
59248675466SDimitry Andric   if (!DestInfo || !SrcInfo)
59348675466SDimitry Andric     return TBAAAccessInfo();
59448675466SDimitry Andric 
59548675466SDimitry Andric   if (DestInfo.isMayAlias() || SrcInfo.isMayAlias())
59648675466SDimitry Andric     return TBAAAccessInfo::getMayAliasInfo();
59748675466SDimitry Andric 
59848675466SDimitry Andric   // TODO: Implement the rest of the logic here. For example, two accesses
59948675466SDimitry Andric   // with same final access types result in an access to an object of that final
60048675466SDimitry Andric   // access type regardless of their base types.
60148675466SDimitry Andric   return TBAAAccessInfo::getMayAliasInfo();
60248675466SDimitry Andric }
603