xref: /src/contrib/llvm-project/llvm/lib/Support/StringRef.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
159850d08SRoman Divacky //===-- StringRef.cpp - Lightweight String References ---------------------===//
259850d08SRoman Divacky //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
659850d08SRoman Divacky //
759850d08SRoman Divacky //===----------------------------------------------------------------------===//
859850d08SRoman Divacky 
959850d08SRoman Divacky #include "llvm/ADT/StringRef.h"
1071d5a254SDimitry Andric #include "llvm/ADT/APFloat.h"
1167a71b31SRoman Divacky #include "llvm/ADT/APInt.h"
1263faed5bSDimitry Andric #include "llvm/ADT/Hashing.h"
13044eb2f6SDimitry Andric #include "llvm/ADT/StringExtras.h"
1463faed5bSDimitry Andric #include "llvm/ADT/edit_distance.h"
15706b4fc4SDimitry Andric #include "llvm/Support/Error.h"
16d39c594dSDimitry Andric #include <bitset>
17829000e0SRoman Divacky 
1859850d08SRoman Divacky using namespace llvm;
1959850d08SRoman Divacky 
2059850d08SRoman Divacky // MSVC emits references to this into the translation units which reference it.
2159850d08SRoman Divacky #ifndef _MSC_VER
22cfca06d7SDimitry Andric constexpr size_t StringRef::npos;
2359850d08SRoman Divacky #endif
2459850d08SRoman Divacky 
25f8af5cf6SDimitry Andric // strncasecmp() is not available on non-POSIX systems, so define an
26f8af5cf6SDimitry Andric // alternative function here.
ascii_strncasecmp(const char * LHS,const char * RHS,size_t Length)27f8af5cf6SDimitry Andric static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
28f8af5cf6SDimitry Andric   for (size_t I = 0; I < Length; ++I) {
29044eb2f6SDimitry Andric     unsigned char LHC = toLower(LHS[I]);
30044eb2f6SDimitry Andric     unsigned char RHC = toLower(RHS[I]);
31907da171SRoman Divacky     if (LHC != RHC)
32907da171SRoman Divacky       return LHC < RHC ? -1 : 1;
33907da171SRoman Divacky   }
34f8af5cf6SDimitry Andric   return 0;
35f8af5cf6SDimitry Andric }
36907da171SRoman Divacky 
compare_insensitive(StringRef RHS) const37344a3780SDimitry Andric int StringRef::compare_insensitive(StringRef RHS) const {
3867c32a98SDimitry Andric   if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
39f8af5cf6SDimitry Andric     return Res;
40907da171SRoman Divacky   if (Length == RHS.Length)
41907da171SRoman Divacky     return 0;
42907da171SRoman Divacky   return Length < RHS.Length ? -1 : 1;
43907da171SRoman Divacky }
44907da171SRoman Divacky 
starts_with_insensitive(StringRef Prefix) const45e3b55780SDimitry Andric bool StringRef::starts_with_insensitive(StringRef Prefix) const {
46f8af5cf6SDimitry Andric   return Length >= Prefix.Length &&
47f8af5cf6SDimitry Andric       ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
48f8af5cf6SDimitry Andric }
49f8af5cf6SDimitry Andric 
ends_with_insensitive(StringRef Suffix) const50e3b55780SDimitry Andric bool StringRef::ends_with_insensitive(StringRef Suffix) const {
51f8af5cf6SDimitry Andric   return Length >= Suffix.Length &&
52f8af5cf6SDimitry Andric       ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
53f8af5cf6SDimitry Andric }
54f8af5cf6SDimitry Andric 
find_insensitive(char C,size_t From) const55344a3780SDimitry Andric size_t StringRef::find_insensitive(char C, size_t From) const {
56044eb2f6SDimitry Andric   char L = toLower(C);
57044eb2f6SDimitry Andric   return find_if([L](char D) { return toLower(D) == L; }, From);
58b915e9e0SDimitry Andric }
59b915e9e0SDimitry Andric 
60abdf259dSRoman Divacky /// compare_numeric - Compare strings, handle embedded numbers.
compare_numeric(StringRef RHS) const61abdf259dSRoman Divacky int StringRef::compare_numeric(StringRef RHS) const {
6267c32a98SDimitry Andric   for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
6330815c53SDimitry Andric     // Check for sequences of digits.
64044eb2f6SDimitry Andric     if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
6530815c53SDimitry Andric       // The longer sequence of numbers is considered larger.
6630815c53SDimitry Andric       // This doesn't really handle prefixed zeros well.
6730815c53SDimitry Andric       size_t J;
6830815c53SDimitry Andric       for (J = I + 1; J != E + 1; ++J) {
69044eb2f6SDimitry Andric         bool ld = J < Length && isDigit(Data[J]);
70044eb2f6SDimitry Andric         bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
71abdf259dSRoman Divacky         if (ld != rd)
72abdf259dSRoman Divacky           return rd ? -1 : 1;
73abdf259dSRoman Divacky         if (!rd)
74abdf259dSRoman Divacky           break;
75abdf259dSRoman Divacky       }
7630815c53SDimitry Andric       // The two number sequences have the same length (J-I), just memcmp them.
7730815c53SDimitry Andric       if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
7830815c53SDimitry Andric         return Res < 0 ? -1 : 1;
7930815c53SDimitry Andric       // Identical number sequences, continue search after the numbers.
8030815c53SDimitry Andric       I = J - 1;
8130815c53SDimitry Andric       continue;
82abdf259dSRoman Divacky     }
8330815c53SDimitry Andric     if (Data[I] != RHS.Data[I])
84d39c594dSDimitry Andric       return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
85abdf259dSRoman Divacky   }
86abdf259dSRoman Divacky   if (Length == RHS.Length)
87abdf259dSRoman Divacky     return 0;
88abdf259dSRoman Divacky   return Length < RHS.Length ? -1 : 1;
89abdf259dSRoman Divacky }
90abdf259dSRoman Divacky 
911e7804dbSRoman Divacky // Compute the edit distance between the two given strings.
edit_distance(llvm::StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const921e7804dbSRoman Divacky unsigned StringRef::edit_distance(llvm::StringRef Other,
93cf099d11SDimitry Andric                                   bool AllowReplacements,
94f8af5cf6SDimitry Andric                                   unsigned MaxEditDistance) const {
95e3b55780SDimitry Andric   return llvm::ComputeEditDistance(ArrayRef(data(), size()),
96e3b55780SDimitry Andric                                    ArrayRef(Other.data(), Other.size()),
9763faed5bSDimitry Andric                                    AllowReplacements, MaxEditDistance);
981e7804dbSRoman Divacky }
99829000e0SRoman Divacky 
edit_distance_insensitive(StringRef Other,bool AllowReplacements,unsigned MaxEditDistance) const100145449b1SDimitry Andric unsigned llvm::StringRef::edit_distance_insensitive(
101145449b1SDimitry Andric     StringRef Other, bool AllowReplacements, unsigned MaxEditDistance) const {
102145449b1SDimitry Andric   return llvm::ComputeMappedEditDistance(
103e3b55780SDimitry Andric       ArrayRef(data(), size()), ArrayRef(Other.data(), Other.size()),
104145449b1SDimitry Andric       llvm::toLower, AllowReplacements, MaxEditDistance);
105145449b1SDimitry Andric }
106145449b1SDimitry Andric 
10763faed5bSDimitry Andric //===----------------------------------------------------------------------===//
10863faed5bSDimitry Andric // String Operations
10963faed5bSDimitry Andric //===----------------------------------------------------------------------===//
110cf099d11SDimitry Andric 
lower() const11163faed5bSDimitry Andric std::string StringRef::lower() const {
112cfca06d7SDimitry Andric   return std::string(map_iterator(begin(), toLower),
113cfca06d7SDimitry Andric                      map_iterator(end(), toLower));
1141e7804dbSRoman Divacky }
1151e7804dbSRoman Divacky 
upper() const11663faed5bSDimitry Andric std::string StringRef::upper() const {
117cfca06d7SDimitry Andric   return std::string(map_iterator(begin(), toUpper),
118cfca06d7SDimitry Andric                      map_iterator(end(), toUpper));
1191e7804dbSRoman Divacky }
1201e7804dbSRoman Divacky 
12159850d08SRoman Divacky //===----------------------------------------------------------------------===//
12259850d08SRoman Divacky // String Searching
12359850d08SRoman Divacky //===----------------------------------------------------------------------===//
12459850d08SRoman Divacky 
12559850d08SRoman Divacky 
12659850d08SRoman Divacky /// find - Search for the first string \arg Str in the string.
12759850d08SRoman Divacky ///
1286b943ff3SDimitry Andric /// \return - The index of the first occurrence of \arg Str, or npos if not
12959850d08SRoman Divacky /// found.
find(StringRef Str,size_t From) const130907da171SRoman Divacky size_t StringRef::find(StringRef Str, size_t From) const {
131dd58ef01SDimitry Andric   if (From > Length)
13259850d08SRoman Divacky     return npos;
13363faed5bSDimitry Andric 
134b915e9e0SDimitry Andric   const char *Start = Data + From;
135b915e9e0SDimitry Andric   size_t Size = Length - From;
136b915e9e0SDimitry Andric 
137dd58ef01SDimitry Andric   const char *Needle = Str.data();
138dd58ef01SDimitry Andric   size_t N = Str.size();
139dd58ef01SDimitry Andric   if (N == 0)
140dd58ef01SDimitry Andric     return From;
141dd58ef01SDimitry Andric   if (Size < N)
142dd58ef01SDimitry Andric     return npos;
143b915e9e0SDimitry Andric   if (N == 1) {
144b915e9e0SDimitry Andric     const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
145b915e9e0SDimitry Andric     return Ptr == nullptr ? npos : Ptr - Data;
146b915e9e0SDimitry Andric   }
147dd58ef01SDimitry Andric 
148dd58ef01SDimitry Andric   const char *Stop = Start + (Size - N + 1);
149dd58ef01SDimitry Andric 
150e3b55780SDimitry Andric   if (N == 2) {
151e3b55780SDimitry Andric     // Provide a fast path for newline finding (CRLF case) in InclusionRewriter.
152e3b55780SDimitry Andric     // Not the most optimized strategy, but getting memcmp inlined should be
153e3b55780SDimitry Andric     // good enough.
154e3b55780SDimitry Andric     do {
155e3b55780SDimitry Andric       if (std::memcmp(Start, Needle, 2) == 0)
156e3b55780SDimitry Andric         return Start - Data;
157e3b55780SDimitry Andric       ++Start;
158e3b55780SDimitry Andric     } while (Start < Stop);
159e3b55780SDimitry Andric     return npos;
160e3b55780SDimitry Andric   }
161e3b55780SDimitry Andric 
16263faed5bSDimitry Andric   // For short haystacks or unsupported needles fall back to the naive algorithm
163dd58ef01SDimitry Andric   if (Size < 16 || N > 255) {
164dd58ef01SDimitry Andric     do {
165dd58ef01SDimitry Andric       if (std::memcmp(Start, Needle, N) == 0)
166dd58ef01SDimitry Andric         return Start - Data;
167dd58ef01SDimitry Andric       ++Start;
168dd58ef01SDimitry Andric     } while (Start < Stop);
16959850d08SRoman Divacky     return npos;
17059850d08SRoman Divacky   }
17159850d08SRoman Divacky 
17263faed5bSDimitry Andric   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
17363faed5bSDimitry Andric   uint8_t BadCharSkip[256];
17463faed5bSDimitry Andric   std::memset(BadCharSkip, N, 256);
17563faed5bSDimitry Andric   for (unsigned i = 0; i != N-1; ++i)
17663faed5bSDimitry Andric     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
17763faed5bSDimitry Andric 
178dd58ef01SDimitry Andric   do {
179b915e9e0SDimitry Andric     uint8_t Last = Start[N - 1];
180b915e9e0SDimitry Andric     if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
181b915e9e0SDimitry Andric       if (std::memcmp(Start, Needle, N - 1) == 0)
182dd58ef01SDimitry Andric         return Start - Data;
18363faed5bSDimitry Andric 
18463faed5bSDimitry Andric     // Otherwise skip the appropriate number of bytes.
185b915e9e0SDimitry Andric     Start += BadCharSkip[Last];
186dd58ef01SDimitry Andric   } while (Start < Stop);
18763faed5bSDimitry Andric 
18863faed5bSDimitry Andric   return npos;
18963faed5bSDimitry Andric }
19063faed5bSDimitry Andric 
find_insensitive(StringRef Str,size_t From) const191344a3780SDimitry Andric size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
192b915e9e0SDimitry Andric   StringRef This = substr(From);
193b915e9e0SDimitry Andric   while (This.size() >= Str.size()) {
1947fa27ce4SDimitry Andric     if (This.starts_with_insensitive(Str))
195b915e9e0SDimitry Andric       return From;
196b915e9e0SDimitry Andric     This = This.drop_front();
197b915e9e0SDimitry Andric     ++From;
198b915e9e0SDimitry Andric   }
199b915e9e0SDimitry Andric   return npos;
200b915e9e0SDimitry Andric }
201b915e9e0SDimitry Andric 
rfind_insensitive(char C,size_t From) const202344a3780SDimitry Andric size_t StringRef::rfind_insensitive(char C, size_t From) const {
203b915e9e0SDimitry Andric   From = std::min(From, Length);
204b915e9e0SDimitry Andric   size_t i = From;
205b915e9e0SDimitry Andric   while (i != 0) {
206b915e9e0SDimitry Andric     --i;
207044eb2f6SDimitry Andric     if (toLower(Data[i]) == toLower(C))
208b915e9e0SDimitry Andric       return i;
209b915e9e0SDimitry Andric   }
210b915e9e0SDimitry Andric   return npos;
211b915e9e0SDimitry Andric }
212b915e9e0SDimitry Andric 
21359850d08SRoman Divacky /// rfind - Search for the last string \arg Str in the string.
21459850d08SRoman Divacky ///
2156b943ff3SDimitry Andric /// \return - The index of the last occurrence of \arg Str, or npos if not
21659850d08SRoman Divacky /// found.
rfind(StringRef Str) const217907da171SRoman Divacky size_t StringRef::rfind(StringRef Str) const {
218e3b55780SDimitry Andric   return std::string_view(*this).rfind(Str);
21959850d08SRoman Divacky }
22059850d08SRoman Divacky 
rfind_insensitive(StringRef Str) const221344a3780SDimitry Andric size_t StringRef::rfind_insensitive(StringRef Str) const {
222b915e9e0SDimitry Andric   size_t N = Str.size();
223b915e9e0SDimitry Andric   if (N > Length)
224b915e9e0SDimitry Andric     return npos;
225b915e9e0SDimitry Andric   for (size_t i = Length - N + 1, e = 0; i != e;) {
226b915e9e0SDimitry Andric     --i;
227344a3780SDimitry Andric     if (substr(i, N).equals_insensitive(Str))
228b915e9e0SDimitry Andric       return i;
229b915e9e0SDimitry Andric   }
230b915e9e0SDimitry Andric   return npos;
231b915e9e0SDimitry Andric }
232b915e9e0SDimitry Andric 
233907da171SRoman Divacky /// find_first_of - Find the first character in the string that is in \arg
234907da171SRoman Divacky /// Chars, or npos if not found.
235907da171SRoman Divacky ///
236d39c594dSDimitry Andric /// Note: O(size() + Chars.size())
find_first_of(StringRef Chars,size_t From) const237907da171SRoman Divacky StringRef::size_type StringRef::find_first_of(StringRef Chars,
238907da171SRoman Divacky                                               size_t From) const {
239d39c594dSDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
240f65dcba8SDimitry Andric   for (char C : Chars)
241f65dcba8SDimitry Andric     CharBits.set((unsigned char)C);
242d39c594dSDimitry Andric 
24367c32a98SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
244d39c594dSDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
24559850d08SRoman Divacky       return i;
24659850d08SRoman Divacky   return npos;
24759850d08SRoman Divacky }
24859850d08SRoman Divacky 
24959850d08SRoman Divacky /// find_first_not_of - Find the first character in the string that is not
250907da171SRoman Divacky /// \arg C or npos if not found.
find_first_not_of(char C,size_t From) const251907da171SRoman Divacky StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
252e3b55780SDimitry Andric   return std::string_view(*this).find_first_not_of(C, From);
253907da171SRoman Divacky }
254907da171SRoman Divacky 
255907da171SRoman Divacky /// find_first_not_of - Find the first character in the string that is not
256907da171SRoman Divacky /// in the string \arg Chars, or npos if not found.
257907da171SRoman Divacky ///
258d39c594dSDimitry Andric /// Note: O(size() + Chars.size())
find_first_not_of(StringRef Chars,size_t From) const259907da171SRoman Divacky StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
260907da171SRoman Divacky                                                   size_t From) const {
261d39c594dSDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
262f65dcba8SDimitry Andric   for (char C : Chars)
263f65dcba8SDimitry Andric     CharBits.set((unsigned char)C);
264d39c594dSDimitry Andric 
26567c32a98SDimitry Andric   for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
266d39c594dSDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
26759850d08SRoman Divacky       return i;
26859850d08SRoman Divacky   return npos;
26959850d08SRoman Divacky }
27059850d08SRoman Divacky 
271cf099d11SDimitry Andric /// find_last_of - Find the last character in the string that is in \arg C,
272cf099d11SDimitry Andric /// or npos if not found.
273cf099d11SDimitry Andric ///
274cf099d11SDimitry Andric /// Note: O(size() + Chars.size())
find_last_of(StringRef Chars,size_t From) const275cf099d11SDimitry Andric StringRef::size_type StringRef::find_last_of(StringRef Chars,
276cf099d11SDimitry Andric                                              size_t From) const {
277cf099d11SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
278f65dcba8SDimitry Andric   for (char C : Chars)
279f65dcba8SDimitry Andric     CharBits.set((unsigned char)C);
280cf099d11SDimitry Andric 
28167c32a98SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
282cf099d11SDimitry Andric     if (CharBits.test((unsigned char)Data[i]))
283cf099d11SDimitry Andric       return i;
284cf099d11SDimitry Andric   return npos;
285cf099d11SDimitry Andric }
28659850d08SRoman Divacky 
28758b69754SDimitry Andric /// find_last_not_of - Find the last character in the string that is not
28858b69754SDimitry Andric /// \arg C, or npos if not found.
find_last_not_of(char C,size_t From) const28958b69754SDimitry Andric StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
29067c32a98SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
29158b69754SDimitry Andric     if (Data[i] != C)
29258b69754SDimitry Andric       return i;
29358b69754SDimitry Andric   return npos;
29458b69754SDimitry Andric }
29558b69754SDimitry Andric 
29658b69754SDimitry Andric /// find_last_not_of - Find the last character in the string that is not in
29758b69754SDimitry Andric /// \arg Chars, or npos if not found.
29858b69754SDimitry Andric ///
29958b69754SDimitry Andric /// Note: O(size() + Chars.size())
find_last_not_of(StringRef Chars,size_t From) const30058b69754SDimitry Andric StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
30158b69754SDimitry Andric                                                  size_t From) const {
30258b69754SDimitry Andric   std::bitset<1 << CHAR_BIT> CharBits;
303f65dcba8SDimitry Andric   for (char C : Chars)
304f65dcba8SDimitry Andric     CharBits.set((unsigned char)C);
30558b69754SDimitry Andric 
30667c32a98SDimitry Andric   for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
30758b69754SDimitry Andric     if (!CharBits.test((unsigned char)Data[i]))
30858b69754SDimitry Andric       return i;
30958b69754SDimitry Andric   return npos;
31058b69754SDimitry Andric }
31158b69754SDimitry Andric 
split(SmallVectorImpl<StringRef> & A,StringRef Separator,int MaxSplit,bool KeepEmpty) const31263faed5bSDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A,
313dd58ef01SDimitry Andric                       StringRef Separator, int MaxSplit,
31463faed5bSDimitry Andric                       bool KeepEmpty) const {
315dd58ef01SDimitry Andric   StringRef S = *this;
31663faed5bSDimitry Andric 
317dd58ef01SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
318dd58ef01SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
319dd58ef01SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
320dd58ef01SDimitry Andric   // but that seems unlikely to be useful.
321dd58ef01SDimitry Andric   while (MaxSplit-- != 0) {
322dd58ef01SDimitry Andric     size_t Idx = S.find(Separator);
323dd58ef01SDimitry Andric     if (Idx == npos)
324dd58ef01SDimitry Andric       break;
32563faed5bSDimitry Andric 
326dd58ef01SDimitry Andric     // Push this split.
327dd58ef01SDimitry Andric     if (KeepEmpty || Idx > 0)
328dd58ef01SDimitry Andric       A.push_back(S.slice(0, Idx));
329dd58ef01SDimitry Andric 
330dd58ef01SDimitry Andric     // Jump forward.
331dd58ef01SDimitry Andric     S = S.slice(Idx + Separator.size(), npos);
33263faed5bSDimitry Andric   }
333dd58ef01SDimitry Andric 
334dd58ef01SDimitry Andric   // Push the tail.
335dd58ef01SDimitry Andric   if (KeepEmpty || !S.empty())
336dd58ef01SDimitry Andric     A.push_back(S);
337dd58ef01SDimitry Andric }
338dd58ef01SDimitry Andric 
split(SmallVectorImpl<StringRef> & A,char Separator,int MaxSplit,bool KeepEmpty) const339dd58ef01SDimitry Andric void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
340dd58ef01SDimitry Andric                       int MaxSplit, bool KeepEmpty) const {
341dd58ef01SDimitry Andric   StringRef S = *this;
342dd58ef01SDimitry Andric 
343dd58ef01SDimitry Andric   // Count down from MaxSplit. When MaxSplit is -1, this will just split
344dd58ef01SDimitry Andric   // "forever". This doesn't support splitting more than 2^31 times
345dd58ef01SDimitry Andric   // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
346dd58ef01SDimitry Andric   // but that seems unlikely to be useful.
347dd58ef01SDimitry Andric   while (MaxSplit-- != 0) {
348dd58ef01SDimitry Andric     size_t Idx = S.find(Separator);
349dd58ef01SDimitry Andric     if (Idx == npos)
350dd58ef01SDimitry Andric       break;
351dd58ef01SDimitry Andric 
352dd58ef01SDimitry Andric     // Push this split.
353dd58ef01SDimitry Andric     if (KeepEmpty || Idx > 0)
354dd58ef01SDimitry Andric       A.push_back(S.slice(0, Idx));
355dd58ef01SDimitry Andric 
356dd58ef01SDimitry Andric     // Jump forward.
357dd58ef01SDimitry Andric     S = S.slice(Idx + 1, npos);
358dd58ef01SDimitry Andric   }
359dd58ef01SDimitry Andric 
360dd58ef01SDimitry Andric   // Push the tail.
361dd58ef01SDimitry Andric   if (KeepEmpty || !S.empty())
362dd58ef01SDimitry Andric     A.push_back(S);
36363faed5bSDimitry Andric }
36463faed5bSDimitry Andric 
36559850d08SRoman Divacky //===----------------------------------------------------------------------===//
36659850d08SRoman Divacky // Helpful Algorithms
36759850d08SRoman Divacky //===----------------------------------------------------------------------===//
36859850d08SRoman Divacky 
36959850d08SRoman Divacky /// count - Return the number of non-overlapped occurrences of \arg Str in
37059850d08SRoman Divacky /// the string.
count(StringRef Str) const371907da171SRoman Divacky size_t StringRef::count(StringRef Str) const {
37259850d08SRoman Divacky   size_t Count = 0;
373e3b55780SDimitry Andric   size_t Pos = 0;
37459850d08SRoman Divacky   size_t N = Str.size();
375e3b55780SDimitry Andric   // TODO: For an empty `Str` we return 0 for legacy reasons. Consider changing
376e3b55780SDimitry Andric   //       this to `Length + 1` which is more in-line with the function
377e3b55780SDimitry Andric   //       description.
378e3b55780SDimitry Andric   if (!N)
37959850d08SRoman Divacky     return 0;
380e3b55780SDimitry Andric   while ((Pos = find(Str, Pos)) != npos) {
38159850d08SRoman Divacky     ++Count;
382e3b55780SDimitry Andric     Pos += N;
383706b4fc4SDimitry Andric   }
38459850d08SRoman Divacky   return Count;
38559850d08SRoman Divacky }
38659850d08SRoman Divacky 
GetAutoSenseRadix(StringRef & Str)38767a71b31SRoman Divacky static unsigned GetAutoSenseRadix(StringRef &Str) {
388b915e9e0SDimitry Andric   if (Str.empty())
389b915e9e0SDimitry Andric     return 10;
390b915e9e0SDimitry Andric 
3914df029ccSDimitry Andric   if (Str.consume_front_insensitive("0x"))
39267a71b31SRoman Divacky     return 16;
39358b69754SDimitry Andric 
3944df029ccSDimitry Andric   if (Str.consume_front_insensitive("0b"))
39567a71b31SRoman Divacky     return 2;
39658b69754SDimitry Andric 
3974df029ccSDimitry Andric   if (Str.consume_front("0o"))
39858b69754SDimitry Andric     return 8;
39958b69754SDimitry Andric 
400044eb2f6SDimitry Andric   if (Str[0] == '0' && Str.size() > 1 && isDigit(Str[1])) {
401b915e9e0SDimitry Andric     Str = Str.substr(1);
40258b69754SDimitry Andric     return 8;
403b915e9e0SDimitry Andric   }
40458b69754SDimitry Andric 
40558b69754SDimitry Andric   return 10;
40667a71b31SRoman Divacky }
40767a71b31SRoman Divacky 
consumeUnsignedInteger(StringRef & Str,unsigned Radix,unsigned long long & Result)408b915e9e0SDimitry Andric bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
40959850d08SRoman Divacky                                   unsigned long long &Result) {
41059850d08SRoman Divacky   // Autosense radix if not specified.
41167a71b31SRoman Divacky   if (Radix == 0)
41267a71b31SRoman Divacky     Radix = GetAutoSenseRadix(Str);
41359850d08SRoman Divacky 
41459850d08SRoman Divacky   // Empty strings (after the radix autosense) are invalid.
41559850d08SRoman Divacky   if (Str.empty()) return true;
41659850d08SRoman Divacky 
41759850d08SRoman Divacky   // Parse all the bytes of the string given this radix.  Watch for overflow.
418b915e9e0SDimitry Andric   StringRef Str2 = Str;
41959850d08SRoman Divacky   Result = 0;
420b915e9e0SDimitry Andric   while (!Str2.empty()) {
42159850d08SRoman Divacky     unsigned CharVal;
422b915e9e0SDimitry Andric     if (Str2[0] >= '0' && Str2[0] <= '9')
423b915e9e0SDimitry Andric       CharVal = Str2[0] - '0';
424b915e9e0SDimitry Andric     else if (Str2[0] >= 'a' && Str2[0] <= 'z')
425b915e9e0SDimitry Andric       CharVal = Str2[0] - 'a' + 10;
426b915e9e0SDimitry Andric     else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
427b915e9e0SDimitry Andric       CharVal = Str2[0] - 'A' + 10;
42859850d08SRoman Divacky     else
429b915e9e0SDimitry Andric       break;
43059850d08SRoman Divacky 
431b915e9e0SDimitry Andric     // If the parsed value is larger than the integer radix, we cannot
432b915e9e0SDimitry Andric     // consume any more characters.
43359850d08SRoman Divacky     if (CharVal >= Radix)
434b915e9e0SDimitry Andric       break;
43559850d08SRoman Divacky 
43659850d08SRoman Divacky     // Add in this character.
43759850d08SRoman Divacky     unsigned long long PrevResult = Result;
43859850d08SRoman Divacky     Result = Result * Radix + CharVal;
43959850d08SRoman Divacky 
440522600a2SDimitry Andric     // Check for overflow by shifting back and seeing if bits were lost.
441522600a2SDimitry Andric     if (Result / Radix < PrevResult)
44259850d08SRoman Divacky       return true;
44359850d08SRoman Divacky 
444b915e9e0SDimitry Andric     Str2 = Str2.substr(1);
44559850d08SRoman Divacky   }
44659850d08SRoman Divacky 
447b915e9e0SDimitry Andric   // We consider the operation a failure if no characters were consumed
448b915e9e0SDimitry Andric   // successfully.
449b915e9e0SDimitry Andric   if (Str.size() == Str2.size())
450b915e9e0SDimitry Andric     return true;
451b915e9e0SDimitry Andric 
452b915e9e0SDimitry Andric   Str = Str2;
45359850d08SRoman Divacky   return false;
45459850d08SRoman Divacky }
45559850d08SRoman Divacky 
consumeSignedInteger(StringRef & Str,unsigned Radix,long long & Result)456b915e9e0SDimitry Andric bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
45763faed5bSDimitry Andric                                 long long &Result) {
45859850d08SRoman Divacky   unsigned long long ULLVal;
45959850d08SRoman Divacky 
46059850d08SRoman Divacky   // Handle positive strings first.
461ac9a064cSDimitry Andric   if (!Str.starts_with("-")) {
462b915e9e0SDimitry Andric     if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
46359850d08SRoman Divacky         // Check for value so large it overflows a signed value.
46459850d08SRoman Divacky         (long long)ULLVal < 0)
46559850d08SRoman Divacky       return true;
46659850d08SRoman Divacky     Result = ULLVal;
46759850d08SRoman Divacky     return false;
46859850d08SRoman Divacky   }
46959850d08SRoman Divacky 
47059850d08SRoman Divacky   // Get the positive part of the value.
471b915e9e0SDimitry Andric   StringRef Str2 = Str.drop_front(1);
472b915e9e0SDimitry Andric   if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
47359850d08SRoman Divacky       // Reject values so large they'd overflow as negative signed, but allow
47459850d08SRoman Divacky       // "-0".  This negates the unsigned so that the negative isn't undefined
47559850d08SRoman Divacky       // on signed overflow.
47659850d08SRoman Divacky       (long long)-ULLVal > 0)
47759850d08SRoman Divacky     return true;
47859850d08SRoman Divacky 
479b915e9e0SDimitry Andric   Str = Str2;
48059850d08SRoman Divacky   Result = -ULLVal;
48159850d08SRoman Divacky   return false;
48259850d08SRoman Divacky }
48359850d08SRoman Divacky 
484b915e9e0SDimitry Andric /// GetAsUnsignedInteger - Workhorse method that converts a integer character
485b915e9e0SDimitry Andric /// sequence of radix up to 36 to an unsigned long long value.
getAsUnsignedInteger(StringRef Str,unsigned Radix,unsigned long long & Result)486b915e9e0SDimitry Andric bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
487b915e9e0SDimitry Andric                                 unsigned long long &Result) {
488b915e9e0SDimitry Andric   if (consumeUnsignedInteger(Str, Radix, Result))
489b915e9e0SDimitry Andric     return true;
490b915e9e0SDimitry Andric 
491b915e9e0SDimitry Andric   // For getAsUnsignedInteger, we require the whole string to be consumed or
492b915e9e0SDimitry Andric   // else we consider it a failure.
493b915e9e0SDimitry Andric   return !Str.empty();
494b915e9e0SDimitry Andric }
495b915e9e0SDimitry Andric 
getAsSignedInteger(StringRef Str,unsigned Radix,long long & Result)496b915e9e0SDimitry Andric bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
497b915e9e0SDimitry Andric                               long long &Result) {
498b915e9e0SDimitry Andric   if (consumeSignedInteger(Str, Radix, Result))
499b915e9e0SDimitry Andric     return true;
500b915e9e0SDimitry Andric 
501b915e9e0SDimitry Andric   // For getAsSignedInteger, we require the whole string to be consumed or else
502b915e9e0SDimitry Andric   // we consider it a failure.
503b915e9e0SDimitry Andric   return !Str.empty();
504b915e9e0SDimitry Andric }
505b915e9e0SDimitry Andric 
consumeInteger(unsigned Radix,APInt & Result)5067fa27ce4SDimitry Andric bool StringRef::consumeInteger(unsigned Radix, APInt &Result) {
50767a71b31SRoman Divacky   StringRef Str = *this;
50867a71b31SRoman Divacky 
50967a71b31SRoman Divacky   // Autosense radix if not specified.
51067a71b31SRoman Divacky   if (Radix == 0)
51167a71b31SRoman Divacky     Radix = GetAutoSenseRadix(Str);
51267a71b31SRoman Divacky 
51367a71b31SRoman Divacky   assert(Radix > 1 && Radix <= 36);
51467a71b31SRoman Divacky 
51567a71b31SRoman Divacky   // Empty strings (after the radix autosense) are invalid.
51667a71b31SRoman Divacky   if (Str.empty()) return true;
51767a71b31SRoman Divacky 
51867a71b31SRoman Divacky   // Skip leading zeroes.  This can be a significant improvement if
51967a71b31SRoman Divacky   // it means we don't need > 64 bits.
5204df029ccSDimitry Andric   Str = Str.ltrim('0');
52167a71b31SRoman Divacky 
52267a71b31SRoman Divacky   // If it was nothing but zeroes....
52367a71b31SRoman Divacky   if (Str.empty()) {
52467a71b31SRoman Divacky     Result = APInt(64, 0);
5257fa27ce4SDimitry Andric     *this = Str;
52667a71b31SRoman Divacky     return false;
52767a71b31SRoman Divacky   }
52867a71b31SRoman Divacky 
52967a71b31SRoman Divacky   // (Over-)estimate the required number of bits.
53067a71b31SRoman Divacky   unsigned Log2Radix = 0;
53167a71b31SRoman Divacky   while ((1U << Log2Radix) < Radix) Log2Radix++;
53267a71b31SRoman Divacky   bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
53367a71b31SRoman Divacky 
53467a71b31SRoman Divacky   unsigned BitWidth = Log2Radix * Str.size();
53567a71b31SRoman Divacky   if (BitWidth < Result.getBitWidth())
53667a71b31SRoman Divacky     BitWidth = Result.getBitWidth(); // don't shrink the result
53758b69754SDimitry Andric   else if (BitWidth > Result.getBitWidth())
538cf099d11SDimitry Andric     Result = Result.zext(BitWidth);
53967a71b31SRoman Divacky 
54067a71b31SRoman Divacky   APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
54167a71b31SRoman Divacky   if (!IsPowerOf2Radix) {
54267a71b31SRoman Divacky     // These must have the same bit-width as Result.
54367a71b31SRoman Divacky     RadixAP = APInt(BitWidth, Radix);
54467a71b31SRoman Divacky     CharAP = APInt(BitWidth, 0);
54567a71b31SRoman Divacky   }
54667a71b31SRoman Divacky 
54767a71b31SRoman Divacky   // Parse all the bytes of the string given this radix.
54867a71b31SRoman Divacky   Result = 0;
54967a71b31SRoman Divacky   while (!Str.empty()) {
55067a71b31SRoman Divacky     unsigned CharVal;
55167a71b31SRoman Divacky     if (Str[0] >= '0' && Str[0] <= '9')
55267a71b31SRoman Divacky       CharVal = Str[0]-'0';
55367a71b31SRoman Divacky     else if (Str[0] >= 'a' && Str[0] <= 'z')
55467a71b31SRoman Divacky       CharVal = Str[0]-'a'+10;
55567a71b31SRoman Divacky     else if (Str[0] >= 'A' && Str[0] <= 'Z')
55667a71b31SRoman Divacky       CharVal = Str[0]-'A'+10;
55767a71b31SRoman Divacky     else
5587fa27ce4SDimitry Andric       break;
55967a71b31SRoman Divacky 
56067a71b31SRoman Divacky     // If the parsed value is larger than the integer radix, the string is
56167a71b31SRoman Divacky     // invalid.
56267a71b31SRoman Divacky     if (CharVal >= Radix)
5637fa27ce4SDimitry Andric       break;
56467a71b31SRoman Divacky 
56567a71b31SRoman Divacky     // Add in this character.
56667a71b31SRoman Divacky     if (IsPowerOf2Radix) {
56767a71b31SRoman Divacky       Result <<= Log2Radix;
56867a71b31SRoman Divacky       Result |= CharVal;
56967a71b31SRoman Divacky     } else {
57067a71b31SRoman Divacky       Result *= RadixAP;
57167a71b31SRoman Divacky       CharAP = CharVal;
57267a71b31SRoman Divacky       Result += CharAP;
57367a71b31SRoman Divacky     }
57467a71b31SRoman Divacky 
57567a71b31SRoman Divacky     Str = Str.substr(1);
57667a71b31SRoman Divacky   }
57767a71b31SRoman Divacky 
5787fa27ce4SDimitry Andric   // We consider the operation a failure if no characters were consumed
5797fa27ce4SDimitry Andric   // successfully.
5807fa27ce4SDimitry Andric   if (size() == Str.size())
5817fa27ce4SDimitry Andric     return true;
5827fa27ce4SDimitry Andric 
5837fa27ce4SDimitry Andric   *this = Str;
58467a71b31SRoman Divacky   return false;
58567a71b31SRoman Divacky }
58663faed5bSDimitry Andric 
getAsInteger(unsigned Radix,APInt & Result) const5877fa27ce4SDimitry Andric bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
5887fa27ce4SDimitry Andric   StringRef Str = *this;
5897fa27ce4SDimitry Andric   if (Str.consumeInteger(Radix, Result))
5907fa27ce4SDimitry Andric     return true;
5917fa27ce4SDimitry Andric 
5927fa27ce4SDimitry Andric   // For getAsInteger, we require the whole string to be consumed or else we
5937fa27ce4SDimitry Andric   // consider it a failure.
5947fa27ce4SDimitry Andric   return !Str.empty();
5957fa27ce4SDimitry Andric }
5967fa27ce4SDimitry Andric 
getAsDouble(double & Result,bool AllowInexact) const59771d5a254SDimitry Andric bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
59871d5a254SDimitry Andric   APFloat F(0.0);
599706b4fc4SDimitry Andric   auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
600706b4fc4SDimitry Andric   if (errorToBool(StatusOrErr.takeError()))
601706b4fc4SDimitry Andric     return true;
602706b4fc4SDimitry Andric 
603706b4fc4SDimitry Andric   APFloat::opStatus Status = *StatusOrErr;
60471d5a254SDimitry Andric   if (Status != APFloat::opOK) {
605c7dac04cSDimitry Andric     if (!AllowInexact || !(Status & APFloat::opInexact))
60671d5a254SDimitry Andric       return true;
60771d5a254SDimitry Andric   }
60871d5a254SDimitry Andric 
60971d5a254SDimitry Andric   Result = F.convertToDouble();
61071d5a254SDimitry Andric   return false;
61171d5a254SDimitry Andric }
61263faed5bSDimitry Andric 
61363faed5bSDimitry Andric // Implementation of StringRef hashing.
hash_value(StringRef S)61463faed5bSDimitry Andric hash_code llvm::hash_value(StringRef S) {
61563faed5bSDimitry Andric   return hash_combine_range(S.begin(), S.end());
61663faed5bSDimitry Andric }
6176f8fc217SDimitry Andric 
getHashValue(StringRef Val)6186f8fc217SDimitry Andric unsigned DenseMapInfo<StringRef, void>::getHashValue(StringRef Val) {
6196f8fc217SDimitry Andric   assert(Val.data() != getEmptyKey().data() &&
6206f8fc217SDimitry Andric          "Cannot hash the empty key!");
6216f8fc217SDimitry Andric   assert(Val.data() != getTombstoneKey().data() &&
6226f8fc217SDimitry Andric          "Cannot hash the tombstone key!");
6236f8fc217SDimitry Andric   return (unsigned)(hash_value(Val));
6246f8fc217SDimitry Andric }
625