xref: /src/contrib/llvm-project/llvm/lib/Support/FormattedStream.cpp (revision 439352ac8257c8419cb4a662abb7f260f31f9932)
159850d08SRoman Divacky //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
259850d08SRoman Divacky //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
659850d08SRoman Divacky //
759850d08SRoman Divacky //===----------------------------------------------------------------------===//
859850d08SRoman Divacky //
959850d08SRoman Divacky // This file contains the implementation of formatted_raw_ostream.
1059850d08SRoman Divacky //
1159850d08SRoman Divacky //===----------------------------------------------------------------------===//
1259850d08SRoman Divacky 
1359850d08SRoman Divacky #include "llvm/Support/FormattedStream.h"
14cfca06d7SDimitry Andric #include "llvm/Support/ConvertUTF.h"
157ab83427SDimitry Andric #include "llvm/Support/Debug.h"
16cfca06d7SDimitry Andric #include "llvm/Support/Unicode.h"
175a5ac124SDimitry Andric #include "llvm/Support/raw_ostream.h"
18cf099d11SDimitry Andric #include <algorithm>
1959850d08SRoman Divacky 
2059850d08SRoman Divacky using namespace llvm;
2159850d08SRoman Divacky 
22f8af5cf6SDimitry Andric /// UpdatePosition - Examine the given char sequence and figure out which
23f8af5cf6SDimitry Andric /// column we end up in after output, and how many line breaks are contained.
24cfca06d7SDimitry Andric /// This assumes that the input string is well-formed UTF-8, and takes into
25cfca06d7SDimitry Andric /// account Unicode characters which render as multiple columns wide.
UpdatePosition(const char * Ptr,size_t Size)26cfca06d7SDimitry Andric void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
27f8af5cf6SDimitry Andric   unsigned &Column = Position.first;
28f8af5cf6SDimitry Andric   unsigned &Line = Position.second;
2959850d08SRoman Divacky 
30cfca06d7SDimitry Andric   auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
31cfca06d7SDimitry Andric     int Width = sys::unicode::columnWidthUTF8(CP);
32cfca06d7SDimitry Andric     if (Width != sys::unicode::ErrorNonPrintableCharacter)
33cfca06d7SDimitry Andric       Column += Width;
34cfca06d7SDimitry Andric 
35cfca06d7SDimitry Andric     // The only special whitespace characters we care about are single-byte.
36cfca06d7SDimitry Andric     if (CP.size() > 1)
37cfca06d7SDimitry Andric       return;
38cfca06d7SDimitry Andric 
39cfca06d7SDimitry Andric     switch (CP[0]) {
40f8af5cf6SDimitry Andric     case '\n':
41f8af5cf6SDimitry Andric       Line += 1;
42e3b55780SDimitry Andric       [[fallthrough]];
43f8af5cf6SDimitry Andric     case '\r':
4459850d08SRoman Divacky       Column = 0;
45f8af5cf6SDimitry Andric       break;
46f8af5cf6SDimitry Andric     case '\t':
4759850d08SRoman Divacky       // Assumes tab stop = 8 characters.
4859850d08SRoman Divacky       Column += (8 - (Column & 0x7)) & 0x7;
49f8af5cf6SDimitry Andric       break;
50f8af5cf6SDimitry Andric     }
51cfca06d7SDimitry Andric   };
52cfca06d7SDimitry Andric 
53cfca06d7SDimitry Andric   // If we have a partial UTF-8 sequence from the previous buffer, check that
54cfca06d7SDimitry Andric   // first.
55cfca06d7SDimitry Andric   if (PartialUTF8Char.size()) {
56cfca06d7SDimitry Andric     size_t BytesFromBuffer =
57cfca06d7SDimitry Andric         getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
58cfca06d7SDimitry Andric     if (Size < BytesFromBuffer) {
59cfca06d7SDimitry Andric       // If we still don't have enough bytes for a complete code point, just
60cfca06d7SDimitry Andric       // append what we have.
61cfca06d7SDimitry Andric       PartialUTF8Char.append(StringRef(Ptr, Size));
62cfca06d7SDimitry Andric       return;
63cfca06d7SDimitry Andric     } else {
64cfca06d7SDimitry Andric       // The first few bytes from the buffer will complete the code point.
65cfca06d7SDimitry Andric       // Concatenate them and process their effect on the line and column
66cfca06d7SDimitry Andric       // numbers.
67cfca06d7SDimitry Andric       PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
68cfca06d7SDimitry Andric       ProcessUTF8CodePoint(PartialUTF8Char);
69cfca06d7SDimitry Andric       PartialUTF8Char.clear();
70cfca06d7SDimitry Andric       Ptr += BytesFromBuffer;
71cfca06d7SDimitry Andric       Size -= BytesFromBuffer;
72cfca06d7SDimitry Andric     }
73cfca06d7SDimitry Andric   }
74cfca06d7SDimitry Andric 
75cfca06d7SDimitry Andric   // Now scan the rest of the buffer.
76cfca06d7SDimitry Andric   unsigned NumBytes;
77cfca06d7SDimitry Andric   for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
78cfca06d7SDimitry Andric     NumBytes = getNumBytesForUTF8(*Ptr);
79cfca06d7SDimitry Andric 
80cfca06d7SDimitry Andric     // The buffer might end part way through a UTF-8 code unit sequence for a
81cfca06d7SDimitry Andric     // Unicode scalar value if it got flushed. If this happens, we can't know
82cfca06d7SDimitry Andric     // the display width until we see the rest of the code point. Stash the
83cfca06d7SDimitry Andric     // bytes we do have, so that we can reconstruct the whole code point later,
84cfca06d7SDimitry Andric     // even if the buffer is being flushed.
85cfca06d7SDimitry Andric     if ((unsigned)(End - Ptr) < NumBytes) {
86cfca06d7SDimitry Andric       PartialUTF8Char = StringRef(Ptr, End - Ptr);
87cfca06d7SDimitry Andric       return;
88cfca06d7SDimitry Andric     }
89cfca06d7SDimitry Andric 
90cfca06d7SDimitry Andric     ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
91f8af5cf6SDimitry Andric   }
9259850d08SRoman Divacky }
9359850d08SRoman Divacky 
94f8af5cf6SDimitry Andric /// ComputePosition - Examine the current output and update line and column
95f8af5cf6SDimitry Andric /// counts.
ComputePosition(const char * Ptr,size_t Size)96f8af5cf6SDimitry Andric void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
9705834caeSDimitry Andric   if (DisableScan)
9805834caeSDimitry Andric     return;
9905834caeSDimitry Andric 
10059850d08SRoman Divacky   // If our previous scan pointer is inside the buffer, assume we already
10159850d08SRoman Divacky   // scanned those bytes. This depends on raw_ostream to not change our buffer
10259850d08SRoman Divacky   // in unexpected ways.
103f8af5cf6SDimitry Andric   if (Ptr <= Scanned && Scanned <= Ptr + Size)
10459850d08SRoman Divacky     // Scan all characters added since our last scan to determine the new
10559850d08SRoman Divacky     // column.
106cfca06d7SDimitry Andric     UpdatePosition(Scanned, Size - (Scanned - Ptr));
107f8af5cf6SDimitry Andric   else
108cfca06d7SDimitry Andric     UpdatePosition(Ptr, Size);
10959850d08SRoman Divacky 
11059850d08SRoman Divacky   // Update the scanning pointer.
11159850d08SRoman Divacky   Scanned = Ptr + Size;
11259850d08SRoman Divacky }
11359850d08SRoman Divacky 
11459850d08SRoman Divacky /// PadToColumn - Align the output to some column number.
11559850d08SRoman Divacky ///
11659850d08SRoman Divacky /// \param NewCol - The column to move to.
11759850d08SRoman Divacky ///
PadToColumn(unsigned NewCol)1186fe5c7aaSRoman Divacky formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
11959850d08SRoman Divacky   // Figure out what's in the buffer and add it to the column count.
120f8af5cf6SDimitry Andric   ComputePosition(getBufferStart(), GetNumBytesInBuffer());
12159850d08SRoman Divacky 
12259850d08SRoman Divacky   // Output spaces until we reach the desired column.
123f8af5cf6SDimitry Andric   indent(std::max(int(NewCol - getColumn()), 1));
1246fe5c7aaSRoman Divacky   return *this;
12559850d08SRoman Divacky }
12659850d08SRoman Divacky 
write_impl(const char * Ptr,size_t Size)12759850d08SRoman Divacky void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
12859850d08SRoman Divacky   // Figure out what's in the buffer and add it to the column count.
129f8af5cf6SDimitry Andric   ComputePosition(Ptr, Size);
13059850d08SRoman Divacky 
13159850d08SRoman Divacky   // Write the data to the underlying stream (which is unbuffered, so
13259850d08SRoman Divacky   // the data will be immediately written out).
13359850d08SRoman Divacky   TheStream->write(Ptr, Size);
13459850d08SRoman Divacky 
13559850d08SRoman Divacky   // Reset the scanning pointer.
1365ca98fd9SDimitry Andric   Scanned = nullptr;
13759850d08SRoman Divacky }
13859850d08SRoman Divacky 
13959850d08SRoman Divacky /// fouts() - This returns a reference to a formatted_raw_ostream for
14059850d08SRoman Divacky /// standard output.  Use it like: fouts() << "foo" << "bar";
fouts()14159850d08SRoman Divacky formatted_raw_ostream &llvm::fouts() {
14259850d08SRoman Divacky   static formatted_raw_ostream S(outs());
14359850d08SRoman Divacky   return S;
14459850d08SRoman Divacky }
14559850d08SRoman Divacky 
14659850d08SRoman Divacky /// ferrs() - This returns a reference to a formatted_raw_ostream for
14759850d08SRoman Divacky /// standard error.  Use it like: ferrs() << "foo" << "bar";
ferrs()14859850d08SRoman Divacky formatted_raw_ostream &llvm::ferrs() {
14959850d08SRoman Divacky   static formatted_raw_ostream S(errs());
15059850d08SRoman Divacky   return S;
15159850d08SRoman Divacky }
152829000e0SRoman Divacky 
153829000e0SRoman Divacky /// fdbgs() - This returns a reference to a formatted_raw_ostream for
154829000e0SRoman Divacky /// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
fdbgs()155829000e0SRoman Divacky formatted_raw_ostream &llvm::fdbgs() {
156829000e0SRoman Divacky   static formatted_raw_ostream S(dbgs());
157829000e0SRoman Divacky   return S;
158829000e0SRoman Divacky }
159