1 //===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements support for bulk buffered stream output.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/raw_ostream.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringExtras.h"
17 #include "llvm/Config/config.h"
18 #include "llvm/Support/Compiler.h"
19 #include "llvm/Support/ErrorHandling.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/Format.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Support/NativeFormatting.h"
25 #include "llvm/Support/Process.h"
26 #include "llvm/Support/Program.h"
27 #include <algorithm>
28 #include <cctype>
29 #include <cerrno>
30 #include <cstdio>
31 #include <iterator>
32 #include <sys/stat.h>
33 #include <system_error>
34 
35 // <fcntl.h> may provide O_BINARY.
36 #if defined(HAVE_FCNTL_H)
37 # include <fcntl.h>
38 #endif
39 
40 #if defined(HAVE_UNISTD_H)
41 # include <unistd.h>
42 #endif
43 
44 #if defined(__CYGWIN__)
45 #include <io.h>
46 #endif
47 
48 #if defined(_MSC_VER)
49 #include <io.h>
50 #ifndef STDIN_FILENO
51 # define STDIN_FILENO 0
52 #endif
53 #ifndef STDOUT_FILENO
54 # define STDOUT_FILENO 1
55 #endif
56 #ifndef STDERR_FILENO
57 # define STDERR_FILENO 2
58 #endif
59 #endif
60 
61 #ifdef _WIN32
62 #include "llvm/Support/ConvertUTF.h"
63 #include "llvm/Support/Windows/WindowsSupport.h"
64 #endif
65 
66 using namespace llvm;
67 
68 const raw_ostream::Colors raw_ostream::BLACK;
69 const raw_ostream::Colors raw_ostream::RED;
70 const raw_ostream::Colors raw_ostream::GREEN;
71 const raw_ostream::Colors raw_ostream::YELLOW;
72 const raw_ostream::Colors raw_ostream::BLUE;
73 const raw_ostream::Colors raw_ostream::MAGENTA;
74 const raw_ostream::Colors raw_ostream::CYAN;
75 const raw_ostream::Colors raw_ostream::WHITE;
76 const raw_ostream::Colors raw_ostream::SAVEDCOLOR;
77 const raw_ostream::Colors raw_ostream::RESET;
78 
79 raw_ostream::~raw_ostream() {
80   // raw_ostream's subclasses should take care to flush the buffer
81   // in their destructors.
82   assert(OutBufCur == OutBufStart &&
83          "raw_ostream destructor called with non-empty buffer!");
84 
85   if (BufferMode == BufferKind::InternalBuffer)
86     delete [] OutBufStart;
87 }
88 
89 size_t raw_ostream::preferred_buffer_size() const {
90   // BUFSIZ is intended to be a reasonable default.
91   return BUFSIZ;
92 }
93 
94 void raw_ostream::SetBuffered() {
95   // Ask the subclass to determine an appropriate buffer size.
96   if (size_t Size = preferred_buffer_size())
97     SetBufferSize(Size);
98   else
99     // It may return 0, meaning this stream should be unbuffered.
100     SetUnbuffered();
101 }
102 
103 void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
104                                    BufferKind Mode) {
105   assert(((Mode == BufferKind::Unbuffered && !BufferStart && Size == 0) ||
106           (Mode != BufferKind::Unbuffered && BufferStart && Size != 0)) &&
107          "stream must be unbuffered or have at least one byte");
108   // Make sure the current buffer is free of content (we can't flush here; the
109   // child buffer management logic will be in write_impl).
110   assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
111 
112   if (BufferMode == BufferKind::InternalBuffer)
113     delete [] OutBufStart;
114   OutBufStart = BufferStart;
115   OutBufEnd = OutBufStart+Size;
116   OutBufCur = OutBufStart;
117   BufferMode = Mode;
118 
119   assert(OutBufStart <= OutBufEnd && "Invalid size!");
120 }
121 
122 raw_ostream &raw_ostream::operator<<(unsigned long N) {
123   write_integer(*this, static_cast<uint64_t>(N), 0, IntegerStyle::Integer);
124   return *this;
125 }
126 
127 raw_ostream &raw_ostream::operator<<(long N) {
128   write_integer(*this, static_cast<int64_t>(N), 0, IntegerStyle::Integer);
129   return *this;
130 }
131 
132 raw_ostream &raw_ostream::operator<<(unsigned long long N) {
133   write_integer(*this, static_cast<uint64_t>(N), 0, IntegerStyle::Integer);
134   return *this;
135 }
136 
137 raw_ostream &raw_ostream::operator<<(long long N) {
138   write_integer(*this, static_cast<int64_t>(N), 0, IntegerStyle::Integer);
139   return *this;
140 }
141 
142 raw_ostream &raw_ostream::write_hex(unsigned long long N) {
143   llvm::write_hex(*this, N, HexPrintStyle::Lower);
144   return *this;
145 }
146 
147 raw_ostream &raw_ostream::operator<<(Colors C) {
148   if (C == Colors::RESET)
149     resetColor();
150   else
151     changeColor(C);
152   return *this;
153 }
154 
155 raw_ostream &raw_ostream::write_uuid(const uuid_t UUID) {
156   for (int Idx = 0; Idx < 16; ++Idx) {
157     *this << format("%02" PRIX32, UUID[Idx]);
158     if (Idx == 3 || Idx == 5 || Idx == 7 || Idx == 9)
159       *this << "-";
160   }
161   return *this;
162 }
163 
164 
165 raw_ostream &raw_ostream::write_escaped(StringRef Str,
166                                         bool UseHexEscapes) {
167   for (unsigned char c : Str) {
168     switch (c) {
169     case '\\':
170       *this << '\\' << '\\';
171       break;
172     case '\t':
173       *this << '\\' << 't';
174       break;
175     case '\n':
176       *this << '\\' << 'n';
177       break;
178     case '"':
179       *this << '\\' << '"';
180       break;
181     default:
182       if (isPrint(c)) {
183         *this << c;
184         break;
185       }
186 
187       // Write out the escaped representation.
188       if (UseHexEscapes) {
189         *this << '\\' << 'x';
190         *this << hexdigit((c >> 4 & 0xF));
191         *this << hexdigit((c >> 0) & 0xF);
192       } else {
193         // Always use a full 3-character octal escape.
194         *this << '\\';
195         *this << char('0' + ((c >> 6) & 7));
196         *this << char('0' + ((c >> 3) & 7));
197         *this << char('0' + ((c >> 0) & 7));
198       }
199     }
200   }
201 
202   return *this;
203 }
204 
205 raw_ostream &raw_ostream::operator<<(const void *P) {
206   llvm::write_hex(*this, (uintptr_t)P, HexPrintStyle::PrefixLower);
207   return *this;
208 }
209 
210 raw_ostream &raw_ostream::operator<<(double N) {
211   llvm::write_double(*this, N, FloatStyle::Exponent);
212   return *this;
213 }
214 
215 void raw_ostream::flush_nonempty() {
216   assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
217   size_t Length = OutBufCur - OutBufStart;
218   OutBufCur = OutBufStart;
219   write_impl(OutBufStart, Length);
220 }
221 
222 raw_ostream &raw_ostream::write(unsigned char C) {
223   // Group exceptional cases into a single branch.
224   if (LLVM_UNLIKELY(OutBufCur >= OutBufEnd)) {
225     if (LLVM_UNLIKELY(!OutBufStart)) {
226       if (BufferMode == BufferKind::Unbuffered) {
227         write_impl(reinterpret_cast<char*>(&C), 1);
228         return *this;
229       }
230       // Set up a buffer and start over.
231       SetBuffered();
232       return write(C);
233     }
234 
235     flush_nonempty();
236   }
237 
238   *OutBufCur++ = C;
239   return *this;
240 }
241 
242 raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
243   // Group exceptional cases into a single branch.
244   if (LLVM_UNLIKELY(size_t(OutBufEnd - OutBufCur) < Size)) {
245     if (LLVM_UNLIKELY(!OutBufStart)) {
246       if (BufferMode == BufferKind::Unbuffered) {
247         write_impl(Ptr, Size);
248         return *this;
249       }
250       // Set up a buffer and start over.
251       SetBuffered();
252       return write(Ptr, Size);
253     }
254 
255     size_t NumBytes = OutBufEnd - OutBufCur;
256 
257     // If the buffer is empty at this point we have a string that is larger
258     // than the buffer. Directly write the chunk that is a multiple of the
259     // preferred buffer size and put the remainder in the buffer.
260     if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) {
261       assert(NumBytes != 0 && "undefined behavior");
262       size_t BytesToWrite = Size - (Size % NumBytes);
263       write_impl(Ptr, BytesToWrite);
264       size_t BytesRemaining = Size - BytesToWrite;
265       if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) {
266         // Too much left over to copy into our buffer.
267         return write(Ptr + BytesToWrite, BytesRemaining);
268       }
269       copy_to_buffer(Ptr + BytesToWrite, BytesRemaining);
270       return *this;
271     }
272 
273     // We don't have enough space in the buffer to fit the string in. Insert as
274     // much as possible, flush and start over with the remainder.
275     copy_to_buffer(Ptr, NumBytes);
276     flush_nonempty();
277     return write(Ptr + NumBytes, Size - NumBytes);
278   }
279 
280   copy_to_buffer(Ptr, Size);
281 
282   return *this;
283 }
284 
285 void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
286   assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
287 
288   // Handle short strings specially, memcpy isn't very good at very short
289   // strings.
290   switch (Size) {
291   case 4: OutBufCur[3] = Ptr[3]; LLVM_FALLTHROUGH;
292   case 3: OutBufCur[2] = Ptr[2]; LLVM_FALLTHROUGH;
293   case 2: OutBufCur[1] = Ptr[1]; LLVM_FALLTHROUGH;
294   case 1: OutBufCur[0] = Ptr[0]; LLVM_FALLTHROUGH;
295   case 0: break;
296   default:
297     memcpy(OutBufCur, Ptr, Size);
298     break;
299   }
300 
301   OutBufCur += Size;
302 }
303 
304 // Formatted output.
305 raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
306   // If we have more than a few bytes left in our output buffer, try
307   // formatting directly onto its end.
308   size_t NextBufferSize = 127;
309   size_t BufferBytesLeft = OutBufEnd - OutBufCur;
310   if (BufferBytesLeft > 3) {
311     size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
312 
313     // Common case is that we have plenty of space.
314     if (BytesUsed <= BufferBytesLeft) {
315       OutBufCur += BytesUsed;
316       return *this;
317     }
318 
319     // Otherwise, we overflowed and the return value tells us the size to try
320     // again with.
321     NextBufferSize = BytesUsed;
322   }
323 
324   // If we got here, we didn't have enough space in the output buffer for the
325   // string.  Try printing into a SmallVector that is resized to have enough
326   // space.  Iterate until we win.
327   SmallVector<char, 128> V;
328 
329   while (true) {
330     V.resize(NextBufferSize);
331 
332     // Try formatting into the SmallVector.
333     size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
334 
335     // If BytesUsed fit into the vector, we win.
336     if (BytesUsed <= NextBufferSize)
337       return write(V.data(), BytesUsed);
338 
339     // Otherwise, try again with a new size.
340     assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
341     NextBufferSize = BytesUsed;
342   }
343 }
344 
345 raw_ostream &raw_ostream::operator<<(const formatv_object_base &Obj) {
346   SmallString<128> S;
347   Obj.format(*this);
348   return *this;
349 }
350 
351 raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
352   if (FS.Str.size() >= FS.Width || FS.Justify == FormattedString::JustifyNone) {
353     this->operator<<(FS.Str);
354     return *this;
355   }
356   const size_t Difference = FS.Width - FS.Str.size();
357   switch (FS.Justify) {
358   case FormattedString::JustifyLeft:
359     this->operator<<(FS.Str);
360     this->indent(Difference);
361     break;
362   case FormattedString::JustifyRight:
363     this->indent(Difference);
364     this->operator<<(FS.Str);
365     break;
366   case FormattedString::JustifyCenter: {
367     int PadAmount = Difference / 2;
368     this->indent(PadAmount);
369     this->operator<<(FS.Str);
370     this->indent(Difference - PadAmount);
371     break;
372   }
373   default:
374     llvm_unreachable("Bad Justification");
375   }
376   return *this;
377 }
378 
379 raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
380   if (FN.Hex) {
381     HexPrintStyle Style;
382     if (FN.Upper && FN.HexPrefix)
383       Style = HexPrintStyle::PrefixUpper;
384     else if (FN.Upper && !FN.HexPrefix)
385       Style = HexPrintStyle::Upper;
386     else if (!FN.Upper && FN.HexPrefix)
387       Style = HexPrintStyle::PrefixLower;
388     else
389       Style = HexPrintStyle::Lower;
390     llvm::write_hex(*this, FN.HexValue, Style, FN.Width);
391   } else {
392     llvm::SmallString<16> Buffer;
393     llvm::raw_svector_ostream Stream(Buffer);
394     llvm::write_integer(Stream, FN.DecValue, 0, IntegerStyle::Integer);
395     if (Buffer.size() < FN.Width)
396       indent(FN.Width - Buffer.size());
397     (*this) << Buffer;
398   }
399   return *this;
400 }
401 
402 raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) {
403   if (FB.Bytes.empty())
404     return *this;
405 
406   size_t LineIndex = 0;
407   auto Bytes = FB.Bytes;
408   const size_t Size = Bytes.size();
409   HexPrintStyle HPS = FB.Upper ? HexPrintStyle::Upper : HexPrintStyle::Lower;
410   uint64_t OffsetWidth = 0;
411   if (FB.FirstByteOffset.hasValue()) {
412     // Figure out how many nibbles are needed to print the largest offset
413     // represented by this data set, so that we can align the offset field
414     // to the right width.
415     size_t Lines = Size / FB.NumPerLine;
416     uint64_t MaxOffset = *FB.FirstByteOffset + Lines * FB.NumPerLine;
417     unsigned Power = 0;
418     if (MaxOffset > 0)
419       Power = llvm::Log2_64_Ceil(MaxOffset);
420     OffsetWidth = std::max<uint64_t>(4, llvm::alignTo(Power, 4) / 4);
421   }
422 
423   // The width of a block of data including all spaces for group separators.
424   unsigned NumByteGroups =
425       alignTo(FB.NumPerLine, FB.ByteGroupSize) / FB.ByteGroupSize;
426   unsigned BlockCharWidth = FB.NumPerLine * 2 + NumByteGroups - 1;
427 
428   while (!Bytes.empty()) {
429     indent(FB.IndentLevel);
430 
431     if (FB.FirstByteOffset.hasValue()) {
432       uint64_t Offset = FB.FirstByteOffset.getValue();
433       llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth);
434       *this << ": ";
435     }
436 
437     auto Line = Bytes.take_front(FB.NumPerLine);
438 
439     size_t CharsPrinted = 0;
440     // Print the hex bytes for this line in groups
441     for (size_t I = 0; I < Line.size(); ++I, CharsPrinted += 2) {
442       if (I && (I % FB.ByteGroupSize) == 0) {
443         ++CharsPrinted;
444         *this << " ";
445       }
446       llvm::write_hex(*this, Line[I], HPS, 2);
447     }
448 
449     if (FB.ASCII) {
450       // Print any spaces needed for any bytes that we didn't print on this
451       // line so that the ASCII bytes are correctly aligned.
452       assert(BlockCharWidth >= CharsPrinted);
453       indent(BlockCharWidth - CharsPrinted + 2);
454       *this << "|";
455 
456       // Print the ASCII char values for each byte on this line
457       for (uint8_t Byte : Line) {
458         if (isPrint(Byte))
459           *this << static_cast<char>(Byte);
460         else
461           *this << '.';
462       }
463       *this << '|';
464     }
465 
466     Bytes = Bytes.drop_front(Line.size());
467     LineIndex += Line.size();
468     if (LineIndex < Size)
469       *this << '\n';
470   }
471   return *this;
472 }
473 
474 template <char C>
475 static raw_ostream &write_padding(raw_ostream &OS, unsigned NumChars) {
476   static const char Chars[] = {C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C,
477                                C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C,
478                                C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C,
479                                C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C,
480                                C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C};
481 
482   // Usually the indentation is small, handle it with a fastpath.
483   if (NumChars < array_lengthof(Chars))
484     return OS.write(Chars, NumChars);
485 
486   while (NumChars) {
487     unsigned NumToWrite = std::min(NumChars,
488                                    (unsigned)array_lengthof(Chars)-1);
489     OS.write(Chars, NumToWrite);
490     NumChars -= NumToWrite;
491   }
492   return OS;
493 }
494 
495 /// indent - Insert 'NumSpaces' spaces.
496 raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
497   return write_padding<' '>(*this, NumSpaces);
498 }
499 
500 /// write_zeros - Insert 'NumZeros' nulls.
501 raw_ostream &raw_ostream::write_zeros(unsigned NumZeros) {
502   return write_padding<'\0'>(*this, NumZeros);
503 }
504 
505 void raw_ostream::anchor() {}
506 
507 //===----------------------------------------------------------------------===//
508 //  Formatted Output
509 //===----------------------------------------------------------------------===//
510 
511 // Out of line virtual method.
512 void format_object_base::home() {
513 }
514 
515 //===----------------------------------------------------------------------===//
516 //  raw_fd_ostream
517 //===----------------------------------------------------------------------===//
518 
519 static int getFD(StringRef Filename, std::error_code &EC,
520                  sys::fs::CreationDisposition Disp, sys::fs::FileAccess Access,
521                  sys::fs::OpenFlags Flags) {
522   assert((Access & sys::fs::FA_Write) &&
523          "Cannot make a raw_ostream from a read-only descriptor!");
524 
525   // Handle "-" as stdout. Note that when we do this, we consider ourself
526   // the owner of stdout and may set the "binary" flag globally based on Flags.
527   if (Filename == "-") {
528     EC = std::error_code();
529     // If user requested binary then put stdout into binary mode if
530     // possible.
531     if (!(Flags & sys::fs::OF_Text))
532       sys::ChangeStdoutToBinary();
533     return STDOUT_FILENO;
534   }
535 
536   int FD;
537   if (Access & sys::fs::FA_Read)
538     EC = sys::fs::openFileForReadWrite(Filename, FD, Disp, Flags);
539   else
540     EC = sys::fs::openFileForWrite(Filename, FD, Disp, Flags);
541   if (EC)
542     return -1;
543 
544   return FD;
545 }
546 
547 raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC)
548     : raw_fd_ostream(Filename, EC, sys::fs::CD_CreateAlways, sys::fs::FA_Write,
549                      sys::fs::OF_None) {}
550 
551 raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
552                                sys::fs::CreationDisposition Disp)
553     : raw_fd_ostream(Filename, EC, Disp, sys::fs::FA_Write, sys::fs::OF_None) {}
554 
555 raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
556                                sys::fs::FileAccess Access)
557     : raw_fd_ostream(Filename, EC, sys::fs::CD_CreateAlways, Access,
558                      sys::fs::OF_None) {}
559 
560 raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
561                                sys::fs::OpenFlags Flags)
562     : raw_fd_ostream(Filename, EC, sys::fs::CD_CreateAlways, sys::fs::FA_Write,
563                      Flags) {}
564 
565 raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
566                                sys::fs::CreationDisposition Disp,
567                                sys::fs::FileAccess Access,
568                                sys::fs::OpenFlags Flags)
569     : raw_fd_ostream(getFD(Filename, EC, Disp, Access, Flags), true) {}
570 
571 /// FD is the file descriptor that this writes to.  If ShouldClose is true, this
572 /// closes the file when the stream is destroyed.
573 raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
574     : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose) {
575   if (FD < 0 ) {
576     ShouldClose = false;
577     return;
578   }
579 
580   // Do not attempt to close stdout or stderr. We used to try to maintain the
581   // property that tools that support writing file to stdout should not also
582   // write informational output to stdout, but in practice we were never able to
583   // maintain this invariant. Many features have been added to LLVM and clang
584   // (-fdump-record-layouts, optimization remarks, etc) that print to stdout, so
585   // users must simply be aware that mixed output and remarks is a possibility.
586   if (FD <= STDERR_FILENO)
587     ShouldClose = false;
588 
589 #ifdef _WIN32
590   // Check if this is a console device. This is not equivalent to isatty.
591   IsWindowsConsole =
592       ::GetFileType((HANDLE)::_get_osfhandle(fd)) == FILE_TYPE_CHAR;
593 #endif
594 
595   // Get the starting position.
596   off_t loc = ::lseek(FD, 0, SEEK_CUR);
597 #ifdef _WIN32
598   // MSVCRT's _lseek(SEEK_CUR) doesn't return -1 for pipes.
599   sys::fs::file_status Status;
600   std::error_code EC = status(FD, Status);
601   SupportsSeeking = !EC && Status.type() == sys::fs::file_type::regular_file;
602 #else
603   SupportsSeeking = loc != (off_t)-1;
604 #endif
605   if (!SupportsSeeking)
606     pos = 0;
607   else
608     pos = static_cast<uint64_t>(loc);
609 }
610 
611 raw_fd_ostream::~raw_fd_ostream() {
612   if (FD >= 0) {
613     flush();
614     if (ShouldClose) {
615       if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
616         error_detected(EC);
617     }
618   }
619 
620 #ifdef __MINGW32__
621   // On mingw, global dtors should not call exit().
622   // report_fatal_error() invokes exit(). We know report_fatal_error()
623   // might not write messages to stderr when any errors were detected
624   // on FD == 2.
625   if (FD == 2) return;
626 #endif
627 
628   // If there are any pending errors, report them now. Clients wishing
629   // to avoid report_fatal_error calls should check for errors with
630   // has_error() and clear the error flag with clear_error() before
631   // destructing raw_ostream objects which may have errors.
632   if (has_error())
633     report_fatal_error("IO failure on output stream: " + error().message(),
634                        /*gen_crash_diag=*/false);
635 }
636 
637 #if defined(_WIN32)
638 // The most reliable way to print unicode in a Windows console is with
639 // WriteConsoleW. To use that, first transcode from UTF-8 to UTF-16. This
640 // assumes that LLVM programs always print valid UTF-8 to the console. The data
641 // might not be UTF-8 for two major reasons:
642 // 1. The program is printing binary (-filetype=obj -o -), in which case it
643 // would have been gibberish anyway.
644 // 2. The program is printing text in a semi-ascii compatible codepage like
645 // shift-jis or cp1252.
646 //
647 // Most LLVM programs don't produce non-ascii text unless they are quoting
648 // user source input. A well-behaved LLVM program should either validate that
649 // the input is UTF-8 or transcode from the local codepage to UTF-8 before
650 // quoting it. If they don't, this may mess up the encoding, but this is still
651 // probably the best compromise we can make.
652 static bool write_console_impl(int FD, StringRef Data) {
653   SmallVector<wchar_t, 256> WideText;
654 
655   // Fall back to ::write if it wasn't valid UTF-8.
656   if (auto EC = sys::windows::UTF8ToUTF16(Data, WideText))
657     return false;
658 
659   // On Windows 7 and earlier, WriteConsoleW has a low maximum amount of data
660   // that can be written to the console at a time.
661   size_t MaxWriteSize = WideText.size();
662   if (!RunningWindows8OrGreater())
663     MaxWriteSize = 32767;
664 
665   size_t WCharsWritten = 0;
666   do {
667     size_t WCharsToWrite =
668         std::min(MaxWriteSize, WideText.size() - WCharsWritten);
669     DWORD ActuallyWritten;
670     bool Success =
671         ::WriteConsoleW((HANDLE)::_get_osfhandle(FD), &WideText[WCharsWritten],
672                         WCharsToWrite, &ActuallyWritten,
673                         /*Reserved=*/nullptr);
674 
675     // The most likely reason for WriteConsoleW to fail is that FD no longer
676     // points to a console. Fall back to ::write. If this isn't the first loop
677     // iteration, something is truly wrong.
678     if (!Success)
679       return false;
680 
681     WCharsWritten += ActuallyWritten;
682   } while (WCharsWritten != WideText.size());
683   return true;
684 }
685 #endif
686 
687 void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
688   assert(FD >= 0 && "File already closed.");
689   pos += Size;
690 
691 #if defined(_WIN32)
692   // If this is a Windows console device, try re-encoding from UTF-8 to UTF-16
693   // and using WriteConsoleW. If that fails, fall back to plain write().
694   if (IsWindowsConsole)
695     if (write_console_impl(FD, StringRef(Ptr, Size)))
696       return;
697 #endif
698 
699   // The maximum write size is limited to INT32_MAX. A write
700   // greater than SSIZE_MAX is implementation-defined in POSIX,
701   // and Windows _write requires 32 bit input.
702   size_t MaxWriteSize = INT32_MAX;
703 
704 #if defined(__linux__)
705   // It is observed that Linux returns EINVAL for a very large write (>2G).
706   // Make it a reasonably small value.
707   MaxWriteSize = 1024 * 1024 * 1024;
708 #endif
709 
710   do {
711     size_t ChunkSize = std::min(Size, MaxWriteSize);
712     ssize_t ret = ::write(FD, Ptr, ChunkSize);
713 
714     if (ret < 0) {
715       // If it's a recoverable error, swallow it and retry the write.
716       //
717       // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
718       // raw_ostream isn't designed to do non-blocking I/O. However, some
719       // programs, such as old versions of bjam, have mistakenly used
720       // O_NONBLOCK. For compatibility, emulate blocking semantics by
721       // spinning until the write succeeds. If you don't want spinning,
722       // don't use O_NONBLOCK file descriptors with raw_ostream.
723       if (errno == EINTR || errno == EAGAIN
724 #ifdef EWOULDBLOCK
725           || errno == EWOULDBLOCK
726 #endif
727           )
728         continue;
729 
730       // Otherwise it's a non-recoverable error. Note it and quit.
731       error_detected(std::error_code(errno, std::generic_category()));
732       break;
733     }
734 
735     // The write may have written some or all of the data. Update the
736     // size and buffer pointer to reflect the remainder that needs
737     // to be written. If there are no bytes left, we're done.
738     Ptr += ret;
739     Size -= ret;
740   } while (Size > 0);
741 }
742 
743 void raw_fd_ostream::close() {
744   assert(ShouldClose);
745   ShouldClose = false;
746   flush();
747   if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD))
748     error_detected(EC);
749   FD = -1;
750 }
751 
752 uint64_t raw_fd_ostream::seek(uint64_t off) {
753   assert(SupportsSeeking && "Stream does not support seeking!");
754   flush();
755 #ifdef _WIN32
756   pos = ::_lseeki64(FD, off, SEEK_SET);
757 #elif defined(HAVE_LSEEK64)
758   pos = ::lseek64(FD, off, SEEK_SET);
759 #else
760   pos = ::lseek(FD, off, SEEK_SET);
761 #endif
762   if (pos == (uint64_t)-1)
763     error_detected(std::error_code(errno, std::generic_category()));
764   return pos;
765 }
766 
767 void raw_fd_ostream::pwrite_impl(const char *Ptr, size_t Size,
768                                  uint64_t Offset) {
769   uint64_t Pos = tell();
770   seek(Offset);
771   write(Ptr, Size);
772   seek(Pos);
773 }
774 
775 size_t raw_fd_ostream::preferred_buffer_size() const {
776 #if defined(_WIN32)
777   // Disable buffering for console devices. Console output is re-encoded from
778   // UTF-8 to UTF-16 on Windows, and buffering it would require us to split the
779   // buffer on a valid UTF-8 codepoint boundary. Terminal buffering is disabled
780   // below on most other OSs, so do the same thing on Windows and avoid that
781   // complexity.
782   if (IsWindowsConsole)
783     return 0;
784   return raw_ostream::preferred_buffer_size();
785 #elif !defined(__minix)
786   // Minix has no st_blksize.
787   assert(FD >= 0 && "File not yet open!");
788   struct stat statbuf;
789   if (fstat(FD, &statbuf) != 0)
790     return 0;
791 
792   // If this is a terminal, don't use buffering. Line buffering
793   // would be a more traditional thing to do, but it's not worth
794   // the complexity.
795   if (S_ISCHR(statbuf.st_mode) && isatty(FD))
796     return 0;
797   // Return the preferred block size.
798   return statbuf.st_blksize;
799 #else
800   return raw_ostream::preferred_buffer_size();
801 #endif
802 }
803 
804 raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
805                                          bool bg) {
806   if (!ColorEnabled)
807     return *this;
808 
809   if (sys::Process::ColorNeedsFlush())
810     flush();
811   const char *colorcode =
812       (colors == SAVEDCOLOR)
813           ? sys::Process::OutputBold(bg)
814           : sys::Process::OutputColor(static_cast<char>(colors), bold, bg);
815   if (colorcode) {
816     size_t len = strlen(colorcode);
817     write(colorcode, len);
818     // don't account colors towards output characters
819     pos -= len;
820   }
821   return *this;
822 }
823 
824 raw_ostream &raw_fd_ostream::resetColor() {
825   if (!ColorEnabled)
826     return *this;
827 
828   if (sys::Process::ColorNeedsFlush())
829     flush();
830   const char *colorcode = sys::Process::ResetColor();
831   if (colorcode) {
832     size_t len = strlen(colorcode);
833     write(colorcode, len);
834     // don't account colors towards output characters
835     pos -= len;
836   }
837   return *this;
838 }
839 
840 raw_ostream &raw_fd_ostream::reverseColor() {
841   if (!ColorEnabled)
842     return *this;
843 
844   if (sys::Process::ColorNeedsFlush())
845     flush();
846   const char *colorcode = sys::Process::OutputReverse();
847   if (colorcode) {
848     size_t len = strlen(colorcode);
849     write(colorcode, len);
850     // don't account colors towards output characters
851     pos -= len;
852   }
853   return *this;
854 }
855 
856 bool raw_fd_ostream::is_displayed() const {
857   return sys::Process::FileDescriptorIsDisplayed(FD);
858 }
859 
860 bool raw_fd_ostream::has_colors() const {
861   return sys::Process::FileDescriptorHasColors(FD);
862 }
863 
864 void raw_fd_ostream::anchor() {}
865 
866 //===----------------------------------------------------------------------===//
867 //  outs(), errs(), nulls()
868 //===----------------------------------------------------------------------===//
869 
870 /// outs() - This returns a reference to a raw_ostream for standard output.
871 /// Use it like: outs() << "foo" << "bar";
872 raw_ostream &llvm::outs() {
873   // Set buffer settings to model stdout behavior.
874   std::error_code EC;
875   static raw_fd_ostream S("-", EC, sys::fs::OF_None);
876   assert(!EC);
877   return S;
878 }
879 
880 /// errs() - This returns a reference to a raw_ostream for standard error.
881 /// Use it like: errs() << "foo" << "bar";
882 raw_ostream &llvm::errs() {
883   // Set standard error to be unbuffered by default.
884   static raw_fd_ostream S(STDERR_FILENO, false, true);
885   return S;
886 }
887 
888 /// nulls() - This returns a reference to a raw_ostream which discards output.
889 raw_ostream &llvm::nulls() {
890   static raw_null_ostream S;
891   return S;
892 }
893 
894 //===----------------------------------------------------------------------===//
895 //  raw_string_ostream
896 //===----------------------------------------------------------------------===//
897 
898 raw_string_ostream::~raw_string_ostream() {
899   flush();
900 }
901 
902 void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
903   OS.append(Ptr, Size);
904 }
905 
906 //===----------------------------------------------------------------------===//
907 //  raw_svector_ostream
908 //===----------------------------------------------------------------------===//
909 
910 uint64_t raw_svector_ostream::current_pos() const { return OS.size(); }
911 
912 void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
913   OS.append(Ptr, Ptr + Size);
914 }
915 
916 void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
917                                       uint64_t Offset) {
918   memcpy(OS.data() + Offset, Ptr, Size);
919 }
920 
921 //===----------------------------------------------------------------------===//
922 //  raw_null_ostream
923 //===----------------------------------------------------------------------===//
924 
925 raw_null_ostream::~raw_null_ostream() {
926 #ifndef NDEBUG
927   // ~raw_ostream asserts that the buffer is empty. This isn't necessary
928   // with raw_null_ostream, but it's better to have raw_null_ostream follow
929   // the rules than to change the rules just for raw_null_ostream.
930   flush();
931 #endif
932 }
933 
934 void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
935 }
936 
937 uint64_t raw_null_ostream::current_pos() const {
938   return 0;
939 }
940 
941 void raw_null_ostream::pwrite_impl(const char *Ptr, size_t Size,
942                                    uint64_t Offset) {}
943 
944 void raw_pwrite_stream::anchor() {}
945 
946 void buffer_ostream::anchor() {}
947