1 /**
2 * @file
3 * @brief Conversions between Unicode and local charsets, string
4 * manipulation functions that act on character types.
5 **/
6 #pragma once
7
8 int strwidth(const char *s);
9 int strwidth(const string &s);
10 string chop_string(const char *s, int width, bool spaces = true);
11 string chop_string(const string &s, int width, bool spaces = true);
12
13 int wctoutf8(char *d, char32_t s);
14 int utf8towc(char32_t *d, const char *s);
15 #ifdef TARGET_OS_WINDOWS
16 typedef wchar_t utf16_t;
17 wstring utf8_to_16(const char *s);
18 string utf16_to_8(const wchar_t *s);
19
utf8_to_16(const string & s)20 static inline wstring utf8_to_16(const string &s)
21 {
22 return utf8_to_16(s.c_str());
23 }
utf16_to_8(const wstring & s)24 static inline string utf16_to_8(const wstring &s)
25 {
26 return utf16_to_8(s.c_str());
27 }
28 #else
29 typedef uint16_t utf16_t;
30 #endif
31 string utf8_to_mb(const char *s);
32 string mb_to_utf8(const char *s);
33
utf8_to_mb(const string & s)34 static inline string utf8_to_mb(const string &s)
35 {
36 return utf8_to_mb(s.c_str());
37 }
mb_to_utf8(const string & s)38 static inline string mb_to_utf8(const string &s)
39 {
40 return mb_to_utf8(s.c_str());
41 }
42
43 int wclen(char32_t c);
44
45 #ifndef UNIX
46 int wcwidth(char32_t c);
47 #endif
48
49 char *prev_glyph(char *s, char *start);
50 char *next_glyph(char *s);
51
52 #define OUTS(x) utf8_to_mb(x).c_str()
53 #define OUTW(x) utf8_to_16(x).c_str()
54
55 class LineInput
56 {
57 public:
~LineInput()58 virtual ~LineInput() {}
59 virtual bool eof() = 0;
error()60 virtual bool error() { return false; };
61 virtual string get_line() = 0;
62 };
63
64 class FileLineInput : public LineInput
65 {
66 enum bom_type
67 {
68 BOM_NORMAL, // system locale
69 BOM_UTF8,
70 BOM_UTF16LE,
71 BOM_UTF16BE,
72 BOM_UTF32LE,
73 BOM_UTF32BE,
74 };
75 FILE *f;
76 bom_type bom;
77 bool seen_eof;
78 public:
79 FileLineInput(const char *name);
80 ~FileLineInput();
eof()81 bool eof() override { return seen_eof || !f; };
error()82 bool error() override { return !f; };
83 string get_line() override;
84 };
85
86 // The file is always UTF-8, no BOM.
87 // Just read it as-is, merely validating for a well-formed stream.
88 class UTF8FileLineInput : public LineInput
89 {
90 FILE *f;
91 bool seen_eof;
92 public:
93 UTF8FileLineInput(const char *name);
94 ~UTF8FileLineInput();
eof()95 bool eof() override { return seen_eof || !f; };
error()96 bool error() override { return !f; };
97 string get_line() override;
98 };
99