1 /**
2  * @file
3  * @brief Conversions between Unicode and local charsets, string
4  *        manipulation functions that act on character types.
5 **/
6 #pragma once
7 
8 int strwidth(const char *s);
9 int strwidth(const string &s);
10 string chop_string(const char *s, int width, bool spaces = true);
11 string chop_string(const string &s, int width, bool spaces = true);
12 
13 int wctoutf8(char *d, char32_t s);
14 int utf8towc(char32_t *d, const char *s);
15 #ifdef TARGET_OS_WINDOWS
16 typedef wchar_t utf16_t;
17 wstring utf8_to_16(const char *s);
18 string utf16_to_8(const wchar_t *s);
19 
utf8_to_16(const string & s)20 static inline wstring utf8_to_16(const string &s)
21 {
22     return utf8_to_16(s.c_str());
23 }
utf16_to_8(const wstring & s)24 static inline string utf16_to_8(const wstring &s)
25 {
26     return utf16_to_8(s.c_str());
27 }
28 #else
29 typedef uint16_t utf16_t;
30 #endif
31 string utf8_to_mb(const char *s);
32 string mb_to_utf8(const char *s);
33 
utf8_to_mb(const string & s)34 static inline string utf8_to_mb(const string &s)
35 {
36     return utf8_to_mb(s.c_str());
37 }
mb_to_utf8(const string & s)38 static inline string mb_to_utf8(const string &s)
39 {
40     return mb_to_utf8(s.c_str());
41 }
42 
43 int wclen(char32_t c);
44 
45 #ifndef UNIX
46 int wcwidth(char32_t c);
47 #endif
48 
49 char *prev_glyph(char *s, char *start);
50 char *next_glyph(char *s);
51 
52 #define OUTS(x) utf8_to_mb(x).c_str()
53 #define OUTW(x) utf8_to_16(x).c_str()
54 
55 class LineInput
56 {
57 public:
~LineInput()58     virtual ~LineInput() {}
59     virtual bool eof() = 0;
error()60     virtual bool error() { return false; };
61     virtual string get_line() = 0;
62 };
63 
64 class FileLineInput : public LineInput
65 {
66     enum bom_type
67     {
68         BOM_NORMAL, // system locale
69         BOM_UTF8,
70         BOM_UTF16LE,
71         BOM_UTF16BE,
72         BOM_UTF32LE,
73         BOM_UTF32BE,
74     };
75     FILE *f;
76     bom_type bom;
77     bool seen_eof;
78 public:
79     FileLineInput(const char *name);
80     ~FileLineInput();
eof()81     bool eof() override { return seen_eof || !f; };
error()82     bool error() override { return !f; };
83     string get_line() override;
84 };
85 
86 // The file is always UTF-8, no BOM.
87 // Just read it as-is, merely validating for a well-formed stream.
88 class UTF8FileLineInput : public LineInput
89 {
90     FILE *f;
91     bool seen_eof;
92 public:
93     UTF8FileLineInput(const char *name);
94     ~UTF8FileLineInput();
eof()95     bool eof() override { return seen_eof || !f; };
error()96     bool error() override { return !f; };
97     string get_line() override;
98 };
99