1 #ifndef __CSUTILHXX__
2 #define __CSUTILHXX__
3 
4 // First some base level utility routines
5 
6 typedef struct {
7     unsigned char l;
8     unsigned char h;
9 } w_char;
10 
11 // convert UTF-16 characters to UTF-8
12 char * u16_u8(char * dest, int size, const w_char * src, int srclen);
13 
14 // convert UTF-8 characters to UTF-16
15 int u8_u16(w_char * dest, int size, const char * src);
16 
17 // sort 2-byte vector
18 void flag_qsort(unsigned short flags[], int begin, int end);
19 
20 // binary search in 2-byte vector
21 int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
22 
23 // remove end of line char(s)
24 void   mychomp(char * s);
25 
26 // duplicate string
27 char * mystrdup(const char * s);
28 
29 // duplicate reverse of string
30 char * myrevstrdup(const char * s);
31 
32 // parse into tokens with char delimiter
33 char * mystrsep(char ** sptr, const char delim);
34 // parse into tokens with char delimiter
35 char * mystrsep2(char ** sptr, const char delim);
36 
37 // parse into tokens with char delimiter
38 char * mystrrep(char *, const char *, const char *);
39 
40 // append s to ends of every lines in text
41 void strlinecat(char * lines, const char * s);
42 
43 // tokenize into lines with new line
44    int line_tok(const char * text, char *** lines);
45 
46 // tokenize into lines with new line and uniq in place
47    char * line_uniq(char * text);
48 
49 // change \n to c in place
50    char * line_join(char * text, char c);
51 
52 // leave only last {[^}]*} pattern in string
53    char * delete_zeros(char * morphout);
54 
55 // reverse word
56    void reverseword(char *);
57 
58 // reverse word
59    void reverseword_utf(char *);
60 
61 // character encoding information
62 struct cs_info {
63   unsigned char ccase;
64   unsigned char clower;
65   unsigned char cupper;
66 };
67 
68 // Unicode character encoding information
69 struct unicode_info {
70   unsigned short c;
71   unsigned short cupper;
72   unsigned short clower;
73 };
74 
75 struct unicode_info2 {
76   char cletter;
77   unsigned short cupper;
78   unsigned short clower;
79 };
80 
81 struct enc_entry {
82   const char * enc_name;
83   struct cs_info * cs_table;
84 };
85 
86 // language to encoding default map
87 
88 struct lang_map {
89   const char * lang;
90   const char * def_enc;
91   int num;
92 };
93 
94 struct cs_info * get_current_cs(const char * es);
95 
96 struct unicode_info * get_utf_cs(bool what = true);
97 
98 int get_utf_cs_len();
99 
100 const char * get_default_enc(const char * lang);
101 
102 int get_lang_num(const char * lang);
103 
104 // convert null terminated string to all caps using encoding
105 void enmkallcap(char * d, const char * p, const char * encoding);
106 
107 // convert null terminated string to all little using encoding
108 void enmkallsmall(char * d, const char * p, const char * encoding);
109 
110 // convert null terminated string to have intial capital using encoding
111 void enmkinitcap(char * d, const char * p, const char * encoding);
112 
113 // convert null terminated string to all caps
114 void mkallcap(char * p, const struct cs_info * csconv);
115 
116 // convert null terminated string to all little
117 void mkallsmall(char * p, const struct cs_info * csconv);
118 
119 // convert null terminated string to have intial capital
120 void mkinitcap(char * p, const struct cs_info * csconv);
121 
122 // convert first nc characters of UTF-8 string to little
123 void mkallsmall_utf(w_char * u, int nc, struct unicode_info2 * utfconv);
124 
125 #endif
126