1Fix displaying of wide (CJK) characters for Win32. 2 3diff --git a/curspriv.h b/curspriv.h 4index 8b34f01..719b611 100644 5--- a/curspriv.h 6+++ b/curspriv.h 7@@ -62,6 +62,11 @@ extern bool pdc_color_started; 8 extern unsigned long pdc_key_modifiers; 9 extern MOUSE_STATUS pdc_mouse_status; 10 11+#ifdef PDC_WIDE 12+# include <wchar.h> /* wchar_t */ 13+int compat_wcwidth(wchar_t ucs); 14+#endif 15+ 16 /*----------------------------------------------------------------------*/ 17 18 /* Platform implementation functions */ 19diff --git a/libobjs.mif b/libobjs.mif 20index 6705323..f1f22e4 100644 21--- a/libobjs.mif 22+++ b/libobjs.mif 23@@ -17,7 +17,7 @@ initscr.$(O) inopts.$(O) insch.$(O) insstr.$(O) instr.$(O) kernel.$(O) \ 24 keyname.$(O) mouse.$(O) move.$(O) outopts.$(O) overlay.$(O) pad.$(O) \ 25 panel.$(O) printw.$(O) refresh.$(O) scanw.$(O) scr_dump.$(O) scroll.$(O) \ 26 slk.$(O) termattr.$(O) terminfo.$(O) touch.$(O) util.$(O) window.$(O) \ 27-debug.$(O) 28+debug.$(O) wcwidth.$(O) 29 30 PDCOBJS = pdcclip.$(O) pdcdisp.$(O) pdcgetsc.$(O) pdckbd.$(O) pdcscrn.$(O) \ 31 pdcsetsc.$(O) pdcutil.$(O) 32diff --git a/pdcurses/addch.c b/pdcurses/addch.c 33index 586d1a7..6481760 100644 34--- a/pdcurses/addch.c 35+++ b/pdcurses/addch.c 36@@ -240,7 +240,17 @@ int waddch(WINDOW *win, const chtype ch) 37 win->_y[y][x] = text; 38 } 39 40- if (++x >= win->_maxx) 41+#ifdef PDC_WIDE 42+ /* Leave the rest "width - 1" character positions untouched, just jump 43+ * over them. Platform-specific code is expected to do something 44+ * similar. */ 45+ x += compat_wcwidth(text); 46+ /* XXX: this might result in truncated character, might want to perform 47+ * the check before setting the character. */ 48+#else 49+ ++x; 50+#endif 51+ if (x >= win->_maxx) 52 { 53 /* wrap around test */ 54 55diff --git a/pdcurses/wcwidth.c b/pdcurses/wcwidth.c 56new file mode 100644 57index 0000000..b80201c 58--- /dev/null 59+++ b/pdcurses/wcwidth.c 60@@ -0,0 +1,224 @@ 61+/* 62+ * Copyright (C) Markus Kuhn -- 2007-05-26 (Unicode 5.0) 63+ * Copyright (C) xaizek -- 2014 (Adapt for vifm) 64+ * Copyright (C) xaizek -- 2016 (Adapt for pdcurses) 65+ * 66+ * This is an implementation of wcwidth() and wcswidth() (defined in 67+ * IEEE Std 1002.1-2001) for Unicode. 68+ * 69+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html 70+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html 71+ * 72+ * In fixed-width output devices, Latin characters all occupy a single 73+ * "cell" position of equal width, whereas ideographic CJK characters 74+ * occupy two such cells. Interoperability between terminal-line 75+ * applications and (teletype-style) character terminals using the 76+ * UTF-8 encoding requires agreement on which character should advance 77+ * the cursor by how many cell positions. No established formal 78+ * standards exist at present on which Unicode character shall occupy 79+ * how many cell positions on character terminals. These routines are 80+ * a first attempt of defining such behavior based on simple rules 81+ * applied to data provided by the Unicode Consortium. 82+ * 83+ * For some graphical characters, the Unicode standard explicitly 84+ * defines a character-cell width via the definition of the East Asian 85+ * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. 86+ * In all these cases, there is no ambiguity about which width a 87+ * terminal shall use. For characters in the East Asian Ambiguous (A) 88+ * class, the width choice depends purely on a preference of backward 89+ * compatibility with either historic CJK or Western practice. 90+ * Choosing single-width for these characters is easy to justify as 91+ * the appropriate long-term solution, as the CJK practice of 92+ * displaying these characters as double-width comes from historic 93+ * implementation simplicity (8-bit encoded characters were displayed 94+ * single-width and 16-bit ones double-width, even for Greek, 95+ * Cyrillic, etc.) and not any typographic considerations. 96+ * 97+ * Much less clear is the choice of width for the Not East Asian 98+ * (Neutral) class. Existing practice does not dictate a width for any 99+ * of these characters. It would nevertheless make sense 100+ * typographically to allocate two character cells to characters such 101+ * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be 102+ * represented adequately with a single-width glyph. The following 103+ * routines at present merely assign a single-cell width to all 104+ * neutral characters, in the interest of simplicity. This is not 105+ * entirely satisfactory and should be reconsidered before 106+ * establishing a formal standard in this area. At the moment, the 107+ * decision which Not East Asian (Neutral) characters should be 108+ * represented by double-width glyphs cannot yet be answered by 109+ * applying a simple rule from the Unicode database content. Setting 110+ * up a proper standard for the behavior of UTF-8 character terminals 111+ * will require a careful analysis not only of each Unicode character, 112+ * but also of each presentation form, something the author of these 113+ * routines has avoided to do so far. 114+ * 115+ * http://www.unicode.org/unicode/reports/tr11/ 116+ * 117+ * Permission to use, copy, modify, and distribute this software 118+ * for any purpose and without fee is hereby granted. The author 119+ * disclaims all warranties with regard to this software. 120+ * 121+ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 122+ */ 123+ 124+#ifdef PDC_WIDE 125+ 126+#include <curspriv.h> 127+ 128+#include <wchar.h> /* wchar_t */ 129+ 130+typedef struct 131+{ 132+ int first; 133+ int last; 134+} 135+interval; 136+ 137+static int bisearch(wchar_t ucs, const interval *table, int max); 138+ 139+/* The following two functions define the column width of an ISO 10646 140+ * character as follows: 141+ * - The null character (U+0000) has a column width of 0. 142+ * - Other C0/C1 control characters and DEL will lead to a return 143+ * value of -1. 144+ * - Non-spacing and enclosing combining characters (general 145+ * category code Mn or Me in the Unicode database) have a 146+ * column width of 0. 147+ * - SOFT HYPHEN (U+00AD) has a column width of 1. 148+ * - Other format characters (general category code Cf in the Unicode 149+ * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. 150+ * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) 151+ * have a column width of 0. 152+ * - Spacing characters in the East Asian Wide (W) or East Asian 153+ * Full-width (F) category as defined in Unicode Technical 154+ * Report #11 have a column width of 2. 155+ * - All remaining characters (including all printable 156+ * ISO 8859-1 and WGL4 characters, Unicode control characters, 157+ * etc.) have a column width of 1. 158+ * This implementation assumes that wchar_t characters are encoded 159+ * in ISO 10646. 160+ */ 161+int 162+compat_wcwidth(wchar_t ucs) 163+{ 164+ /* Sorted list of non-overlapping intervals of non-spacing characters 165+ * generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c". */ 166+ static const interval combining[] = 167+ { 168+ { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, 169+ { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, 170+ { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 }, 171+ { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 }, 172+ { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, 173+ { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, 174+ { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 }, 175+ { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, 176+ { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, 177+ { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, 178+ { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, 179+ { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, 180+ { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, 181+ { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, 182+ { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, 183+ { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, 184+ { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, 185+ { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, 186+ { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, 187+ { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, 188+ { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, 189+ { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, 190+ { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, 191+ { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, 192+ { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, 193+ { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, 194+ { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, 195+ { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, 196+ { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, 197+ { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F }, 198+ { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, 199+ { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, 200+ { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, 201+ { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, 202+ { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, 203+ { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, 204+ { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 }, 205+ { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF }, 206+ { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, 207+ { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F }, 208+ { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, 209+ { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, 210+ { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, 211+ { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, 212+ { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 }, 213+ { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, 214+ { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, 215+ { 0xE0100, 0xE01EF } 216+ }; 217+ 218+ /* Test for 8-bit control characters. */ 219+ if(ucs == 0) 220+ { 221+ return 0; 222+ } 223+ if(ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) 224+ { 225+ return -1; 226+ } 227+ 228+ /* Binary search in table of non-spacing characters. */ 229+ if(bisearch(ucs, combining, sizeof(combining)/sizeof(combining[0]) - 1)) 230+ { 231+ return 0; 232+ } 233+ 234+ /* If we arrive here, ucs is not a combining or C0/C1 control character. */ 235+ 236+ return 1 + 237+ (ucs >= 0x1100 && 238+ (ucs <= 0x115f || /* Hangul Jamo init. consonants */ 239+ ucs == 0x2329 || ucs == 0x232a || 240+ (ucs >= 0x2e80 && ucs <= 0xa4cf && 241+ ucs != 0x303f) || /* CJK ... Yi */ 242+ (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ 243+ (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ 244+ (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */ 245+ (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ 246+ (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */ 247+ (ucs >= 0xffe0 && ucs <= 0xffe6) || 248+ (ucs >= 0x20000 && ucs <= 0x2fffd) || 249+ (ucs >= 0x30000 && ucs <= 0x3fffd))); 250+} 251+ 252+/* Auxiliary function for binary search in interval table. */ 253+static int 254+bisearch(wchar_t ucs, const interval *table, int max) 255+{ 256+ int min; 257+ 258+ if(ucs < table[0].first || ucs > table[max].last) 259+ { 260+ return 0; 261+ } 262+ 263+ min = 0; 264+ while(max >= min) 265+ { 266+ const int mid = (min + max)/2; 267+ if(ucs > table[mid].last) 268+ { 269+ min = mid + 1; 270+ } 271+ else if(ucs < table[mid].first) 272+ { 273+ max = mid - 1; 274+ } 275+ else 276+ { 277+ return 1; 278+ } 279+ } 280+ 281+ return 0; 282+} 283+ 284+#endif 285diff --git a/win32/pdcdisp.c b/win32/pdcdisp.c 286index c2ad814..d67dc58 100644 287--- a/win32/pdcdisp.c 288+++ b/win32/pdcdisp.c 289@@ -104,13 +104,26 @@ void PDC_transform_line(int lineno, int x, int len, const chtype *srcp) 290 for (j = 0; j < len; j++) 291 { 292 chtype ch = srcp[j]; 293+ unsigned char attr = pdc_atrtab[ch >> PDC_ATTR_SHIFT]; 294 295- ci[j].Attributes = pdc_atrtab[ch >> PDC_ATTR_SHIFT]; 296+ ci[j].Attributes = attr; 297 #ifdef CHTYPE_LONG 298 if (ch & A_ALTCHARSET && !(ch & 0xff80)) 299 ch = acs_map[ch & 0x7f]; 300 #endif 301 ci[j].Char.UnicodeChar = ch & A_CHARTEXT; 302+ 303+#ifdef PDC_WIDE 304+ { 305+ int gap_width = compat_wcwidth(srcp[j] & A_CHARTEXT) - 1; 306+ while (gap_width-- > 0) 307+ { 308+ ++j; 309+ ci[j].Attributes = attr; 310+ ci[j].Char.UnicodeChar = '\0'; 311+ } 312+ } 313+#endif 314 } 315 316 WriteConsoleOutput(pdc_con_out, ci, bufSize, bufPos, &sr); 317