1Fix displaying of wide (CJK) characters for Win32.
2
3diff --git a/curspriv.h b/curspriv.h
4index 8b34f01..719b611 100644
5--- a/curspriv.h
6+++ b/curspriv.h
7@@ -62,6 +62,11 @@ extern bool pdc_color_started;
8 extern unsigned long pdc_key_modifiers;
9 extern MOUSE_STATUS pdc_mouse_status;
10
11+#ifdef PDC_WIDE
12+# include <wchar.h> /* wchar_t */
13+int compat_wcwidth(wchar_t ucs);
14+#endif
15+
16 /*----------------------------------------------------------------------*/
17
18 /* Platform implementation functions */
19diff --git a/libobjs.mif b/libobjs.mif
20index 6705323..f1f22e4 100644
21--- a/libobjs.mif
22+++ b/libobjs.mif
23@@ -17,7 +17,7 @@ initscr.$(O) inopts.$(O) insch.$(O) insstr.$(O) instr.$(O) kernel.$(O) \
24 keyname.$(O) mouse.$(O) move.$(O) outopts.$(O) overlay.$(O) pad.$(O) \
25 panel.$(O) printw.$(O) refresh.$(O) scanw.$(O) scr_dump.$(O) scroll.$(O) \
26 slk.$(O) termattr.$(O) terminfo.$(O) touch.$(O) util.$(O) window.$(O) \
27-debug.$(O)
28+debug.$(O) wcwidth.$(O)
29
30 PDCOBJS = pdcclip.$(O) pdcdisp.$(O) pdcgetsc.$(O) pdckbd.$(O) pdcscrn.$(O) \
31 pdcsetsc.$(O) pdcutil.$(O)
32diff --git a/pdcurses/addch.c b/pdcurses/addch.c
33index 586d1a7..6481760 100644
34--- a/pdcurses/addch.c
35+++ b/pdcurses/addch.c
36@@ -240,7 +240,17 @@ int waddch(WINDOW *win, const chtype ch)
37             win->_y[y][x] = text;
38         }
39
40-        if (++x >= win->_maxx)
41+#ifdef PDC_WIDE
42+        /* Leave the rest "width - 1" character positions untouched, just jump
43+         * over them.  Platform-specific code is expected to do something
44+         * similar. */
45+        x += compat_wcwidth(text);
46+        /* XXX: this might result in truncated character, might want to perform
47+         *      the check before setting the character. */
48+#else
49+        ++x;
50+#endif
51+        if (x >= win->_maxx)
52         {
53             /* wrap around test */
54
55diff --git a/pdcurses/wcwidth.c b/pdcurses/wcwidth.c
56new file mode 100644
57index 0000000..b80201c
58--- /dev/null
59+++ b/pdcurses/wcwidth.c
60@@ -0,0 +1,224 @@
61+/*
62+ * Copyright (C) Markus Kuhn -- 2007-05-26 (Unicode 5.0)
63+ * Copyright (C) xaizek      -- 2014       (Adapt for vifm)
64+ * Copyright (C) xaizek      -- 2016       (Adapt for pdcurses)
65+ *
66+ * This is an implementation of wcwidth() and wcswidth() (defined in
67+ * IEEE Std 1002.1-2001) for Unicode.
68+ *
69+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
70+ * http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
71+ *
72+ * In fixed-width output devices, Latin characters all occupy a single
73+ * "cell" position of equal width, whereas ideographic CJK characters
74+ * occupy two such cells. Interoperability between terminal-line
75+ * applications and (teletype-style) character terminals using the
76+ * UTF-8 encoding requires agreement on which character should advance
77+ * the cursor by how many cell positions. No established formal
78+ * standards exist at present on which Unicode character shall occupy
79+ * how many cell positions on character terminals. These routines are
80+ * a first attempt of defining such behavior based on simple rules
81+ * applied to data provided by the Unicode Consortium.
82+ *
83+ * For some graphical characters, the Unicode standard explicitly
84+ * defines a character-cell width via the definition of the East Asian
85+ * FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
86+ * In all these cases, there is no ambiguity about which width a
87+ * terminal shall use. For characters in the East Asian Ambiguous (A)
88+ * class, the width choice depends purely on a preference of backward
89+ * compatibility with either historic CJK or Western practice.
90+ * Choosing single-width for these characters is easy to justify as
91+ * the appropriate long-term solution, as the CJK practice of
92+ * displaying these characters as double-width comes from historic
93+ * implementation simplicity (8-bit encoded characters were displayed
94+ * single-width and 16-bit ones double-width, even for Greek,
95+ * Cyrillic, etc.) and not any typographic considerations.
96+ *
97+ * Much less clear is the choice of width for the Not East Asian
98+ * (Neutral) class. Existing practice does not dictate a width for any
99+ * of these characters. It would nevertheless make sense
100+ * typographically to allocate two character cells to characters such
101+ * as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
102+ * represented adequately with a single-width glyph. The following
103+ * routines at present merely assign a single-cell width to all
104+ * neutral characters, in the interest of simplicity. This is not
105+ * entirely satisfactory and should be reconsidered before
106+ * establishing a formal standard in this area. At the moment, the
107+ * decision which Not East Asian (Neutral) characters should be
108+ * represented by double-width glyphs cannot yet be answered by
109+ * applying a simple rule from the Unicode database content. Setting
110+ * up a proper standard for the behavior of UTF-8 character terminals
111+ * will require a careful analysis not only of each Unicode character,
112+ * but also of each presentation form, something the author of these
113+ * routines has avoided to do so far.
114+ *
115+ * http://www.unicode.org/unicode/reports/tr11/
116+ *
117+ * Permission to use, copy, modify, and distribute this software
118+ * for any purpose and without fee is hereby granted. The author
119+ * disclaims all warranties with regard to this software.
120+ *
121+ * Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
122+ */
123+
124+#ifdef PDC_WIDE
125+
126+#include <curspriv.h>
127+
128+#include <wchar.h> /* wchar_t */
129+
130+typedef struct
131+{
132+	int first;
133+	int last;
134+}
135+interval;
136+
137+static int bisearch(wchar_t ucs, const interval *table, int max);
138+
139+/* The following two functions define the column width of an ISO 10646
140+ * character as follows:
141+ *  - The null character (U+0000) has a column width of 0.
142+ *  - Other C0/C1 control characters and DEL will lead to a return
143+ *    value of -1.
144+ *  - Non-spacing and enclosing combining characters (general
145+ *    category code Mn or Me in the Unicode database) have a
146+ *    column width of 0.
147+ *  - SOFT HYPHEN (U+00AD) has a column width of 1.
148+ *  - Other format characters (general category code Cf in the Unicode
149+ *    database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
150+ *  - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
151+ *    have a column width of 0.
152+ *  - Spacing characters in the East Asian Wide (W) or East Asian
153+ *    Full-width (F) category as defined in Unicode Technical
154+ *    Report #11 have a column width of 2.
155+ *  - All remaining characters (including all printable
156+ *    ISO 8859-1 and WGL4 characters, Unicode control characters,
157+ *    etc.) have a column width of 1.
158+ * This implementation assumes that wchar_t characters are encoded
159+ * in ISO 10646.
160+ */
161+int
162+compat_wcwidth(wchar_t ucs)
163+{
164+	/* Sorted list of non-overlapping intervals of non-spacing characters
165+	 * generated by "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c". */
166+	static const interval combining[] =
167+	{
168+		{ 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
169+		{ 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
170+		{ 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
171+		{ 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
172+		{ 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
173+		{ 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
174+		{ 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
175+		{ 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
176+		{ 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
177+		{ 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
178+		{ 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
179+		{ 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
180+		{ 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
181+		{ 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
182+		{ 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
183+		{ 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
184+		{ 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
185+		{ 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
186+		{ 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
187+		{ 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
188+		{ 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
189+		{ 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
190+		{ 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
191+		{ 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
192+		{ 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
193+		{ 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
194+		{ 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
195+		{ 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
196+		{ 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
197+		{ 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
198+		{ 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
199+		{ 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
200+		{ 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
201+		{ 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
202+		{ 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
203+		{ 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
204+		{ 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
205+		{ 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
206+		{ 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
207+		{ 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
208+		{ 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
209+		{ 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
210+		{ 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
211+		{ 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
212+		{ 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
213+		{ 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
214+		{ 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
215+		{ 0xE0100, 0xE01EF }
216+	};
217+
218+	/* Test for 8-bit control characters. */
219+	if(ucs == 0)
220+	{
221+		return 0;
222+	}
223+	if(ucs < 32 || (ucs >= 0x7f && ucs < 0xa0))
224+	{
225+		return -1;
226+	}
227+
228+	/* Binary search in table of non-spacing characters. */
229+	if(bisearch(ucs, combining, sizeof(combining)/sizeof(combining[0]) - 1))
230+	{
231+		return 0;
232+	}
233+
234+	/* If we arrive here, ucs is not a combining or C0/C1 control character. */
235+
236+	return 1 +
237+	  (ucs >= 0x1100 &&
238+	   (ucs <= 0x115f ||                    /* Hangul Jamo init. consonants */
239+	    ucs == 0x2329 || ucs == 0x232a ||
240+	    (ucs >= 0x2e80 && ucs <= 0xa4cf &&
241+	     ucs != 0x303f) ||                  /* CJK ... Yi */
242+	    (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */
243+	    (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */
244+	    (ucs >= 0xfe10 && ucs <= 0xfe19) || /* Vertical forms */
245+	    (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */
246+	    (ucs >= 0xff00 && ucs <= 0xff60) || /* Fullwidth Forms */
247+	    (ucs >= 0xffe0 && ucs <= 0xffe6) ||
248+	    (ucs >= 0x20000 && ucs <= 0x2fffd) ||
249+	    (ucs >= 0x30000 && ucs <= 0x3fffd)));
250+}
251+
252+/* Auxiliary function for binary search in interval table. */
253+static int
254+bisearch(wchar_t ucs, const interval *table, int max)
255+{
256+	int min;
257+
258+	if(ucs < table[0].first || ucs > table[max].last)
259+	{
260+		return 0;
261+	}
262+
263+	min = 0;
264+	while(max >= min)
265+	{
266+		const int mid = (min + max)/2;
267+		if(ucs > table[mid].last)
268+		{
269+			min = mid + 1;
270+		}
271+		else if(ucs < table[mid].first)
272+		{
273+			max = mid - 1;
274+		}
275+		else
276+		{
277+			return 1;
278+		}
279+	}
280+
281+	return 0;
282+}
283+
284+#endif
285diff --git a/win32/pdcdisp.c b/win32/pdcdisp.c
286index c2ad814..d67dc58 100644
287--- a/win32/pdcdisp.c
288+++ b/win32/pdcdisp.c
289@@ -104,13 +104,26 @@ void PDC_transform_line(int lineno, int x, int len, const chtype *srcp)
290     for (j = 0; j < len; j++)
291     {
292         chtype ch = srcp[j];
293+        unsigned char attr = pdc_atrtab[ch >> PDC_ATTR_SHIFT];
294
295-        ci[j].Attributes = pdc_atrtab[ch >> PDC_ATTR_SHIFT];
296+        ci[j].Attributes = attr;
297 #ifdef CHTYPE_LONG
298         if (ch & A_ALTCHARSET && !(ch & 0xff80))
299             ch = acs_map[ch & 0x7f];
300 #endif
301         ci[j].Char.UnicodeChar = ch & A_CHARTEXT;
302+
303+#ifdef PDC_WIDE
304+        {
305+            int gap_width = compat_wcwidth(srcp[j] & A_CHARTEXT) - 1;
306+            while (gap_width-- > 0)
307+            {
308+                ++j;
309+                ci[j].Attributes = attr;
310+                ci[j].Char.UnicodeChar = '\0';
311+            }
312+        }
313+#endif
314     }
315
316     WriteConsoleOutput(pdc_con_out, ci, bufSize, bufPos, &sr);
317