1 /*
2  * Functions for characters and strings for CLISP
3  * Bruno Haible 1990-2008, 2016-2017
4  * Sam Steingold 1998-2009, 2012
5  * German comments translated into English: Stefan Kain 2002-09-20
6  */
7 
8 #include "lispbibl.c"
9 
10 /* character conversion tables: */
11 #if defined(ENABLE_UNICODE)
12 /* here are the registered bijective case (small<-->CAP) transformations
13  for Unicode. */
14 #elif defined(ISOLATIN_CHS)
15 /* here are the registered bijective case (small<-->CAP) transformations
16   small 61 ... 7A E0 ... F6 F8 ... FE
17   CAP   41 ... 5A C0 ... D6 D8 ... DE
18   both  aA ... zZ àÀ ... öÖ øØ ... þÞ */
19 #elif defined(HPROMAN8_CHS)
20 /* here are the registered bijective case (small<-->CAP) transformations
21   small 61 ... 7A C4 C5 D5 C6 C7 B2 C0 C1 D1 C2 C3 C8 C9 D9 CA CB
22   CAP   41 ... 5A E0 DC E5 E7 ED B1 A2 A4 A6 DF AE A1 A3 E6 E8 AD
23   which aA ... zZ áÁ éÉ íÍ óÓ úÚ ýÝ â êÊ îÎ ôÔ ûÛ àÀ èÈ ìÌ òÒ ùÙ
24   small CC CD DD CE CF EF E2 B7 EA D4 D7 D6 B5 EC E4 F1
25   CAP   D8 A5 A7 DA DB EE E1 B6 E9 D0 D3 D2 B4 EB E3 F0
26   which äÄ ëË ïÏ öÖ üÜ ÿŸ ãà ñÑ õÕ åÅ æÆ øØ çÇ šŠ ðÐ þÞ */
27 #else /* defined(ASCII_CHS) */
28 /* here are the registered bijective case (small<-->CAP) transformations
29   small 61 ... 7A
30   CAP   41 ... 5A
31   both  aA ... zZ */
32 #endif
33 
34 #ifdef ENABLE_UNICODE
35 /* No-conversion table, used by up_case_table and down_case_table. */
36 static const uint16 nop_page[256] = {
37   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
50   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
53 };
54 #endif
55 
56 /* Converts byte ch into an uppercase letter */
up_case(chart ch)57 modexp chart up_case (chart ch) {
58  #ifdef ENABLE_UNICODE
59   #include "uni_upcase.c"
60   var cint c = as_cint(ch);
61   if (c < sizeof(up_case_table)/sizeof(up_case_table[0]) << 8)
62     return as_chart(c+(sint16)up_case_table[c>>8][c&0xFF]);
63   else
64     return ch;
65  #else
66   /* table for conversion into uppercase letters: */
67   local const cint up_case_table[char_code_limit] =
68    #if defined(ISOLATIN_CHS)
69     { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
70       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
71       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
72       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
73       0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
74       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
75       0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
76       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
77       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
78       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
79       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
80       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
81       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
82       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
83       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
84       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xF7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xFF,
85     };
86    #elif defined(HPROMAN8_CHS)
87     { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
88       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
89       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
90       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
91       0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
92       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
93       0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
94       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
95       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
96       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
97       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
98       0xB0,0xB1,0xB2,0xB3,0xB4,0xB4,0xB6,0xB6,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
99       0xA2,0xA4,0xDF,0xAE,0xE0,0xDC,0xE7,0xB2,0xA1,0xA3,0xE8,0xAD,0xD8,0xA5,0xDA,0xDB,
100       0xD0,0xA6,0xD2,0xD3,0xD0,0xE5,0xD2,0xD3,0xD8,0xE6,0xDA,0xDB,0xDC,0xA7,0xDE,0xDF,
101       0xE0,0xE1,0xE1,0xE3,0xE3,0xE5,0xE6,0xE7,0xE8,0xE9,0xE9,0xEB,0xEB,0xED,0xEE,0xEE,
102       0xF0,0xF0,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
103     };
104    #else /* standard ascii conversion table: only a..z --> A..Z */
105     { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
106       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
107       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
108       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
109       0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
110       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
111       0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
112       0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
113       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
114       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
115       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
116       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
117       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
118       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
119       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
120       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
121     };
122    #endif
123   return as_chart(up_case_table[as_cint(ch)]);
124  #endif
125 }
126 
127 /* Converts byte ch into a lowercase letter */
down_case(chart ch)128 modexp chart down_case (chart ch) {
129  #ifdef ENABLE_UNICODE
130   #include "uni_downcase.c"
131   var cint c = as_cint(ch);
132   if (c < sizeof(down_case_table)/sizeof(down_case_table[0]) << 8)
133     return as_chart(c+(sint16)down_case_table[c>>8][c&0xFF]);
134   else
135     return ch;
136  #else
137   /* table for conversion into lowercase letters: */
138   local const cint down_case_table[char_code_limit] =
139    #if defined(ISOLATIN_CHS)
140     { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
141       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
142       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
143       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
144       0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
145       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
146       0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
147       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
148       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
149       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
150       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
151       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
152       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
153       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xD7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xDF,
154       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
155       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
156     };
157    #elif defined(HPROMAN8_CHS)
158     { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
159       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
160       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
161       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
162       0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
163       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
164       0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
165       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
166       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
167       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
168       0xA0,0xC8,0xC0,0xC9,0xC1,0xCD,0xD1,0xDD,0xA8,0xA9,0xAA,0xAB,0xAC,0xCB,0xC3,0xAF,
169       0xB0,0xB2,0xB2,0xB3,0xB5,0xB5,0xB7,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
170       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
171       0xD4,0xD1,0xD6,0xD7,0xD4,0xD5,0xD6,0xD7,0xCC,0xD9,0xCE,0xCF,0xC5,0xDD,0xDE,0xC2,
172       0xC4,0xE2,0xE2,0xE4,0xE4,0xD5,0xD9,0xC6,0xCA,0xEA,0xEA,0xEC,0xEC,0xC7,0xEF,0xEF,
173       0xF1,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
174     };
175    #else /* standard ascii conversion table: only A..Z --> a..z */
176     { 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
177       0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
178       0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
179       0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
180       0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
181       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
182       0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
183       0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
184       0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
185       0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
186       0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
187       0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
188       0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
189       0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
190       0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
191       0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
192     };
193    #endif
194   return as_chart(down_case_table[as_cint(ch)]);
195  #endif
196 }
197 
198 #ifdef ENABLE_UNICODE
199 /* Table of Unicode character attributes.
200  unicode_attribute(c)
201  > cint c: a character code
202  < result: 0 = non-graphic
203            1 = graphic, but not alphanumeric
204            2 = graphic and numeric
205            3 = graphic and alphabetic */
206 #include "uni_attribute.c"
207 #define unicode_attribute(c)                                            \
208     ((c) < sizeof(unicode_attribute_table)/sizeof(unicode_attribute_table[0]) << 10 \
209      ? (unicode_attribute_table[(c)>>10][((c)>>2)&0xFF] >> (((c)&0x3)*2)) & 0x3     \
210      : 0)
211 #endif
212 
213 /* UP: Determines, if a character is alphabetic.
214  alphap(ch)
215  > ch: character-code
216  < result: true if alphabetic, otherwise false.
217  Alphabetic characters have a code c, with */
218 #if defined(ENABLE_UNICODE)
219 /* java.lang.Character.isLetter(c) */
220 #else
221 /* $41 <= c <= $5A or $61 <= c <= $7A */
222 #if defined(ISOLATIN_CHS)
223 /* or $C0 <= c except c=$D7,$F7. */
224 #elif defined(HPROMAN8_CHS)
225 /* or $A1 <= c <= $A7 or $AD <= c <= $AE or $B1 <= c <= $B7 except c=$B3
226    or $C0 <= c <= $F1. */
227 #endif
228 #endif
229 /* Therein, all uppercase- and all lowercase-
230  characters are enclosed (see CLTL p. 236 top). */
alphap(chart ch)231 local bool alphap (chart ch)
232 {
233   var cint c = as_cint(ch);
234  #ifdef ENABLE_UNICODE
235   return (unicode_attribute(c) == 3);
236  #else
237   if (c < 0x41) goto no; if (c <= 0x5A) goto yes;
238   if (c < 0x61) goto no; if (c <= 0x7A) goto yes;
239  #if defined(ISOLATIN_CHS)
240   if (c < 0xC0) goto no;
241   if ((c == 0xD7) || (c == 0xF7)) goto no; else goto yes;
242  #elif defined(HPROMAN8_CHS)
243   if (c < 0xA1) goto no;
244   if (c > 0xF1) goto no; if (c >= 0xC0) goto yes;
245   if (c <= 0xA7) goto yes;
246   if (c < 0xB1) {
247     if (c < 0xAD) goto no; if (c <= 0xAE) goto yes; goto no;
248   } else {
249     if (c > 0xB7) goto no; if (c == 0xB3) goto no; else goto yes;
250   }
251  #endif
252  no: return false;
253  yes: return true;
254  #endif
255 }
256 
257 /* Determines, if a character is numeric.
258  numericp(ch)
259  > ch: character-code
260  < result: true if numeric, otherwise false. */
261 local bool numericp (chart ch);
262 #ifdef ENABLE_UNICODE
263   #define numericp(ch)  (unicode_attribute(as_cint(ch)) == 2)
264 #else
265   #define numericp(ch)  (('0' <= as_cint(ch)) && (as_cint(ch) <= '9'))
266 #endif
267 
268 /* Determines, if a character is alphanumeric.
269  alphanumericp(ch)
270  > ch: character-code
271  < result: true if alphanumeric, otherwise false.
272  Alphanumeric characters comprise the alphabetic characters and the digits. */
alphanumericp(chart ch)273 global bool alphanumericp (chart ch)
274 {
275  #ifdef ENABLE_UNICODE
276   var cint c = as_cint(ch);
277   return (unicode_attribute(c) >= 2);
278  #else
279   return (numericp(ch) || alphap(ch));
280  #endif
281 }
282 
283 /* Determines, if a character is a Graphic-Character ("printing") .
284  graphic_char_p(ch)
285  > ch: character-code
286  < result: true if printing, otherwise false.
287  Graphic-Characters are those with a Code c, with */
288 #if defined(ENABLE_UNICODE)
289 /*       (java.lang.Character.isDefined(c) || c == 0x20AC)
290          && !(c < 0x0020 || (0x007F <= c <= 0x009F)) */
291 #elif defined(ISOLATIN_CHS) || defined(HPROMAN8_CHS)
292 /*       $20 <= c <= $7E or $A0 <= c < $100. */
293 #else /* defined(ASCII_CHS) */
294 /*       $20 <= c <= $7E. */
295 #endif
graphic_char_p(chart ch)296 global bool graphic_char_p (chart ch) {
297   /* This would be the same as iswprint(ch), assuming wide characters were
298      Unicode. */
299   var cint c = as_cint(ch);
300  #ifdef ENABLE_UNICODE
301   return (unicode_attribute(c) == 0 ? false : true);
302  #else
303  #if defined(ISOLATIN_CHS) || defined(HPROMAN8_CHS)
304   if ((('~' >= c) && (c >= ' ')) || (c >= 0xA0)) goto yes; else goto no;
305  #else /* defined(ASCII_CHS) */
306   if (c >= ' ') goto yes; else goto no;
307  #endif
308  no: return false;
309  yes: return true;
310  #endif
311 }
312 
313 /* Returns the screen display width of a character.
314  char_width(ch)
315  > ch: character code
316  < result: number of output columns occupied by ch */
317 global uintL char_width (chart ch);
318 #ifdef ENABLE_UNICODE
319 #include "uniwidth.h"           /* from gnulib */
char_width(chart ch)320 global uintL char_width (chart ch) {
321   /* This would be the same as wcwidth(ch), assuming wide characters were
322      Unicode, except that for non-printable characters we return 0, not -1. */
323   extern const char* locale_encoding;
324   int w = uc_width(as_cint(ch),locale_encoding);
325   return (w >= 0 ? w : 0);
326 }
327 #else
char_width(chart ch)328 global uintL char_width (chart ch) {
329   return (graphic_char_p(ch) ? 1 : 0);
330 }
331 #endif
332 
333 #if !defined(ENABLE_UNICODE) || defined(HAVE_SMALL_SSTRING)
334 /* Copies an array of uint8 to an array of uint8.
335  copy_8bit_8bit(src,dest,len);
336  > uint8* src: source
337  > uint8* dest: destination
338  > uintL len: number of elements to be copied, > 0 */
copy_8bit_8bit(const uint8 * src,uint8 * dest,uintL len)339 global void copy_8bit_8bit (const uint8* src, uint8* dest, uintL len) {
340   do { *dest++ = *src++; } while (--len);
341 }
342 #endif
343 
344 #if defined(HAVE_SMALL_SSTRING)
345 /* Copies an array of uint8 to an array of uint16.
346  copy_8bit_16bit(src,dest,len);
347  > uint8* src: source
348  > uint16* dest: destination
349  > uintL len: number of elements to be copied, > 0 */
copy_8bit_16bit(const uint8 * src,uint16 * dest,uintL len)350 modexp void copy_8bit_16bit (const uint8* src, uint16* dest, uintL len) {
351   do { *dest++ = *src++; } while (--len);
352 }
353 #endif
354 
355 #if defined(HAVE_SMALL_SSTRING)
356 /* Copies an array of uint8 to an array of uint32.
357  copy_8bit_32bit(src,dest,len);
358  > uint8* src: source
359  > uint32* dest: destination
360  > uintL len: number of elements to be copied, > 0 */
copy_8bit_32bit(const uint8 * src,uint32 * dest,uintL len)361 modexp void copy_8bit_32bit (const uint8* src, uint32* dest, uintL len) {
362   do { *dest++ = *src++; } while (--len);
363 }
364 #endif
365 
366 #if defined(HAVE_SMALL_SSTRING)
367 /* Copies an array of uint16 to an array of uint8.
368  All source elements must fit into uint8.
369  copy_16bit_8bit(src,dest,len);
370  > uint16* src: source
371  > uint8* dest: destination
372  > uintL len: number of elements to be copied, > 0 */
copy_16bit_8bit(const uint16 * src,uint8 * dest,uintL len)373 modexp void copy_16bit_8bit (const uint16* src, uint8* dest, uintL len) {
374   do { *dest++ = *src++; } while (--len);
375 }
376 #endif
377 
378 #if defined(HAVE_SMALL_SSTRING)
379 /* Copies an array of uint16 to an array of uint16.
380  copy_16bit_16bit(src,dest,len);
381  > uint16* src: source
382  > uint16* dest: destination
383  > uintL len: number of elements to be copied, > 0 */
copy_16bit_16bit(const uint16 * src,uint16 * dest,uintL len)384 modexp void copy_16bit_16bit (const uint16* src, uint16* dest, uintL len) {
385   do { *dest++ = *src++; } while (--len);
386 }
387 #endif
388 
389 #if defined(HAVE_SMALL_SSTRING)
390 /* Copies an array of uint16 to an array of uint32.
391  copy_16bit_32bit(src,dest,len);
392  > uint16* src: source
393  > uint32* dest: destination
394  > uintL len: number of elements to be copied, > 0 */
copy_16bit_32bit(const uint16 * src,uint32 * dest,uintL len)395 modexp void copy_16bit_32bit (const uint16* src, uint32* dest, uintL len) {
396   do { *dest++ = *src++; } while (--len);
397 }
398 #endif
399 
400 #if defined(HAVE_SMALL_SSTRING)
401 /* Copies an array of uint32 to an array of uint8.
402  All source elements must fit into uint8.
403  copy_32bit_8bit(src,dest,len);
404  > uint32* src: source
405  > uint8* dest: destination
406  > uintL len: number of elements to be copied, > 0 */
copy_32bit_8bit(const uint32 * src,uint8 * dest,uintL len)407 modexp void copy_32bit_8bit (const uint32* src, uint8* dest, uintL len) {
408   do { *dest++ = *src++; } while (--len);
409 }
410 #endif
411 
412 #if defined(HAVE_SMALL_SSTRING)
413 /* Copies an array of uint32 to an array of uint16.
414  All source elements must fit into uint16.
415  copy_32bit_16bit(src,dest,len);
416  > uint32* src: source
417  > uint16* dest: destination
418  > uintL len: number of elements to be copied, > 0 */
copy_32bit_16bit(const uint32 * src,uint16 * dest,uintL len)419 modexp void copy_32bit_16bit (const uint32* src, uint16* dest, uintL len) {
420   do { *dest++ = *src++; } while (--len);
421 }
422 #endif
423 
424 #if defined(ENABLE_UNICODE)
425 /* Copies an array of uint32 to an array of uint32.
426  copy_32bit_32bit(src,dest,len);
427  > uint32* src: source
428  > uint32* dest: destination
429  > uintL len: number of elements to be copied, > 0 */
copy_32bit_32bit(const uint32 * src,uint32 * dest,uintL len)430 global void copy_32bit_32bit (const uint32* src, uint32* dest, uintL len) {
431   do { *dest++ = *src++; } while (--len);
432 }
433 #endif
434 
435 #ifdef HAVE_SMALL_SSTRING
436 
437 /* Determines the smallest string element type capable of holding a
438  set of 16-bit characters.
439  smallest_string_flavour16(src,len)
440  > uint16* src: source
441  > uintL len: number of characters at src
442  < result: Sstringtype_8Bit or Sstringtype_16Bit */
smallest_string_flavour16(const uint16 * src,uintL len)443 global uintBWL smallest_string_flavour16 (const uint16* src, uintL len) {
444   var uintBWL result = Sstringtype_8Bit;
445   if (len > 0) {
446     var uintL count;
447     dotimespL(count,len, {
448       if (!(*src < cint8_limit)) {
449         result = Sstringtype_16Bit;
450         break;
451       }
452       src++;
453     });
454   }
455   return result;
456 }
457 
458 /* Determines the smallest string element type capable of holding a
459  set of 32-bit characters.
460  smallest_string_flavour32(src,len)
461  > uint32* src: source
462  > uintL len: number of characters at src
463  < result: Sstringtype_8Bit or Sstringtype_16Bit or Sstringtype_32Bit */
smallest_string_flavour32(const uint32 * src,uintL len)464 global uintBWL smallest_string_flavour32 (const uint32* src, uintL len) {
465   var uintBWL result = Sstringtype_8Bit;
466   if (len > 0) {
467     var uintL count;
468     dotimespL(count,len, {
469       if (!(*src < cint8_limit))
470         result = Sstringtype_16Bit;
471       if (!(*src < cint16_limit)) {
472         result = Sstringtype_32Bit;
473         break;
474       }
475       src++;
476     });
477   }
478   return result;
479 }
480 
481 #endif
482 
483 /* UP: unpack a string
484  unpack_string_ro(string,&len,&offset)  [for read-only access]
485  > object string: a string
486  < uintL len: the fill-pointer length of the string
487  < uintL offset: offset into the datastorage vector
488  < object result: datastorage vector, a simple-string or NIL */
unpack_string_ro(object string,uintL * len,uintL * offset)489 modexp object unpack_string_ro (object string, uintL* len, uintL* offset) {
490   if (simple_string_p(string)) {
491     sstring_un_realloc(string);
492     *len = Sstring_length(string);
493     *offset = 0;
494     return string;
495   } else {
496     /* string, but not simple-string => follow the displacement
497        determine the length (like vector_length() in array.d): */
498     var uintL size;
499     {
500       var Iarray addr = TheIarray(string);
501       var uintL offset_fil = offsetofa(iarray_,dims);
502       if (iarray_flags(addr) & bit(arrayflags_dispoffset_bit))
503         offset_fil += sizeof(uintL);
504       if (iarray_flags(addr) & bit(arrayflags_fillp_bit))
505         offset_fil += sizeof(uintL);
506       size = *(uintL*)pointerplus(addr,offset_fil);
507     }
508     *len = size;
509     /* follow the displacement: */
510     *offset = 0;
511     return iarray_displace_check(string,size,offset);
512   }
513 }
514 
515 #if 0                           /* not used */
516 /* UP: unpack a string
517  unpack_string_rw(string,&len,&offset)  [for read-write access]
518  > object string: a string
519  < uintL len: the fill-pointer length of the string
520  < uintL offset: offset in the datastorage vector
521  < object result: datastorage vector, a simple-string or NIL */
522 global object unpack_string_rw (object string, uintL* len, uintL* offset) {
523   var object unpacked = unpack_string_ro(string,len,offset);
524   if (*len > 0) {
525     if (simple_nilarray_p(unpacked)) error_nilarray_access();
526     check_sstring_mutable(unpacked);
527   }
528   return unpacked;
529 }
530 #endif
531 
532 /* UP: compares two strings for equality
533  string_eq(string1,string2)
534  > string1: string
535  > string2: simple-string
536  < result: /=0, if equal */
string_eq(object string1,object string2)537 global bool string_eq (object string1, object string2) {
538   var uintL len1;
539   var uintL offset1;
540   string1 = unpack_string_ro(string1,&len1,&offset1);
541   sstring_un_realloc(string2);
542   if (len1 != Sstring_length(string2))
543     return false;
544   /* Now both strings have exactly len1 characters. Compare them. */
545   if (len1 > 0)
546     return string_eqcomp(string1,offset1,string2,0,len1);
547   return true;
548 }
549 
550 /* UP: compares two strings for equality, case-insensitive
551  string_equal(string1,string2)
552  > string1: string
553  > string2: simple-string
554  < result: /=0, if equal */
string_equal(object string1,object string2)555 modexp bool string_equal (object string1, object string2) {
556   var uintL len1;
557   var uintL offset1;
558   string1 = unpack_string_ro(string1,&len1,&offset1);
559   sstring_un_realloc(string2);
560   if (len1 != Sstring_length(string2))
561     return false;
562   /* Now both strings have exactly len1 characters. Compare them. */
563   if (len1 > 0)
564     return string_eqcomp_ci(string1,offset1,string2,0,len1);
565   return true;
566 }
567 
568 /* UP: Stores a character in a string.
569  > string: a mutable string that is or was simple
570  > index: (already checked) index into the string
571  > element: a character
572  < result: the possibly reallocated string
573  can trigger GC */
sstring_store(object string,uintL index,chart element)574 global maygc object sstring_store (object string, uintL index, chart element) {
575   var object inner_string = string;
576   sstring_un_realloc(inner_string);
577   switch (sstring_eltype(TheSstring(inner_string))) {
578   #ifdef ENABLE_UNICODE
579    #ifdef HAVE_SMALL_SSTRING
580     case Sstringtype_8Bit: /* mutable Simple-String */
581       if (as_cint(element) < cint8_limit) {
582         TheS8string(inner_string)->data[index] = as_cint(element);
583         break;
584       }
585       ASSERT(eq(string,inner_string));
586       if (as_cint(element) < cint16_limit) {
587         string = reallocate_small_string(inner_string,Sstringtype_16Bit);
588         inner_string = TheSistring(string)->data;
589         TheS16string(inner_string)->data[index] = as_cint(element);
590         break;
591       }
592       string = reallocate_small_string(inner_string,Sstringtype_32Bit);
593       inner_string = TheSistring(string)->data;
594       TheS32string(inner_string)->data[index] = as_cint(element);
595       break;
596     case Sstringtype_16Bit: /* mutable Simple-String */
597       if (as_cint(element) < cint16_limit) {
598         TheS16string(inner_string)->data[index] = as_cint(element);
599         break;
600       }
601       pushSTACK(string);
602       inner_string = reallocate_small_string(inner_string,Sstringtype_32Bit);
603       string = popSTACK();
604       inner_string = TheSistring(inner_string)->data;
605       /*FALLTHROUGH*/
606    #endif
607     case Sstringtype_32Bit: /* mutable Simple-String */
608       TheS32string(inner_string)->data[index] = as_cint(element);
609       break;
610   #else
611     case Sstringtype_8Bit: /* mutable Simple-String */
612       TheS8string(inner_string)->data[index] = as_cint(element);
613       break;
614   #endif
615     default: NOTREACHED;
616   }
617   return string;
618 }
619 
620 /* UP: Stores an array of characters in a string.
621  > string: a mutable string that is or was simple
622  > offset: (already checked) offset into the string
623  > charptr[0..len-1]: a character array, not GC affected
624  < result: the possibly reallocated string
625  can trigger GC */
sstring_store_array(object string,uintL offset,const chart * charptr,uintL len)626 global maygc object sstring_store_array (object string, uintL offset,
627                                          const chart *charptr, uintL len)
628 {
629   if (len > 0) {
630     var object inner_string = string;
631     sstring_un_realloc(inner_string);
632     switch (sstring_eltype(TheSstring(inner_string))) {
633     #ifdef ENABLE_UNICODE
634      #ifdef HAVE_SMALL_SSTRING
635       case Sstringtype_8Bit: { /* mutable Simple-String */
636         var bool fits_in_8bit = true;
637         var bool fits_in_16bit = true;
638         {
639           var uintL n = len;
640           var const chart* p = charptr;
641           do {
642             if (!(as_cint(*p) < cint8_limit))
643               fits_in_8bit = false;
644             if (!(as_cint(*p) < cint16_limit)) {
645               fits_in_16bit = false;
646               break;
647             }
648             p++;
649           } while (--n);
650         }
651         if (fits_in_8bit) {
652           var const chart* p = charptr;
653           var cint8* q = &TheS8string(inner_string)->data[offset];
654           do {
655             *q = as_cint(*p);
656             p++;
657             q++;
658           } while (--len);
659           break;
660         }
661         ASSERT(eq(string,inner_string));
662         if (fits_in_16bit) {
663           string = reallocate_small_string(inner_string,Sstringtype_16Bit);
664           inner_string = TheSistring(string)->data;
665           var const chart* p = charptr;
666           var cint16* q = &TheS16string(inner_string)->data[offset];
667           do {
668             *q = as_cint(*p);
669             p++;
670             q++;
671           } while (--len);
672           break;
673         }
674         string = reallocate_small_string(inner_string,Sstringtype_32Bit);
675         inner_string = TheSistring(string)->data;
676         var const chart* p = charptr;
677         var cint32* q = &TheS32string(inner_string)->data[offset];
678         do {
679           *q = as_cint(*p);
680           p++;
681           q++;
682         } while (--len);
683       }
684         break;
685       case Sstringtype_16Bit: { /* mutable Simple-String */
686         var bool fits_in_16bit = true;
687         {
688           var uintL n = len;
689           var const chart* p = charptr;
690           do {
691             if (!(as_cint(*p) < cint16_limit)) {
692               fits_in_16bit = false;
693               break;
694             }
695             p++;
696           } while (--n);
697         }
698         if (fits_in_16bit) {
699           var const chart* p = charptr;
700           var cint16* q = &TheS16string(inner_string)->data[offset];
701           do {
702             *q = as_cint(*p);
703             p++;
704             q++;
705           } while (--len);
706           break;
707         }
708         pushSTACK(string);
709         inner_string = reallocate_small_string(inner_string,Sstringtype_32Bit);
710         string = popSTACK();
711         inner_string = TheSistring(inner_string)->data;
712       }
713         /*FALLTHROUGH*/
714      #endif
715       case Sstringtype_32Bit: { /* mutable Simple-String */
716         var const chart* p = charptr;
717         var cint32* q = &TheS32string(inner_string)->data[offset];
718         do {
719           *q = as_cint(*p);
720           p++;
721           q++;
722         } while (--len);
723       }
724         break;
725     #else
726       case Sstringtype_8Bit: { /* mutable Simple-String */
727         var const chart* p = charptr;
728         var cint8* q = &TheS8string(inner_string)->data[offset];
729         do {
730           *q = as_cint(*p);
731           p++;
732           q++;
733         } while (--len);
734       }
735         break;
736     #endif
737       default: NOTREACHED;
738     }
739   }
740   return string;
741 }
742 
743 #ifdef ENABLE_UNICODE
744 /* UP: Creates a Simple-String with given elements.
745  stringof(len)
746  > uintL len: desired vector length
747  > on STACK: len characters, first on top
748  < result: Simple-String with these objects
749  increases STACK
750  changes STACK, can trigger GC */
stringof(uintL len)751 global maygc object stringof (uintL len) {
752   check_stringsize(len);
753   var object new_string = allocate_string(len);
754   if (len > 0) {
755     var gcv_object_t* topargptr = STACK STACKop len;
756     var gcv_object_t* argptr = topargptr;
757     var chart* ptr = &TheSnstring(new_string)->data[0];
758     var uintL count;
759     dotimespL(count,len, { *ptr++ = char_code(NEXT(argptr)); });
760     set_args_end_pointer(topargptr);
761     #ifdef HAVE_SMALL_SSTRING
762     /* We use alloca for small-simple-strings, therefore their length
763      should not be too large, or we risk an SP overflow and core dump. */
764     if (len < 0x10000) {
765       var uintBWL flavour = smallest_string_flavour(&TheSnstring(new_string)->data[0],len);
766       if (flavour == Sstringtype_8Bit) {
767         pushSTACK(new_string);
768         var object copied_string = allocate_s8string(len);
769         copy_32bit_8bit(&TheS32string(popSTACK())->data[0],
770                         &TheS8string(copied_string)->data[0],len);
771         new_string = copied_string;
772       } else if (flavour == Sstringtype_16Bit) {
773         pushSTACK(new_string);
774         var object copied_string = allocate_s16string(len);
775         copy_32bit_16bit(&TheS32string(popSTACK())->data[0],
776                          &TheS16string(copied_string)->data[0],len);
777         new_string = copied_string;
778       }
779     }
780     #endif
781   }
782   return new_string;
783 }
784 #endif
785 
786 /* UP: Copies a string and thereby turns it into a Simple-String.
787  copy_string_normal(string)
788  > string: String
789  < result: mutable Normal-Simple-String with the same characters
790  can trigger GC */
copy_string_normal(object string)791 global maygc object copy_string_normal (object string) {
792   var uintL len;
793   var uintL offset;
794   string = unpack_string_ro(string,&len,&offset);
795   pushSTACK(string); /* save string */
796   var object new_string = allocate_string(len);
797   /* new_string = new Normal-Simple-String with given length len */
798   string = popSTACK(); /* return string */
799   if (len > 0) {
800    #ifdef ENABLE_UNICODE
801     SstringCase(string,
802                 { copy_8bit_32bit(&TheS8string(string)->data[offset],
803                                   &TheS32string(new_string)->data[0],len); },
804                 { copy_16bit_32bit(&TheS16string(string)->data[offset],
805                                    &TheS32string(new_string)->data[0],len); },
806                 { copy_32bit_32bit(&TheS32string(string)->data[offset],
807                                    &TheS32string(new_string)->data[0],len); },
808                 { error_nilarray_retrieve(); });
809    #else
810     SstringCase(string, { NOTREACHED; }, { NOTREACHED; },
811                 { copy_8bit_8bit(&TheS8string(string)->data[offset],
812                                  &TheS8string(new_string)->data[0],len); },
813                 { error_nilarray_retrieve(); });
814    #endif
815   }
816   return new_string;
817 }
818 
819 #ifdef HAVE_SMALL_SSTRING
820 /* UP: Copies a string and thereby turns it into a Simple-String.
821  copy_string(string)
822  > string: String
823  < result: mutable Simple-String with the same characters
824  can trigger GC */
copy_string(object string)825 global maygc object copy_string (object string) {
826   var uintL len;
827   var uintL offset;
828   string = unpack_string_ro(string,&len,&offset);
829   var uintBWL flavour;
830   /* We use alloca for small-simple-strings, therefore their length
831    should not be too large, or we risk an SP overflow and core dump. */
832   if (len < 0x10000) {
833     SstringCase(string,
834       { flavour = smallest_string_flavour8(&TheS8string(string)->data[offset],len); },
835       { flavour = smallest_string_flavour16(&TheS16string(string)->data[offset],len); },
836       { flavour = smallest_string_flavour32(&TheS32string(string)->data[offset],len); },
837       { flavour = Sstringtype_8Bit; });
838   } else
839     flavour = Sstringtype_32Bit;
840   pushSTACK(string); /* save string */
841   var object new_string =
842     (flavour == Sstringtype_8Bit ? allocate_s8string(len) :
843      flavour == Sstringtype_16Bit ? allocate_s16string(len) :
844      allocate_s32string(len));
845   /* new_string = new Simple-String with given length len */
846   string = popSTACK(); /* return string */
847   if (len > 0) {
848     if (flavour == Sstringtype_8Bit) {
849       SstringCase(string,
850                   { copy_8bit_8bit(&TheS8string(string)->data[offset],
851                                    &TheS8string(new_string)->data[0],len); },
852                   { copy_16bit_8bit(&TheS16string(string)->data[offset],
853                                     &TheS8string(new_string)->data[0],len); },
854                   { copy_32bit_8bit(&TheS32string(string)->data[offset],
855                                     &TheS8string(new_string)->data[0],len); },
856                   { error_nilarray_retrieve(); });
857     } else if (flavour == Sstringtype_16Bit) {
858       SstringCase(string,
859                   { copy_8bit_16bit(&TheS8string(string)->data[offset],
860                                     &TheS16string(new_string)->data[0],len); },
861                   { copy_16bit_16bit(&TheS16string(string)->data[offset],
862                                      &TheS16string(new_string)->data[0],len); },
863                   { copy_32bit_16bit(&TheS32string(string)->data[offset],
864                                      &TheS16string(new_string)->data[0],len); },
865                   { NOTREACHED; });
866     } else {
867       SstringCase(string,
868                   { copy_8bit_32bit(&TheS8string(string)->data[offset],
869                                     &TheS32string(new_string)->data[0],len); },
870                   { copy_16bit_32bit(&TheS16string(string)->data[offset],
871                                      &TheS32string(new_string)->data[0],len); },
872                   { copy_32bit_32bit(&TheS32string(string)->data[offset],
873                                      &TheS32string(new_string)->data[0],len); },
874                   { NOTREACHED; });
875     }
876   }
877   return new_string;
878 }
879 #endif
880 
881 /* UP: converts a string into a Simple-String.
882  coerce_ss(obj)
883  > obj: Lisp-object, should be a string.
884  < result: Simple-String with the same characters
885  can trigger GC */
coerce_ss(object obj)886 global maygc object coerce_ss (object obj) {
887  start:
888  #ifdef TYPECODES
889   switch (typecode(obj))
890  #else
891     if (orecordp(obj))
892       switch (Record_type(obj)) {
893         case_Rectype_Sstring_above;
894         case_Rectype_ostring_above;
895         default: break;
896       }
897   switch (0)
898  #endif
899     {
900       case_sstring:
901         /* Simple-String, returned unchanged */
902         DBGREALLOC(obj);
903         return obj;
904       case_ostring:
905         /* other string, copy it */
906         return copy_string(obj);
907       default:
908         break;
909     }
910   obj = check_string(obj); goto start;
911 }
912 
913 /* UP: converts a string into an immutable Simple-String.
914  coerce_imm_ss(obj)
915  > obj: Lisp-object, should be a string.
916  < result: immutable Simple-String with the same characters
917  can trigger GC */
coerce_imm_ss(object obj)918 global maygc object coerce_imm_ss (object obj)
919 {
920  start:
921  #ifdef TYPECODES
922   switch (typecode(obj))
923  #else
924     if (orecordp(obj))
925       switch (Record_type(obj)) {
926         case_Rectype_Sstring_above;
927         case_Rectype_ostring_above;
928         default: break;
929       }
930   switch (0)
931  #endif
932     {
933       case_sstring:
934         /* Simple-String */
935         if (sstring_immutable(TheSstring(obj)))
936           /* already immutable, return unchanged */
937           return obj;
938         /*FALLTHROUGH*/
939       case_ostring:
940         { /* other string, copy it */
941           var uintL len;
942           var uintL offset;
943           var object string = unpack_string_ro(obj,&len,&offset);
944           if (simple_nilarray_p(string)) {
945             if (len > 0) error_nilarray_retrieve();
946             return allocate_imm_string(0);
947           }
948           #ifdef ENABLE_UNICODE
949           #ifdef HAVE_SMALL_SSTRING
950           if (sstring_eltype(TheSstring(string)) == Sstringtype_8Bit) {
951             pushSTACK(string);
952             var object new_string = allocate_imm_s8string(len);
953             string = popSTACK();
954             if (len > 0)
955               copy_8bit_8bit(&TheS8string(string)->data[offset],
956                              &TheS8string(new_string)->data[0],len);
957             return new_string;
958           }
959           if (sstring_eltype(TheSstring(string)) == Sstringtype_16Bit) {
960             /* Check if all characters fit into an 8-bit character string. */
961             var bool fits_in_8bit = true;
962             if (len > 0) {
963               var const cint16* ptr = &TheS16string(string)->data[offset];
964               var uintL count = len;
965               do {
966                 if (!(*ptr < cint8_limit)) {
967                   fits_in_8bit = false;
968                   break;
969                 }
970                 ptr++;
971               } while (--count);
972             }
973             pushSTACK(string);
974             var object new_string =
975               (fits_in_8bit
976                ? allocate_imm_s8string(len)
977                : allocate_imm_s16string(len));
978             string = popSTACK();
979             if (len > 0) {
980               if (fits_in_8bit)
981                 copy_16bit_8bit(&TheS16string(string)->data[offset],
982                                 &TheS8string(new_string)->data[0],len);
983               else
984                 copy_16bit_16bit(&TheS16string(string)->data[offset],
985                                  &TheS16string(new_string)->data[0],len);
986             }
987             return new_string;
988           }
989           ASSERT(sstring_eltype(TheSstring(string)) == Sstringtype_32Bit);
990           /* We use alloca for small-simple-strings, therefore their length
991              should not be too large, or we risk an SP overflow and
992              core dump. */
993           if (len < 0x10000) {
994             /* Check if all characters fit into an 8-bit or 16-bit character
995                simple string: */
996             var bool fits_in_8bit = true;
997             var bool fits_in_16bit = true;
998             if (len > 0) {
999               var const cint32* ptr = &TheS32string(string)->data[offset];
1000               var uintL count = len;
1001               do {
1002                 if (!(*ptr < cint8_limit))
1003                   fits_in_8bit = false;
1004                 if (!(*ptr < cint16_limit)) {
1005                   fits_in_16bit = false;
1006                   break;
1007                 }
1008                 ptr++;
1009               } while (--count);
1010             }
1011             if (fits_in_16bit) {
1012               pushSTACK(string);
1013               var object new_string =
1014                 (fits_in_8bit
1015                  ? allocate_imm_s8string(len)
1016                  : allocate_imm_s16string(len));
1017               string = popSTACK();
1018               if (len > 0) {
1019                 if (fits_in_8bit)
1020                   copy_32bit_8bit(&TheS32string(string)->data[offset],
1021                                   &TheS8string(new_string)->data[0],len);
1022                 else
1023                   copy_32bit_16bit(&TheS32string(string)->data[offset],
1024                                    &TheS16string(new_string)->data[0],len);
1025               }
1026               return new_string;
1027             }
1028           }
1029           #endif
1030           pushSTACK(string);
1031           var object new_string = allocate_imm_s32string(len);
1032           string = popSTACK();
1033           if (len > 0)
1034             copy_32bit_32bit(&TheS32string(string)->data[offset],
1035                              &TheS32string(new_string)->data[0],len);
1036           return new_string;
1037           #else
1038           pushSTACK(string);
1039           var object new_string = allocate_imm_string(len);
1040           string = popSTACK();
1041           if (len > 0)
1042             copy_8bit_8bit(&TheS8string(string)->data[offset],
1043                            &TheS8string(new_string)->data[0],len);
1044           return new_string;
1045           #endif
1046         }
1047       default:
1048         break;
1049     }
1050   obj = check_string(obj); goto start;
1051 }
1052 
1053 #ifdef HAVE_SMALL_SSTRING
1054 /* UP: converts a string into a Normal-Simple-String.
1055  coerce_normal_ss(obj)
1056  > obj: Lisp-object, should be a string.
1057  < result: Normal-Simple-String with the same characters
1058  can trigger GC */
coerce_normal_ss(object obj)1059 global maygc object coerce_normal_ss (object obj)
1060 {
1061  start:
1062  #ifdef TYPECODES
1063   switch (typecode(obj))
1064  #else
1065     if (orecordp(obj))
1066       switch (Record_type(obj)) {
1067         case_Rectype_Sstring_above;
1068         case_Rectype_ostring_above;
1069         default: break;
1070       }
1071   switch (0)
1072  #endif
1073     {
1074       case_sstring:
1075         sstring_un_realloc(obj);
1076         if (sstring_eltype(TheSstring(obj)) == Sstringtype_32Bit)
1077           /* already a Normal-Simple-String, return unchanged */
1078           return obj;
1079         /*FALLTHROUGH*/
1080       case_ostring:
1081         /* other string, copy it */
1082         return copy_string_normal(obj);
1083       default:
1084         break;
1085     }
1086   obj = check_string(obj); goto start;
1087 }
1088 #endif
1089 
1090 #if 0 /* unused */
1091 #ifdef HAVE_SMALL_SSTRING
1092 /* UP: converts a string into an immutable Normal-Simple-String.
1093  coerce_imm_normal_ss(obj)
1094  > obj: Lisp-object, should be a string.
1095  < result: immutable Normal-Simple-String with the same characters
1096  can trigger GC */
1097 global maygc object coerce_imm_normal_ss (object obj)
1098 {
1099  start:
1100  #ifdef TYPECODES
1101   switch (typecode(obj))
1102  #else
1103     if (orecordp(obj))
1104       switch (Record_type(obj)) {
1105         case_Rectype_Sstring_above;
1106         case_Rectype_ostring_above;
1107         default: break;
1108       }
1109   switch (0)
1110  #endif
1111     {
1112       case_sstring:
1113         if (sstring_immutable(TheSstring(obj))
1114             && sstring_eltype(TheSstring(obj)) == Sstringtype_32Bit)
1115           /* immutable Normal-Simple-String, return unchanged */
1116           return obj;
1117         /*FALLTHROUGH*/
1118       case_ostring:
1119         { /* other string, copy it */
1120           var uintL len;
1121           var uintL offset;
1122           var object string = unpack_string_ro(obj,&len,&offset);
1123           pushSTACK(string);
1124           var object new_string = allocate_imm_string(len);
1125           string = popSTACK();
1126           if (len > 0) {
1127             SstringCase(string,
1128               { copy_8bit_32bit(&TheS8string(string)->data[offset],
1129                                 &TheS32string(new_string)->data[0],len); },
1130               { copy_16bit_32bit(&TheS16string(string)->data[offset],
1131                                  &TheS32string(new_string)->data[0],len); },
1132               { copy_32bit_32bit(&TheS32string(string)->data[offset],
1133                                  &TheS32string(new_string)->data[0],len); },
1134               { error_nilarray_retrieve(); });
1135           }
1136           return new_string;
1137         }
1138       default:
1139         break;
1140     }
1141   obj = check_string(obj); goto start;
1142 }
1143 #endif
1144 #endif
1145 
1146 LISPFUNNR(string_info,1)
1147 { /* (SYS::STRING-INFO str) => char-len(8/16/32); immutable-p; realloc-p */
1148   var object str = popSTACK();
1149   if (stringp(str)) {
1150     if (!simple_string_p(str)) {
1151       if ((Iarray_flags(str) & arrayflags_atype_mask) == Atype_NIL) goto other;
1152       do {
1153         str = TheIarray(str)->data;
1154       } while (!simple_string_p(str));
1155     }
1156     value3 = NIL;
1157     while (sstring_reallocatedp(TheSstring(str))) {
1158       value3 = T;
1159       str = TheSistring(str)->data;
1160     }
1161     value2 = (sstring_immutable(TheSstring(str)) ? T : NIL);
1162     value1 = fixnum(8 << sstring_eltype(TheSstring(str)));
1163   } else
1164    other:
1165     value1 = value2 = value3 = NIL;
1166   mv_count = 3;
1167 }
1168 
1169 /* UP: conversion of an object into a character
1170  coerce_char(obj)
1171  > obj: Lisp-object
1172  < result: Character or NIL */
coerce_char(object obj)1173 global object coerce_char (object obj) {
1174   if (charp(obj)) {
1175     return obj; /* return character unchanged */
1176   } else if (symbolp(obj)) {
1177     /* obj is a symbol */
1178     obj = TheSymbol(obj)->pname; goto string;
1179   } else if (stringp(obj))
1180   string: { /* obj is a string */
1181     var uintL len;
1182     var uintL offset;
1183     var object string = unpack_string_ro(obj,&len,&offset);
1184     /* at ptr are len characters */
1185     if (len==1)
1186       return code_char(schar(string,offset));
1187   } else if (nullpSv(coerce_fixnum_char_ansi) && posfixnump(obj)) {
1188     var uintV code = posfixnum_to_V(obj);
1189     if (code < char_code_limit)
1190       /* obj is a fixnum >=0, < char_code_limit */
1191       return code_char(as_chart(code));
1192   }
1193 #if defined(KEYBOARD)
1194   else if (typep_classname(obj,S(input_character))) {
1195     /* obj is an INPUT-CHARACTER. Call (SYS::INPUT-CHARACTER-CHAR obj): */
1196     pushSTACK(obj); funcall(S(input_character_char),1);
1197     return charp(value1) ? value1 : NIL;
1198   }
1199 #endif
1200   /* was none of it -> can not be converted into a character */
1201   return NIL; /* NIL as result */
1202 }
1203 
1204 /* character-names:
1205  Only the characters with font 0 and bits 0 have names. Among these
1206  are all non-graphic characters and the space.
1207  The reader also accepts
1208  - the syntax #\A for the character A (etc. for all characters),
1209  - the syntax #\^A for the character 'A'-64 (etc. for all control-characters
1210    of the ASCII-charset) and
1211  - the syntax #\Code231 for the character with the code 231 (decimal)
1212  for all characters out of font 0. */
1213 
1214 /* table of character-names:
1215  defined in CONSTOBJ.D, */
1216 #ifdef WIN32_CHARNAMES
1217   #define charname_table_length  14  /* length of the table */
1218   #define charname_table  ((gcv_object_t*)(&object_tab.charname_0)) /* table starts with charname_0 */
1219 #endif
1220 #ifdef UNIX_CHARNAMES
1221   #define charname_table_length  48  /* length of the table */
1222   #define charname_table  ((gcv_object_t*)(&object_tab.charname_0_1)) /* table starts with charname_0_1 */
1223 #endif
1224 /* table of codes for this name: */
1225 local const uintB charname_table_codes [charname_table_length]
1226   #ifdef WIN32_CHARNAMES
1227     = { 0,BEL,BS,TAB,NL,11,PG,CR,26,ESC,' ',RUBOUT,LF,27, };
1228   #endif
1229   #ifdef UNIX_CHARNAMES
1230     = { 0,7,BS,TAB,NL,LF,LF,PG,PG,CR,27,32,RUBOUT,127,
1231         0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
1232         20,21,22,23,24,25,26,27,28,29,30,31,32,127,
1233       };
1234   #endif
1235 /* the code charname_table_codes[i] belongs to the name charname_table[i]
1236    (for 0 <= i < charname_table_length). */
1237 
1238 #ifdef ENABLE_UNICODE
1239 #include "uniname.h"            /* from gnulib */
1240 #endif
1241 
1242 /* UP: return the name of a character
1243  char_name(code)
1244  > chart code: character code
1245  < result: simple-string (the name of the character) or NIL
1246  Note that the resulting string is ready to be output unmodified; no prior
1247  STRING-CAPITALIZE or so is needed.
1248  can trigger GC */
char_name(chart code)1249 global maygc object char_name (chart code) {
1250   var cint c = as_cint(code);
1251   {
1252     var const uintB* codes_ptr = &charname_table_codes[0];
1253     var const gcv_object_t* strings_ptr = &charname_table[0];
1254     var uintC count = charname_table_length;
1255     while (count--) {
1256       if (c == *codes_ptr++) /* compare code with charname_table_codes[i] */
1257         return *strings_ptr; /* return string charname_table[i] from the table */
1258       strings_ptr++;
1259     }
1260   }
1261   /* not found */
1262  #ifdef ENABLE_UNICODE
1263   /* Try to find the long name, from UnicodeDataFull.txt. It is the second
1264      semicolon separated field from (sys::unicode-attributes-line c). */
1265   #ifdef AWFULLY_SLOW
1266   {
1267     pushSTACK(fixnum(c));
1268     funcall(S(unicode_attributes_line),1);
1269     var object line = value1;
1270     if (!nullp(line)) {
1271       var uintL len = Sstring_length(line);
1272       var uintL i1;
1273       var uintL i2;
1274       for (i1 = 0; i1 < len; i1++)
1275         if (chareq(TheSstring(line)->data[i1],ascii(';'))) {
1276           i1++;
1277           for (i2 = i1; i2 < len; i2++)
1278             if (chareq(TheSstring(line)->data[i2],ascii(';'))) {
1279               if (!chareq(TheSstring(line)->data[i1],ascii('<'))) {
1280                 var object name = subsstring(line,i1,i2);
1281                 /* Replace ' ' with '_': */
1282                 var uintL count = i2-i1;
1283                 if (count > 0) {
1284                   var chart* ptr = &TheSstring(name)->data[0];
1285                   do {
1286                     if (chareq(*ptr,ascii(' ')))
1287                       *ptr = ascii('_');
1288                     ptr++;
1289                   } while (--count);
1290                 }
1291                 return name;
1292               }
1293               break;
1294             }
1295           break;
1296         }
1297     }
1298   }
1299   #else
1300   /* Here is a much faster implementation. */
1301   {
1302     var char buf[8+UNINAME_MAX];
1303     if (unicode_character_name(c,buf+8)) {
1304       var char* name = buf+8;
1305       /* Fix collision between #\Bell = #\Code7 and #\U0001F514
1306          and between #\Page = #\Code12 and #\U0001F5CF. */
1307       if (c == 0x1F514 || c == 0x1F5CF) {
1308         copy_mem_b(buf,"UNICODE_",8);
1309         name = buf;
1310       }
1311       /* Turn the word separators into underscores. */
1312       var char* ptr = name;
1313       while (*ptr != '\0') {
1314         if (*ptr == ' ')
1315           *ptr = '_';
1316         ptr++;
1317       }
1318       return n_char_to_string(name,ptr-name,Symbol_value(S(ascii)));
1319     }
1320   }
1321   #endif /* AWFULLY_SLOW */
1322   /* CLHS (glossary "name" 5) specifies that all non-graphic characters have
1323      a name. Let's give a name to all of them, it's more uniform (and avoids
1324      printer errors). */
1325   /* if (!graphic_char_p(code)) */
1326   {
1327     local const char hex_table[] = "0123456789ABCDEF";
1328     if (c < 0x10000) {
1329       var object name = allocate_s8string(5);
1330       TheS8string(name)->data[0] = as_cint(ascii('U'));
1331       TheS8string(name)->data[1] = as_cint(ascii(hex_table[(c>>12)&0x0F]));
1332       TheS8string(name)->data[2] = as_cint(ascii(hex_table[(c>>8)&0x0F]));
1333       TheS8string(name)->data[3] = as_cint(ascii(hex_table[(c>>4)&0x0F]));
1334       TheS8string(name)->data[4] = as_cint(ascii(hex_table[c&0x0F]));
1335       return name;
1336     } else {
1337       var object name = allocate_s8string(9);
1338       TheS8string(name)->data[0] = as_cint(ascii('U'));
1339       TheS8string(name)->data[1] = as_cint(ascii('0'));
1340       TheS8string(name)->data[2] = as_cint(ascii('0'));
1341       TheS8string(name)->data[3] = as_cint(ascii(hex_table[(c>>20)&0x0F]));
1342       TheS8string(name)->data[4] = as_cint(ascii(hex_table[(c>>16)&0x0F]));
1343       TheS8string(name)->data[5] = as_cint(ascii(hex_table[(c>>12)&0x0F]));
1344       TheS8string(name)->data[6] = as_cint(ascii(hex_table[(c>>8)&0x0F]));
1345       TheS8string(name)->data[7] = as_cint(ascii(hex_table[(c>>4)&0x0F]));
1346       TheS8string(name)->data[8] = as_cint(ascii(hex_table[c&0x0F]));
1347       return name;
1348     }
1349   }
1350  #else /* no ENABLE_UNICODE */
1351   {
1352     var object name = allocate_string(1);
1353     TheSnstring(name)->data[0] = ascii(c);
1354     return name;
1355   }
1356  #endif
1357   return NIL;
1358 }
1359 
1360 /* UP: find the character with the given name
1361  name_char(string)
1362  > string: String
1363  < result: character with the name, or NIL if does not exist */
name_char(object string)1364 global object name_char (object string) {
1365   {
1366     var const uintB* codes_ptr = &charname_table_codes[0];
1367     var const gcv_object_t* strings_ptr = &charname_table[0];
1368     var uintC count = charname_table_length;
1369     while (count--) {
1370       if (string_equal(string,*strings_ptr++)) /* compare string with charname_table[i] */
1371         return code_char(as_chart(*codes_ptr)); /* return Code charname_table_codes[i] from the table */
1372       codes_ptr++;
1373     }
1374   }
1375   /* no character with the name name found */
1376  #ifdef ENABLE_UNICODE
1377   {
1378     var uintL len;
1379     var uintL offset;
1380     string = unpack_string_ro(string,&len,&offset);
1381     if (len > 1 && len < UNINAME_MAX) {
1382       var const chart* charptr;
1383       unpack_sstring_alloca(string,len,offset, charptr=);
1384       /* Test for Uxxxx or Uxxxxxxxx syntax. */
1385       if ((len == 5 || len == 9)
1386           && (chareq(charptr[0],ascii('U'))
1387               || chareq(charptr[0],ascii('u')))) {
1388         /* decode a hexadecimal number: */
1389         var uintL code = 0;
1390         var uintL index = 1;
1391         var const chart* tmpcharptr = charptr+1;
1392         while (1) {
1393           var cint c = as_cint(*tmpcharptr++); /* next character */
1394           /* should be a hexadecimal digit: */
1395           if (c > 'f') break;
1396           if (c >= 'a') { c -= 'a'-'A'; }
1397           if (c < '0') break;
1398           if (c <= '9') { c = c - '0'; }
1399           else if ((c >= 'A') && (c <= 'F')) { c = c - 'A' + 10; }
1400           else break;
1401           code = 16*code + c; /* put in the digit */
1402           /* code should be < char_code_limit: */
1403           if (code >= char_code_limit) break; /* should not occur */
1404           index++;
1405           if (index == len) {
1406             /* Character name was "Uxxxx" with code = xxxx < char_code_limit.
1407                Its length should be 5 or 9, depending on xxxx < 0x10000. */
1408             if (!(len == (code < 0x10000 ? 5 : 9)))
1409               break;
1410             /* Don't test for graphic_char_p - see comment in char_name().
1411                This also avoids special-casing the #\Uxxxx syntax in io.d. */
1412             /* if (!graphic_char_p(as_chart(code))) */
1413             return code_char(as_chart(code));
1414           }
1415         }
1416       }
1417       { /* Test for word1_word2_... syntax.
1418            Also convert to upper case on the fly. */
1419         var char buf[UNINAME_MAX];
1420         var char* ptr = buf;
1421         while (1) {
1422           var cint c = as_cint(*charptr++);
1423           if (!(c >= ' ' && c <= '~'))
1424             break;
1425           if (c >= 'a' && c <= 'z')
1426             c = c - 'a' + 'A';
1427           *ptr++ = (char)(c == '_' ? ' ' : c);
1428           if (--len == 0)
1429             goto filled_buf;
1430         }
1431         if (false) {
1432         filled_buf:
1433           *ptr = '\0';
1434           var cint32 code;
1435           /* Fix collision between #\Bell = #\Code7 and #\U0001F514
1436              and between #\Page = #\Code12 and #\U0001F5CF. */
1437           if (asciz_equal(buf,"UNICODE BELL"))
1438             code = 0x1F514;
1439           else if (asciz_equal(buf,"UNICODE PAGE"))
1440             code = 0x1F5CF;
1441           else
1442             code = unicode_name_character(buf);
1443           if (code != UNINAME_INVALID)
1444             return code_char(as_chart(code));
1445         }
1446       }
1447     }
1448   }
1449  #else /* no ENABLE_UNICODE */
1450   return coerce_char(string);
1451  #endif
1452   return NIL;
1453 }
1454 
1455 LISPFUNNF(standard_char_p,1)
1456 { /* (STANDARD-CHAR-P char), CLTL p. 234
1457   (standard-char-p char) ==
1458    (or (char= char #\Newline) (char<= #\Space char #\~))
1459  Standard-Chars have a code c, with
1460        $20 <= c <= $7E or c = NL. */
1461   var object arg = check_char(popSTACK());
1462   var chart ch = char_code(arg);
1463   var cint c = as_cint(ch);
1464   VALUES_IF(standard_cint_p(c));
1465 }
1466 
1467 LISPFUNNF(graphic_char_p,1)
1468 { /* (GRAPHIC-CHAR-P char), CLTL p. 234 */
1469   var object arg = check_char(popSTACK());
1470   VALUES_IF(graphic_char_p(char_code(arg)));
1471 }
1472 
1473 LISPFUNN(char_width,1) /* (CHAR-WIDTH char) */
1474 {
1475   var object arg = check_char(popSTACK());
1476   VALUES1(fixnum(char_width(char_code(arg))));
1477 }
1478 
1479 LISPFUNNF(string_char_p,1)
1480 { /* (STRING-CHAR-P char), CLTL p. 235 - all characters are string-chars. */
1481   var object arg = check_char(popSTACK());
1482   VALUES1(T);
1483 }
1484 
1485 #if (base_char_code_limit < char_code_limit)
1486 LISPFUNN(base_char_p,1) /* (SYSTEM::BASE-CHAR-P char) */
1487 {
1488   var object arg = check_char(popSTACK());
1489   VALUES_IF(as_cint(char_code(arg)) < base_char_code_limit);
1490 }
1491 #endif
1492 
1493 LISPFUNNF(alpha_char_p,1)
1494 { /* (ALPHA-CHAR-P char), CLTL p. 235 - test with ALPHAP. */
1495   var object arg = check_char(popSTACK());
1496   VALUES_IF(alphap(char_code(arg)));
1497 }
1498 
1499 LISPFUNNF(upper_case_p,1)
1500 { /* (UPPER-CASE-P char), CLTL p. 235: upper-case-characters are those with
1501   a code c with 0 <= c < $100, that are different from (downcase char) . */
1502   var object arg = check_char(popSTACK());
1503   var chart ch = char_code(arg);
1504   VALUES_IF(!chareq(down_case(ch),ch));
1505 }
1506 
1507 LISPFUNNF(lower_case_p,1)
1508 { /* (LOWER-CASE-P char), CLTL p. 235: lower-case-characters are those with
1509   a code c with 0 <= c < $100, that are different from (upcase char) . */
1510   var object arg = check_char(popSTACK());
1511   var chart ch = char_code(arg);
1512   VALUES_IF(!chareq(up_case(ch),ch));
1513 }
1514 
1515 LISPFUNNF(both_case_p,1)
1516 { /* (BOTH-CASE-P char), CLTL p. 235
1517  (both-case-p char) == (or (upper-case-p char) (lower-case-p char))
1518  both-case-characters are those with a code c with 0 <= c < $100.
1519  For them (downcase char) and (upcase char) are different. */
1520   var object arg = check_char(popSTACK());
1521   var chart ch = char_code(arg);
1522   VALUES_IF(!chareq(down_case(ch),up_case(ch)));
1523 }
1524 
1525 /* UP: Checks an optional radix-argument
1526  test_radix_arg()
1527  > STACK_0: argument, default is 10
1528  < result: radix, an integer >=2, <=36
1529  removes one element from STACK
1530  can trigger GC */
test_radix_arg(void)1531 local maygc uintWL test_radix_arg (void) {
1532   var object arg = popSTACK(); /* argument */
1533  restart_radix_check:
1534   if (!boundp(arg))
1535     return 10;
1536   if (posfixnump(arg)) {
1537     var uintV radix = posfixnum_to_V(arg);
1538     if ((2 <= radix) && (radix <= 36))
1539       return radix;
1540   }
1541   pushSTACK(NIL); /* no PLACE */
1542   pushSTACK(arg); /* TYPE-ERROR slot DATUM */
1543   pushSTACK(O(type_radix)); /* TYPE-ERROR slot EXPECTED-TYPE */
1544   pushSTACK(arg); pushSTACK(TheSubr(subr_self)->name);
1545   check_value(type_error,GETTEXT("~S: the radix must be an integer between 2 and 36, not ~S"));
1546   arg = value1;
1547   goto restart_radix_check;
1548 }
1549 
1550 LISPFUN(digit_char_p,seclass_foldable,1,1,norest,nokey,0,NIL)
1551 { /* (DIGIT-CHAR-P char [radix]), CLTL p. 236
1552  method:
1553  test, if radix is an integer >=2 and <=36 .
1554  char must be a character <= 'z' , otherwise return NIL as result.
1555  if radix<=10: c must be >= '0' and < '0'+radix , else NIL.
1556  if radix>=10: c must be >= '0' and <= '9' or
1557                   (upcase c) must be >= 'A' and < 'A'-10+radix , else NIL. */
1558   var uintWL radix = test_radix_arg(); /* basis >=2, <=36 */
1559   var object arg = check_char(popSTACK());
1560   var chart ch = char_code(arg);
1561   var cint c = as_cint(ch);
1562  #ifdef ENABLE_UNICODE
1563   switch (c >> 8) {
1564     case 0x00: /* ASCII */
1565       if ((c >= 0x0030) && (c <= 0x0039)) { c -= 0x0030; break; }
1566       if ((c >= 0x0041) && (c <= 0x005a)) { c -= 0x0037; break; }
1567       if ((c >= 0x0061) && (c <= 0x007a)) { c -= 0x0057; break; }
1568       goto no;
1569     case 0x06: /* (EXTENDED)?_ARABIC-INDIC_DIGIT_* */
1570       if ((c >= 0x0660) && (c <= 0x0669)) { c -= 0x0660; break; }
1571       if ((c >= 0x06f0) && (c <= 0x06f9)) { c -= 0x06f0; break; }
1572       goto no;
1573     case 0x09: /* DEVANAGARI_DIGIT_*, BENGALI_DIGIT_* */
1574       if ((c >= 0x0966) && (c <= 0x096f)) { c -= 0x0966; break; }
1575       if ((c >= 0x09e6) && (c <= 0x09ef)) { c -= 0x09e6; break; }
1576       goto no;
1577     case 0x0A: /* GURMUKHI_DIGIT_*, GUJARATI_DIGIT_* */
1578       if ((c >= 0x0a66) && (c <= 0x0a6f)) { c -= 0x0a66; break; }
1579       if ((c >= 0x0ae6) && (c <= 0x0aef)) { c -= 0x0ae6; break; }
1580       goto no;
1581     case 0x0B: /* ORIYA_DIGIT_*, TAMIL_DIGIT_* */
1582       if ((c >= 0x0b66) && (c <= 0x0b6f)) { c -= 0x0b66; break; }
1583       if ((c >= 0x0be7) && (c <= 0x0bef)) { c -= 0x0be6; break; }
1584       goto no;
1585     case 0x0C: /* TELUGU_DIGIT_*, KANNADA_DIGIT_* */
1586       if ((c >= 0x0c66) && (c <= 0x0c6f)) { c -= 0x0c66; break; }
1587       if ((c >= 0x0ce6) && (c <= 0x0cef)) { c -= 0x0ce6; break; }
1588       goto no;
1589     case 0x0D: /* MALAYALAM_DIGIT_* */
1590       if ((c >= 0x0d66) && (c <= 0x0d6f)) { c -= 0x0d66; break; }
1591       goto no;
1592     case 0x0E: /* THAI_DIGIT_*, LAO_DIGIT_* */
1593       if ((c >= 0x0e50) && (c <= 0x0e59)) { c -= 0x0e50; break; }
1594       if ((c >= 0x0ed0) && (c <= 0x0ed9)) { c -= 0x0ed0; break; }
1595       goto no;
1596     case 0x0F: /* TIBETAN_DIGIT_* */
1597       if ((c >= 0x0f20) && (c <= 0x0f29)) { c -= 0x0f20; break; }
1598       goto no;
1599     case 0x10: /* MYANMAR_DIGIT_* */
1600       if ((c >= 0x1040) && (c <= 0x1049)) { c -= 0x1040; break; }
1601       goto no;
1602     case 0x13: /* ETHIOPIC_DIGIT_* */
1603       if ((c >= 0x1369) && (c <= 0x1371)) { c -= 0x1369; break; }
1604       goto no;
1605     case 0x17: /* KHMER_DIGIT_* */
1606       if ((c >= 0x17E0) && (c <= 0x17E9)) { c -= 0x17E0; break; }
1607       goto no;
1608     case 0x18: /* MONGOLIAN_DIGIT_* */
1609       if ((c >= 0x1810) && (c <= 0x1819)) { c -= 0x1810; break; }
1610       goto no;
1611     case 0xFF: /* FULLWIDTH_DIGIT_* */
1612       if ((c >= 0xff10) && (c <= 0xff19)) { c -= 0xff10; break; }
1613       goto no;
1614     case 0x1d7: /* MATHEMATICAL_* SANS-SERIF/BOLD DOUBLE-STRUCK MONOSPACE */
1615       if ((c >= 0x1d7ce) && (c <= 0x1d7ff)) { c -= 0x1d7ce; c %= 10; break; }
1616       goto no;
1617     default:
1618       goto no;
1619   }
1620  #else
1621   if (c > 'z') goto no; /* too big -> no */
1622   if (c >= 'a') { c -= 'a'-'A'; } /* convert character >='a',<='z' into uppercase letter */
1623   /* now: $00 <= ch <= $60. */
1624   if (c < '0') goto no;
1625   /* convert $30 <= c <= $60 into number value: */
1626   if (c <= '9') { c = c - '0'; }
1627   else if (c >= 'A') { c = c - 'A' + 10; }
1628   else goto no;
1629  #endif
1630   /* now, c is the number value of the digit, >=0, <=41. */
1631   if (c >= radix) goto no; /* only valid, if 0 <= c < radix. */
1632   /* return value as fixnum: */
1633   VALUES1(fixnum(c)); return;
1634  no: VALUES1(NIL); return;
1635 }
1636 
1637 LISPFUNNF(alphanumericp,1)
1638 { /* (ALPHANUMERICP char), CLTL p. 236 alphanumeric characters are the
1639    digits '0',...,'9' and the alphabetic characters. */
1640   var object arg = check_char(popSTACK());
1641   VALUES_IF(alphanumericp(char_code(arg)));
1642 }
1643 
1644 /* comparison functions for characters:
1645  The comparisons CHAR=,... compare the entire oint (or equivalent,
1646  only the cint).
1647  The comparisons CHAR-EQUAL,... convert the codes into uppercase letters and
1648  compare those. */
1649 
1650 /* UP: tests, if all argcount+1 arguments below args_pointer
1651  are characters. if not, Error.
1652  > argcount: number of arguments - 1
1653  > args_pointer: pointer to the arguments
1654  can trigger GC */
test_char_args(uintC argcount,gcv_object_t * args_pointer)1655 local maygc void test_char_args (uintC argcount, gcv_object_t* args_pointer) {
1656   do {
1657     var gcv_object_t* argptr = &NEXT(args_pointer);
1658     var object arg = *argptr; /* next argument */
1659     if (!charp(arg)) /* must be a character */
1660       *argptr = check_char(arg);
1661   } while (argcount--); /* sic: not --argcount! */
1662 }
1663 
1664 /* UP: tests, if all argcount+1 arguments below args_pointer
1665  are characters. If not, error. Discards bits and font
1666  and transforms them into uppercase letters.
1667  > argcount: number of arguments - 1
1668  > args_pointer: pointer to the arguments
1669  can trigger GC */
test_char_args_upcase(uintC argcount,gcv_object_t * args_pointer)1670 local maygc void test_char_args_upcase (uintC argcount, gcv_object_t* args_pointer) {
1671   do {
1672     var gcv_object_t* argptr = &NEXT(args_pointer);
1673     var object arg = *argptr; /* next argument */
1674     if (!charp(arg)) /* must be a character */
1675       arg = check_char(arg);
1676     /* replace by uppercase letters: */
1677     *argptr = code_char(up_case(char_code(arg)));
1678   } while (argcount--); /* sic: not --argcount! */
1679 }
1680 
1681 /* UP: (CHAR= char {char}) for checked arguments */
char_eq(uintC argcount,gcv_object_t * args_pointer)1682 local Values char_eq (uintC argcount, gcv_object_t* args_pointer)
1683 { /* method:
1684  n+1 arguments Arg[0..n].
1685  x:=Arg[n].
1686  for i:=n-1 to 0 step -1 do ( if Arg[i]/=x then return(NIL) ), return(T). */
1687   var object x = popSTACK(); /* take last argument */
1688   while (argcount--) {
1689     if (!eq(popSTACK(),x))
1690       goto no;
1691   }
1692  yes: VALUES1(T); goto ok;
1693  no: VALUES1(NIL); goto ok;
1694  ok: set_args_end_pointer(args_pointer);
1695 }
1696 
1697 /* UP: (CHAR/= char {char}) for checked arguments */
char_noteq(uintC argcount,gcv_object_t * args_pointer)1698 local Values char_noteq (uintC argcount, gcv_object_t* args_pointer)
1699 { /* method:
1700  n+1 arguments Arg[0..n].
1701  for j:=n-1 to 0 step -1 do
1702    x:=Arg[j+1], for i:=j to 0 step -1 do
1703                    if Arg[i]=x then return(NIL),
1704  return(T). */
1705   var gcv_object_t* arg_j_ptr = args_end_pointer;
1706   var uintC j = argcount;
1707   while (j!=0) {
1708     var object x = BEFORE(arg_j_ptr); /* second last argument */
1709     /* compare with all previous arguments: */
1710     var gcv_object_t* arg_i_ptr = arg_j_ptr;
1711     var uintC i = j;
1712     do {
1713       if (eq(BEFORE(arg_i_ptr),x))
1714         goto no;
1715     } while (--i);
1716     j--;
1717   }
1718  yes: VALUES1(T); goto ok;
1719  no: VALUES1(NIL); goto ok;
1720  ok: set_args_end_pointer(args_pointer);
1721 }
1722 
1723 /* UP: (CHAR< char {char}) for checked arguments */
char_less(uintC argcount,gcv_object_t * args_pointer)1724 local Values char_less (uintC argcount, gcv_object_t* args_pointer)
1725 { /* method:
1726  n+1 Arguments Arg[0..n].
1727  for i:=n to 1 step -1 do
1728     x:=Arg[i], if x char<= Arg[i-1] then return(NIL),
1729  return(T). */
1730   while (argcount--) {
1731     var object x = popSTACK();
1732     if (as_oint(x) <= as_oint(STACK_0))
1733       goto no;
1734   }
1735  yes: VALUES1(T); goto ok;
1736  no: VALUES1(NIL); goto ok;
1737  ok: set_args_end_pointer(args_pointer);
1738 }
1739 
1740 /* UP: (CHAR> char {char}) for checked arguments */
char_greater(uintC argcount,gcv_object_t * args_pointer)1741 local Values char_greater (uintC argcount, gcv_object_t* args_pointer)
1742 { /* method:
1743  n+1 arguments Arg[0..n].
1744  for i:=n to 1 step -1 do
1745     x:=Arg[i], if x char>= Arg[i-1] then return(NIL),
1746  return(T). */
1747   while (argcount--) {
1748     var object x = popSTACK();
1749     if (as_oint(x) >= as_oint(STACK_0))
1750       goto no;
1751   }
1752  yes: VALUES1(T); goto ok;
1753  no: VALUES1(NIL); goto ok;
1754  ok: set_args_end_pointer(args_pointer);
1755 }
1756 
1757 /* UP: (CHAR<= char {char}) for checked arguments */
char_ltequal(uintC argcount,gcv_object_t * args_pointer)1758 local Values char_ltequal (uintC argcount, gcv_object_t* args_pointer)
1759 { /* method:
1760  n+1 arguments Arg[0..n].
1761  for i:=n to 1 step -1 do
1762     x:=Arg[i], if x char< Arg[i-1] then return(NIL),
1763  return(T). */
1764   while (argcount--) {
1765     var object x = popSTACK();
1766     if (as_oint(x) < as_oint(STACK_0))
1767       goto no;
1768   }
1769  yes: VALUES1(T); goto ok;
1770  no: VALUES1(NIL); goto ok;
1771  ok: set_args_end_pointer(args_pointer);
1772 }
1773 
1774 /* UP: (CHAR>= char {char}) for checked arguments */
char_gtequal(uintC argcount,gcv_object_t * args_pointer)1775 local Values char_gtequal (uintC argcount, gcv_object_t* args_pointer)
1776 { /* method:
1777  n+1 arguments Arg[0..n].
1778  for i:=n to 1 step -1 do
1779     x:=Arg[i], if x char> Arg[i-1] then return(NIL),
1780  return(T). */
1781   while (argcount--) {
1782     var object x = popSTACK();
1783     if (as_oint(x) > as_oint(STACK_0))
1784       goto no;
1785   }
1786  yes: VALUES1(T); goto ok;
1787  no: VALUES1(NIL); goto ok;
1788  ok: set_args_end_pointer(args_pointer);
1789 }
1790 
1791 LISPFUN(char_eq,seclass_foldable,1,0,rest,nokey,0,NIL)
1792 { /* (CHAR= char {char}), CLTL p. 237 */
1793   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1794   test_char_args(argcount,args_pointer);
1795   return_Values char_eq(argcount,args_pointer);
1796 }
1797 
1798 LISPFUN(char_noteq,seclass_foldable,1,0,rest,nokey,0,NIL)
1799 { /* (CHAR/= char {char}), CLTL p. 237 */
1800   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1801   test_char_args(argcount,args_pointer);
1802   return_Values char_noteq(argcount,args_pointer);
1803 }
1804 
1805 LISPFUN(char_less,seclass_foldable,1,0,rest,nokey,0,NIL)
1806 { /* (CHAR< char {char}), CLTL p. 237 */
1807   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1808   test_char_args(argcount,args_pointer);
1809   return_Values char_less(argcount,args_pointer);
1810 }
1811 
1812 LISPFUN(char_greater,seclass_foldable,1,0,rest,nokey,0,NIL)
1813 { /* (CHAR> char {char}), CLTL p. 237 */
1814   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1815   test_char_args(argcount,args_pointer);
1816   return_Values char_greater(argcount,args_pointer);
1817 }
1818 
1819 LISPFUN(char_ltequal,seclass_foldable,1,0,rest,nokey,0,NIL)
1820 { /* (CHAR<= char {char}), CLTL p. 237 */
1821   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1822   test_char_args(argcount,args_pointer);
1823   return_Values char_ltequal(argcount,args_pointer);
1824 }
1825 
1826 LISPFUN(char_gtequal,seclass_foldable,1,0,rest,nokey,0,NIL)
1827 { /* (CHAR>= char {char}), CLTL p. 237 */
1828   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1829   test_char_args(argcount,args_pointer);
1830   return_Values char_gtequal(argcount,args_pointer);
1831 }
1832 
1833 LISPFUN(char_equal,seclass_foldable,1,0,rest,nokey,0,NIL)
1834 { /* (CHAR-EQUAL char {char}), CLTL p. 239 */
1835   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1836   test_char_args_upcase(argcount,args_pointer);
1837   return_Values char_eq(argcount,args_pointer);
1838 }
1839 
1840 LISPFUN(char_not_equal,seclass_foldable,1,0,rest,nokey,0,NIL)
1841 { /* (CHAR-NOT-EQUAL char {char}), CLTL p. 239 */
1842   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1843   test_char_args_upcase(argcount,args_pointer);
1844   return_Values char_noteq(argcount,args_pointer);
1845 }
1846 
1847 LISPFUN(char_lessp,seclass_foldable,1,0,rest,nokey,0,NIL)
1848 { /* (CHAR-LESSP char {char}), CLTL p. 239 */
1849   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1850   test_char_args_upcase(argcount,args_pointer);
1851   return_Values char_less(argcount,args_pointer);
1852 }
1853 
1854 LISPFUN(char_greaterp,seclass_foldable,1,0,rest,nokey,0,NIL)
1855 { /* (CHAR-GREATERP char {char}), CLTL p. 239 */
1856   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1857   test_char_args_upcase(argcount,args_pointer);
1858   return_Values char_greater(argcount,args_pointer);
1859 }
1860 
1861 LISPFUN(char_not_greaterp,seclass_foldable,1,0,rest,nokey,0,NIL)
1862 { /* (CHAR-NOT-GREATERP char {char}), CLTL p. 239 */
1863   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1864   test_char_args_upcase(argcount,args_pointer);
1865   return_Values char_ltequal(argcount,args_pointer);
1866 }
1867 
1868 LISPFUN(char_not_lessp,seclass_foldable,1,0,rest,nokey,0,NIL)
1869 { /* (CHAR-NOT-LESSP char {char}), CLTL p. 239 */
1870   var gcv_object_t* args_pointer = rest_args_pointer STACKop 1;
1871   test_char_args_upcase(argcount,args_pointer);
1872   return_Values char_gtequal(argcount,args_pointer);
1873 }
1874 
1875 LISPFUNNF(char_code,1)
1876 { /* (CHAR-CODE char), CLTL p. 239 */
1877   var object arg = check_char(popSTACK());
1878   VALUES1(fixnum(as_cint(char_code(arg)))); /* ascii-code as fixnum */
1879 }
1880 
1881 LISPFUNNF(code_char,1)
1882 { /* (CODE-CHAR code) */
1883   var object codeobj = popSTACK(); /* code-argument */
1884   if (!integerp(codeobj)) {
1885     /* code-argument is not an integer. */
1886     pushSTACK(codeobj); /* TYPE-ERROR slot DATUM */
1887     pushSTACK(S(integer)); /* TYPE-ERROR slot EXPECTED-TYPE */
1888     pushSTACK(codeobj); pushSTACK(TheSubr(subr_self)->name);
1889     error(type_error,
1890            GETTEXT("~S: the code argument should be an integer, not ~S"));
1891   }
1892   /* codeobj is now an integer. */
1893   var uintV code;
1894   /* test, if  0 <= code < char_code_limit : */
1895   if (posfixnump(codeobj)
1896       && ((code = posfixnum_to_V(codeobj)) < char_code_limit)) {
1897     VALUES1(code_char(as_chart(code))); /* handicraft character */
1898   } else {
1899     VALUES1(NIL); /* else value NIL */
1900   }
1901 }
1902 
1903 LISPFUNNR(character,1)
1904 { /* (CHARACTER object), CLTL p. 241 */
1905   var object trial = coerce_char(STACK_0); /* convert argument into character */
1906   if (nullp(trial)) { /* unsuccessfully? */
1907     /* Argument still in STACK_0, TYPE-ERROR slot DATUM */
1908     pushSTACK(O(type_designator_character)); /* TYPE-ERROR slot EXPECTED-TYPE*/
1909     pushSTACK(STACK_1);
1910     pushSTACK(TheSubr(subr_self)->name);
1911     error(type_error,GETTEXT("~S: cannot coerce ~S to a character"));
1912   } else {
1913     VALUES1(trial); skipSTACK(1);
1914   }
1915 }
1916 
1917 LISPFUNNF(char_upcase,1)
1918 { /* (CHAR-UPCASE char), CLTL p. 241 */
1919   var object arg = check_char(popSTACK());
1920   VALUES1(code_char(up_case(char_code(arg)))); /* convert into uppercase letters */
1921 }
1922 
1923 LISPFUNNF(char_downcase,1)
1924 { /* (CHAR-DOWNCASE char), CLTL p. 241 */
1925   var object arg = check_char(popSTACK());
1926   VALUES1(code_char(down_case(char_code(arg)))); /* convert into lowercase letters */
1927 }
1928 
1929 LISPFUN(digit_char,seclass_foldable,1,1,norest,nokey,0,NIL)
1930 { /* (DIGIT-CHAR weight [radix]), CLTL2 p. 384
1931  method:
1932  all arguments have to be integers, radix between 2 and 36.
1933  if 0 <= weight < radix, construct
1934      a character from '0',...,'9','A',...,'Z' with value weight.
1935  else value NIL. */
1936   var uintWL radix = test_radix_arg(); /* radix-argument, >=2, <=36 */
1937   var object weightobj = popSTACK(); /* weight-argument */
1938   if (!integerp(weightobj)) {
1939     /* weight-Argument is not an integer. */
1940     pushSTACK(weightobj); /* TYPE-ERROR slot DATUM */
1941     pushSTACK(S(integer)); /* TYPE-ERROR slot EXPECTED-TYPE */
1942     pushSTACK(weightobj); pushSTACK(TheSubr(subr_self)->name);
1943     error(type_error,
1944            GETTEXT("~S: the weight argument should be an integer, not ~S"));
1945   }
1946   /* weightobj is now an integer. */
1947   /* test, if 0<=weight<radix, else NIL: */
1948   var uintV weight;
1949   if (posfixnump(weightobj)
1950       && ((weight = posfixnum_to_V(weightobj)) < radix)) {
1951     weight = weight + '0'; /* convert into digit */
1952     if (weight > '9')
1953       weight += 'A'-'0'-10; /* or turn it into a letter */
1954     VALUES1(ascii_char(weight)); /* handicraft character */
1955   } else
1956     VALUES1(NIL);
1957 }
1958 
1959 LISPFUNNF(char_int,1)
1960 { /* (CHAR-INT char), CLTL p. 242 */
1961   var object arg = check_char(popSTACK());
1962   VALUES1(fixnum(as_cint(char_code(arg))));
1963 }
1964 
1965 LISPFUNNF(int_char,1)
1966 { /* (INT-CHAR integer), CLTL p. 242 */
1967   var object arg = popSTACK(); /* integer-Argument */
1968   if (integerp(arg)) {
1969     /* turn into a character if 0 <= arg < char_code_limit, else NIL */
1970     var uintV i;
1971     if ((posfixnump(arg)) && ((i = posfixnum_to_V(arg)) < char_code_limit)) {
1972       VALUES1(code_char(as_chart(i)));
1973     } else {
1974       VALUES1(NIL);
1975     }
1976   } else { /* arg not an integer -> error: */
1977     pushSTACK(arg); /* TYPE-ERROR slot DATUM */
1978     pushSTACK(S(integer)); /* TYPE-ERROR slot EXPECTED-TYPE */
1979     pushSTACK(arg); pushSTACK(TheSubr(subr_self)->name);
1980     error(type_error,GETTEXT("~S: argument should be an integer, not ~S"));
1981   }
1982 }
1983 
1984 LISPFUNNF(char_name,1)
1985 { /* (CHAR-NAME char), CLTL p. 242 */
1986   var object arg = check_char(popSTACK());
1987   VALUES1(char_name(char_code(arg)));
1988 }
1989 
1990 
1991 /* Support for case-inverted packages. */
1992 
1993 /* Converts a character to opposite case.
1994  invert_case(ch)
1995  > ch: a character
1996  < result: a character, either ch or up_case(ch) or down_case(ch)
1997  Note that always invert_case(invert_case(ch)) == ch. */
invert_case(chart ch)1998 global chart invert_case (chart ch) {
1999   var chart up = up_case(ch);
2000   if (!chareq(ch,up))
2001     return up;
2002   var chart down = down_case(ch);
2003   if (!chareq(ch,down))
2004     return down;
2005   return ch;
2006 }
2007 
2008 LISPFUNNF(char_invertcase,1)
2009 { /* (EXT:CHAR-INVERTCASE char) */
2010   var object arg = check_char(popSTACK());
2011   VALUES1(code_char(invert_case(char_code(arg))));
2012 }
2013 
2014 /* UP: compares two strings of equal length for equality modulo case-invert
2015  > string1,offset1: here are the addressed characters in string1
2016  > string2,offset2: here are the addressed characters in string2
2017  > len: number of addressed characters in String1 and in String2, > 0
2018  < result: true if equal, else false. */
string_eqcomp_inverted(object string1,uintL offset1,object string2,uintL offset2,uintL len)2019 local bool string_eqcomp_inverted (object string1, uintL offset1, object string2,
2020                                    uintL offset2, uintL len) {
2021   SstringDispatch(string1,X1, {
2022     var const cintX1* charptr1 = &((SstringX1)TheVarobject(string1))->data[offset1];
2023     SstringDispatch(string2,X2, {
2024       var const cintX2* charptr2 = &((SstringX2)TheVarobject(string2))->data[offset2];
2025       do {
2026         if (!chareq(invert_case(as_chart(*charptr1++)),as_chart(*charptr2++)))
2027           goto no;
2028       } while (--len);
2029     });
2030   });
2031   return true;
2032  no: return false;
2033 }
2034 
2035 /* UP: compares two strings for equality modulo case-invert
2036  string_eq_inverted(string1,string2)
2037  > string1: string
2038  > string2: simple-string
2039  < result: /=0, if equal modulo case-invert */
string_eq_inverted(object string1,object string2)2040 global bool string_eq_inverted (object string1, object string2) {
2041   var uintL len1;
2042   var uintL offset1;
2043   string1 = unpack_string_ro(string1,&len1,&offset1);
2044   sstring_un_realloc(string2);
2045   if (len1 != Sstring_length(string2))
2046     return false;
2047   /* Now both strings have exactly len1 characters. Compare them. */
2048   if (len1 > 0)
2049     return string_eqcomp_inverted(string1,offset1,string2,0,len1);
2050   return true;
2051 }
2052 
2053 /* UP: converts a string piece to opposite case, uppercase characters to
2054  lowercase and lowercase characters to uppercase.
2055  nstring_invertcase(dv,offset,len);
2056  > object dv: the character storage vector
2057  > uintL offset: index of first affected character
2058  > uintL len: number of affected characters
2059  can trigger GC */
nstring_invertcase(object dv,uintL offset,uintL len)2060 local maygc void nstring_invertcase (object dv, uintL offset, uintL len) {
2061   restart_it:
2062   if (len > 0) {
2063     SstringCase(dv,{
2064       do {
2065         var chart ch = invert_case(as_chart(TheS8string(dv)->data[offset]));
2066         if (as_cint(ch) < cint8_limit) {
2067           TheS8string(dv)->data[offset] = as_cint(ch);
2068           offset++;
2069           len--;
2070         } else {
2071           dv = sstring_store(dv,offset,ch);
2072           offset++;
2073           len--;
2074           if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
2075             dv = TheSistring(dv)->data;
2076             goto restart_it;
2077           }
2078         }
2079       } while (len > 0);
2080     },{
2081       do {
2082         var chart ch = invert_case(as_chart(TheS16string(dv)->data[offset]));
2083         if (as_cint(ch) < cint16_limit) {
2084           TheS16string(dv)->data[offset] = as_cint(ch);
2085           offset++;
2086           len--;
2087         } else {
2088           dv = sstring_store(dv,offset,ch);
2089           offset++;
2090           len--;
2091           if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
2092             dv = TheSistring(dv)->data;
2093             goto restart_it;
2094           }
2095         }
2096       } while (len > 0);
2097     },{
2098       var cint32* charptr = &TheS32string(dv)->data[offset];
2099       do { *charptr = as_cint(invert_case(as_chart(*charptr))); charptr++;
2100       } while (--len);
2101     },{
2102       error_nilarray_retrieve();
2103     });
2104   }
2105 }
2106 
2107 /* UP: converts a string to opposite case
2108  string_invertcase(string)
2109  > string: string
2110  < result: new normal-simple-string
2111  can trigger GC */
string_invertcase(object string)2112 global maygc object string_invertcase (object string) {
2113   string = copy_string_normal(string); /* copy and turn into a normal-simple-string */
2114   pushSTACK(string);
2115   nstring_invertcase(string,0,Sstring_length(string)); /* convert */
2116   string = popSTACK();
2117   DBGREALLOC(string);
2118   return string;
2119 }
2120 
2121 /* error, if index-argument is not an integer. */
error_int(object kw,object obj)2122 local _Noreturn void error_int (object kw, object obj) {
2123   pushSTACK(obj); /* TYPE-ERROR slot DATUM */
2124   pushSTACK(S(integer)); /* TYPE-ERROR slot EXPECTED-TYPE */
2125   pushSTACK(obj);
2126   if (eq(kw,nullobj)) {
2127     pushSTACK(TheSubr(subr_self)->name);
2128     error(type_error,GETTEXT("~S: index should be an integer, not ~S"));
2129   } else {
2130     pushSTACK(kw); pushSTACK(TheSubr(subr_self)->name);
2131     error(type_error,GETTEXT("~S: ~S-index should be an integer, not ~S"));
2132   }
2133 }
2134 
2135 /* error, if index-argument is not an integer or NIL. */
error_int_null(object kw,object obj)2136 local _Noreturn void error_int_null (object kw, object obj) {
2137   pushSTACK(obj); /* TYPE-ERROR slot DATUM */
2138   pushSTACK(O(type_end_index)); /* TYPE-ERROR slot EXPECTED-TYPE */
2139   pushSTACK(obj);
2140   if (eq(kw,nullobj)) {
2141     pushSTACK(TheSubr(subr_self)->name);
2142     error(type_error,GETTEXT("~S: index should be NIL or an integer, not ~S"));
2143   } else {
2144     pushSTACK(kw); pushSTACK(TheSubr(subr_self)->name);
2145     error(type_error,GETTEXT("~S: ~S-index should be NIL or an integer, not ~S"));
2146   }
2147 }
2148 
2149 /* error, if index-argument is not <= limit. */
error_cmp_inclusive(object kw,object obj,uintL grenze)2150 local _Noreturn void error_cmp_inclusive (object kw, object obj, uintL grenze) {
2151   pushSTACK(obj); /* TYPE-ERROR slot DATUM */
2152   pushSTACK(NIL);
2153   pushSTACK(obj);
2154   {
2155     var object tmp;
2156     pushSTACK(S(integer)); pushSTACK(Fixnum_0); pushSTACK(UL_to_I(grenze));
2157     tmp = listof(3);
2158     STACK_1 = tmp; /* TYPE-ERROR slot EXPECTED-TYPE */
2159   }
2160   if (eq(kw,nullobj)) {
2161     pushSTACK(TheSubr(subr_self)->name);
2162     error(type_error,GETTEXT("~S: index ~S should not be greater than the length of the string"));
2163   } else {
2164     pushSTACK(kw); pushSTACK(TheSubr(subr_self)->name);
2165     error(type_error,GETTEXT("~S: ~S-index ~S should not be greater than the length of the string"));
2166   }
2167 }
2168 
2169 /* error, if index-argument is not < limit. */
error_cmp_exclusive(object kw,object obj,uintL grenze)2170 local _Noreturn void error_cmp_exclusive (object kw, object obj, uintL grenze) {
2171   pushSTACK(obj); /* TYPE-ERROR slot DATUM */
2172   pushSTACK(NIL);
2173   pushSTACK(obj);
2174   {
2175     var object tmp;
2176     pushSTACK(S(integer)); pushSTACK(Fixnum_0); pushSTACK(UL_to_I(grenze));
2177     tmp = listof(1); pushSTACK(tmp); tmp = listof(3);
2178     STACK_1 = tmp; /* TYPE-ERROR slot EXPECTED-TYPE */
2179   }
2180   if (eq(kw,nullobj)) {
2181     pushSTACK(TheSubr(subr_self)->name);
2182     error(type_error,
2183            GETTEXT("~S: index ~S should be less than the length of the string"));
2184   } else {
2185     pushSTACK(kw); pushSTACK(TheSubr(subr_self)->name);
2186     error(type_error,GETTEXT("~S: ~S-index ~S should be less than the length of the string"));
2187   }
2188 }
2189 
2190 /* Macro: checks an index-argument
2191  test_index(from,to_setter,def,default,uplimit_cmp,upper_limit,ucname,lcname)
2192  from : expression, where the index (as object) comes from.
2193  to_setter : assigns the result (as uintV) .
2194  def : 0 if we do not have to test for default values,
2195        1 if the default is set in on unbound,
2196        2 if the default is set in on unbound or NIL.
2197  default : expression, that serves as default value in this case.
2198  upper_limit : upper limit
2199  uplimit_cmp : comparison with upper limit
2200  kw : keyword, that identifies the index, or nullobj */
2201 #define test_index(from,to_setter,def,default,uplimit_cmp,upper_limit,kw) \
2202   { var object index = from; /* index-argument */                       \
2203     if (def && (!boundp(index) || (def == 2 && nullp(index))))          \
2204       { to_setter default; }                                            \
2205     else { /* must be an integer: */                                    \
2206       if (!integerp(index))                                             \
2207         { if (def==2) error_int_null(kw,index); else error_int(kw,index); } \
2208       /* index is an integer. */                                        \
2209       if (!(positivep(index)))                                          \
2210         { error_pos_integer(kw,index); }                                \
2211       /* index is >=0. */                                               \
2212       if (!((posfixnump(index)) &&                                      \
2213             ((to_setter posfixnum_to_V(index)) uplimit_cmp upper_limit))) { \
2214         if (0 uplimit_cmp 0)                                            \
2215           /* "<= upper_limit" - comparison not satisfied (upper_limit == limit) */ \
2216           { error_cmp_inclusive(kw,index,upper_limit); }                \
2217         else                                                            \
2218           /* "< upper_limit" - comparison not satisfied (upper_limit == limit) */ \
2219           { error_cmp_exclusive(kw,index,upper_limit); }                \
2220       }                                                                 \
2221     }}
2222 
2223 /* UP: check the index argument for string functions
2224  > STACK_0: Argument
2225  > len: length of the strings (< array-total-size-limit)
2226  < return: index in the string */
test_index_arg(uintL len)2227 local uintL test_index_arg (uintL len)
2228 {
2229   var uintV i;
2230   /* i := Index STACK_0, no default value, must be <len: */
2231   test_index(STACK_0,i=,0,0,<,len,nullobj);
2232   return i;
2233 }
2234 
2235 LISPFUNNR(char,2)
2236 { /* (CHAR string index), CLTL p. 300 */
2237   var object string = check_string(STACK_1);
2238   var uintL len;
2239   var uintL offset = 0;
2240   /* Don't use unpack_string_ro() -- we need (array-dimension string 0),
2241      not (length string). */
2242   if (simple_string_p(string)) {
2243     sstring_un_realloc(string);
2244     len = Sstring_length(string);
2245   } else {
2246     len = TheIarray(string)->totalsize;
2247     string = iarray_displace_check(string,len,&offset);
2248   }
2249   var uintL index = test_index_arg(len);
2250   VALUES1(code_char(schar(string,offset+index)));
2251   skipSTACK(2);
2252 }
2253 
2254 LISPFUNNR(schar,2)
2255 { /* (SCHAR string integer), CLTL p. 300 */
2256   var object string = STACK_1;
2257   if (!simple_string_p(string)) { /* must be a simple-string */
2258     if (stringp(string)
2259         && (Iarray_flags(string) & arrayflags_atype_mask) == Atype_NIL)
2260       error_nilarray_store();
2261     else
2262       error_sstring(string);
2263   }
2264   sstring_un_realloc(string);
2265   var uintL index = test_index_arg(Sstring_length(string));
2266   VALUES1(code_char(schar(string,index)));
2267   skipSTACK(2);
2268 }
2269 
2270 LISPFUNN(store_char,3)
2271 { /* (SYSTEM::STORE-CHAR string index newchar)
2272    = (SETF (CHAR string index) newchar), CLTL p. 300 */
2273   STACK_2 = check_string(STACK_2); /* string-argument */
2274   var object newchar = check_char(popSTACK()); /* newchar-Argument */
2275   var object string = STACK_1; /* string-argument */
2276   var uintL len;
2277   var uintL offset = 0;
2278   /* Don't use unpack_string_rw() -- we need (array-dimension string 0),
2279      not (length string). */
2280   if (simple_string_p(string)) {
2281     sstring_un_realloc(string);
2282     len = Sstring_length(string);
2283   } else {
2284     len = TheIarray(string)->totalsize;
2285     string = iarray_displace_check(string,len,&offset);
2286     if (simple_nilarray_p(string)) error_nilarray_store();
2287   }
2288   check_sstring_mutable(string);
2289   offset += test_index_arg(len); /* go to the element addressed by index */
2290   sstring_store(string,offset,char_code(newchar)); /* put in the character */
2291   VALUES1(newchar);
2292   skipSTACK(2);
2293 }
2294 
2295 LISPFUNN(store_schar,3)
2296 { /* (SYSTEM::STORE-SCHAR simple-string index newchar)
2297    = (SETF (SCHAR simple-string index) newchar), CLTL p. 300 */
2298   var object newchar = check_char(popSTACK()); /* newchar-argument */
2299   var object string = STACK_1; /* string-argument */
2300   if (!simple_string_p(string)) { /* must be a simple-string */
2301     if (stringp(string)
2302         && (Iarray_flags(string) & arrayflags_atype_mask) == Atype_NIL)
2303       error_nilarray_store();
2304     else
2305       error_sstring(string);
2306   }
2307   sstring_un_realloc(string);
2308   check_sstring_mutable(string);
2309   var uintL offset = test_index_arg(Sstring_length(string)); /* go to the element addressed by index */
2310   sstring_store(string,offset,char_code(newchar)); /* put in the character */
2311   VALUES1(newchar);
2312   skipSTACK(2);
2313 }
2314 
2315 /* UP: checks :START and :END limits for a vector argument
2316  > STACK_1: optional :start-argument
2317  > STACK_0: optional :end-argument
2318  > stringarg arg: arg.string its data vector,
2319                   [arg.offset .. arg.offset+arg.len-1] the range within the
2320                   data vector corresponding to the entire vector-argument
2321  < stringarg arg: arg.string and arg.offset unchanged,
2322                   [arg.offset+arg.index .. arg.offset+arg.index+arg.len-1] the
2323                   range within the data vector corresponding to the selected
2324                   vector slice
2325  removes 2 elements from STACK */
test_vector_limits(stringarg * arg)2326 modexp void test_vector_limits (stringarg* arg) {
2327   if (arg->len > 0 && simple_nilarray_p(arg->string))
2328     error_nilarray_retrieve();
2329   var uintV start;
2330   var uintV end;
2331   /* arg->len is the length (<2^oint_data_len).
2332      check :START-argument:
2333      start := Index STACK_1, default value 0, must be <=len : */
2334   test_index(STACK_1,start=,1,0,<=,arg->len,S(Kstart));
2335   /* start is now the value of the :START-argument.
2336      check :END-argument:
2337      end := Index STACK_0, default value len, musst be <=len : */
2338   test_index(STACK_0,end=,2,arg->len,<=,arg->len,S(Kend));
2339   /* end is now the value of the :END-argument.
2340      compare :START and :END arguments: */
2341   if (start > end) {
2342     pushSTACK(STACK_0); /* :END index */
2343     pushSTACK(STACK_(1+1)); /* :START index */
2344     pushSTACK(TheSubr(subr_self)->name);
2345     error(error_condition,GETTEXT("~S: :START-index ~S must not be greater than :END-index ~S"));
2346   }
2347   skipSTACK(2);
2348   /* fill results: */
2349   arg->index = start; arg->len = end-start;
2350 }
2351 
2352 /* UP: checks the limits for a string-argument
2353  test_string_limits_ro(&arg)  [for read-only access]
2354  > STACK_2: string-argument
2355  > STACK_1: optional :start-argument
2356  > STACK_0: optional :end-argument
2357  < stringarg arg: description of the argument
2358  < result: string-argument
2359  increases STACK by 3
2360  can trigger GC */
test_string_limits_ro(stringarg * arg)2361 global maygc object test_string_limits_ro (stringarg* arg) {
2362   /* check string-argument: */
2363   STACK_2 = check_string(STACK_2);
2364   arg->string = unpack_string_ro(STACK_2,&arg->len,&arg->offset);
2365   test_vector_limits(arg);
2366   return popSTACK();
2367 }
2368 
2369 /* UP: checks the limits for a string-argument
2370  test_string_limits_rw(&arg)  [for read-write access]
2371  > STACK_2: string-argument
2372  > STACK_1: optional :start-argument
2373  > STACK_0: optional :end-argument
2374  < stringarg arg: description of the argument
2375  < result: string-argument
2376  increases STACK by 3
2377  can trigger GC */
test_string_limits_rw(stringarg * arg)2378 local maygc object test_string_limits_rw (stringarg* arg) {
2379   var object string = test_string_limits_ro(arg);
2380   if (arg->len > 0) {
2381     if (simple_nilarray_p(arg->string)) error_nilarray_access();
2382     check_sstring_mutable(arg->string);
2383   }
2384   return string;
2385 }
2386 
2387 /* UP: checks a string/symbol/character-argument
2388  test_stringsymchar_arg(obj,invert)
2389  > obj: argument
2390  > invert: whether to implicitly case-invert a symbol's printname
2391  < result: argument as string
2392  can trigger GC */
test_stringsymchar_arg(object obj,bool invert)2393 global maygc object test_stringsymchar_arg (object obj, bool invert) {
2394  restart_stringsymchar:
2395   if (stringp(obj)) /* string: return unchanged */
2396     return obj;
2397   if (symbolp(obj)) { /* symbol: use print name */
2398     obj = TheSymbol(obj)->pname;
2399     return (invert ? string_invertcase(obj) : obj);
2400   }
2401   if (charp(obj)) { /* character: turn it into a one-element string: */
2402     var object new_string = allocate_string(1);
2403     TheSnstring(new_string)->data[0] = char_code(obj);
2404     return new_string;
2405   }
2406   pushSTACK(NIL); /* no PLACE */
2407   pushSTACK(obj); /* TYPE-ERROR slot DATUM */
2408   pushSTACK(O(type_stringsymchar)); /* TYPE-ERROR slot EXPECTED-TYPE */
2409   pushSTACK(obj); pushSTACK(TheSubr(subr_self)->name);
2410   check_value(type_error,GETTEXT("~S: argument ~S should be a string, a symbol or a character"));
2411   obj = value1;
2412   goto restart_stringsymchar;
2413 }
2414 
2415 /* UP: checks the limits for 1 string/symbol-argument and copies it
2416  test_1_stringsym_limits(invert,&string,&len)
2417  > STACK_2: string/symbol-argument
2418  > STACK_1: optional :start-argument
2419  > STACK_0: optional :end-argument
2420  > invert: whether to implicitly case-invert a symbol's printname
2421  < object string: copy of the string
2422  < uintL offset: index of first affected character
2423  < uintL len: number of affected characters
2424  increases STACK by 3
2425  can trigger GC */
test_1_stringsym_limits(bool invert,object * string_,uintL * offset_,uintL * len_)2426 local maygc void test_1_stringsym_limits (bool invert, object* string_,
2427                                           uintL* offset_, uintL* len_) {
2428   var object string;
2429   var uintL len;
2430   var uintV start;
2431   var uintV end;
2432   /* check string/symbol-argument: */
2433   string = test_stringsymchar_arg(STACK_2,invert);
2434   len = vector_length(string);
2435   /* now, len is the length (<2^oint_data_len).
2436      check :START-argument:
2437      start := Index STACK_1, default value 0, must be <=len : */
2438   test_index(STACK_1,start=,1,0,<=,len,S(Kstart));
2439   /* start is now the value of the :START-argument.
2440      check :END-argument:
2441      end := Index STACK_0, default value len, must be <=len : */
2442   test_index(STACK_0,end=,2,len,<=,len,S(Kend));
2443   /* end is now the value of the :END-argument.
2444      compare :START and :END arguments: */
2445   if (!(start <= end)) {
2446     pushSTACK(STACK_0); /* :END-Index */
2447     pushSTACK(STACK_2); /* :START-Index */
2448     pushSTACK(TheSubr(subr_self)->name);
2449     error(error_condition,GETTEXT("~S: :START-index ~S must not be greater than :END-index ~S"));
2450   }
2451   skipSTACK(3);
2452   /* copy string and issue results: */
2453   *string_ = copy_string(string); /* copy string */
2454   *offset_ = start; *len_ = end-start;
2455 }
2456 
2457 /* UP: checks the limits for 2 string/symbol-arguments
2458  test_2_stringsym_limits(invert,&arg1,&arg2)
2459  > STACK_5: string/symbol-argument1
2460  > STACK_4: string/symbol-argument2
2461  > STACK_3: optional :start1-argument
2462  > STACK_2: optional :end1-argument
2463  > STACK_1: optional :start2-argument
2464  > STACK_0: optional :end2-argument
2465  > invert: whether to implicitly case-invert a symbol's printname
2466  < stringarg arg1: description of argument1
2467  < stringarg arg2: description of argument2
2468  increases STACK by 6 */
test_2_stringsym_limits(bool invert,stringarg * arg1,stringarg * arg2)2469 local void test_2_stringsym_limits (bool invert, stringarg* arg1, stringarg* arg2) {
2470   var uintL len1;
2471   var uintL len2;
2472   { /* check string/symbol-argument1: */
2473     var object string1 = test_stringsymchar_arg(STACK_5,invert);
2474     pushSTACK(string1); /* save string1 */
2475     /* check string/symbol-argument2: */
2476     var object string2 = test_stringsymchar_arg(STACK_(4+1),invert);
2477     arg2->string = unpack_string_ro(string2,&len2,&arg2->offset);
2478     /* now, len2 is the length (<2^oint_data_len) of string2. */
2479     string1 = popSTACK(); /* restore string1 */
2480     arg1->string = unpack_string_ro(string1,&len1,&arg1->offset);
2481     /* now, len1 is the length (<2^oint_data_len) of string1. */
2482   }
2483   { /* check :START1 and :END1: */
2484     var uintV start1;
2485     var uintV end1;
2486     /* check :START1-argument:
2487        start1 := Index STACK_3, default value 0, must be <=len1: */
2488     test_index(STACK_3,start1=,1,0,<=,len1,S(Kstart1));
2489     /* start1 is now the value of the :START1-argument.
2490        check :END1-argument:
2491        end1 := Index STACK_2, default value len1, must be <=len1: */
2492     test_index(STACK_2,end1=,2,len1,<=,len1,S(Kend1));
2493     /* end1 is now the value of the :END1-argument.
2494        compare :START1 and :END1 arguments: */
2495     if (!(start1 <= end1)) {
2496       pushSTACK(STACK_2); /* :END1-Index */
2497       pushSTACK(STACK_4); /* :START1-Index */
2498       pushSTACK(TheSubr(subr_self)->name);
2499       error(error_condition,GETTEXT("~S: :START1-index ~S must not be greater than :END1-index ~S"));
2500     }
2501     /* issue the results for string1: */
2502     arg1->index = start1; arg1->len = end1-start1;
2503     if (arg1->len > 0 && simple_nilarray_p(arg1->string))
2504       error_nilarray_retrieve();
2505   }
2506   { /* check :START2 and :END2: */
2507     var uintV start2;
2508     var uintV end2;
2509     /* check :START2-argument:
2510        start2 := Index STACK_1, default value 0, must be <=len2: */
2511     test_index(STACK_1,start2=,1,0,<=,len2,S(Kstart2));
2512     /* start2 is now the value of the :START2-argument.
2513        check :END2-argument:
2514        end2 := Index STACK_0, default value len2, must be <=len2: */
2515     test_index(STACK_0,end2=,2,len2,<=,len2,S(Kend2));
2516     /* end2 is now the value of the :END2-argument.
2517        compare :START2 and :END2 arguments: */
2518     if (!(start2 <= end2)) {
2519       pushSTACK(STACK_0); /* :END2-Index */
2520       pushSTACK(STACK_2); /* :START2-Index */
2521       pushSTACK(TheSubr(subr_self)->name);
2522       error(error_condition,GETTEXT("~S: :START2-index ~S must not be greater than :END2-index ~S"));
2523     }
2524     /* issue the results for string2: */
2525     arg2->index = start2; arg2->len = end2-start2;
2526     if (arg2->len > 0 && simple_nilarray_p(arg2->string))
2527       error_nilarray_retrieve();
2528   }
2529   /* done. */
2530   skipSTACK(6);
2531 }
2532 
2533 /* UP: compares two strings of equal length for equality
2534  > string1,offset1: here are the addressed characters in string1
2535  > string2,offset2: here are the addressed characters in string2
2536  > len: number of addressed characters in String1 and in String2, > 0
2537  < result: true if equal, else false. */
string_eqcomp(object string1,uintL offset1,object string2,uintL offset2,uintL len)2538 global bool string_eqcomp (object string1, uintL offset1, object string2,
2539                            uintL offset2, uintL len) {
2540   SstringDispatch(string1,X1, {
2541     var const cintX1* charptr1 = &((SstringX1)TheVarobject(string1))->data[offset1];
2542     SstringDispatch(string2,X2, {
2543       var const cintX2* charptr2 = &((SstringX2)TheVarobject(string2))->data[offset2];
2544       do {
2545         if (!chareq(as_chart(*charptr1++),as_chart(*charptr2++)))
2546           goto no;
2547       } while (--len);
2548     });
2549   });
2550   return true;
2551  no: return false;
2552 }
2553 
2554 /* UP: compares two strings
2555  > arg1: here are the addressed characters in string1
2556  > arg2: here are the addressed characters in string2
2557  < arg1.index: location of the first difference i string1
2558  < result: 0 if equal,
2559              -1 if string1 is genuinely lesser than string2,
2560              +1 if string1 is genuinely bigger than string2. */
string_comp(stringarg * arg1,const stringarg * arg2)2561 local signean string_comp (stringarg* arg1, const stringarg* arg2) {
2562   var uintL len1 = arg1->len;
2563   var uintL len2 = arg2->len;
2564   SstringCase(arg1->string, {
2565     var const cint8* charptr1_0 = &TheS8string(arg1->string)->data[arg1->offset];
2566     var const cint8* charptr1 = &charptr1_0[arg1->index];
2567     /* one of the strings empty ? */
2568     if (len1==0) goto A_string1_end;
2569     if (len2==0) goto A_string2_end;
2570     SstringDispatch(arg2->string,X2, {
2571       var const cintX2* charptr2 = &((SstringX2)TheVarobject(arg2->string))->data[arg2->offset+arg2->index];
2572       while (1) {
2573         /* compare next characters: */
2574         if (!chareq(as_chart(*charptr1++),as_chart(*charptr2++))) break;
2575         /* decrease both counters: */
2576         len1--; len2--;
2577         /* one of the strings finished ? */
2578         if (len1==0) goto A_string1_end;
2579         if (len2==0) goto A_string2_end;
2580       }
2581       /* two different characters are found */
2582       arg1->index = --charptr1 - charptr1_0;
2583       if (charlt(as_chart(*charptr1),as_chart(*--charptr2)))
2584         return signean_minus; /* string1 < string2 */
2585       else
2586         return signean_plus; /* string1 > string2 */
2587     });
2588   A_string1_end: /* string1 finished */
2589     arg1->index = charptr1 - charptr1_0;
2590     if (len2==0)
2591       return signean_null; /* string1 = string2 */
2592     else /* String1 is a genuine starting piece of string2 */
2593       return signean_minus;
2594   A_string2_end: /* string2 is finished, string1 is not yet finished */
2595     arg1->index = charptr1 - charptr1_0;
2596     return signean_plus; /* string2 is a genuine starting piece of string1 */
2597   }, {
2598     var const cint16* charptr1_0 = &TheS16string(arg1->string)->data[arg1->offset];
2599     var const cint16* charptr1 = &charptr1_0[arg1->index];
2600     /* one of the strings empty ? */
2601     if (len1==0) goto B_string1_end;
2602     if (len2==0) goto B_string2_end;
2603     SstringDispatch(arg2->string,X2, {
2604       var const cintX2* charptr2 = &((SstringX2)TheVarobject(arg2->string))->data[arg2->offset+arg2->index];
2605       while (1) {
2606         /* compare next characters: */
2607         if (!chareq(as_chart(*charptr1++),as_chart(*charptr2++))) break;
2608         /* decrease both counters: */
2609         len1--; len2--;
2610         /* one of the strings finished ? */
2611         if (len1==0) goto B_string1_end;
2612         if (len2==0) goto B_string2_end;
2613       }
2614       /* two different characters are found */
2615       arg1->index = --charptr1 - charptr1_0;
2616       if (charlt(as_chart(*charptr1),as_chart(*--charptr2)))
2617         return signean_minus; /* string1 < string2 */
2618       else
2619         return signean_plus; /* string1 > string2 */
2620     });
2621   B_string1_end: /* string1 finished */
2622     arg1->index = charptr1 - charptr1_0;
2623     if (len2==0)
2624       return signean_null; /* string1 = string2 */
2625     else /* String1 is a genuine starting piece of string2 */
2626       return signean_minus;
2627   B_string2_end: /* string2 is finished, string1 is not yet finished */
2628     arg1->index = charptr1 - charptr1_0;
2629     return signean_plus; /* string2 is a genuine starting piece of string1 */
2630   }, {
2631     var const cint32* charptr1_0 = &TheS32string(arg1->string)->data[arg1->offset];
2632     var const cint32* charptr1 = &charptr1_0[arg1->index];
2633     /* one of the strings empty ? */
2634     if (len1==0) goto C_string1_end;
2635     if (len2==0) goto C_string2_end;
2636     SstringDispatch(arg2->string,X2, {
2637       var const cintX2* charptr2 = &((SstringX2)TheVarobject(arg2->string))->data[arg2->offset+arg2->index];
2638       while (1) {
2639         /* compare next characters: */
2640         if (!chareq(as_chart(*charptr1++),as_chart(*charptr2++))) break;
2641         /* decrease both counters: */
2642         len1--; len2--;
2643         /* one of the strings finished ? */
2644         if (len1==0) goto C_string1_end;
2645         if (len2==0) goto C_string2_end;
2646       }
2647       /* two different characters are found */
2648       arg1->index = --charptr1 - charptr1_0;
2649       if (charlt(as_chart(*charptr1),as_chart(*--charptr2)))
2650         return signean_minus; /* string1 < string2 */
2651       else
2652         return signean_plus; /* string1 > string2 */
2653     });
2654   C_string1_end: /* string1 finished */
2655     arg1->index = charptr1 - charptr1_0;
2656     if (len2==0)
2657       return signean_null; /* string1 = string2 */
2658     else /* string1 is a genuine starting piece of string2 */
2659       return signean_minus;
2660   C_string2_end: /* string2 is finished, string1 is not yet finished */
2661     arg1->index = charptr1 - charptr1_0;
2662     return signean_plus; /* string2 is a genuine starting piece of string1 */
2663   }, {
2664     /* one of the strings empty ? */
2665     if (len1==0) goto D_string1_end;
2666     if (len2==0) goto D_string2_end;
2667     error_nilarray_retrieve();
2668   D_string1_end: /* string1 finished */
2669     arg1->index = 0;
2670     if (len2==0)
2671       return signean_null; /* string1 = string2 */
2672     else /* string1 is a genuine starting piece of string2 */
2673       return signean_minus;
2674   D_string2_end: /* string2 is finished, string1 is not yet finished */
2675     arg1->index = 0;
2676     return signean_plus; /* string2 is a genuine starting piece of string1 */
2677   });
2678 }
2679 
2680 /* (STRING= string1 string2 :start1 :end1 :start2 :end2), CLTL p. 300 */
2681 LISPFUN(string_eq,seclass_read,2,0,norest,key,4,
2682         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2683 {
2684   var stringarg arg1;
2685   var stringarg arg2;
2686   /* check arguments: */
2687   test_2_stringsym_limits(false,&arg1,&arg2);
2688   /* compare: */
2689   VALUES_IF((arg1.len==arg2.len)
2690             && ((arg1.len==0)
2691                 || string_eqcomp(arg1.string,arg1.offset+arg1.index,
2692                                  arg2.string,arg2.offset+arg2.index,
2693                                  arg1.len)));
2694 }
2695 
2696 /* (CS-COMMON-LISP:STRING= string1 string2 :start1 :end1 :start2 :end2) */
2697 LISPFUN(cs_string_eq,seclass_read,2,0,norest,key,4,
2698         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2699 {
2700   var stringarg arg1;
2701   var stringarg arg2;
2702   /* check arguments: */
2703   test_2_stringsym_limits(true,&arg1,&arg2);
2704   /* compare: */
2705   VALUES_IF((arg1.len==arg2.len)
2706             && ((arg1.len==0)
2707                 || string_eqcomp(arg1.string,arg1.offset+arg1.index,
2708                                  arg2.string,arg2.offset+arg2.index,
2709                                  arg1.len)));
2710 }
2711 
2712 /* (STRING/= string1 string2 :start1 :end1 :start2 :end2), CLTL p. 301 */
2713 LISPFUN(string_noteq,seclass_read,2,0,norest,key,4,
2714         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2715 {
2716   var stringarg arg1;
2717   var stringarg arg2;
2718   /* check arguments: */
2719   test_2_stringsym_limits(false,&arg1,&arg2);
2720   /* compare: */
2721   VALUES1(string_comp(&arg1,&arg2)==0 ? NIL : fixnum(arg1.index));
2722 }
2723 
2724 /* (CS-COMMON-LISP:STRING/= string1 string2 :start1 :end1 :start2 :end2) */
2725 LISPFUN(cs_string_noteq,seclass_read,2,0,norest,key,4,
2726         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2727 {
2728   var stringarg arg1;
2729   var stringarg arg2;
2730   /* check arguments: */
2731   test_2_stringsym_limits(true,&arg1,&arg2);
2732   /* compare: */
2733   VALUES1(string_comp(&arg1,&arg2)==0 ? NIL : fixnum(arg1.index));
2734 }
2735 
2736 /* (STRING< string1 string2 :start1 :end1 :start2 :end2), CLTL p. 301 */
2737 LISPFUN(string_less,seclass_read,2,0,norest,key,4,
2738         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2739 {
2740   var stringarg arg1;
2741   var stringarg arg2;
2742   /* check arguments: */
2743   test_2_stringsym_limits(false,&arg1,&arg2);
2744   /* compare: */
2745   VALUES1(string_comp(&arg1,&arg2)<0 ? fixnum(arg1.index) : NIL);
2746 }
2747 
2748 /* (CS-COMMON-LISP:STRING< string1 string2 :start1 :end1 :start2 :end2) */
2749 LISPFUN(cs_string_less,seclass_read,2,0,norest,key,4,
2750         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2751 {
2752   var stringarg arg1;
2753   var stringarg arg2;
2754   /* check arguments: */
2755   test_2_stringsym_limits(true,&arg1,&arg2);
2756   /* compare: */
2757   VALUES1(string_comp(&arg1,&arg2)<0 ? fixnum(arg1.index) : NIL);
2758 }
2759 
2760 /* (STRING> string1 string2 :start1 :end1 :start2 :end2), CLTL p. 301 */
2761 LISPFUN(string_greater,seclass_read,2,0,norest,key,4,
2762         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2763 {
2764   var stringarg arg1;
2765   var stringarg arg2;
2766   /* check arguments: */
2767   test_2_stringsym_limits(false,&arg1,&arg2);
2768   /* compare: */
2769   VALUES1(string_comp(&arg1,&arg2)>0 ? fixnum(arg1.index) : NIL);
2770 }
2771 
2772 /* (CS-COMMON-LISP:STRING> string1 string2 :start1 :end1 :start2 :end2) */
2773 LISPFUN(cs_string_greater,seclass_read,2,0,norest,key,4,
2774         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2775 {
2776   var stringarg arg1;
2777   var stringarg arg2;
2778   /* check arguments: */
2779   test_2_stringsym_limits(true,&arg1,&arg2);
2780   /* compare: */
2781   VALUES1(string_comp(&arg1,&arg2)>0 ? fixnum(arg1.index) : NIL);
2782 }
2783 
2784 /* (STRING<= string1 string2 :start1 :end1 :start2 :end2), CLTL p. 301 */
2785 LISPFUN(string_ltequal,seclass_read,2,0,norest,key,4,
2786         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2787 {
2788   var stringarg arg1;
2789   var stringarg arg2;
2790   /* check arguments: */
2791   test_2_stringsym_limits(false,&arg1,&arg2);
2792   /* compare: */
2793   VALUES1(string_comp(&arg1,&arg2)<=0 ? fixnum(arg1.index) : NIL);
2794 }
2795 
2796 /* (CS-COMMON-LISP:STRING<= string1 string2 :start1 :end1 :start2 :end2) */
2797 LISPFUN(cs_string_ltequal,seclass_read,2,0,norest,key,4,
2798         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2799 {
2800   var stringarg arg1;
2801   var stringarg arg2;
2802   /* check arguments: */
2803   test_2_stringsym_limits(true,&arg1,&arg2);
2804   /* compare: */
2805   VALUES1(string_comp(&arg1,&arg2)<=0 ? fixnum(arg1.index) : NIL);
2806 }
2807 
2808 /* (STRING>= string1 string2 :start1 :end1 :start2 :end2), CLTL p. 301 */
2809 LISPFUN(string_gtequal,seclass_read,2,0,norest,key,4,
2810         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2811 {
2812   var stringarg arg1;
2813   var stringarg arg2;
2814   /* check arguments: */
2815   test_2_stringsym_limits(false,&arg1,&arg2);
2816   /* compare: */
2817   VALUES1(string_comp(&arg1,&arg2)>=0 ? fixnum(arg1.index) : NIL);
2818 }
2819 
2820 /* (CS-COMMON-LISP:STRING>= string1 string2 :start1 :end1 :start2 :end2) */
2821 LISPFUN(cs_string_gtequal,seclass_read,2,0,norest,key,4,
2822         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2823 {
2824   var stringarg arg1;
2825   var stringarg arg2;
2826   /* check arguments: */
2827   test_2_stringsym_limits(true,&arg1,&arg2);
2828   /* compare: */
2829   VALUES1(string_comp(&arg1,&arg2)>=0 ? fixnum(arg1.index) : NIL);
2830 }
2831 
2832 /* UP: compares two strings of equal length for equality, case-insensitive
2833  > string1,offset1: here are the addressed characters in string1
2834  > string2,offset2: here are the addressed characters in string2
2835  > len: number of addressed characters in String1 and in String2, > 0
2836  < result: true if equal, else false. */
string_eqcomp_ci(object string1,uintL offset1,object string2,uintL offset2,uintL len)2837 global bool string_eqcomp_ci (object string1, uintL offset1, object string2,
2838                               uintL offset2, uintL len) {
2839   SstringDispatch(string1,X1, {
2840     var const cintX1* charptr1 = &((SstringX1)TheVarobject(string1))->data[offset1];
2841     SstringDispatch(string2,X2, {
2842       var const cintX2* charptr2 = &((SstringX2)TheVarobject(string2))->data[offset2];
2843       do {
2844         if (!chareq(up_case(as_chart(*charptr1++)),up_case(as_chart(*charptr2++))))
2845           goto no;
2846       } while (--len);
2847     });
2848   });
2849   return true;
2850  no: return false;
2851 }
2852 
2853 /* UP: compares two strings, case-insensitive
2854  > arg1: here are the addressed characters in string1
2855  > arg2: here are the addressed characters in string2
2856  < arg1.index: location of the first difference i string1
2857  < result: 0 if equal,
2858              -1 if string1 is genuinely lesser than string2,
2859              +1 if string1 is genuinely bigger than string2. */
string_comp_ci(stringarg * arg1,const stringarg * arg2)2860 local signean string_comp_ci (stringarg* arg1, const stringarg* arg2)
2861 {
2862   var uintL len1 = arg1->len;
2863   var uintL len2 = arg2->len;
2864   SstringCase(arg1->string,{
2865     var const cint8* charptr1_0 = &TheS8string(arg1->string)->data[arg1->offset];
2866     var const cint8* charptr1 = &charptr1_0[arg1->index];
2867     var chart ch1;
2868     var chart ch2;
2869     /* one of the strings empty ? */
2870     if (len1==0) goto A_string1_end;
2871     if (len2==0) goto A_string2_end;
2872     SstringDispatch(arg2->string,X2, {
2873       var const cintX2* charptr2 = &((SstringX2)TheVarobject(arg2->string))->data[arg2->offset+arg2->index];
2874       while (1) {
2875         /* compare next characters: */
2876         if (!chareq(ch1 = up_case(as_chart(*charptr1++)), ch2 = up_case(as_chart(*charptr2++)))) break;
2877         /* decrease both counters: */
2878         len1--; len2--;
2879         /* is one of the strings finished ? */
2880         if (len1==0) goto A_string1_end;
2881         if (len2==0) goto A_string2_end;
2882       }
2883     });
2884     /* two different characters are found */
2885     arg1->index = --charptr1 - charptr1_0;
2886     if (charlt(ch1,ch2))
2887       return signean_minus; /* string1 < string2 */
2888     else
2889       return signean_plus; /* string1 > string2 */
2890    A_string1_end: /* string1 finished */
2891     arg1->index = charptr1 - charptr1_0;
2892     if (len2==0)
2893       return signean_null; /* string1 = string2 */
2894     else
2895       return signean_minus; /* string1 is a genuine starting piece of string2 */
2896    A_string2_end: /* string2 is finished, string1 is not yet finished */
2897     arg1->index = charptr1 - charptr1_0;
2898     return signean_plus; /* string2 is a genuine starting piece of string1 */
2899   },{
2900     var const cint16* charptr1_0 = &TheS16string(arg1->string)->data[arg1->offset];
2901     var const cint16* charptr1 = &charptr1_0[arg1->index];
2902     var chart ch1;
2903     var chart ch2;
2904     /* one of the strings empty ? */
2905     if (len1==0) goto B_string1_end;
2906     if (len2==0) goto B_string2_end;
2907     SstringDispatch(arg2->string,X2, {
2908       var const cintX2* charptr2 = &((SstringX2)TheVarobject(arg2->string))->data[arg2->offset+arg2->index];
2909       while (1) {
2910         /* compare next characters: */
2911         if (!chareq(ch1 = up_case(as_chart(*charptr1++)), ch2 = up_case(as_chart(*charptr2++)))) break;
2912         /* decrease both counters: */
2913         len1--; len2--;
2914         /* is one of the strings finished ? */
2915         if (len1==0) goto B_string1_end;
2916         if (len2==0) goto B_string2_end;
2917       }
2918     });
2919     /* two different characters are found */
2920     arg1->index = --charptr1 - charptr1_0;
2921     if (charlt(ch1,ch2))
2922       return signean_minus; /* string1 < string2 */
2923     else
2924       return signean_plus; /* string1 > string2 */
2925    B_string1_end: /* string1 finished */
2926     arg1->index = charptr1 - charptr1_0;
2927     if (len2==0)
2928       return signean_null; /* string1 = string2 */
2929     else
2930       return signean_minus; /* string1 is a genuine starting piece of string2 */
2931    B_string2_end: /* string2 is finished, string1 is not yet finished */
2932     arg1->index = charptr1 - charptr1_0;
2933     return signean_plus; /* string2 is a genuine starting piece of string1 */
2934   },{
2935     var const cint32* charptr1_0 = &TheS32string(arg1->string)->data[arg1->offset];
2936     var const cint32* charptr1 = &charptr1_0[arg1->index];
2937     var chart ch1;
2938     var chart ch2;
2939     /* one of the strings empty ? */
2940     if (len1==0) goto C_string1_end;
2941     if (len2==0) goto C_string2_end;
2942     SstringDispatch(arg2->string,X2, {
2943       var const cintX2* charptr2 = &((SstringX2)TheVarobject(arg2->string))->data[arg2->offset+arg2->index];
2944       while (1) {
2945         /* compare next characters: */
2946         if (!chareq(ch1 = up_case(as_chart(*charptr1++)), ch2 = up_case(as_chart(*charptr2++)))) break;
2947         /* decrease both counters: */
2948         len1--; len2--;
2949         /* is one of the strings finished ? */
2950         if (len1==0) goto C_string1_end;
2951         if (len2==0) goto C_string2_end;
2952       }
2953     });
2954     /* two different characters are found */
2955     arg1->index = --charptr1 - charptr1_0;
2956     if (charlt(ch1,ch2))
2957       return signean_minus; /* string1 < string2 */
2958     else
2959       return signean_plus; /* string1 > string2 */
2960    C_string1_end: /* String1 finished */
2961     arg1->index = charptr1 - charptr1_0;
2962     if (len2==0)
2963       return signean_null; /* string1 = string2 */
2964     else
2965       return signean_minus; /* string1 is a genuine starting piece of string2 */
2966    C_string2_end: /* string2 is finished, string1 is not yet finished */
2967     arg1->index = charptr1 - charptr1_0;
2968     return signean_plus; /* string2 is a genuine starting piece of string1 */
2969   }, {
2970     /* one of the strings empty ? */
2971     if (len1==0) goto D_string1_end;
2972     if (len2==0) goto D_string2_end;
2973     error_nilarray_retrieve();
2974   D_string1_end: /* string1 finished */
2975     arg1->index = 0;
2976     if (len2==0)
2977       return signean_null; /* string1 = string2 */
2978     else /* string1 is a genuine starting piece of string2 */
2979       return signean_minus;
2980   D_string2_end: /* string2 is finished, string1 is not yet finished */
2981     arg1->index = 0;
2982     return signean_plus; /* string2 is a genuine starting piece of string1 */
2983   });
2984 }
2985 
2986 /* (STRING-EQUAL string1 string2 :start1 :end1 :start2 :end2), CLTL p. 301 */
2987 LISPFUN(string_equal,seclass_read,2,0,norest,key,4,
2988         (kw(start1),kw(end1),kw(start2),kw(end2)) )
2989 {
2990   var stringarg arg1;
2991   var stringarg arg2;
2992   /* check arguments: */
2993   test_2_stringsym_limits(false,&arg1,&arg2);
2994   /* compare: */
2995   VALUES_IF((arg1.len==arg2.len)
2996             && ((arg1.len==0)
2997                 || string_eqcomp_ci(arg1.string,arg1.offset+arg1.index,
2998                                     arg2.string,arg2.offset+arg2.index,
2999                                     arg1.len)));
3000 }
3001 
3002 /* (STRING-NOT-EQUAL string1 string2 :start1 :end1 :start2 :end2),
3003    CLTL p. 302 */
3004 LISPFUN(string_not_equal,seclass_read,2,0,norest,key,4,
3005         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3006 {
3007   var stringarg arg1;
3008   var stringarg arg2;
3009   /* check arguments: */
3010   test_2_stringsym_limits(false,&arg1,&arg2);
3011   /* compare: */
3012   VALUES1(string_comp_ci(&arg1,&arg2)==0 ? NIL : fixnum(arg1.index));
3013 }
3014 
3015 /* (STRING-LESSP string1 string2 :start1 :end1 :start2 :end2), CLTL p. 302 */
3016 LISPFUN(string_lessp,seclass_read,2,0,norest,key,4,
3017         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3018 {
3019   var stringarg arg1;
3020   var stringarg arg2;
3021   /* check arguments: */
3022   test_2_stringsym_limits(false,&arg1,&arg2);
3023   /* compare: */
3024   VALUES1(string_comp_ci(&arg1,&arg2)<0 ? fixnum(arg1.index) : NIL);
3025 }
3026 
3027 /* (STRING-GREATERP string1 string2 :start1 :end1 :start2 :end2),
3028    CLTL p. 302 */
3029 LISPFUN(string_greaterp,seclass_read,2,0,norest,key,4,
3030         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3031 {
3032   var stringarg arg1;
3033   var stringarg arg2;
3034   /* check arguments: */
3035   test_2_stringsym_limits(false,&arg1,&arg2);
3036   /* compare: */
3037   VALUES1(string_comp_ci(&arg1,&arg2)>0 ? fixnum(arg1.index) : NIL);
3038 }
3039 
3040 /* (STRING-NOT-GREATERP string1 string2 :start1 :end1 :start2 :end2),
3041    CLTL p. 302 */
3042 LISPFUN(string_not_greaterp,seclass_read,2,0,norest,key,4,
3043         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3044 {
3045   var stringarg arg1;
3046   var stringarg arg2;
3047   /* check arguments: */
3048   test_2_stringsym_limits(false,&arg1,&arg2);
3049   /* compare: */
3050   VALUES1(string_comp_ci(&arg1,&arg2)<=0 ? fixnum(arg1.index) : NIL);
3051 }
3052 
3053 /* (STRING-NOT-LESSP string1 string2 :start1 :end1 :start2 :end2),
3054    CLTL p. 302 */
3055 LISPFUN(string_not_lessp,seclass_read,2,0,norest,key,4,
3056         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3057 {
3058   var stringarg arg1;
3059   var stringarg arg2;
3060   /* check arguments: */
3061   test_2_stringsym_limits(false,&arg1,&arg2);
3062   /* compare: */
3063   VALUES1(string_comp_ci(&arg1,&arg2)>=0 ? fixnum(arg1.index) : NIL);
3064 }
3065 
3066 /* UP: searches a string string1 in another string string2
3067  > arg1: here are the addressed characters in string1
3068  > arg2: here are the addressed characters in string2
3069  > eqcomp: comparison function, &string_eqcomp or &string_eqcomp_ci
3070  < result: NIL if not found,
3071              position in string2 (as fixnum) if found.
3072  let eqcomp_fun_t be the type of such a comparison function: */
3073 typedef bool (*eqcomp_fun_t) (object string1, uintL offset1,
3074                               object string2, uintL offset2, uintL len);
string_search(const stringarg * arg1,const stringarg * arg2,eqcomp_fun_t eqcomp)3075 local object string_search (const stringarg* arg1, const stringarg* arg2,
3076                             eqcomp_fun_t eqcomp)
3077 {
3078   var uintL len1 = arg1->len;
3079   var uintL len2 = arg2->len;
3080   if (len1 > len2) goto notfound; /* Only if len1<=len2, can string1 occur in string2. */
3081   /* loop:
3082      for i=0..len2-len1:
3083      compare string1 with the len1 characters at charptr2[i].
3084      Thereto, pass through loop len2-len1+1 times,
3085      growing charptr2 and start2. */
3086   {
3087     var object string1 = arg1->string;
3088     var uintL offset1 = arg1->offset + arg1->index;
3089     var object string2 = arg2->string;
3090     var uintL offset2 = arg2->offset + arg2->index;
3091     var uintL count = len2-len1+1;
3092     if (len1==0) goto found;
3093     do {
3094       if ((*eqcomp)(string1,offset1,string2,offset2,len1)) /* compare */
3095         goto found;
3096       offset2++;
3097     } while (--count);
3098     goto notfound;
3099    found: return fixnum(offset2 - arg2->offset);
3100   }
3101  notfound: return NIL;
3102 }
3103 
3104 LISPFUN(search_string_eq,seclass_read,2,0,norest,key,4,
3105         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3106 { /* (SYS::SEARCH-STRING= string1 string2 [:start1] [:end1] [:start2] [:end2])
3107    = (search string1 string2 :test #'char= [:start1] [:end1] [:start2] [:end2]) */
3108   var stringarg arg1;
3109   var stringarg arg2;
3110   /* check arguments: */
3111   test_2_stringsym_limits(false,&arg1,&arg2);
3112   /* search string1 in string2: */
3113   VALUES1(string_search(&arg1,&arg2,&string_eqcomp));
3114 }
3115 
3116 LISPFUN(search_string_equal,seclass_read,2,0,norest,key,4,
3117         (kw(start1),kw(end1),kw(start2),kw(end2)) )
3118 { /* (SYS::SEARCH-STRING-EQUAL string1 string2 [:start1] [:end1] [:start2] [:end2])
3119    = (search string1 string2 :test #'char-equal [:start1] [:end1] [:start2] [:end2]) */
3120   var stringarg arg1;
3121   var stringarg arg2;
3122   /* check arguments: */
3123   test_2_stringsym_limits(false,&arg1,&arg2);
3124   /* search string1 in string2: */
3125   VALUES1(string_search(&arg1,&arg2,&string_eqcomp_ci));
3126 }
3127 
3128 LISPFUN(make_string,seclass_no_se,1,0,norest,key,2,
3129         (kw(initial_element),kw(element_type)) )
3130 { /* (MAKE-STRING size :initial-element :element-type) */
3131   var uintV size;
3132   /* check size: */
3133   if (!posfixnump(STACK_2)) { /* size must be fixnum >= 0 */
3134     pushSTACK(STACK_2); /* TYPE-ERROR slot DATUM */
3135     pushSTACK(O(type_posfixnum)); /* TYPE-ERROR slot EXPECTED-TYPE */
3136     pushSTACK(STACK_(2+2)); pushSTACK(TheSubr(subr_self)->name);
3137     error(type_error,
3138            GETTEXT("~S: the string length ~S should be nonnegative fixnum"));
3139   }
3140   size = posfixnum_to_V(STACK_2);
3141   check_stringsize(size);
3142   /* check element-type: */
3143   if (boundp(STACK_0)) {
3144     var object eltype = STACK_0;
3145     if (!eq(eltype,S(character))) {
3146       /* Verify (SUBTYPEP eltype 'CHARACTER): */
3147       pushSTACK(eltype); pushSTACK(S(character)); funcall(S(subtypep),2);
3148       if (nullp(value1)) {
3149         pushSTACK(STACK_0); /* eltype */
3150         pushSTACK(S(character)); /* CHARACTER */
3151         pushSTACK(S(Kelement_type)); /* :ELEMENT-TYPE */
3152         pushSTACK(S(make_string));
3153         error(error_condition,GETTEXT("~S: ~S argument must be a subtype of ~S, not ~S"));
3154       }
3155     }
3156   }
3157   var object new_string;
3158   /* maybe fill with initial-element: */
3159   var object initial_element = STACK_1;
3160   if (!boundp(initial_element)) {
3161     /* Allocate a small-sstring, to save memory in the most frequent case.
3162      It will become wider as needed automatically. */
3163     new_string = allocate_s8string(size);
3164   } else {
3165     if (!charp(initial_element)) { /* must be a character */
3166       pushSTACK(initial_element); /* TYPE-ERROR slot DATUM */
3167       pushSTACK(S(character)); /* TYPE-ERROR slot EXPECTED-TYPE */
3168       pushSTACK(S(character)); pushSTACK(initial_element);
3169       pushSTACK(S(Kinitial_element)); pushSTACK(TheSubr(subr_self)->name);
3170       error(type_error,GETTEXT("~S: ~S argument ~S should be of type ~S"));
3171     } else {
3172       var chart ch = char_code(initial_element);
3173      #ifdef HAVE_SMALL_SSTRING
3174       var cint c = as_cint(ch);
3175       if (c < cint8_limit) {
3176         new_string = allocate_s8string(size);
3177         if (size !=0) {
3178           var cint8* pdata = TheS8string(new_string)->data;
3179           do { *pdata++ = c; } while (--size);
3180         }
3181       } else if (c < cint16_limit) {
3182         new_string = allocate_s16string(size);
3183         if (size !=0) {
3184           var cint16* pdata = TheS16string(new_string)->data;
3185           do { *pdata++ = c; } while (--size);
3186         }
3187       } else {
3188         new_string = allocate_s32string(size);
3189         if (size !=0) {
3190           var cint32* pdata = TheS32string(new_string)->data;
3191           do { *pdata++ = c; } while (--size);
3192         }
3193       }
3194      #else
3195       new_string = allocate_string(size);
3196       if (size!=0) {
3197         var chart* charptr = &TheSnstring(new_string)->data[0];
3198         do { *charptr++ = ch; }  while (--size);
3199       }
3200      #endif
3201     }
3202   }
3203   DBGREALLOC(new_string);
3204   VALUES1(new_string); skipSTACK(3);
3205 }
3206 
3207 LISPFUNNR(string_both_trim,4)
3208 /* (SYS::STRING-BOTH-TRIM character-bag-left character-bag-right string invertp)
3209  basic function for
3210  STRING-TRIM, STRING-LEFT-TRIM, STRING-RIGHT-TRIM, CLTL p. 302
3211  method:
3212  (let ((l (length string)))
3213    (do ((i 0 (1+ i)))
3214        (nil)
3215      (when (or (= i l)
3216                (not (find (char string i) character-bag-left)))
3217        (do ((j l (1- j)))
3218            (nil)
3219          (when (or (= i j)
3220                    (not (find (char string (1- j)) character-bag-right)))
3221            (return (if (and (= i 0) (= j l)) string
3222                        (substring string i j)))))))) */
3223 {
3224   var object invertp = popSTACK();
3225   var object string = test_stringsymchar_arg(popSTACK(),!nullp(invertp)); /* convert argument into string */
3226   pushSTACK(string); /* and back into stack again */
3227   pushSTACK(fixnum(vector_length(string))); /* length as fixnum into the stack */
3228   pushSTACK(Fixnum_0); /* i := 0 */
3229   /* stack layout: bag-left, bag-right, string, l, i */
3230   while (!eq(STACK_0,STACK_1)) { /* for i = l (both fixnums): loop done */
3231     /* determine (char string i) : */
3232     pushSTACK(STACK_2); pushSTACK(STACK_1); funcall(L(char),2);
3233     /* determine (find (char ...) character-bag-left) : */
3234     pushSTACK(value1); pushSTACK(STACK_5); funcall(L(find),2);
3235     if (nullp(value1)) break; /* char not in character-bag-left -> loop done */
3236     STACK_0 = fixnum_inc(STACK_0,1); /* i := (1+ i) */
3237   }
3238   pushSTACK(STACK_1); /* j := l */
3239   /* stack layout: bag-left, bag-right, string, l, i, j */
3240   while (!eq(STACK_0,STACK_1)) { /* for j = i (both fixnums): loop done */
3241     /* determine (char string (1- j)) : */
3242     pushSTACK(STACK_3); pushSTACK(fixnum_inc(STACK_1,-1)); funcall(L(char),2);
3243     /* determine (find (char ...) character-bag-right) : */
3244     pushSTACK(value1); pushSTACK(STACK_5); funcall(L(find),2);
3245     if (nullp(value1)) break; /* char not in character-bag-right -> loop done */
3246     STACK_0 = fixnum_inc(STACK_0,-1); /* j := (1- j) */
3247   }
3248   /* stack layout: bag-left, bag-right, string, l, i, j
3249      throw away the characters of the string with index <i or >=j : */
3250   var object j = popSTACK();
3251   var object i = popSTACK();
3252   var object l = popSTACK();
3253   string = popSTACK();
3254   skipSTACK(2);
3255   if (eq(i,Fixnum_0) && eq(j,l)) {
3256     value1 = string; /* for i=0 and j=l, nothing to do, string as value */
3257   } else {
3258     /* copy sub part of the indices >=i, <j :
3259        (substring string i j) as value */
3260     pushSTACK(string); pushSTACK(i); pushSTACK(j); funcall(L(substring),3);
3261   }
3262   mv_count=1;
3263 }
3264 
3265 LISPFUN(string_width,seclass_default,1,0,norest,key,2, (kw(start),kw(end)) )
3266 {
3267   var stringarg arg;
3268   test_string_limits_ro(&arg);
3269   var uintL width = 0;
3270   var uintL len = arg.len;
3271   if (len > 0) {
3272     SstringDispatch(arg.string,X, {
3273       var const cintX* charptr =
3274         &((SstringX)TheVarobject(arg.string))->data[arg.offset+arg.index];
3275       do { width += char_width(as_chart(*charptr)); charptr++;
3276       } while (--len);
3277     });
3278   }
3279   /* width <= 2*arg.len. */
3280   VALUES1(UL_to_I(width));
3281 }
3282 
3283 /* UP: converts the characters of a string piece into uppercase letters
3284  nstring_upcase(dv,offset,len);
3285  > object dv: the character storage vector
3286  > uintL offset: index of first affected character
3287  > uintL len: number of affected characters
3288  can trigger GC */
nstring_upcase(object dv,uintL offset,uintL len)3289 global maygc void nstring_upcase (object dv, uintL offset, uintL len) {
3290  restart_it:
3291   if (len > 0) {
3292     SstringCase(dv,{
3293       do {
3294         dv = sstring_store(dv,offset,up_case(as_chart(TheS8string(dv)->data[offset])));
3295         offset++;
3296         len--;
3297         if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
3298           dv = TheSistring(dv)->data;
3299           goto restart_it;
3300         }
3301       } while (len > 0);
3302     },{
3303       do {
3304         dv = sstring_store(dv,offset,up_case(as_chart(TheS16string(dv)->data[offset])));
3305         offset++;
3306         len--;
3307         if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
3308           dv = TheSistring(dv)->data;
3309           goto restart_it;
3310         }
3311       } while (len > 0);
3312     },{
3313       var cint32* charptr = &TheS32string(dv)->data[offset];
3314       do { *charptr = as_cint(up_case(as_chart(*charptr))); charptr++;
3315       } while (--len);
3316     },{
3317       error_nilarray_retrieve();
3318     });
3319   }
3320 }
3321 
3322 /* UP: converts a string into uppercase letters
3323  string_upcase(string)
3324  > string: string
3325  < result: new normal-simple-string, in uppercase letters
3326  can trigger GC */
string_upcase(object string)3327 global maygc object string_upcase (object string) {
3328   string = copy_string_normal(string); /* copy and turn into a normal-simple-string */
3329   pushSTACK(string);
3330   nstring_upcase(string,0,Sstring_length(string)); /* convert */
3331   string = popSTACK();
3332   DBGREALLOC(string);
3333   return string;
3334 }
3335 
3336 LISPFUN(nstring_upcase,seclass_default,1,0,norest,key,2, (kw(start),kw(end)) )
3337 { /* (NSTRING-UPCASE string :start :end), CLTL p. 304 */
3338   var stringarg arg;
3339   var object string = test_string_limits_rw(&arg);
3340   pushSTACK(string);
3341   nstring_upcase(arg.string,arg.offset+arg.index,arg.len);
3342   VALUES1(popSTACK());
3343 }
3344 
3345 LISPFUN(string_upcase,seclass_read,1,0,norest,key,2, (kw(start),kw(end)) )
3346 { /* (STRING-UPCASE string :start :end), CLTL p. 303 */
3347   var object string;
3348   var uintL offset;
3349   var uintL len;
3350   test_1_stringsym_limits(false,&string,&offset,&len);
3351   pushSTACK(string);
3352   nstring_upcase(string,offset,len);
3353   string = popSTACK();
3354   sstring_un_realloc(string);
3355   VALUES1(string);
3356 }
3357 
3358 /* UP: converts the characters of a string piece into lowercase letters
3359  nstring_downcase(dv,offset,len);
3360  > object dv: the character storage vector
3361  > uintL offset: index of first affected character
3362  > uintL len: number of affected characters
3363  can trigger GC */
nstring_downcase(object dv,uintL offset,uintL len)3364 global maygc void nstring_downcase (object dv, uintL offset, uintL len) {
3365  restart_it:
3366   if (len > 0) {
3367     SstringCase(dv,{
3368       do {
3369         dv = sstring_store(dv,offset,down_case(as_chart(TheS8string(dv)->data[offset])));
3370         offset++;
3371         len--;
3372         if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
3373           dv = TheSistring(dv)->data;
3374           goto restart_it;
3375         }
3376       } while (len > 0);
3377     },{
3378       do {
3379         dv = sstring_store(dv,offset,down_case(as_chart(TheS16string(dv)->data[offset])));
3380         offset++;
3381         len--;
3382         if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
3383           dv = TheSistring(dv)->data;
3384           goto restart_it;
3385         }
3386       } while (len > 0);
3387     },{
3388       var cint32* charptr = &TheS32string(dv)->data[offset];
3389       do { *charptr = as_cint(down_case(as_chart(*charptr))); charptr++;
3390       } while (--len);
3391     },{
3392       error_nilarray_retrieve();
3393     });
3394   }
3395 }
3396 
3397 /* UP: converts a string into lowercase letters
3398  string_downcase(string)
3399  > string: string
3400  < result: new normal-simple-string, in lowercase letters
3401  can trigger GC */
string_downcase(object string)3402 global maygc object string_downcase (object string) {
3403   string = copy_string_normal(string); /* copy and turn into a normal-simple-string */
3404   pushSTACK(string);
3405   nstring_downcase(string,0,Sstring_length(string)); /* convert */
3406   string = popSTACK();
3407   DBGREALLOC(string);
3408   return string;
3409 }
3410 
3411 LISPFUN(nstring_downcase,seclass_default,1,0,norest,key,2,
3412         (kw(start),kw(end)) )
3413 { /* (NSTRING-DOWNCASE string :start :end), CLTL p. 304 */
3414   var stringarg arg;
3415   var object string = test_string_limits_rw(&arg);
3416   pushSTACK(string);
3417   nstring_downcase(arg.string,arg.offset+arg.index,arg.len);
3418   VALUES1(popSTACK());
3419 }
3420 
3421 LISPFUN(string_downcase,seclass_read,1,0,norest,key,2, (kw(start),kw(end)) )
3422 { /* (STRING-DOWNCASE string :start :end), CLTL p. 303 */
3423   var object string;
3424   var uintL offset;
3425   var uintL len;
3426   test_1_stringsym_limits(false,&string,&offset,&len);
3427   pushSTACK(string);
3428   nstring_downcase(string,offset,len);
3429   string = popSTACK();
3430   sstring_un_realloc(string);
3431   VALUES1(string);
3432 }
3433 
3434 /* UP: converts the words of a string piece into words, that
3435  that start with a capital and continue with lowercase letters.
3436  nstring_capitalize(dv,offset,len);
3437  > object dv: the character storage vector
3438  > uintL offset: index of first affected character
3439  > uintL len: number of affected characters
3440  method:
3441   alternately, seach for beginning of a word (and do not convert)
3442   resp. search for end of word (and do convert).
3443  can trigger GC */
nstring_capitalize(object dv,uintL offset,uintL len)3444 global maygc void nstring_capitalize (object dv, uintL offset, uintL len) {
3445   if (len > 0) {
3446     var chart ch;
3447     SstringCase(dv,{
3448       /* Search the start of a word. */
3449      search_wordstart_8:
3450       while (len!=0) {
3451         ch = as_chart(TheS8string(dv)->data[offset]);
3452         if (alphanumericp(ch))
3453           goto wordstart_8;
3454         offset++; len--;
3455       }
3456       return; /* len = 0 -> string terminated */
3457       /* Found the start of a word. */
3458      wordstart_8:
3459       dv = sstring_store(dv,offset,up_case(ch));
3460       while (1) {
3461         offset++;
3462         if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
3463           dv = TheSistring(dv)->data;
3464           SstringCase(dv, NOTREACHED;, goto in_word_16;, goto in_word_32;, NOTREACHED; );
3465         }
3466        in_word_8:
3467         if (--len==0)
3468           break;
3469         ch = as_chart(TheS8string(dv)->data[offset]);
3470         if (!alphanumericp(ch))
3471           goto search_wordstart_8;
3472         dv = sstring_store(dv,offset,down_case(ch));
3473       }
3474       return; /* len = 0 -> string terminated */
3475     },{
3476       /* Search the start of a word. */
3477      search_wordstart_16:
3478       while (len!=0) {
3479         ch = as_chart(TheS16string(dv)->data[offset]);
3480         if (alphanumericp(ch))
3481           goto wordstart_16;
3482         offset++; len--;
3483       }
3484       return; /* len = 0 -> string terminated */
3485       /* Found the start of a word. */
3486      wordstart_16:
3487       dv = sstring_store(dv,offset,up_case(ch));
3488       while (1) {
3489         offset++;
3490         if (sstring_reallocatedp(TheSstring(dv))) { /* has it been reallocated? */
3491           dv = TheSistring(dv)->data;
3492           SstringCase(dv, NOTREACHED;, NOTREACHED;, goto in_word_32;, NOTREACHED; );
3493         }
3494       in_word_16:
3495         if (--len==0)
3496           break;
3497         ch = as_chart(TheS16string(dv)->data[offset]);
3498         if (!alphanumericp(ch))
3499           goto search_wordstart_16;
3500         dv = sstring_store(dv,offset,down_case(ch));
3501       }
3502       return; /* len = 0 -> string terminated */
3503     },{
3504       /* Search the start of a word. */
3505      search_wordstart_32:
3506       while (len!=0) {
3507         ch = as_chart(TheS32string(dv)->data[offset]);
3508         if (alphanumericp(ch))
3509           goto wordstart_32;
3510         offset++; len--;
3511       }
3512       return; /* len = 0 -> string terminated */
3513       /* Found the start of a word. */
3514      wordstart_32:
3515       TheS32string(dv)->data[offset] = as_cint(up_case(ch));
3516       while (1) {
3517         offset++;
3518       in_word_32:
3519         if (--len==0)
3520           break;
3521         ch = as_chart(TheS32string(dv)->data[offset]);
3522         if (!alphanumericp(ch))
3523           goto search_wordstart_32;
3524         TheS32string(dv)->data[offset] = as_cint(down_case(ch));
3525       }
3526       return; /* len = 0 -> string terminated */
3527     },{
3528       error_nilarray_retrieve();
3529     });
3530   }
3531 }
3532 
3533 LISPFUN(nstring_capitalize,seclass_default,1,0,norest,key,2,
3534         (kw(start),kw(end)) )
3535 { /* (NSTRING-CAPITALIZE string :start :end), CLTL p. 304 */
3536   var stringarg arg;
3537   var object string = test_string_limits_rw(&arg);
3538   pushSTACK(string);
3539   nstring_capitalize(arg.string,arg.offset+arg.index,arg.len);
3540   VALUES1(popSTACK());
3541 }
3542 
3543 LISPFUN(string_capitalize,seclass_read,1,0,norest,key,2, (kw(start),kw(end)) )
3544 { /* (STRING-CAPITALIZE string :start :end), CLTL p. 303 */
3545   var object string;
3546   var uintL offset;
3547   var uintL len;
3548   test_1_stringsym_limits(false,&string,&offset,&len);
3549   pushSTACK(string);
3550   nstring_capitalize(string,offset,len);
3551   string = popSTACK();
3552   sstring_un_realloc(string);
3553   VALUES1(string);
3554 }
3555 
3556 LISPFUN(nstring_invertcase,seclass_default,1,0,norest,key,2,
3557         (kw(start),kw(end)) )
3558 { /* (EXT:NSTRING-INVERTCASE string :start :end) */
3559   var stringarg arg;
3560   var object string = test_string_limits_rw(&arg);
3561   pushSTACK(string);
3562   nstring_invertcase(arg.string,arg.offset+arg.index,arg.len);
3563   VALUES1(popSTACK());
3564 }
3565 
3566 LISPFUN(string_invertcase,seclass_read,1,0,norest,key,2, (kw(start),kw(end)) )
3567 { /* (EXT:STRING-INVERTCASE string :start :end) */
3568   var object string;
3569   var uintL offset;
3570   var uintL len;
3571   test_1_stringsym_limits(false,&string,&offset,&len);
3572   pushSTACK(string);
3573   nstring_invertcase(string,offset,len);
3574   string = popSTACK();
3575   sstring_un_realloc(string);
3576   VALUES1(string);
3577 }
3578 
3579 /* (STRING object), CLTL p. 304 */
3580 LISPFUNNR(string,1)
3581 {
3582   VALUES1(test_stringsymchar_arg(popSTACK(),false));
3583 }
3584 
3585 /* (CS-COMMON-LISP:STRING object) */
3586 LISPFUNNR(cs_string,1)
3587 {
3588   VALUES1(test_stringsymchar_arg(popSTACK(),true));
3589 }
3590 
3591 /* (NAME-CHAR name), CLTL p. 243 */
3592 LISPFUNNR(name_char,1)
3593 { /* Convert argument into a string. (Case is not significant here.)
3594      Then search a character with this name: */
3595   VALUES1(name_char(test_stringsymchar_arg(popSTACK(),false)));
3596 }
3597 
3598 /* UP: Returns a substring of a simple-string.
3599  subsstring(string,start,end)
3600  > object string: a simple-string
3601  > uintL start: start index
3602  > uintL end: end index
3603  with 0 <= start <= end <= Sstring_length(string)
3604  < object result: (subseq string start end),
3605            a freshly created normal-simple-string
3606  can trigger GC */
subsstring(object string,uintL start,uintL end)3607 global maygc object subsstring (object string, uintL start, uintL end) {
3608   var uintL count = end - start;
3609   pushSTACK(string);
3610   var object new_string = allocate_string(count);
3611   string = popSTACK();
3612   if (count > 0) {
3613    #ifdef ENABLE_UNICODE
3614     SstringCase(string,
3615       { copy_8bit_32bit(&TheS8string(string)->data[start],
3616                         &TheS32string(new_string)->data[0],count); },
3617       { copy_16bit_32bit(&TheS16string(string)->data[start],
3618                          &TheS32string(new_string)->data[0],count); },
3619       { copy_32bit_32bit(&TheS32string(string)->data[start],
3620                          &TheS32string(new_string)->data[0],count); },
3621       { error_nilarray_retrieve(); });
3622    #else
3623     SstringCase(string, { NOTREACHED; }, { NOTREACHED; },
3624       { copy_8bit_8bit(&TheS8string(string)->data[start],
3625                        &TheS8string(new_string)->data[0],count); },
3626       { error_nilarray_retrieve(); });
3627    #endif
3628   }
3629   DBGREALLOC(new_string);
3630   return new_string;
3631 }
3632 
3633 LISPFUN(substring,seclass_read,2,1,norest,nokey,0,NIL)
3634 { /* (SUBSTRING string start [end]) like SUBSEQ, but only for strings */
3635   var object string;
3636   var uintL len;
3637   var uintV start;
3638   var uintV end;
3639   /* check string/symbol-argument: */
3640   /* FIXME: This does the wrong thing in a case-sensitive package. */
3641   string = test_stringsymchar_arg(STACK_2,false);
3642   len = vector_length(string);
3643   /* now, len is the length (<2^oint_data_len).
3644      check :START-argument:
3645      start := Index STACK_1, default value 0, must be <=len: */
3646   test_index(STACK_1,start=,1,0,<=,len,S(Kstart));
3647   /* start is now the value of the :START-argument.
3648      check :end-argument:
3649      end := Index STACK_0, default value len, must be <=len: */
3650   test_index(STACK_0,end=,2,len,<=,len,S(Kend));
3651   /* end is now the value of the :END-argument.
3652      compare :START and :END arguments: */
3653   if (start > end) {
3654     pushSTACK(STACK_0); /* :END-Index */
3655     pushSTACK(STACK_2); /* :START-Index */
3656     pushSTACK(TheSubr(subr_self)->name);
3657     error(error_condition,GETTEXT("~S: :START-index ~S must not be greater than :END-index ~S"));
3658   }
3659   skipSTACK(3);
3660   /* extract substring: */
3661   pushSTACK(string); /* save old string */
3662   var uintL count = end-start; /* number of characters to be copied */
3663   var object new_string = allocate_string(count); /* new string */
3664   string = popSTACK(); /* old string */
3665   if (count > 0) {
3666     var uintL len; /* again the length of the old string */
3667     var uintL offset;
3668     string = unpack_string_ro(string,&len,&offset);
3669    #ifdef ENABLE_UNICODE
3670     SstringCase(string,
3671       { copy_8bit_32bit(&TheS8string(string)->data[offset+start],
3672                         &TheS32string(new_string)->data[0],count); },
3673       { copy_16bit_32bit(&TheS16string(string)->data[offset+start],
3674                          &TheS32string(new_string)->data[0],count); },
3675       { copy_32bit_32bit(&TheS32string(string)->data[offset+start],
3676                          &TheS32string(new_string)->data[0],count); },
3677       { error_nilarray_retrieve(); });
3678    #else
3679     SstringCase(string, { NOTREACHED; }, { NOTREACHED; },
3680       { copy_8bit_8bit(&TheS8string(string)->data[offset+start],
3681                        &TheS8string(new_string)->data[0],count); },
3682       { error_nilarray_retrieve(); });
3683    #endif
3684   }
3685   DBGREALLOC(new_string);
3686   VALUES1(new_string);
3687 }
3688 
3689 /* UP: concatenates several strings.
3690  string_concat(argcount)
3691  > uintC argcount: number of arguments
3692  > on the STACK: the arguments (should be strings)
3693  < result: total string, freshly created
3694  < STACK: cleaned up
3695  can trigger GC */
string_concat(uintC argcount)3696 modexp maygc object string_concat (uintC argcount) {
3697   var gcv_object_t* args_pointer = (args_end_pointer STACKop argcount);
3698   /* args_pointer = pointer to the arguments
3699      check, if they are all strings, and add the lengths: */
3700   var uintL total_length = 0;
3701   if (argcount > 0) {
3702     var gcv_object_t* argptr = args_pointer;
3703     var uintC count = argcount;
3704     do {
3705       var gcv_object_t *arg = &(NEXT(argptr)); /* next argument */
3706       if (!stringp(*arg)) *arg = check_string(*arg);
3707       total_length += vector_length(*arg);
3708     } while (--count);
3709   }
3710   /* total_length is now the total length. */
3711   check_stringsize(total_length);
3712   var object new_string = allocate_string(total_length); /* new string */
3713   if (argcount > 0) {
3714     var cint32* charptr2 = &TheS32string(new_string)->data[0];
3715     var gcv_object_t* argptr = args_pointer;
3716     do {
3717       var object arg = NEXT(argptr); /* next argument-string */
3718       var uintL len; /* its length */
3719       var uintL offset;
3720       var object string = unpack_string_ro(arg,&len,&offset);
3721       if (len > 0) { /* copy len characters from string to charptr2: */
3722        #ifdef ENABLE_UNICODE
3723         SstringCase(string,
3724           { copy_8bit_32bit(&TheS8string(string)->data[offset],
3725                             charptr2,len); },
3726           { copy_16bit_32bit(&TheS16string(string)->data[offset],
3727                              charptr2,len); },
3728           { copy_32bit_32bit(&TheS32string(string)->data[offset],
3729                              charptr2,len); },
3730           { error_nilarray_retrieve(); });
3731        #else
3732         SstringCase(string, { NOTREACHED; }, { NOTREACHED; },
3733           { copy_8bit_8bit(&TheS8string(string)->data[offset],charptr2,len); },
3734           { error_nilarray_retrieve(); });
3735        #endif
3736         charptr2 += len;
3737       }
3738     } while (--argcount > 0);
3739   }
3740   set_args_end_pointer(args_pointer); /* clean up STACK */
3741   DBGREALLOC(new_string);
3742   return new_string;
3743 }
3744 
3745 LISPFUN(string_concat,seclass_read,0,0,rest,nokey,0,NIL)
3746 { /* (STRING-CONCAT {string})
3747      creates a string by concatenating the arguments */
3748   VALUES1(string_concat(argcount));
3749 }
3750