1 /* 2 * string.h - Public API for Scheme strings 3 * 4 * Copyright (c) 2000-2020 Shiro Kawai <shiro@acm.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the authors nor the names of its contributors 18 * may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 27 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 /* This file is included from gauche.h */ 35 36 #ifndef GAUCHE_STRING_H 37 #define GAUCHE_STRING_H 38 39 /* [String mutation and MT safety] 40 * Scheme String is mutable (unfortunately). The fields of a string 41 * may be altered by another thread while you're reading it. For MT 42 * safety, it is important that we ensure the atomicity in retrieving 43 * string length/size and content. 44 * 45 * It isn't practical to use mutex for every string access, so we use 46 * atomicity of pointer dereference and assignments. The actual string 47 * fields are stored in immutable structure, ScmStringBody, and a Scheme 48 * string, ScmString, has a pointer to it. When mutation occurs, a new 49 * ScmStringBody is allocated, and the pointer is altered. So, as far 50 * as the client retrieves ScmStringBody first, then use its field values, 51 * the client won't see inconsistent state. 52 * Alternatively, the client can use Scm_GetStringContent(), which 53 * retrieves length, size and char array atomically. 54 * 55 * We further use an assumption that mutation of strings is rare. 56 * So ScmString is allocated with initial body. The 'body' pointer 57 * of ScmString is NULL when it is created, which indicates the initial 58 * body is used (this convention allows static definition of ScmString). 59 * Once the string is mutated, the 'body' pointer points to a 60 * fresh ScmStringBody. Note that it means the initial content of the 61 * string, pointed by initialBody.start, won't be GC-ed as far as the 62 * ScmString is alive, even if its content is mutated and the initial 63 * content isn't used. Another reason to avoid string mutations. 64 */ 65 /* NB: We used to have lengh and size as 'u_int'. It effectively limited 66 the length and size of strings up to max(INT_MAX, SCM_SMALL_INT_MAX), 67 for we accept fixnum range in Scheme level. For 32bit architecture 68 it was OK, but for 64bit architecture it became an issue. 69 As of 0.9.7, we switch to use ScmSmallInt for length and size, breaking 70 the backward compatibility. 71 */ 72 /* The 'index' slot may contain an index vector to realize O(1) random-access 73 * of the string. Building index costs time and space, so it is only 74 * constructed when explicitly asked. Srfi-135 (Immutable Texts) is 75 * really an immutable string with an indexed body. 76 * The user should treat index field as a opaque pointer. 77 * See priv/stringP.h for the details. 78 */ 79 80 typedef struct ScmStringBodyRec { 81 u_long flags; 82 ScmSmallInt length; /* in characters */ 83 ScmSmallInt size; /* in bytes */ 84 const char *start; 85 const void *index; 86 } ScmStringBody; 87 88 #if SIZEOF_LONG == 4 89 #define SCM_STRING_MAX_SIZE SCM_SMALL_INT_MAX 90 #define SCM_STRING_MAX_LENGTH SCM_SMALL_INT_MAX 91 #else /*SIZEOF_LONG > 4*/ 92 #define SCM_STRING_MAX_SIZE INT_MAX 93 #define SCM_STRING_MAX_LENGTH INT_MAX 94 #endif 95 96 struct ScmStringRec { 97 SCM_HEADER; 98 const ScmStringBody *body; /* may be NULL if we use initial body. */ 99 ScmStringBody initialBody; /* initial body */ 100 }; 101 102 /* The flag value. Some of them can be specified at construction time 103 (marked 'C'). Some of them are kept in "flags" field of the string body 104 (marked 'R). */ 105 enum ScmStringFlags { 106 SCM_STRING_IMMUTABLE = (1L<<0), /* [C,R] The string is immutable. */ 107 SCM_STRING_INCOMPLETE = (1L<<1), /* [C,R] The string is incomplete. */ 108 SCM_STRING_TERMINATED = (1L<<2), /* [R] The string content is 109 NUL-terminated. This flag is used 110 internally. */ 111 SCM_STRING_COPYING = (1L<<16), /* [C] Need to copy the content 112 given to the constructor. */ 113 }; 114 #define SCM_STRING_FLAG_MASK (0xffff) 115 116 SCM_CLASS_DECL(Scm_StringClass); 117 #define SCM_CLASS_STRING (&Scm_StringClass) 118 119 #define SCM_STRINGP(obj) SCM_XTYPEP(obj, SCM_CLASS_STRING) 120 #define SCM_STRING(obj) ((ScmString*)(obj)) 121 #define SCM_STRING_BODY(obj) \ 122 ((const ScmStringBody*)(SCM_STRING(obj)->body?SCM_STRING(obj)->body:&SCM_STRING(obj)->initialBody)) 123 124 /* Accessor macros for string body */ 125 #define SCM_STRING_BODY_LENGTH(body) ((body)->length) 126 #define SCM_STRING_BODY_SIZE(body) ((body)->size) 127 #define SCM_STRING_BODY_START(body) ((body)->start) 128 #define SCM_STRING_BODY_END(body) ((body)->start + (body)->size) 129 #define SCM_STRING_BODY_FLAGS(body) ((body)->flags) 130 131 #define SCM_STRING_BODY_HAS_FLAG(body, flag) \ 132 (SCM_STRING_BODY_FLAGS(body)&(flag)) 133 #define SCM_STRING_BODY_INCOMPLETE_P(body) \ 134 SCM_STRING_BODY_HAS_FLAG(body, SCM_STRING_INCOMPLETE) 135 #define SCM_STRING_BODY_IMMUTABLE_P(body) \ 136 SCM_STRING_BODY_HAS_FLAG(body, SCM_STRING_IMMUTABLE) 137 #define SCM_STRING_BODY_SINGLE_BYTE_P(body) \ 138 (SCM_STRING_BODY_SIZE(body)==SCM_STRING_BODY_LENGTH(body)) 139 140 /* This is MT-safe, for string immutability won't change */ 141 #define SCM_STRING_IMMUTABLE_P(obj) \ 142 SCM_STRING_BODY_IMMUTABLE_P(SCM_STRING_BODY(obj)) 143 144 #define SCM_STRING_NULL_P(obj) \ 145 (SCM_STRING_BODY_SIZE(SCM_STRING_BODY(obj)) == 0) 146 147 /* Macros for backward compatibility. Use of these are deprecated, 148 since they are not MT-safe. Use SCM_STRING_BODY_* macros or 149 Scm_GetString* API. */ 150 #define SCM_STRING_LENGTH(obj) (SCM_STRING_BODY(obj)->length) 151 #define SCM_STRING_SIZE(obj) (SCM_STRING_BODY(obj)->size) 152 #define SCM_STRING_START(obj) (SCM_STRING_BODY(obj)->start) 153 #define SCM_STRING_INCOMPLETE_P(obj) \ 154 (SCM_STRING_BODY_INCOMPLETE_P(SCM_STRING_BODY(obj))) 155 #define SCM_STRING_SINGLE_BYTE_P(obj) \ 156 (SCM_STRING_SIZE(obj)==SCM_STRING_LENGTH(obj)) 157 158 159 /* OBSOLETED. Kept for backward compatibility */ 160 #define SCM_MAKSTR_INCOMPLETE SCM_STRING_INCOMPLETE 161 #define SCM_MAKSTR_IMMUTABLE SCM_STRING_IMMUTABLE 162 #define SCM_MAKSTR_COPYING SCM_STRING_COPYING 163 164 /* 165 * String cursors (srfi-130) 166 */ 167 168 /* If offset is small, we use immediate value (see gauche.h). 169 * When the offset doesn't fit, we fall back to ScmStringCursorLarge. 170 * The two types of cursors are handled transparently in string.c and 171 * users shouldn't worry about the distinction. 172 * The actual definition is in priv/stringP.h. 173 */ 174 typedef struct ScmStringCursorLargeRec ScmStringCursorLarge; 175 176 SCM_CLASS_DECL(Scm_StringCursorClass); 177 SCM_CLASS_DECL(Scm_StringCursorLargeClass); 178 #define SCM_CLASS_STRING_CURSOR (&Scm_StringCursorClass) 179 #define SCM_CLASS_STRING_CURSOR_LARGE (&Scm_StringCursorLargeClass) 180 181 SCM_EXTERN int Scm_StringCursorP(ScmObj obj); 182 SCM_EXTERN ScmObj Scm_MakeStringCursorFromIndex(ScmString *src, ScmSmallInt index); 183 SCM_EXTERN ScmObj Scm_MakeStringCursorEnd(ScmString *src); 184 SCM_EXTERN ScmObj Scm_StringCursorIndex(ScmString *s, ScmObj sc); 185 SCM_EXTERN ScmObj Scm_StringCursorStart(ScmString* s); 186 SCM_EXTERN ScmObj Scm_StringCursorEnd(ScmString* s); 187 SCM_EXTERN ScmObj Scm_StringCursorForward(ScmString* s, ScmObj cursor, int nchars); 188 SCM_EXTERN ScmObj Scm_StringCursorBack(ScmString* s, ScmObj cursor, int nchars); 189 SCM_EXTERN ScmChar Scm_StringRefCursor(ScmString* s, ScmObj sc, int range_error); 190 SCM_EXTERN ScmObj Scm_SubstringCursor(ScmString *str, ScmObj start, ScmObj end); 191 SCM_EXTERN int Scm_StringCursorCompare(ScmObj sc1, ScmObj sc2, int (*numcmp)(ScmObj, ScmObj)); 192 193 194 /* 195 * Constructors 196 */ 197 198 SCM_EXTERN ScmObj Scm_MakeString(const char *str, 199 ScmSmallInt size, ScmSmallInt len, 200 u_long flags); 201 SCM_EXTERN ScmObj Scm_MakeFillString(ScmSmallInt len, ScmChar fill); 202 SCM_EXTERN ScmObj Scm_CopyStringWithFlags(ScmString *str, 203 u_long flags, u_long mask); 204 205 #define SCM_MAKE_STR(cstr) \ 206 Scm_MakeString(cstr, -1, -1, 0) 207 #define SCM_MAKE_STR_COPYING(cstr) \ 208 Scm_MakeString(cstr, -1, -1, SCM_STRING_COPYING) 209 #define SCM_MAKE_STR_IMMUTABLE(cstr) \ 210 Scm_MakeString(cstr, -1, -1, SCM_STRING_IMMUTABLE) 211 212 #define Scm_CopyString(str) \ 213 Scm_CopyStringWithFlags(str, 0, SCM_STRING_IMMUTABLE) 214 215 /* 216 * C String extraction 217 */ 218 219 SCM_EXTERN char* Scm_GetString(ScmString *str); 220 SCM_EXTERN const char* Scm_GetStringConst(ScmString *str); 221 SCM_EXTERN const char* Scm_GetStringConstUnsafe(ScmString *str); 222 SCM_EXTERN const char* Scm_GetStringContent(ScmString *str, 223 ScmSmallInt *psize, 224 ScmSmallInt *plen, 225 u_long *pflags); 226 227 #define SCM_STRING_CONST_CSTRING(obj) Scm_GetStringConst(SCM_STRING(obj)) 228 #define SCM_STRING_CONST_CSTRING_SAFE(obj) Scm_GetStringConstSafe(SCM_STRING(obj)) 229 230 /* 231 * Conversions 232 */ 233 234 SCM_EXTERN ScmObj Scm_CStringArrayToList(const char **array, 235 ScmSmallInt size, u_long flags); 236 SCM_EXTERN const char **Scm_ListToConstCStringArray(ScmObj lis, 237 int errp); 238 SCM_EXTERN char **Scm_ListToCStringArray(ScmObj lis, int errp, 239 void *(*alloc)(size_t)); 240 SCM_EXTERN ScmObj Scm_StringToList(ScmString *str); 241 SCM_EXTERN ScmObj Scm_ListToString(ScmObj chars); 242 243 /* OBSOLETED */ 244 SCM_EXTERN ScmObj Scm_StringCompleteToIncomplete(ScmString *); 245 SCM_EXTERN ScmObj Scm_StringIncompleteToComplete(ScmString *, int, ScmChar); 246 247 /* 248 * Comparisons 249 */ 250 251 SCM_EXTERN int Scm_StringEqual(ScmString *x, ScmString *y); 252 SCM_EXTERN int Scm_StringCmp(ScmString *x, ScmString *y); 253 SCM_EXTERN int Scm_StringCiCmp(ScmString *x, ScmString *y); 254 255 /* 256 * Accessors and modifiers 257 */ 258 259 SCM_EXTERN ScmChar Scm_StringRef(ScmString *str, 260 ScmSmallInt k, 261 int range_error); 262 SCM_EXTERN int Scm_StringByteRef(ScmString *str, 263 ScmSmallInt k, 264 int range_error); 265 SCM_EXTERN ScmObj Scm_Substring(ScmString *x, 266 ScmSmallInt start, 267 ScmSmallInt end, 268 int byterange); 269 SCM_EXTERN ScmObj Scm_StringReplaceBody(ScmString *x, const ScmStringBody *b); 270 271 /* 272 * Concatenation 273 */ 274 275 SCM_EXTERN ScmObj Scm_StringAppend2(ScmString *x, ScmString *y); 276 SCM_EXTERN ScmObj Scm_StringAppendC(ScmString *x, const char *s, 277 ScmSmallInt size, ScmSmallInt len); 278 SCM_EXTERN ScmObj Scm_StringAppend(ScmObj strs); 279 SCM_EXTERN ScmObj Scm_StringJoin(ScmObj strs, ScmString *delim, int grammar); 280 281 282 /* grammar spec for StringJoin (see SRFI-13) */ 283 enum { 284 SCM_STRING_JOIN_INFIX, 285 SCM_STRING_JOIN_STRICT_INFIX, 286 SCM_STRING_JOIN_SUFFIX, 287 SCM_STRING_JOIN_PREFIX 288 }; 289 290 /* 291 * Searching 292 */ 293 294 /* Note: On 1.0 release, let StringSplitByChar have limit arg. For 0.9.x 295 series we use a separate Scm_StringSplitByCharWithLimit in order to keep 296 ABI compatibility. */ 297 SCM_EXTERN ScmObj Scm_StringSplitByChar(ScmString *str, ScmChar ch); 298 SCM_EXTERN ScmObj Scm_StringSplitByCharWithLimit(ScmString *str, ScmChar ch, 299 int limit); 300 SCM_EXTERN ScmObj Scm_StringScan(ScmString *s1, ScmString *s2, int retmode); 301 SCM_EXTERN ScmObj Scm_StringScanChar(ScmString *s1, ScmChar ch, int retmode); 302 SCM_EXTERN ScmObj Scm_StringScanRight(ScmString *s1, ScmString *s2, int retmode); 303 SCM_EXTERN ScmObj Scm_StringScanCharRight(ScmString *s1, ScmChar ch, int retmode); 304 305 /* "retmode" argument for string scan */ 306 enum { 307 SCM_STRING_SCAN_INDEX, /* return index */ 308 SCM_STRING_SCAN_BEFORE, /* return substring of s1 before s2 */ 309 SCM_STRING_SCAN_AFTER, /* return substring of s1 after s2 */ 310 SCM_STRING_SCAN_BEFORE2, /* return substr of s1 before s2 and rest */ 311 SCM_STRING_SCAN_AFTER2, /* return substr of s1 up to s2 and rest */ 312 SCM_STRING_SCAN_BOTH, /* return substr of s1 before and after s2 */ 313 SCM_STRING_SCAN_CURSOR, /* return <string-cursor> 314 TRANSIENT: This may be more reasonable 315 to be after INDEX, but we have it here 316 until 1.0 to keep ABI compatibility */ 317 SCM_STRING_SCAN_NUM_RETMODES 318 }; 319 320 /* 321 * Miscellaneous 322 */ 323 SCM_EXTERN ScmSmallInt Scm_MBLen(const char *str, const char *stop); 324 325 /* INTERNAL */ 326 SCM_EXTERN const char *Scm_StringPosition(ScmString *str, ScmSmallInt k); /*DEPRECATED*/ 327 SCM_EXTERN const char *Scm_StringBodyPosition(const ScmStringBody *str, ScmSmallInt k); 328 SCM_EXTERN ScmObj Scm_MaybeSubstring(ScmString *x, ScmObj start, ScmObj end); 329 SCM_EXTERN int Scm_StringBodyFastIndexableP(const ScmStringBody *sb); 330 331 /* 332 * Static initializer 333 */ 334 /* You can allocate a constant string statically, if you calculate 335 the length by yourself. These macros are mainly used in machine- 336 generated code. 337 SCM_DEFINE_STRING_CONST can be used to define a static string, 338 and SCM_STRING_CONST_INITIALIZER can be used inside static array 339 of strings. */ 340 341 #define SCM_STRING_CONST_INITIALIZER(str, len, siz) \ 342 { { SCM_CLASS_STATIC_TAG(Scm_StringClass) }, NULL, \ 343 { SCM_STRING_IMMUTABLE|SCM_STRING_TERMINATED, (len), (siz), (str), NULL } } 344 345 #define SCM_DEFINE_STRING_CONST(name, str, len, siz) \ 346 ScmString name = SCM_STRING_CONST_INITIALIZER(str, len, siz) 347 348 /* 349 * DStrings 350 * Auxiliary structure to construct a string of unknown length. 351 * This is not an ScmObj. See string.c for details. 352 */ 353 #define SCM_DSTRING_INIT_CHUNK_SIZE 32 354 355 typedef struct ScmDStringChunkRec { 356 ScmSmallInt bytes; /* actual bytes stored in this chunk. 357 Note that this is set when the next 358 chunk is allocated, or by Scm_DStringSize.*/ 359 char data[SCM_DSTRING_INIT_CHUNK_SIZE]; /* variable length, indeed. */ 360 } ScmDStringChunk; 361 362 typedef struct ScmDStringChainRec { 363 struct ScmDStringChainRec *next; 364 ScmDStringChunk *chunk; 365 } ScmDStringChain; 366 367 struct ScmDStringRec { 368 ScmDStringChunk init; /* initial chunk */ 369 ScmDStringChain *anchor; /* chain of extra chunks */ 370 ScmDStringChain *tail; /* current chunk */ 371 char *current; /* current ptr */ 372 char *end; /* end of current chunk */ 373 ScmSmallInt lastChunkSize; /* size of the last chunk */ 374 ScmSmallInt length; /* # of chars written */ 375 }; 376 377 SCM_EXTERN void Scm_DStringInit(ScmDString *dstr); 378 SCM_EXTERN ScmSmallInt Scm_DStringSize(ScmDString *dstr); 379 SCM_EXTERN ScmObj Scm_DStringGet(ScmDString *dstr, u_long flags); 380 SCM_EXTERN const char *Scm_DStringGetz(ScmDString *dstr); 381 SCM_EXTERN void Scm_DStringWeld(ScmDString *dstr); 382 SCM_EXTERN const char *Scm_DStringPeek(ScmDString *dstr, 383 ScmSmallInt *size, 384 ScmSmallInt *len); 385 SCM_EXTERN void Scm_DStringPutz(ScmDString *dstr, const char *str, 386 ScmSmallInt siz); 387 SCM_EXTERN void Scm_DStringAdd(ScmDString *dstr, ScmString *str); 388 SCM_EXTERN void Scm_DStringPutb(ScmDString *dstr, char byte); 389 SCM_EXTERN void Scm_DStringPutc(ScmDString *dstr, ScmChar ch); 390 SCM_EXTERN ScmSmallInt Scm_DStringTruncate(ScmDString *dstr, 391 ScmSmallInt newsize); 392 393 #define SCM_DSTRING_SIZE(dstr) Scm_DStringSize(dstr); 394 395 #define SCM_DSTRING_PUTB(dstr, byte) \ 396 do { \ 397 if ((dstr)->current >= (dstr)->end) Scm__DStringRealloc(dstr, 1);\ 398 *(dstr)->current++ = (char)(byte); \ 399 (dstr)->length = -1; /* may be incomplete */ \ 400 } while (0) 401 402 #define SCM_DSTRING_PUTC(dstr, ch) \ 403 do { \ 404 ScmChar ch_DSTR = (ch); \ 405 ScmDString *d_DSTR = (dstr); \ 406 ScmSmallInt siz_DSTR = SCM_CHAR_NBYTES(ch_DSTR);\ 407 if (d_DSTR->current + siz_DSTR > d_DSTR->end) \ 408 Scm__DStringRealloc(d_DSTR, siz_DSTR); \ 409 SCM_CHAR_PUT(d_DSTR->current, ch_DSTR); \ 410 d_DSTR->current += siz_DSTR; \ 411 if (d_DSTR->length >= 0) d_DSTR->length++; \ 412 } while (0) 413 414 SCM_EXTERN void Scm__DStringRealloc(ScmDString *dstr, ScmSmallInt min_incr); 415 416 /* 417 * Utility. Returns NUL-terminated string (SRC doesn't need to be 418 * NUL-terminated, but must be longer than SIZE). 419 */ 420 SCM_EXTERN char *Scm_StrdupPartial(const char *src, size_t size); 421 422 #if GAUCHE_STRING_POINTER 423 /* 424 * String pointers (OBSOLETED) 425 */ 426 427 /* Efficient way to access string from Scheme */ 428 typedef struct ScmStringPointerRec { 429 SCM_HEADER; 430 ScmSmallInt length; 431 ScmSmallInt size; 432 const char *start; 433 ScmSmallInt index; 434 const char *current; 435 } ScmStringPointer; 436 437 SCM_CLASS_DECL(Scm_StringPointerClass); 438 #define SCM_CLASS_STRING_POINTER (&Scm_StringPointerClass) 439 #define SCM_STRING_POINTERP(obj) SCM_XTYPEP(obj, SCM_CLASS_STRING_POINTER) 440 #define SCM_STRING_POINTER(obj) ((ScmStringPointer*)obj) 441 442 SCM_EXTERN ScmObj Scm_MakeStringPointer(ScmString *src, ScmSmallInt index, 443 ScmSmallInt start, ScmSmallInt end); 444 SCM_EXTERN ScmObj Scm_StringPointerRef(ScmStringPointer *sp); 445 SCM_EXTERN ScmObj Scm_StringPointerNext(ScmStringPointer *sp); 446 SCM_EXTERN ScmObj Scm_StringPointerPrev(ScmStringPointer *sp); 447 SCM_EXTERN ScmObj Scm_StringPointerSet(ScmStringPointer *sp, 448 ScmSmallInt index); 449 SCM_EXTERN ScmObj Scm_StringPointerSubstring(ScmStringPointer *sp, int beforep); 450 SCM_EXTERN ScmObj Scm_StringPointerCopy(ScmStringPointer *sp); 451 SCM_EXTERN void Scm_StringPointerDump(ScmStringPointer *sp); 452 453 #endif /*GAUCHE_STRING_POINTER*/ 454 455 #endif /* GAUCHE_STRING_H */ 456