1 /**
2 ** @file mruby/string.h - String class
3 **
4 ** See Copyright Notice in mruby.h
5 */
6 
7 #ifndef MRUBY_STRING_H
8 #define MRUBY_STRING_H
9 
10 #include "common.h"
11 
12 /**
13  * String class
14  */
15 MRB_BEGIN_DECL
16 
17 extern const char mrb_digitmap[];
18 
19 #define RSTRING_EMBED_LEN_MAX \
20   ((mrb_int)(sizeof(void*) * 3 + sizeof(void*) - 32 / CHAR_BIT - 1))
21 
22 struct RString {
23   MRB_OBJECT_HEADER;
24   union {
25     struct {
26       mrb_ssize len;
27       union {
28         mrb_ssize capa;
29         struct mrb_shared_string *shared;
30         struct RString *fshared;
31       } aux;
32       char *ptr;
33     } heap;
34   } as;
35 };
36 struct RStringEmbed {
37   MRB_OBJECT_HEADER;
38   char ary[];
39 };
40 
41 #define RSTR_SET_TYPE_FLAG(s, type) (RSTR_UNSET_TYPE_FLAG(s), (s)->flags |= MRB_STR_##type)
42 #define RSTR_UNSET_TYPE_FLAG(s) ((s)->flags &= ~(MRB_STR_TYPE_MASK|MRB_STR_EMBED_LEN_MASK))
43 
44 #define RSTR_EMBED_P(s) ((s)->flags & MRB_STR_EMBED)
45 #define RSTR_SET_EMBED_FLAG(s) ((s)->flags |= MRB_STR_EMBED)
46 #define RSTR_UNSET_EMBED_FLAG(s) ((s)->flags &= ~(MRB_STR_EMBED|MRB_STR_EMBED_LEN_MASK))
47 #define RSTR_SET_EMBED_LEN(s, n) do {\
48   size_t tmp_n = (n);\
49   (s)->flags &= ~MRB_STR_EMBED_LEN_MASK;\
50   (s)->flags |= (tmp_n) << MRB_STR_EMBED_LEN_SHIFT;\
51 } while (0)
52 #define RSTR_SET_LEN(s, n) do {\
53   if (RSTR_EMBED_P(s)) {\
54     RSTR_SET_EMBED_LEN((s),(n));\
55   }\
56   else {\
57     (s)->as.heap.len = (mrb_ssize)(n);\
58   }\
59 } while (0)
60 #define RSTR_EMBED_PTR(s) (((struct RStringEmbed*)(s))->ary)
61 #define RSTR_EMBED_LEN(s)\
62   (mrb_int)(((s)->flags & MRB_STR_EMBED_LEN_MASK) >> MRB_STR_EMBED_LEN_SHIFT)
63 #define RSTR_EMBEDDABLE_P(len) ((len) <= RSTRING_EMBED_LEN_MAX)
64 
65 #define RSTR_PTR(s) ((RSTR_EMBED_P(s)) ? RSTR_EMBED_PTR(s) : (s)->as.heap.ptr)
66 #define RSTR_LEN(s) ((RSTR_EMBED_P(s)) ? RSTR_EMBED_LEN(s) : (s)->as.heap.len)
67 #define RSTR_CAPA(s) (RSTR_EMBED_P(s) ? RSTRING_EMBED_LEN_MAX : (s)->as.heap.aux.capa)
68 
69 #define RSTR_SHARED_P(s) ((s)->flags & MRB_STR_SHARED)
70 #define RSTR_SET_SHARED_FLAG(s) ((s)->flags |= MRB_STR_SHARED)
71 #define RSTR_UNSET_SHARED_FLAG(s) ((s)->flags &= ~MRB_STR_SHARED)
72 
73 #define RSTR_FSHARED_P(s) ((s)->flags & MRB_STR_FSHARED)
74 #define RSTR_SET_FSHARED_FLAG(s) ((s)->flags |= MRB_STR_FSHARED)
75 #define RSTR_UNSET_FSHARED_FLAG(s) ((s)->flags &= ~MRB_STR_FSHARED)
76 
77 #define RSTR_NOFREE_P(s) ((s)->flags & MRB_STR_NOFREE)
78 #define RSTR_SET_NOFREE_FLAG(s) ((s)->flags |= MRB_STR_NOFREE)
79 #define RSTR_UNSET_NOFREE_FLAG(s) ((s)->flags &= ~MRB_STR_NOFREE)
80 
81 #ifdef MRB_UTF8_STRING
82 # define RSTR_ASCII_P(s) ((s)->flags & MRB_STR_ASCII)
83 # define RSTR_SET_ASCII_FLAG(s) ((s)->flags |= MRB_STR_ASCII)
84 # define RSTR_UNSET_ASCII_FLAG(s) ((s)->flags &= ~MRB_STR_ASCII)
85 # define RSTR_WRITE_ASCII_FLAG(s, v) (RSTR_UNSET_ASCII_FLAG(s), (s)->flags |= v)
86 # define RSTR_COPY_ASCII_FLAG(dst, src) RSTR_WRITE_ASCII_FLAG(dst, RSTR_ASCII_P(src))
87 #else
88 # define RSTR_ASCII_P(s) (void)0
89 # define RSTR_SET_ASCII_FLAG(s) (void)0
90 # define RSTR_UNSET_ASCII_FLAG(s) (void)0
91 # define RSTR_WRITE_ASCII_FLAG(s, v) (void)0
92 # define RSTR_COPY_ASCII_FLAG(dst, src) (void)0
93 #endif
94 
95 #define RSTR_POOL_P(s) ((s)->flags & MRB_STR_POOL)
96 #define RSTR_SET_POOL_FLAG(s) ((s)->flags |= MRB_STR_POOL)
97 
98 /**
99  * Returns a pointer from a Ruby string
100  */
101 #define mrb_str_ptr(s)       ((struct RString*)(mrb_ptr(s)))
102 #define RSTRING(s)           mrb_str_ptr(s)
103 #define RSTRING_PTR(s)       RSTR_PTR(RSTRING(s))
104 #define RSTRING_EMBED_LEN(s) RSTR_EMBED_LEN(RSTRING(s))
105 #define RSTRING_LEN(s)       RSTR_LEN(RSTRING(s))
106 #define RSTRING_CAPA(s)      RSTR_CAPA(RSTRING(s))
107 #define RSTRING_END(s)       (RSTRING_PTR(s) + RSTRING_LEN(s))
108 MRB_API mrb_int mrb_str_strlen(mrb_state*, struct RString*);
109 #define RSTRING_CSTR(mrb,s)  mrb_string_cstr(mrb, s)
110 
111 #define MRB_STR_SHARED    1
112 #define MRB_STR_FSHARED   2
113 #define MRB_STR_NOFREE    4
114 #define MRB_STR_EMBED     8  /* type flags up to here */
115 #define MRB_STR_POOL     16  /* status flags from here */
116 #define MRB_STR_ASCII    32
117 #define MRB_STR_EMBED_LEN_SHIFT 6
118 #define MRB_STR_EMBED_LEN_BIT 5
119 #define MRB_STR_EMBED_LEN_MASK (((1 << MRB_STR_EMBED_LEN_BIT) - 1) << MRB_STR_EMBED_LEN_SHIFT)
120 #define MRB_STR_TYPE_MASK (MRB_STR_POOL - 1)
121 
122 
123 void mrb_gc_free_str(mrb_state*, struct RString*);
124 
125 MRB_API void mrb_str_modify(mrb_state *mrb, struct RString *s);
126 /* mrb_str_modify() with keeping ASCII flag if set */
127 MRB_API void mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s);
128 
129 /**
130  * Finds the index of a substring in a string
131  */
132 MRB_API mrb_int mrb_str_index(mrb_state *mrb, mrb_value str, const char *p, mrb_int len, mrb_int offset);
133 #define mrb_str_index_lit(mrb, str, lit, off) mrb_str_index(mrb, str, lit, mrb_strlen_lit(lit), off);
134 
135 /**
136  * Appends self to other. Returns self as a concatenated string.
137  *
138  *
139  * Example:
140  *
141  *     int
142  *     main(int argc,
143  *          char **argv)
144  *     {
145  *       // Variable declarations.
146  *       mrb_value str1;
147  *       mrb_value str2;
148  *
149  *       mrb_state *mrb = mrb_open();
150  *       if (!mrb)
151  *       {
152  *          // handle error
153  *       }
154  *
155  *       // Creates new Ruby strings.
156  *       str1 = mrb_str_new_lit(mrb, "abc");
157  *       str2 = mrb_str_new_lit(mrb, "def");
158  *
159  *       // Concatenates str2 to str1.
160  *       mrb_str_concat(mrb, str1, str2);
161  *
162  *       // Prints new Concatenated Ruby string.
163  *       mrb_p(mrb, str1);
164  *
165  *       mrb_close(mrb);
166  *       return 0;
167  *     }
168  *
169  * Result:
170  *
171  *     => "abcdef"
172  *
173  * @param mrb The current mruby state.
174  * @param self String to concatenate.
175  * @param other String to append to self.
176  * @return [mrb_value] Returns a new String appending other to self.
177  */
178 MRB_API void mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other);
179 
180 /**
181  * Adds two strings together.
182  *
183  *
184  * Example:
185  *
186  *     int
187  *     main(int argc,
188  *          char **argv)
189  *     {
190  *       // Variable declarations.
191  *       mrb_value a;
192  *       mrb_value b;
193  *       mrb_value c;
194  *
195  *       mrb_state *mrb = mrb_open();
196  *       if (!mrb)
197  *       {
198  *          // handle error
199  *       }
200  *
201  *       // Creates two Ruby strings from the passed in C strings.
202  *       a = mrb_str_new_lit(mrb, "abc");
203  *       b = mrb_str_new_lit(mrb, "def");
204  *
205  *       // Prints both C strings.
206  *       mrb_p(mrb, a);
207  *       mrb_p(mrb, b);
208  *
209  *       // Concatenates both Ruby strings.
210  *       c = mrb_str_plus(mrb, a, b);
211  *
212  *       // Prints new Concatenated Ruby string.
213  *       mrb_p(mrb, c);
214  *
215  *       mrb_close(mrb);
216  *       return 0;
217  *     }
218  *
219  *
220  * Result:
221  *
222  *     => "abc"  # First string
223  *     => "def"  # Second string
224  *     => "abcdef" # First & Second concatenated.
225  *
226  * @param mrb The current mruby state.
227  * @param a First string to concatenate.
228  * @param b Second string to concatenate.
229  * @return [mrb_value] Returns a new String containing a concatenated to b.
230  */
231 MRB_API mrb_value mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b);
232 
233 /**
234  * Converts pointer into a Ruby string.
235  *
236  * @param mrb The current mruby state.
237  * @param p The pointer to convert to Ruby string.
238  * @return [mrb_value] Returns a new Ruby String.
239  */
240 MRB_API mrb_value mrb_ptr_to_str(mrb_state *mrb, void *p);
241 
242 /**
243  * Returns an object as a Ruby string.
244  *
245  * @param mrb The current mruby state.
246  * @param obj An object to return as a Ruby string.
247  * @return [mrb_value] An object as a Ruby string.
248  */
249 MRB_API mrb_value mrb_obj_as_string(mrb_state *mrb, mrb_value obj);
250 
251 /**
252  * Resizes the string's length. Returns the amount of characters
253  * in the specified by len.
254  *
255  * Example:
256  *
257  *     int
258  *     main(int argc,
259  *          char **argv)
260  *     {
261  *         // Variable declaration.
262  *         mrb_value str;
263  *
264  *         mrb_state *mrb = mrb_open();
265  *         if (!mrb)
266  *         {
267  *            // handle error
268  *         }
269  *         // Creates a new string.
270  *         str = mrb_str_new_lit(mrb, "Hello, world!");
271  *         // Returns 5 characters of
272  *         mrb_str_resize(mrb, str, 5);
273  *         mrb_p(mrb, str);
274  *
275  *         mrb_close(mrb);
276  *         return 0;
277  *      }
278  *
279  * Result:
280  *
281  *      => "Hello"
282  *
283  * @param mrb The current mruby state.
284  * @param str The Ruby string to resize.
285  * @param len The length.
286  * @return [mrb_value] An object as a Ruby string.
287  */
288 MRB_API mrb_value mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len);
289 
290 /**
291  * Returns a sub string.
292  *
293  * Example:
294  *
295  *     int
296  *     main(int argc,
297  *     char const **argv)
298  *     {
299  *       // Variable declarations.
300  *       mrb_value str1;
301  *       mrb_value str2;
302  *
303  *       mrb_state *mrb = mrb_open();
304  *       if (!mrb)
305  *       {
306  *         // handle error
307  *       }
308  *       // Creates new string.
309  *       str1 = mrb_str_new_lit(mrb, "Hello, world!");
310  *       // Returns a sub-string within the range of 0..2
311  *       str2 = mrb_str_substr(mrb, str1, 0, 2);
312  *
313  *       // Prints sub-string.
314  *       mrb_p(mrb, str2);
315  *
316  *       mrb_close(mrb);
317  *       return 0;
318  *     }
319  *
320  * Result:
321  *
322  *     => "He"
323  *
324  * @param mrb The current mruby state.
325  * @param str Ruby string.
326  * @param beg The beginning point of the sub-string.
327  * @param len The end point of the sub-string.
328  * @return [mrb_value] An object as a Ruby sub-string.
329  */
330 MRB_API mrb_value mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
331 
332 /**
333  * Returns a Ruby string type.
334  *
335  *
336  * @param mrb The current mruby state.
337  * @param str Ruby string.
338  * @return [mrb_value] A Ruby string.
339  */
340 MRB_API mrb_value mrb_ensure_string_type(mrb_state *mrb, mrb_value str);
341 MRB_API mrb_value mrb_check_string_type(mrb_state *mrb, mrb_value str);
342 /* obsolete: use mrb_ensure_string_type() instead */
343 MRB_API mrb_value mrb_string_type(mrb_state *mrb, mrb_value str);
344 
345 
346 MRB_API mrb_value mrb_str_new_capa(mrb_state *mrb, size_t capa);
347 MRB_API mrb_value mrb_str_buf_new(mrb_state *mrb, size_t capa);
348 
349 /* NULL terminated C string from mrb_value */
350 MRB_API const char *mrb_string_cstr(mrb_state *mrb, mrb_value str);
351 /* NULL terminated C string from mrb_value; `str` will be updated */
352 MRB_API const char *mrb_string_value_cstr(mrb_state *mrb, mrb_value *str);
353 /* obslete: use RSTRING_PTR() */
354 MRB_API const char *mrb_string_value_ptr(mrb_state *mrb, mrb_value str);
355 /* obslete: use RSTRING_LEN() */
356 MRB_API mrb_int mrb_string_value_len(mrb_state *mrb, mrb_value str);
357 
358 /**
359  * Duplicates a string object.
360  *
361  *
362  * @param mrb The current mruby state.
363  * @param str Ruby string.
364  * @return [mrb_value] Duplicated Ruby string.
365  */
366 MRB_API mrb_value mrb_str_dup(mrb_state *mrb, mrb_value str);
367 
368 /**
369  * Returns a symbol from a passed in Ruby string.
370  *
371  * @param mrb The current mruby state.
372  * @param self Ruby string.
373  * @return [mrb_value] A symbol.
374  */
375 MRB_API mrb_value mrb_str_intern(mrb_state *mrb, mrb_value self);
376 
377 MRB_API mrb_value mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck);
378 MRB_API mrb_value mrb_cstr_to_inum(mrb_state *mrb, const char *s, mrb_int base, mrb_bool badcheck);
379 MRB_API double mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck);
380 MRB_API double mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck);
381 
382 /**
383  * Returns a converted string type.
384  * For type checking, non converting `mrb_to_str` is recommended.
385  */
386 MRB_API mrb_value mrb_str_to_str(mrb_state *mrb, mrb_value str);
387 
388 /**
389  * Returns true if the strings match and false if the strings don't match.
390  *
391  * @param mrb The current mruby state.
392  * @param str1 Ruby string to compare.
393  * @param str2 Ruby string to compare.
394  * @return [mrb_value] boolean value.
395  */
396 MRB_API mrb_bool mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2);
397 
398 /**
399  * Returns a concatenated string comprised of a Ruby string and a C string.
400  *
401  * @param mrb The current mruby state.
402  * @param str Ruby string.
403  * @param ptr A C string.
404  * @param len length of C string.
405  * @return [mrb_value] A Ruby string.
406  * @see mrb_str_cat_cstr
407  */
408 MRB_API mrb_value mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len);
409 
410 /**
411  * Returns a concatenated string comprised of a Ruby string and a C string.
412  *
413  * @param mrb The current mruby state.
414  * @param str Ruby string.
415  * @param ptr A C string.
416  * @return [mrb_value] A Ruby string.
417  * @see mrb_str_cat
418  */
419 MRB_API mrb_value mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr);
420 MRB_API mrb_value mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2);
421 #define mrb_str_cat_lit(mrb, str, lit) mrb_str_cat(mrb, str, lit, mrb_strlen_lit(lit))
422 
423 /**
424  * Adds str2 to the end of str1.
425  */
426 MRB_API mrb_value mrb_str_append(mrb_state *mrb, mrb_value str, mrb_value str2);
427 
428 /**
429  * Returns 0 if both Ruby strings are equal. Returns a value < 0 if Ruby str1 is less than Ruby str2. Returns a value > 0 if Ruby str2 is greater than Ruby str1.
430  */
431 MRB_API int mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2);
432 
433 /**
434  * Returns a newly allocated C string from a Ruby string.
435  * This is an utility function to pass a Ruby string to C library functions.
436  *
437  * - Returned string does not contain any NUL characters (but terminator).
438  * - It raises an ArgumentError exception if Ruby string contains
439  *   NUL characters.
440  * - Retured string will be freed automatically on next GC.
441  * - Caller can modify returned string without affecting Ruby string
442  *   (e.g. it can be used for mkstemp(3)).
443  *
444  * @param mrb The current mruby state.
445  * @param str Ruby string. Must be an instance of String.
446  * @return [char *] A newly allocated C string.
447  */
448 MRB_API char *mrb_str_to_cstr(mrb_state *mrb, mrb_value str);
449 
450 mrb_value mrb_str_pool(mrb_state *mrb, const char *s, mrb_int len, mrb_bool nofree);
451 uint32_t mrb_str_hash(mrb_state *mrb, mrb_value str);
452 mrb_value mrb_str_dump(mrb_state *mrb, mrb_value str);
453 
454 /**
455  * Returns a printable version of str, surrounded by quote marks, with special characters escaped.
456  */
457 mrb_value mrb_str_inspect(mrb_state *mrb, mrb_value str);
458 
459 /* For backward compatibility */
460 #define mrb_str_cat2(mrb, str, ptr) mrb_str_cat_cstr(mrb, str, ptr)
461 #define mrb_str_buf_cat(mrb, str, ptr, len) mrb_str_cat(mrb, str, ptr, len)
462 #define mrb_str_buf_append(mrb, str, str2) mrb_str_cat_str(mrb, str, str2)
463 
464 mrb_bool mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp);
465 mrb_value mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len);
466 
467 #ifdef MRB_UTF8_STRING
468 mrb_int mrb_utf8len(const char *str, const char *end);
469 mrb_int mrb_utf8_strlen(const char *str, mrb_int byte_len);
470 #endif
471 
472 MRB_END_DECL
473 
474 #endif  /* MRUBY_STRING_H */
475