1 /* udstr.c:
2  *
3  ****************************************************************
4  * Copyright (C) 2004 Tom Lord
5  *
6  * See the file "COPYING" for further information about
7  * the copyright and warranty status of this work.
8  */
9 
10 
11 #include "hackerlab/mem/mem.h"
12 #include "hackerlab/bugs/panic.h"
13 #include "hackerlab/uni/invariant.h"
14 #include "hackerlab/strings/udstr.h"
15 
16 
17 
18 /************************************************************************
19  *(h1 "Dynamic Unicode String Functions"
20  *     :includes ("hackerlab/strings/udstr.h"))
21  *
22  *
23  */
24 
25 /*(c t_udstr :category type)
26  * typedef <unspecified> t_udstr;
27  *
28  * `t_udstr' values are mutable, resizable Unicode strings.
29  *
30  * Internally, strings of this type may be of any supported encoding
31  * form and may contain any codepoint valid in that encoding.  The
32  * length of an udstr (pronounced "uhd-stir") in both encoding values
33  * and code-points is explicitly recorded.
34  */
35 struct udstr_handle
36 {
37   int refs;
38 
39   uni_string str;
40   enum uni_encoding_scheme enc;
41   ustr_cv_index_t cv_len;
42   ustr_cp_index_t cp_len;
43 
44   alloc_limits limits;
45 };
46 
47 
48 /*(menu)
49  */
50 
51 
52 
53 /* __STDC__ prototypes for static functions */
54 static enum uni_encoding_scheme pick_fw_of (enum uni_encoding_scheme a,
55                                             enum uni_encoding_scheme b);
56 static void take_new_data (t_udstr dstr, t_udstr new_data);
57 
58 
59 /************************************************************************
60  *(h2 "udstr Naming Conventions")
61  *
62  * Some `udstr' functions require users to pass string indexes or lengths
63  * as parameters.   Such parameters are always expressed in units of
64  * code values within the relevent encoding form.
65  *
66  * Functions having the suffix `_x' work by modifying their first string
67  * argument rather than by returning a newly allocated string.
68  *
69  * Finally, `_fw' functions are the "full width" varients (see below).
70  *
71  */
72 
73 /************************************************************************
74  *(h2 "Convervative Width vs. Full Width Unicode String Functions")
75  *
76  * Normally, `udstr' functions choose the encoding form of their
77  * output strings (or the strings they modify) by copying the encoding
78  * of the first string argument.  Thus, for example, concatenating a
79  * UTF-8 string (on the left) and a UTF-16 string (on the right) produces
80  * a UTF-8 string.
81  *
82  * Many `udstr' functions also have a varient whose name contains the
83  * suffix `_fw' ("full width").  These choose output encoding forms by
84  * choosing the _narrowest_ encoding wide enough so that each
85  * codepoint in the string occupies exactly one coding value.  For example,
86  * the concatenation of a full-width UTF-8 string (which must contain
87  * only codepoints in the range 0..127) with a full-width UTF-16 string
88  * (which can contain no surrogate pair codepoints are codepoints larger
89  * than 16 bits) may be either UTF-8 or UTF-16, depending on whether the
90  * UTF-16 argument contains any codepoints outside of the range 0..127.
91  *
92  * [Add table of precise conversion rules.]
93  */
94 
95 
96 
97 /************************************************************************
98  *(h2 "Constructors")
99  *
100  */
101 
102 
103 /*(c udstr_save)
104  * t_udstr udstr_save (alloc_limits limits,
105  *                     uni_string str,
106  *                     enum uni_encoding_scheme enc);
107  *
108  * Create a `t_udstr' from a 0-terminated unicode string
109  * in the indicated encoding form.
110  *
111  * The internal encoding form of the resulting `t_udstr'
112  * is the same as the encoding of `str', namely, `enc'.
113  */
114 t_udstr
udstr_save(alloc_limits limits,uni_string str,enum uni_encoding_scheme enc)115 udstr_save (alloc_limits limits,
116             uni_string str,
117             enum uni_encoding_scheme enc)
118 {
119   ustr_cv_index_t len;
120   ustr_cp_index_t cp_len;
121 
122   len = ustr_lengths (&cp_len, 0, str, enc);
123   return udstr_save_generic (limits, enc, len, str, enc, len, cp_len);
124 }
125 
126 
127 /*(c udstr_save_n)
128  * t_udstr udstr_save_n (alloc_limits limits,
129  *                       uni_string str,
130  *                       enum uni_encoding_scheme enc,
131  *                       ustr_cv_index_t len);
132  *
133  *
134  * Create a `t_udstr' from a unicode string
135  * in the indicated encoding form and of the
136  * indicated length (in code values).
137  *
138  * The internal encoding form of the resulting `t_udstr'
139  * is the same as the encoding of `str', namely, `enc'.
140  */
141 t_udstr
udstr_save_n(alloc_limits limits,uni_string str,enum uni_encoding_scheme enc,ustr_cv_index_t len)142 udstr_save_n (alloc_limits limits,
143               uni_string str,
144               enum uni_encoding_scheme enc,
145               ustr_cv_index_t len)
146 {
147   ustr_cp_index_t cp_len;
148 
149   cp_len = ustr_cp_length_n (0, str, enc, len);
150   return udstr_save_generic (limits, enc, len, str, enc, len, cp_len);
151 }
152 
153 
154 
155 /*(c udstr_save_fw)
156  * t_udstr udstr_save_fw (alloc_limits limits,
157  *                        uni_string str,
158  *                        enum uni_encoding_scheme enc);
159  *
160  *
161  * Create a `t_udstr' from a 0-terminated unicode string
162  * in the indicated encoding form.
163  *
164  * The internal encoding form of the resulting `t_udstr'
165  * is the narrowest among:
166  *
167  *	uni_iso8859_1
168  * 	uni_utf16
169  * 	uni_utf32
170  * 	uni_bogus32
171  *
172  * in which the data from `str' can be represented with
173  * exactly one code value per coding point.
174  */
175 t_udstr
udstr_save_fw(alloc_limits limits,uni_string str,enum uni_encoding_scheme enc)176 udstr_save_fw (alloc_limits limits,
177                uni_string str,
178                enum uni_encoding_scheme enc)
179 {
180   ustr_cv_index_t len;
181   ustr_cp_index_t cp_len;
182   ustr_cv_index_t dest_len;
183   enum uni_encoding_scheme fw_enc;
184 
185   len = ustr_lengths (&cp_len, &fw_enc, str, enc);
186   dest_len.cv = cp_len.cp;
187   return udstr_save_generic (limits, fw_enc, dest_len, str, enc, len, cp_len);
188 }
189 
190 
191 /*(c udstr_save_fw_n)
192  * t_udstr udstr_save_fw_n (alloc_limits limits,
193  *                          uni_string str,
194  *                          enum uni_encoding_scheme enc,
195  *                          ustr_cv_index_t len);
196  *
197  * Create a `t_udstr' from a unicode string
198  * in the indicated encoding form and of the
199  * indicated length (in code values).
200  *
201  * The internal encoding form of the resulting `t_udstr'
202  * is the narrowest among:
203  *
204  *	uni_iso8859_1
205  * 	uni_utf16
206  * 	uni_utf32
207  * 	uni_bogus32
208  *
209  * in which the data from `str' can be represented with
210  * exactly one code value per coding point.
211  */
212 t_udstr
udstr_save_fw_n(alloc_limits limits,uni_string str,enum uni_encoding_scheme enc,ustr_cv_index_t len)213 udstr_save_fw_n (alloc_limits limits,
214                  uni_string str,
215                  enum uni_encoding_scheme enc,
216                  ustr_cv_index_t len)
217 {
218   ustr_cp_index_t cp_len;
219   ustr_cv_index_t dest_len;
220   enum uni_encoding_scheme fw_enc;
221 
222   (void)ustr_lengths_n (&cp_len, &fw_enc, str, enc, len);
223   dest_len.cv = cp_len.cp;
224   return udstr_save_generic (limits, fw_enc, dest_len, str, enc, len, cp_len);
225 }
226 
227 
228 
229 
230 t_udstr
udstr_save_generic(alloc_limits limits,enum uni_encoding_scheme dest_enc,ustr_cv_index_t dest_len,uni_string str,enum uni_encoding_scheme enc,ustr_cv_index_t len,ustr_cp_index_t cp_len)231 udstr_save_generic (alloc_limits limits,
232                     enum uni_encoding_scheme dest_enc,
233                     ustr_cv_index_t dest_len,
234                     uni_string str,
235                     enum uni_encoding_scheme enc,
236                     ustr_cv_index_t len,
237                     ustr_cp_index_t cp_len)
238 {
239   t_udstr answer;
240   size_t dest_cv_sizeof;
241 
242   answer = (t_udstr)lim_malloc (limits, sizeof (*answer));
243   if (!answer)
244     return 0;
245 
246   answer->refs = 1;
247   answer->enc = dest_enc;
248   answer->cv_len = dest_len;
249   answer->cp_len = cp_len;
250 
251   dest_cv_sizeof = uni_cv_sizeof (dest_enc);
252 
253   answer->str.raw = lim_malloc (limits, (1 + dest_len.cv) * dest_cv_sizeof);
254   if (!answer->str.raw)
255     {
256       lim_free (limits, (t_uchar *)answer);
257       answer = 0;
258     }
259   else
260     {
261       if (dest_enc == enc)
262         {
263           mem_move (answer->str.raw, str.raw, len.cv * dest_cv_sizeof);
264           mem_set0 (answer->str.raw + (dest_len.cv * dest_cv_sizeof), dest_cv_sizeof);
265         }
266       else
267         {
268           ustr_copy_n (answer->str, dest_enc, dest_len, str, enc, len);
269         }
270     }
271 
272   return answer;
273 }
274 
275 
276 
277 /************************************************************************
278  *(h2 "udstr Memory Management")
279  *
280  * `t_udstr' values are reference counted objects.
281  * Constructors return objects with a reference count
282  * of 1.
283  *
284  */
285 
286 /*(c udstr_ref)
287  * void udstr_ref (t_udstr d);
288  *
289  * Increment the reference count of `d'.
290  */
291 void
udstr_ref(t_udstr d)292 udstr_ref (t_udstr d)
293 {
294   if (!d)
295     return;
296 
297   ++d->refs;
298 }
299 
300 
301 /*(c udstr_unref)
302  * void udstr_unref (t_udstr d);
303  *
304  * Decrement the reference count of `d'.
305  * If it drops to 0, free all storage associated
306  * wtih `d' (thus invalidating all references
307  * to `d').
308  */
309 void
udstr_unref(t_udstr d)310 udstr_unref (t_udstr d)
311 {
312   if (!d)
313     return;
314 
315   if (d->refs > 1)
316     --d->refs;
317   else
318     {
319       lim_free (d->limits, d->str.raw);
320     }
321 }
322 
323 
324 
325 /************************************************************************
326  *(h2 "udstr Deconstruction")
327  *
328  *
329  *
330  */
331 
332 /*(c udstr_cv_length)
333  * ustr_cv_index_t udstr_cv_length (t_udstr dstr);
334  *
335  * Return the length of `dstr', measured in coding values.
336  */
337 ustr_cv_index_t
udstr_cv_length(t_udstr dstr)338 udstr_cv_length (t_udstr dstr)
339 {
340   return dstr->cv_len;
341 }
342 
343 
344 /*(c udstr_cp_length)
345  * ustr_cp_index_t udstr_cp_length (t_udstr dstr);
346  *
347  * Return the length of `dstr', measured in codepoints.
348  */
349 ustr_cp_index_t
udstr_cp_length(t_udstr dstr)350 udstr_cp_length (t_udstr dstr)
351 {
352   return dstr->cp_len;
353 }
354 
355 
356 /*(c udstr_encoding)
357  * enum uni_encoding_scheme udstr_encoding (t_udstr dstr);
358  *
359  * Return the encoding form used internally for `dstr'.
360  */
361 enum uni_encoding_scheme
udstr_encoding(t_udstr dstr)362 udstr_encoding (t_udstr dstr)
363 {
364   return dstr->enc;
365 }
366 
367 
368 /*(c udstr_str)
369  * uni_string udstr_str (t_udstr dstr);
370  *
371  * Return a pointer to the string data used internally for `dstr'.
372  * Subsequent calls to `udstr' functions with `dstr' as a parameter
373  * can invalidate the return value of this function.
374  */
375 uni_string
udstr_str(t_udstr dstr)376 udstr_str (t_udstr dstr)
377 {
378   return dstr->str;
379 }
380 
381 
382 /*(c udstr_cv_ref)
383  * t_unicode udstr_cv_ref (ustr_cv_index_t * pos_after,
384  *                         t_udstr dstr,
385  *                         ustr_cv_index_t pos);
386  *
387  * Return the codepoint found at the indicated
388  * code value index in `dstr'.  Optionally return the
389  * codevalue index of the subsequent character.
390  */
391 t_unicode
udstr_cv_ref(ustr_cv_index_t * pos_after,t_udstr dstr,ustr_cv_index_t pos)392 udstr_cv_ref (ustr_cv_index_t * pos_after,
393               t_udstr dstr,
394               ustr_cv_index_t pos)
395 {
396   return ustr_cv_ref_n (pos_after, dstr->str, dstr->enc, dstr->cv_len, pos);
397 }
398 
399 
400 /*(c udstr_cp_ref)
401  * t_unicode udstr_cp_ref (ustr_cv_index_t * pos_after,
402  *                         t_udstr dstr,
403  *                         ustr_cp_index_t pos);
404  *
405  * Return the codepoint found at the indicated
406  * codepoint index in `dstr'.  Optionally return the
407  * codevalue index of the subsequent character.
408  */
409 t_unicode
udstr_cp_ref(ustr_cv_index_t * pos_after,t_udstr dstr,ustr_cp_index_t pos)410 udstr_cp_ref (ustr_cv_index_t * pos_after,
411               t_udstr dstr,
412               ustr_cp_index_t pos)
413 {
414   uni_string pos_str;
415   ustr_cv_index_t pos_cv;
416 
417   if (pos.cp >= dstr->cp_len.cp)
418     {
419       return 0;
420     }
421   else
422     {
423       pos_str = ustr_cp_offset_n (dstr->str, dstr->enc, dstr->cv_len, pos);
424       pos_cv = ustr_str_subtract (pos_str, dstr->str, dstr->enc);
425       return udstr_cv_ref (pos_after, dstr, pos_cv);
426     }
427 }
428 
429 
430 /*(c udstr_cv_set)
431  * t_udstr udstr_cv_set (alloc_limits limits,
432  *                       t_udstr s,
433  *                       ustr_cv_index_t x,
434  *                       t_unicode c);
435  *
436  * Return a new copy of `s' with the character at the
437  * indicated code value replaced by `c'.
438  */
439 t_udstr
udstr_cv_set(alloc_limits limits,t_udstr s,ustr_cv_index_t x,t_unicode c)440 udstr_cv_set (alloc_limits limits,
441               t_udstr s,
442               ustr_cv_index_t x,
443               t_unicode c)
444 {
445   struct udstr_handle tmp;
446   ustr_cv_index_t end;
447 
448   tmp.refs = 1;
449   tmp.str.utf32 = &c;
450   tmp.enc = uni_bogus32;
451   tmp.cv_len = ustr_cv_index (1);
452   tmp.cp_len = ustr_cp_index (1);
453   tmp.limits = 0;
454 
455   end = udstr_cv_inc (s, x);
456 
457   return udstr_cv_replace (limits,
458                            s, x, end,
459                            &tmp, ustr_cv_index (0), ustr_cv_index (1));
460 }
461 
462 
463 /*(c udstr_cv_set_fw)
464  * t_udstr udstr_cv_set_fw (alloc_limits limits,
465  *                          t_udstr s,
466  *                          ustr_cv_index_t x,
467  *                          t_unicode c);
468  *
469  *
470  * Return a new copy of `s' with the character at the
471  * indicated code value replaced by `c'.
472  *
473  * The returned string uses a full-width encoding.
474  */
475 t_udstr
udstr_cv_set_fw(alloc_limits limits,t_udstr s,ustr_cv_index_t x,t_unicode c)476 udstr_cv_set_fw (alloc_limits limits,
477                  t_udstr s,
478                  ustr_cv_index_t x,
479                  t_unicode c)
480 {
481   struct udstr_handle tmp;
482   ustr_cv_index_t end;
483 
484   tmp.refs = 1;
485   tmp.str.utf32 = &c;
486   tmp.enc = uni_bogus32;
487   tmp.cv_len = ustr_cv_index (1);
488   tmp.cp_len = ustr_cp_index (1);
489   tmp.limits = 0;
490 
491   end = udstr_cv_inc (s, x);
492 
493   return udstr_cv_replace_fw (limits,
494                               s, x, end,
495                               &tmp, ustr_cv_index (0), ustr_cv_index (1));
496 }
497 
498 
499 /*(c udstr_cp_set)
500  * t_udstr udstr_cp_set (alloc_limits limits,
501  *                       t_udstr s,
502  *                       ustr_cp_index_t x,
503  *                       t_unicode c);
504  *
505  *
506  * Return a new copy of `s' with the character at the
507  * indicated code value replaced by `c'.
508  */
509 t_udstr
udstr_cp_set(alloc_limits limits,t_udstr s,ustr_cp_index_t x,t_unicode c)510 udstr_cp_set (alloc_limits limits,
511               t_udstr s,
512               ustr_cp_index_t x,
513               t_unicode c)
514 {
515   struct udstr_handle tmp;
516 
517   tmp.refs = 1;
518   tmp.str.utf32 = &c;
519   tmp.enc = uni_bogus32;
520   tmp.cv_len = ustr_cv_index (1);
521   tmp.cp_len = ustr_cp_index (1);
522   tmp.limits = 0;
523 
524   return udstr_cp_replace (limits,
525                            s, x, ustr_cp_index (x.cp + 1),
526                            &tmp, ustr_cp_index (0), ustr_cp_index (1));
527 }
528 
529 
530 /*(c udstr_cp_set_fw)
531  * t_udstr udstr_cp_set_fw (alloc_limits limits,
532  *                          t_udstr s,
533  *                          ustr_cp_index_t x,
534  *                          t_unicode c);
535  *
536  *
537  * Return a new copy of `s' with the character at the
538  * indicated code value replaced by `c'.
539  *
540  * The returned string uses a full-width encoding.
541  */
542 t_udstr
udstr_cp_set_fw(alloc_limits limits,t_udstr s,ustr_cp_index_t x,t_unicode c)543 udstr_cp_set_fw (alloc_limits limits,
544                  t_udstr s,
545                  ustr_cp_index_t x,
546                  t_unicode c)
547 {
548   struct udstr_handle tmp;
549 
550   tmp.refs = 1;
551   tmp.str.utf32 = &c;
552   tmp.enc = uni_bogus32;
553   tmp.cv_len = ustr_cv_index (1);
554   tmp.cp_len = ustr_cp_index (1);
555   tmp.limits = 0;
556 
557   return udstr_cp_replace_fw (limits,
558                               s, x, ustr_cp_index (x.cp + 1),
559                               &tmp, ustr_cp_index (0), ustr_cp_index (1));
560 }
561 
562 
563 
564 
565 
566 /************************************************************************
567  *(h2 "Full-width Conversion")
568  *
569  */
570 
571 
572 /*(c udstr_fw_x)
573  * t_udstr udstr_fw_x (t_udstr d);
574  *
575  * Modify (if necessary) `d' to be of a narrowest encoding such that
576  * each codepoint in `d' occupies exactly one code value.
577  */
578 t_udstr
udstr_fw_x(t_udstr d)579 udstr_fw_x (t_udstr d)
580 {
581   enum uni_encoding_scheme fw_enc;
582 
583   (void)ustr_cp_length_n (&fw_enc, d->str, d->enc, d->cv_len);
584 
585   if (d->enc == fw_enc)
586     return d;
587 
588   {
589     ustr_cv_index_t fw_len;
590     t_udstr new_data = 0;
591 
592     fw_len = ustr_cv_length_in_encoding_n (fw_enc, d->str, d->enc, d->cv_len);
593 
594     new_data = udstr_save_generic (d->limits, fw_enc, fw_len, d->str, d->enc, d->cv_len, d->cp_len);
595     if (!new_data)
596       return 0;
597 
598     take_new_data (d, new_data);
599 
600     return d;
601   }
602 }
603 
604 
605 
606 /************************************************************************
607  *(h2 "udstr String Copying")
608  *
609  *
610  *
611  */
612 
613 /*(c udstr_copy)
614  * t_udstr udstr_copy (alloc_limits limits, t_udstr dstr);
615  *
616  * Allocate a fresh copy of `dstr'.
617  */
618 t_udstr
udstr_copy(alloc_limits limits,t_udstr dstr)619 udstr_copy (alloc_limits limits, t_udstr dstr)
620 {
621   return udstr_save_generic (limits, dstr->enc, dstr->cv_len, dstr->str, dstr->enc, dstr->cv_len, dstr->cp_len);
622 }
623 
624 
625 /*(c udstr_copy_fw)
626  * t_udstr udstr_copy_fw (alloc_limits limits, t_udstr dstr);
627  *
628  * Allocate a fresh copy of `dstr', converting it (if necessary)
629  * to a full-width encoding.
630  */
631 t_udstr
udstr_copy_fw(alloc_limits limits,t_udstr dstr)632 udstr_copy_fw (alloc_limits limits, t_udstr dstr)
633 {
634   return udstr_save_fw_n (limits, dstr->str, dstr->enc, dstr->cv_len);
635 }
636 
637 
638 /************************************************************************
639  *(h2 "udstr Substrings")
640  *
641  *
642  *
643  */
644 
645 
646 /*(c udstr_cv_substr)
647  * t_udstr udstr_cv_substr (alloc_limits limits,
648  *                          t_udstr dstr,
649  *                          ustr_cv_index_t from,
650  *                          ustr_cv_index_t to);
651  *
652  * Return a freshly allocated substring of `dstr'
653  * containing the indicated half-open range of characters
654  * (measured in code values).
655  */
656 t_udstr
udstr_cv_substr(alloc_limits limits,t_udstr dstr,ustr_cv_index_t from,ustr_cv_index_t to)657 udstr_cv_substr (alloc_limits limits,
658                  t_udstr dstr,
659                  ustr_cv_index_t from,
660                  ustr_cv_index_t to)
661 {
662   uni_string str;
663   ustr_cv_index_t len;
664 
665   str = ustr_cv_offset (dstr->str, dstr->enc, from);
666   len = ustr_cv_index (to.cv - from.cv);
667 
668   return udstr_save_n (limits, str, dstr->enc, len);
669 }
670 
671 
672 /*(c udstr_cv_substr_x)
673  * t_udstr udstr_cv_substr_x (t_udstr dstr,
674  *                            ustr_cv_index_t from,
675  *                            ustr_cv_index_t to);
676  *
677  * Modify `dstr' to contain only its indicated
678  * substring.
679  */
680 t_udstr
udstr_cv_substr_x(t_udstr dstr,ustr_cv_index_t from,ustr_cv_index_t to)681 udstr_cv_substr_x (t_udstr dstr,
682                    ustr_cv_index_t from,
683                    ustr_cv_index_t to)
684 {
685   t_udstr almost_answer;
686 
687   almost_answer = udstr_cv_substr (dstr->limits, dstr, from, to);
688 
689   if (!almost_answer)
690     {
691       return 0;
692     }
693   else
694     {
695       take_new_data (dstr, almost_answer);
696 
697       return dstr;
698     }
699 }
700 
701 
702 /*(c udstr_cv_substr_fw)
703  * t_udstr udstr_cv_substr_fw (alloc_limits limits,
704  *                             t_udstr dstr,
705  *                             ustr_cv_index_t from,
706  *                             ustr_cv_index_t to);
707  *
708  *
709  * Return a freshly allocated substring of `dstr'
710  * containing the indicated half-open range of characters
711  * (measured in code values).
712  *
713  * The returned string uses a full-width encoding (all of
714  * it's codepoints fit in exactly one code value).
715  */
716 t_udstr
udstr_cv_substr_fw(alloc_limits limits,t_udstr dstr,ustr_cv_index_t from,ustr_cv_index_t to)717 udstr_cv_substr_fw (alloc_limits limits,
718                     t_udstr dstr,
719                     ustr_cv_index_t from,
720                     ustr_cv_index_t to)
721 {
722   uni_string str;
723   ustr_cv_index_t len;
724 
725   str = ustr_cv_offset (dstr->str, dstr->enc, from);
726   len = ustr_cv_index (to.cv - from.cv);
727 
728   return udstr_save_fw_n (limits, str, dstr->enc, len);
729 }
730 
731 
732 /*(c udstr_cv_substr_fw_x)
733  * t_udstr udstr_cv_substr_fw_x (t_udstr dstr,
734  *                               ustr_cv_index_t from,
735  *                               ustr_cv_index_t to);
736  *
737  * Modify `dstr' to contain only its indicated substring
738  * and to be in a full-width encoding.
739  */
740 t_udstr
udstr_cv_substr_fw_x(t_udstr dstr,ustr_cv_index_t from,ustr_cv_index_t to)741 udstr_cv_substr_fw_x (t_udstr dstr,
742                       ustr_cv_index_t from,
743                       ustr_cv_index_t to)
744 {
745   t_udstr almost_answer;
746 
747   almost_answer = udstr_cv_substr_fw (dstr->limits, dstr, from, to);
748 
749   if (!almost_answer)
750     {
751       return 0;
752     }
753   else
754     {
755       take_new_data (dstr, almost_answer);
756 
757       return dstr;
758     }
759 }
760 
761 
762 /*(c udstr_cp_substr)
763  * t_udstr udstr_cp_substr (alloc_limits limits,
764  *                          t_udstr dstr,
765  *                          ustr_cp_index_t from,
766  *                          ustr_cp_index_t to);
767  *
768  * Return a freshly allocated substring of `dstr'
769  * containing the indicated half-open range of characters
770  * (measured in codepoints).
771  */
772 t_udstr
udstr_cp_substr(alloc_limits limits,t_udstr dstr,ustr_cp_index_t from,ustr_cp_index_t to)773 udstr_cp_substr (alloc_limits limits,
774                  t_udstr dstr,
775                  ustr_cp_index_t from,
776                  ustr_cp_index_t to)
777 {
778   ustr_cv_index_t f_v;
779   ustr_cv_index_t t_v;
780 
781   udstr_cp_to_cv_range (&f_v, &t_v, dstr, from, to);
782   return udstr_cv_substr (limits, dstr, f_v, t_v);
783 }
784 
785 
786 /*(c udstr_cp_substr_x)
787  * t_udstr udstr_cp_substr_x (t_udstr dstr,
788  *                            ustr_cp_index_t from,
789  *                            ustr_cp_index_t to);
790  *
791  * Modify `dstr' to contain only its indicated substring.
792  */
793 t_udstr
udstr_cp_substr_x(t_udstr dstr,ustr_cp_index_t from,ustr_cp_index_t to)794 udstr_cp_substr_x (t_udstr dstr,
795                    ustr_cp_index_t from,
796                    ustr_cp_index_t to)
797 {
798   t_udstr almost_answer;
799 
800   almost_answer = udstr_cp_substr (dstr->limits, dstr, from, to);
801 
802   if (!almost_answer)
803     {
804       return 0;
805     }
806   else
807     {
808       take_new_data (dstr, almost_answer);
809 
810       return dstr;
811     }
812 }
813 
814 
815 /*(c udstr_cp_substr_fw)
816  * t_udstr udstr_cp_substr_fw (alloc_limits limits,
817  *                             t_udstr dstr,
818  *                             ustr_cp_index_t from,
819  *                             ustr_cp_index_t to);
820  *
821  * Return a freshly allocated substring of `dstr'
822  * containing the indicated half-open range of characters
823  * (measured in codepoints).
824  *
825  * The returned string uses a full-width encoding (all of
826  * it's codepoints fit in exactly one code value).
827  */
828 t_udstr
udstr_cp_substr_fw(alloc_limits limits,t_udstr dstr,ustr_cp_index_t from,ustr_cp_index_t to)829 udstr_cp_substr_fw (alloc_limits limits,
830                     t_udstr dstr,
831                     ustr_cp_index_t from,
832                     ustr_cp_index_t to)
833 {
834   ustr_cv_index_t f_v;
835   ustr_cv_index_t t_v;
836 
837   udstr_cp_to_cv_range (&f_v, &t_v, dstr, from, to);
838   return udstr_cv_substr_fw (limits, dstr, f_v, t_v);
839 }
840 
841 
842 /*(c udstr_cp_substr_fw_x)
843  * t_udstr udstr_cp_substr_fw_x (t_udstr dstr,
844  *                               ustr_cp_index_t from,
845  *                               ustr_cp_index_t to);
846  *
847  * Modify `dstr' to contain only its indicated substring
848  * and to be in a full-width encoding.
849  */
850 t_udstr
udstr_cp_substr_fw_x(t_udstr dstr,ustr_cp_index_t from,ustr_cp_index_t to)851 udstr_cp_substr_fw_x (t_udstr dstr,
852                       ustr_cp_index_t from,
853                       ustr_cp_index_t to)
854 {
855   t_udstr almost_answer;
856 
857   almost_answer = udstr_cp_substr_fw (dstr->limits, dstr, from, to);
858 
859   if (!almost_answer)
860     {
861       return 0;
862     }
863   else
864     {
865       take_new_data (dstr, almost_answer);
866 
867       return dstr;
868     }
869 }
870 
871 
872 
873 
874 
875 /************************************************************************
876  *(h2 "udstr String Concatentation")
877  *
878  *
879  *
880  */
881 
882 
883 
884 /*(c udstr_append)
885  * t_udstr udstr_append (alloc_limits limits,
886  *                       t_udstr a_dstr,
887  *                       t_udstr b_dstr);
888  *
889  * Return a freshly allocated string containing
890  * the concatenation of the argument strings.
891  */
892 t_udstr
udstr_append(alloc_limits limits,t_udstr a_dstr,t_udstr b_dstr)893 udstr_append (alloc_limits limits,
894               t_udstr a_dstr,
895               t_udstr b_dstr)
896 {
897   t_udstr answer = 0;
898 
899   answer = udstr_copy (limits, a_dstr);
900 
901   if (answer)
902     {
903       if (!udstr_append_x (answer, b_dstr))
904         {
905           udstr_unref (answer);
906           answer = 0;
907         }
908     }
909 
910   return answer;
911 }
912 
913 
914 /*(c udstr_append_x)
915  * t_udstr udstr_append_x (t_udstr a_dstr,
916  *                         t_udstr b_dstr);
917  *
918  * Modify `a_dstr' to contain the concatenation
919  * of `a_dstr' and `b_dstr'.
920  */
921 t_udstr
udstr_append_x(t_udstr a_dstr,t_udstr b_dstr)922 udstr_append_x (t_udstr a_dstr,
923                 t_udstr b_dstr)
924 {
925   ustr_cv_index_t b_len_in_a_enc;
926   ustr_cv_index_t total_len;
927   size_t a_enc_size;
928   size_t proper_a_size;
929   uni_string b_dest;
930 
931   if (a_dstr->enc == b_dstr->enc)
932     {
933       b_len_in_a_enc = b_dstr->cv_len;
934     }
935   else
936     {
937       b_len_in_a_enc = ustr_cv_length_in_encoding_n (a_dstr->enc, b_dstr->str, b_dstr->enc, b_dstr->cv_len);
938     }
939 
940   total_len = ustr_cv_index (b_len_in_a_enc.cv + a_dstr->cv_len.cv);
941 
942   a_enc_size = uni_cv_sizeof (a_dstr->enc);
943   proper_a_size = a_dstr->cv_len.cv * a_enc_size;
944 
945   {
946     t_uchar * resized;
947 
948     resized = lim_realloc (a_dstr->limits, a_dstr->str.raw, proper_a_size);
949     if (!resized)
950       return 0;
951 
952     a_dstr->str.raw = resized;
953   }
954 
955   b_dest = ustr_cv_offset_n (0, a_dstr->str, a_dstr->enc, a_dstr->cv_len, a_dstr->cv_len);
956 
957   ustr_copy_n (b_dest, a_dstr->enc, b_len_in_a_enc, b_dstr->str, b_dstr->enc, b_dstr->cv_len);
958 
959   a_dstr->cp_len = ustr_cp_length_n (0, a_dstr->str, a_dstr->enc, a_dstr->cv_len);
960 
961   return a_dstr;
962 }
963 
964 
965 /*(c udstr_append_fw)
966  * t_udstr udstr_append_fw (alloc_limits limits,
967  *                          t_udstr a_dstr,
968  *                          t_udstr b_dstr);
969  *
970  * Return a freshly allocated string containing
971  * the concatenation of the argument strings.
972  *
973  * The new string uses a full-width encoding.
974  */
975 t_udstr
udstr_append_fw(alloc_limits limits,t_udstr a_dstr,t_udstr b_dstr)976 udstr_append_fw (alloc_limits limits,
977                  t_udstr a_dstr,
978                  t_udstr b_dstr)
979 {
980   t_udstr answer = 0;
981 
982   answer = udstr_copy (limits, a_dstr);
983 
984   if (answer)
985     {
986       if (!udstr_append_fw_x (answer, b_dstr))
987         {
988           udstr_unref (answer);
989           answer = 0;
990         }
991     }
992 
993   return answer;
994 }
995 
996 
997 /*(c udstr_append_fw_x)
998  * t_udstr udstr_append_fw_x (t_udstr a_dstr,
999  *                            t_udstr b_dstr);
1000  *
1001  * Modify `a_dstr' to contain the concatenation
1002  * of `a_dstr' and `b_dstr' and to use a full-width
1003  * encoding.
1004  */
1005 t_udstr
udstr_append_fw_x(t_udstr a_dstr,t_udstr b_dstr)1006 udstr_append_fw_x (t_udstr a_dstr,
1007                    t_udstr b_dstr)
1008 {
1009   enum uni_encoding_scheme a_fw;
1010   enum uni_encoding_scheme b_fw;
1011   enum uni_encoding_scheme best_encoding;
1012 
1013   ustr_lengths_n (0, &a_fw, a_dstr->str, a_dstr->enc, a_dstr->cv_len);
1014   ustr_lengths_n (0, &b_fw, a_dstr->str, a_dstr->enc, a_dstr->cv_len);
1015 
1016   best_encoding = pick_fw_of (a_fw, b_fw);
1017 
1018   if (a_dstr->enc != best_encoding)
1019     {
1020       t_udstr new_a;
1021 
1022       new_a = udstr_save_generic (a_dstr->limits,
1023                                   best_encoding,
1024                                   ustr_cv_index (a_dstr->cp_len.cp),
1025                                   a_dstr->str,
1026                                   a_dstr->enc,
1027                                   a_dstr->cv_len,
1028                                   a_dstr->cp_len);
1029 
1030       if (!new_a)
1031         return 0;
1032 
1033       a_dstr->str.raw = new_a->str.raw;
1034       a_dstr->enc = new_a->enc;
1035 
1036       lim_free (new_a->limits, new_a);
1037 
1038       /* new_a dropped deliberately --- a_dstr took it over
1039        */
1040     }
1041 
1042   return udstr_append_x (a_dstr, b_dstr);
1043 }
1044 
1045 
1046 
1047 
1048 
1049 /************************************************************************
1050  *(h2 "udstr Substring Deletion")
1051  *
1052  *
1053  *
1054  */
1055 
1056 
1057 /*(c udstr_cv_delete)
1058  * t_udstr udstr_cv_delete (alloc_limits limits,
1059  *                          t_udstr d,
1060  *                          ustr_cv_index_t from,
1061  *                          ustr_cv_index_t to);
1062  *
1063  * Return a new string which is a copy of `d' with
1064  * code values in the half-open range `from' to `to'
1065  * removed.
1066  */
1067 t_udstr
udstr_cv_delete(alloc_limits limits,t_udstr d,ustr_cv_index_t from,ustr_cv_index_t to)1068 udstr_cv_delete (alloc_limits limits,
1069                  t_udstr d,
1070                  ustr_cv_index_t from,
1071                  ustr_cv_index_t to)
1072 {
1073   uni_string right_source;
1074   ustr_cv_index_t right_len;
1075   ustr_cv_index_t total_len;
1076   ustr_cv_index_t left_len;
1077   uni_string right_dest;
1078   t_udstr answer = 0;
1079 
1080 
1081   right_source = ustr_cv_offset_n (&right_len, d->str, d->enc, d->cv_len, to);
1082   left_len = from;
1083   total_len = ustr_cv_index (left_len.cv + right_len.cv);
1084 
1085 
1086   answer = udstr_save_generic (limits, d->enc, total_len, d->str, d->enc, d->cv_len, d->cp_len);
1087   if (!answer)
1088     return 0;
1089   /*
1090    * answer is missing half of its data and has the wrong cp_len now.
1091    */
1092 
1093   right_dest = ustr_cv_offset_n (0, answer->str, answer->enc, answer->cv_len, left_len);
1094   ustr_copy_n (right_dest, answer->enc, right_len, right_source, d->enc, right_len);
1095 
1096   answer->cp_len = ustr_cp_length_n (0, answer->str, answer->enc, answer->cv_len);
1097 
1098   return answer;
1099 }
1100 
1101 
1102 /*(c udstr_cp_delete)
1103  * t_udstr udstr_cp_delete (alloc_limits limits,
1104  *                          t_udstr d,
1105  *                          ustr_cp_index_t from,
1106  *                          ustr_cp_index_t to);
1107  *
1108  * Return a new string which is a copy of `d' with
1109  * codepoints in the half-open range `from' to `to'
1110  * removed.
1111  */
1112 t_udstr
udstr_cp_delete(alloc_limits limits,t_udstr d,ustr_cp_index_t from,ustr_cp_index_t to)1113 udstr_cp_delete (alloc_limits limits,
1114                  t_udstr d,
1115                  ustr_cp_index_t from,
1116                  ustr_cp_index_t to)
1117 {
1118   ustr_cv_index_t fv;
1119   ustr_cv_index_t tv;
1120 
1121   udstr_cp_to_cv_range (&fv, &tv, d, from, to);
1122   return udstr_cv_delete (limits, d, fv, tv);
1123 }
1124 
1125 
1126 /*(c udstr_cv_delete_x)
1127  * t_udstr udstr_cv_delete_x (t_udstr d,
1128  *                            ustr_cv_index_t from,
1129  *                            ustr_cv_index_t to);
1130  *
1131  * Modify `d' by removing code values in the half-open range `from' to
1132  * `to'.
1133  */
1134 t_udstr
udstr_cv_delete_x(t_udstr d,ustr_cv_index_t from,ustr_cv_index_t to)1135 udstr_cv_delete_x (t_udstr d,
1136                    ustr_cv_index_t from,
1137                    ustr_cv_index_t to)
1138 {
1139   size_t cv_sizeof;
1140   size_t from_offset;
1141   size_t to_offset;
1142   size_t current_length;
1143 
1144   cv_sizeof = uni_cv_sizeof (d->enc);
1145 
1146   from_offset = from.cv * cv_sizeof;
1147   to_offset = to.cv * cv_sizeof;
1148   current_length = d->cv_len.cv * cv_sizeof;
1149 
1150   mem_move (d->str.raw + from_offset, d->str.raw + to_offset, current_length - to_offset);
1151 
1152   d->str.raw = lim_realloc (d->limits, d->str.raw, current_length - (to_offset - from_offset));
1153   d->cv_len.cv = (ssize_t)(current_length - (to_offset - from_offset));
1154 
1155   return d;
1156 }
1157 
1158 
1159 /*(c udstr_cp_delete_x)
1160  * t_udstr udstr_cp_delete_x (t_udstr d,
1161  *                            ustr_cp_index_t from,
1162  *                            ustr_cp_index_t to);
1163  *
1164  * Modify `d' by removing codepoints in the half-open range `from' to
1165  * `to'.
1166  */
1167 t_udstr
udstr_cp_delete_x(t_udstr d,ustr_cp_index_t from,ustr_cp_index_t to)1168 udstr_cp_delete_x (t_udstr d,
1169                    ustr_cp_index_t from,
1170                    ustr_cp_index_t to)
1171 {
1172   ustr_cv_index_t fv;
1173   ustr_cv_index_t tv;
1174 
1175   udstr_cp_to_cv_range (&fv, &tv, d, from, to);
1176   return udstr_cv_delete_x (d, fv, tv);
1177 }
1178 
1179 
1180 /*(c udstr_cv_delete_fw)
1181  * t_udstr udstr_cv_delete_fw (alloc_limits limits,
1182  *                             t_udstr d,
1183  *                             ustr_cv_index_t from,
1184  *                             ustr_cv_index_t to);
1185  *
1186  * Return a new string which is a copy of `d' with
1187  * code values in the half-open range `from' to `to'
1188  * removed.
1189  *
1190  * The new string uses a full-width encoding.
1191  */
1192 t_udstr
udstr_cv_delete_fw(alloc_limits limits,t_udstr d,ustr_cv_index_t from,ustr_cv_index_t to)1193 udstr_cv_delete_fw (alloc_limits limits,
1194                     t_udstr d,
1195                     ustr_cv_index_t from,
1196                     ustr_cv_index_t to)
1197 {
1198   uni_string right_source;
1199   ustr_cv_index_t right_len;
1200   ustr_cp_index_t right_cp_len;
1201   enum uni_encoding_scheme right_fw;
1202   ustr_cp_index_t left_cp_len;
1203   enum uni_encoding_scheme left_fw;
1204   enum uni_encoding_scheme actual_fw;
1205   ustr_cv_index_t right_len_fw;
1206   ustr_cv_index_t left_len_fw;
1207   ustr_cv_index_t len_fw;
1208   ustr_cp_index_t cp_len;
1209   t_udstr answer = 0;
1210   uni_string right_dest;
1211   size_t d_cv_sizeof;
1212   size_t d_right_sizeof;
1213 
1214 
1215   right_source = ustr_cv_offset_n (&right_len, d->str, d->enc, d->cv_len, to);
1216   right_cp_len = ustr_cp_length_n (&right_fw, right_source, d->enc, right_len);
1217 
1218   left_cp_len = ustr_cp_length_n (&left_fw, d->str, d->enc, from);
1219 
1220   actual_fw = pick_fw_of (right_fw, left_fw);
1221 
1222   if (actual_fw == d->enc)
1223     {
1224       right_len_fw = right_len;
1225       left_len_fw = from;
1226     }
1227   else
1228     {
1229       right_len_fw = ustr_cv_length_in_encoding_n (actual_fw, right_source, d->enc, right_len);
1230       left_len_fw = ustr_cv_length_in_encoding_n (actual_fw, d->str, d->enc, from);
1231     }
1232 
1233   len_fw = ustr_cv_index (right_len_fw.cv + left_len_fw.cv);
1234   cp_len = ustr_cp_index (right_cp_len.cp + left_cp_len.cp);
1235 
1236   answer = udstr_save_generic (limits, actual_fw, len_fw, d->str, d->enc, from, left_cp_len);
1237   if (!answer)
1238     return 0;
1239 
1240   answer->cp_len = cp_len;
1241 
1242   right_dest = ustr_cv_offset_n (0, answer->str, answer->enc, answer->cv_len, left_len_fw);
1243   d_cv_sizeof = uni_cv_sizeof (d->enc);
1244   d_right_sizeof = right_len.cv * d_cv_sizeof;
1245   mem_move (right_dest.raw, right_source.raw, d_right_sizeof);
1246 
1247   return answer;
1248 }
1249 
1250 
1251 /*(c udstr_cp_delete_fw)
1252  * t_udstr udstr_cp_delete_fw (alloc_limits limits,
1253  *                             t_udstr d,
1254  *                             ustr_cp_index_t from,
1255  *                             ustr_cp_index_t to);
1256  *
1257  * Return a new string which is a copy of `d' with
1258  * codepoints in the half-open range `from' to `to'
1259  * removed.
1260  *
1261  * The new string uses a full-width encoding.
1262  */
1263 t_udstr
udstr_cp_delete_fw(alloc_limits limits,t_udstr d,ustr_cp_index_t from,ustr_cp_index_t to)1264 udstr_cp_delete_fw (alloc_limits limits,
1265                     t_udstr d,
1266                     ustr_cp_index_t from,
1267                     ustr_cp_index_t to)
1268 {
1269   ustr_cv_index_t fv;
1270   ustr_cv_index_t tv;
1271 
1272   udstr_cp_to_cv_range (&fv, &tv, d, from, to);
1273   return udstr_cv_delete_fw (limits, d, fv, tv);
1274 }
1275 
1276 
1277 /*(c udstr_cv_delete_fw_x)
1278  * t_udstr udstr_cv_delete_fw_x (t_udstr d,
1279  *                               ustr_cv_index_t from,
1280  *                               ustr_cv_index_t to);
1281  *
1282  * Modify `d' by removing code values in the half-open range `from' to
1283  * `to' and ensuring that `d' uses a full-width encoding.
1284  */
1285 t_udstr
udstr_cv_delete_fw_x(t_udstr d,ustr_cv_index_t from,ustr_cv_index_t to)1286 udstr_cv_delete_fw_x (t_udstr d,
1287                       ustr_cv_index_t from,
1288                       ustr_cv_index_t to)
1289 {
1290   t_udstr almost_answer = 0;
1291 
1292   almost_answer = udstr_cv_delete_fw (d->limits, d, from, to);
1293 
1294   if (!almost_answer)
1295     return 0;
1296 
1297   take_new_data (d, almost_answer);
1298 
1299   return d;
1300 }
1301 
1302 
1303 /*(c udstr_cp_delete_fw_x)
1304  * t_udstr udstr_cp_delete_fw_x (t_udstr d,
1305  *                               ustr_cp_index_t from,
1306  *                               ustr_cp_index_t to);
1307  *
1308  * Modify `d' by removing codepoints in the half-open range `from' to
1309  * `to' and ensuring that `d' uses a full-width encoding.
1310  */
1311 t_udstr
udstr_cp_delete_fw_x(t_udstr d,ustr_cp_index_t from,ustr_cp_index_t to)1312 udstr_cp_delete_fw_x (t_udstr d,
1313                       ustr_cp_index_t from,
1314                       ustr_cp_index_t to)
1315 {
1316   ustr_cv_index_t fv;
1317   ustr_cv_index_t tv;
1318 
1319   udstr_cp_to_cv_range (&fv, &tv, d, from, to);
1320   return udstr_cv_delete_fw_x (d, fv, tv);
1321 }
1322 
1323 
1324 
1325 
1326 
1327 /************************************************************************
1328  *(h2 "udstr Substring Replacement")
1329  *
1330  *
1331  *
1332  */
1333 
1334 
1335 /*(c udstr_cv_replace)
1336  * t_udstr udstr_cv_replace (alloc_limits limits,
1337  *                           t_udstr into,
1338  *                           ustr_cv_index_t i_from,
1339  *                           ustr_cv_index_t i_to,
1340  *                           t_udstr from,
1341  *                           ustr_cv_index_t f_from,
1342  *                           ustr_cv_index_t f_to);
1343  *
1344  * Return a new string in which the indicated substring
1345  * of `into' is replaced by the indicated substring of `from'.
1346  */
1347 t_udstr
udstr_cv_replace(alloc_limits limits,t_udstr into,ustr_cv_index_t i_from,ustr_cv_index_t i_to,t_udstr from,ustr_cv_index_t f_from,ustr_cv_index_t f_to)1348 udstr_cv_replace (alloc_limits limits,
1349                   t_udstr into,
1350                   ustr_cv_index_t i_from,
1351                   ustr_cv_index_t i_to,
1352                   t_udstr from,
1353                   ustr_cv_index_t f_from,
1354                   ustr_cv_index_t f_to)
1355 {
1356   t_udstr answer = 0;
1357   t_udstr middle = 0;
1358   t_udstr end = 0;
1359 
1360   answer = udstr_cv_substr (limits, into, ustr_cv_index (0), i_from);
1361   middle = udstr_cv_substr (limits, from, f_from, f_to);
1362   end = udstr_cv_substr (limits, into, i_to, into->cv_len);
1363 
1364   if (!(answer && middle && end)
1365       || !udstr_append_x (answer, middle)
1366       || !udstr_append_x (answer, end))
1367     {
1368       udstr_unref (answer);
1369       udstr_unref (middle);
1370       udstr_unref (end);
1371       answer = 0;
1372     }
1373   else
1374     {
1375       udstr_unref (middle);
1376       udstr_unref (end);
1377     }
1378 
1379   return answer;
1380 }
1381 
1382 
1383 /*(c udstr_cp_replace)
1384  * t_udstr udstr_cp_replace (alloc_limits limits,
1385  *                           t_udstr into,
1386  *                           ustr_cp_index_t i_from,
1387  *                           ustr_cp_index_t i_to,
1388  *                           t_udstr from,
1389  *                           ustr_cp_index_t f_from,
1390  *                           ustr_cp_index_t f_to);
1391  *
1392  * Return a new string in which the indicated substring
1393  * of `into' is replaced by the indicated substring of `from'.
1394  */
1395 t_udstr
udstr_cp_replace(alloc_limits limits,t_udstr into,ustr_cp_index_t i_from,ustr_cp_index_t i_to,t_udstr from,ustr_cp_index_t f_from,ustr_cp_index_t f_to)1396 udstr_cp_replace (alloc_limits limits,
1397                   t_udstr into,
1398                   ustr_cp_index_t i_from,
1399                   ustr_cp_index_t i_to,
1400                   t_udstr from,
1401                   ustr_cp_index_t f_from,
1402                   ustr_cp_index_t f_to)
1403 {
1404   ustr_cv_index_t i_f;
1405   ustr_cv_index_t i_t;
1406   ustr_cv_index_t f_f;
1407   ustr_cv_index_t f_t;
1408 
1409   udstr_cp_to_cv_range (&i_f, &i_t, into, i_from, i_to);
1410   udstr_cp_to_cv_range (&f_f, &f_t, from, f_from, f_to);
1411 
1412   return udstr_cv_replace (limits, into, i_f, i_t, from, f_f, f_t);
1413 }
1414 
1415 
1416 
1417 
1418 /*(c udstr_cv_replace_x)
1419  * t_udstr udstr_cv_replace_x (t_udstr into,
1420  *                             ustr_cv_index_t i_from,
1421  *                             ustr_cv_index_t i_to,
1422  *                             t_udstr from,
1423  *                             ustr_cv_index_t f_from,
1424  *                             ustr_cv_index_t f_to);
1425  *
1426  * Modify `into' by replacing the indicated substring with
1427  * the indicated substring of `from'.
1428  */
1429 t_udstr
udstr_cv_replace_x(t_udstr into,ustr_cv_index_t i_from,ustr_cv_index_t i_to,t_udstr from,ustr_cv_index_t f_from,ustr_cv_index_t f_to)1430 udstr_cv_replace_x (t_udstr into,
1431                     ustr_cv_index_t i_from,
1432                     ustr_cv_index_t i_to,
1433                     t_udstr from,
1434                     ustr_cv_index_t f_from,
1435                     ustr_cv_index_t f_to)
1436 {
1437   t_udstr almost_answer;
1438 
1439   almost_answer = udstr_cv_replace (into->limits, into, i_from, i_to, from, f_from, f_to);
1440 
1441   if (!almost_answer)
1442     return 0;
1443 
1444   take_new_data (into, almost_answer);
1445   return into;
1446 }
1447 
1448 
1449 /*(c udstr_cp_replace_x)
1450  * t_udstr udstr_cp_replace_x (t_udstr into,
1451  *                             ustr_cp_index_t i_from,
1452  *                             ustr_cp_index_t i_to,
1453  *                             t_udstr from,
1454  *                             ustr_cp_index_t f_from,
1455  *                             ustr_cp_index_t f_to);
1456  *
1457  * Modify `into' by replacing the indicated substring with
1458  * the indicated substring of `from'.
1459  */
1460 t_udstr
udstr_cp_replace_x(t_udstr into,ustr_cp_index_t i_from,ustr_cp_index_t i_to,t_udstr from,ustr_cp_index_t f_from,ustr_cp_index_t f_to)1461 udstr_cp_replace_x (t_udstr into,
1462                     ustr_cp_index_t i_from,
1463                     ustr_cp_index_t i_to,
1464                     t_udstr from,
1465                     ustr_cp_index_t f_from,
1466                     ustr_cp_index_t f_to)
1467 {
1468   ustr_cv_index_t i_f;
1469   ustr_cv_index_t i_t;
1470   ustr_cv_index_t f_f;
1471   ustr_cv_index_t f_t;
1472 
1473   udstr_cp_to_cv_range (&i_f, &i_t, into, i_from, i_to);
1474   udstr_cp_to_cv_range (&f_f, &f_t, from, f_from, f_to);
1475 
1476   return udstr_cv_replace_x (into, i_f, i_t, from, f_f, f_t);
1477 }
1478 
1479 
1480 /*(c udstr_cv_replace_fw)
1481  * t_udstr udstr_cv_replace_fw (alloc_limits limits,
1482  *                              t_udstr into,
1483  *                              ustr_cv_index_t i_from,
1484  *                              ustr_cv_index_t i_to,
1485  *                              t_udstr from,
1486  *                              ustr_cv_index_t f_from,
1487  *                              ustr_cv_index_t f_to);
1488  *
1489  * Return a new string in which the indicated substring
1490  * of `into' is replaced by the indicated substring of `from'.
1491  *
1492  * The returned string uses a full-width encoding.
1493  */
1494 t_udstr
udstr_cv_replace_fw(alloc_limits limits,t_udstr into,ustr_cv_index_t i_from,ustr_cv_index_t i_to,t_udstr from,ustr_cv_index_t f_from,ustr_cv_index_t f_to)1495 udstr_cv_replace_fw (alloc_limits limits,
1496                      t_udstr into,
1497                      ustr_cv_index_t i_from,
1498                      ustr_cv_index_t i_to,
1499                      t_udstr from,
1500                      ustr_cv_index_t f_from,
1501                      ustr_cv_index_t f_to)
1502 {
1503   t_udstr answer = 0;
1504   t_udstr middle = 0;
1505   t_udstr end = 0;
1506 
1507   answer = udstr_cv_substr_fw (limits, into, ustr_cv_index (0), i_from);
1508   middle = udstr_cv_substr (limits, from, f_from, f_to);
1509   end = udstr_cv_substr (limits, into, i_to, into->cv_len);
1510 
1511   if (!(answer && middle && end)
1512       || !udstr_append_fw_x (answer, middle)
1513       || !udstr_append_fw_x (answer, end))
1514     {
1515       udstr_unref (answer);
1516       udstr_unref (middle);
1517       udstr_unref (end);
1518       answer = 0;
1519     }
1520   else
1521     {
1522       udstr_unref (middle);
1523       udstr_unref (end);
1524     }
1525 
1526   return answer;
1527 }
1528 
1529 
1530 /*(c udstr_cp_replace_fw)
1531  * t_udstr udstr_cp_replace_fw (alloc_limits limits,
1532  *                              t_udstr into,
1533  *                              ustr_cp_index_t i_from,
1534  *                              ustr_cp_index_t i_to,
1535  *                              t_udstr from,
1536  *                              ustr_cp_index_t f_from,
1537  *                              ustr_cp_index_t f_to);
1538  *
1539  * Return a new string in which the indicated substring
1540  * of `into' is replaced by the indicated substring of `from'.
1541  *
1542  * The returned string uses a full-width encoding.
1543  */
1544 t_udstr
udstr_cp_replace_fw(alloc_limits limits,t_udstr into,ustr_cp_index_t i_from,ustr_cp_index_t i_to,t_udstr from,ustr_cp_index_t f_from,ustr_cp_index_t f_to)1545 udstr_cp_replace_fw (alloc_limits limits,
1546                      t_udstr into,
1547                      ustr_cp_index_t i_from,
1548                      ustr_cp_index_t i_to,
1549                      t_udstr from,
1550                      ustr_cp_index_t f_from,
1551                      ustr_cp_index_t f_to)
1552 {
1553   ustr_cv_index_t i_f;
1554   ustr_cv_index_t i_t;
1555   ustr_cv_index_t f_f;
1556   ustr_cv_index_t f_t;
1557 
1558   udstr_cp_to_cv_range (&i_f, &i_t, into, i_from, i_to);
1559   udstr_cp_to_cv_range (&f_f, &f_t, from, f_from, f_to);
1560 
1561   return udstr_cv_replace_fw (limits, into, i_f, i_t, from, f_f, f_t);
1562 }
1563 
1564 
1565 /*(c udstr_cv_replace_fw_x)
1566  * t_udstr udstr_cv_replace_fw_x (t_udstr into,
1567  *                                ustr_cv_index_t i_from,
1568  *                                ustr_cv_index_t i_to,
1569  *                                t_udstr from,
1570  *                                ustr_cv_index_t f_from,
1571  *                                ustr_cv_index_t f_to);
1572  *
1573  * Modify `into' by replacing the indicated substring with
1574  * the indicated substring of `from' and ensuring that
1575  * `into' uses a full-width encoding.
1576  */
1577 t_udstr
udstr_cv_replace_fw_x(t_udstr into,ustr_cv_index_t i_from,ustr_cv_index_t i_to,t_udstr from,ustr_cv_index_t f_from,ustr_cv_index_t f_to)1578 udstr_cv_replace_fw_x (t_udstr into,
1579                        ustr_cv_index_t i_from,
1580                        ustr_cv_index_t i_to,
1581                        t_udstr from,
1582                        ustr_cv_index_t f_from,
1583                        ustr_cv_index_t f_to)
1584 {
1585   t_udstr almost_answer;
1586 
1587   almost_answer = udstr_cv_replace_fw (into->limits, into, i_from, i_to, from, f_from, f_to);
1588 
1589   if (!almost_answer)
1590     return 0;
1591 
1592   take_new_data (into, almost_answer);
1593   return into;
1594 }
1595 
1596 
1597 /*(c udstr_cp_replace_fw_x)
1598  * t_udstr udstr_cp_replace_fw_x (t_udstr into,
1599  *                                ustr_cp_index_t i_from,
1600  *                                ustr_cp_index_t i_to,
1601  *                                t_udstr from,
1602  *                                ustr_cp_index_t f_from,
1603  *                                ustr_cp_index_t f_to);
1604  *
1605  * Modify `into' by replacing the indicated substring with
1606  * the indicated substring of `from' and ensuring that
1607  * `into' uses a full-width encoding.
1608  */
1609 t_udstr
udstr_cp_replace_fw_x(t_udstr into,ustr_cp_index_t i_from,ustr_cp_index_t i_to,t_udstr from,ustr_cp_index_t f_from,ustr_cp_index_t f_to)1610 udstr_cp_replace_fw_x (t_udstr into,
1611                        ustr_cp_index_t i_from,
1612                        ustr_cp_index_t i_to,
1613                        t_udstr from,
1614                        ustr_cp_index_t f_from,
1615                        ustr_cp_index_t f_to)
1616 {
1617   ustr_cv_index_t i_f;
1618   ustr_cv_index_t i_t;
1619   ustr_cv_index_t f_f;
1620   ustr_cv_index_t f_t;
1621 
1622   udstr_cp_to_cv_range (&i_f, &i_t, into, i_from, i_to);
1623   udstr_cp_to_cv_range (&f_f, &f_t, from, f_from, f_to);
1624 
1625   return udstr_cv_replace_fw_x (into, i_f, i_t, from, f_f, f_t);
1626 }
1627 
1628 
1629 
1630 
1631 
1632 /************************************************************************
1633  *(h2 "udstr Index Normalization")
1634  *
1635  *
1636  *
1637  */
1638 
1639 
1640 /*(c udstr_cv_normalize)
1641  * ustr_cv_index_t udstr_cv_normalize (t_udstr dstr,
1642  *                                     ustr_cv_index_t orig_index);
1643  *
1644  * Return a code value index derived from `orig_index' by adjusting
1645  * it to point to the first code value in its codepoint.
1646  */
1647 ustr_cv_index_t
udstr_cv_normalize(t_udstr dstr,ustr_cv_index_t orig_index)1648 udstr_cv_normalize (t_udstr dstr,
1649                     ustr_cv_index_t orig_index)
1650 {
1651   ustr_cv_index_t index = orig_index;
1652 
1653 
1654   switch (dstr->enc)
1655     {
1656     default:
1657     case uni_iso8859_1:
1658     case uni_utf32:
1659     case uni_utf32be:
1660     case uni_utf32le:
1661     case uni_bogus32:
1662     case uni_bogus32be:
1663     case uni_bogus32le:
1664       {
1665         if (index.cv > dstr->cv_len.cv)
1666           index.cv = dstr->cv_len.cv;
1667         else if (index.cv < 0)
1668           index.cv = 0;
1669 
1670         return index;
1671       }
1672 
1673     case uni_utf8:
1674       {
1675         if (index.cv > dstr->cv_len.cv)
1676           {
1677             index.cv = dstr->cv_len.cv;
1678             return index;
1679           }
1680         else if (index.cv <= 0)
1681           {
1682             index.cv = 0;
1683             return index;
1684           }
1685         else if (!(dstr->str.utf8[index.cv] & 0x80))
1686           {
1687             return index;
1688           }
1689         else
1690           {
1691             if (0x80 == (0xC0 & dstr->str.utf8[index.cv]))
1692               {
1693                 int n_suffix_bytes = 1;
1694 
1695                 while (1)
1696                   {
1697                     /* invarients:
1698                      *
1699                      * index.cv >= 1
1700                      *
1701                      * str[index.cv] is some 10xxxxxx character
1702                      *
1703                      * 1 <= n_suffix_bytes <= 4
1704                      */
1705 
1706                     /* Look at the previous character to decide
1707                      * what to do.
1708                      */
1709                     switch (0xc0 & dstr->str.utf8[index.cv - 1])
1710                       {
1711                       case 0x80:
1712                         {
1713                           /* preceeding is also a non-first multi-byte sequence
1714                            * code value.  This preserves one of three loop
1715                            * invarients.
1716                            */
1717 
1718                           if (n_suffix_bytes == 4)
1719                             {
1720                               /* Then the one to the left means that there are 5 or
1721                                * more, which is illegal, so, our original index is
1722                                * just pointing at an ill-formed sequence.
1723                                * This preserves the second loop invarient while
1724                                * n_suffix_bytes counts up to detect ill-formed sequences.
1725                                */
1726                               return orig_index;
1727                             }
1728                           else if (index.cv == 1)
1729                             {
1730                               /* The string starts (at index 0) in the middle of
1731                                * a multi-char sequence.   So, once again, our
1732                                * original index is pointing at an ill-formed sequence.
1733                                * This preserves the final loop invarient.
1734                                */
1735                               return orig_index;
1736                             }
1737                           else
1738                             {
1739                               /* All invarients being preserved and having had made
1740                                * progress towards finding the start of the character
1741                                * sequence:
1742                                */
1743                               ++n_suffix_bytes;
1744                               --index.cv;
1745                               continue;
1746                             }
1747                         }
1748                       case 0x40:
1749                       case 0x00:
1750                         {
1751                           /* The place we started at is part of an ill-formed sequence (no
1752                            * 0xc0 starting character.   This preserves our loop invarients.
1753                            */
1754                           return orig_index;
1755                         }
1756 
1757                       case 0xC0:
1758                         {
1759                           /* aha!  the first character of a multi-byte sequence.
1760                            */
1761                           --index.cv;
1762                           return index;
1763                         }
1764                       }
1765                   }
1766               }
1767           }
1768       }
1769     case uni_utf16:
1770       {
1771         if (index.cv > dstr->cv_len.cv)
1772           {
1773             index.cv = dstr->cv_len.cv;
1774             return index;
1775           }
1776         else if (index.cv <= 0)
1777           {
1778             index.cv = 0;
1779             return index;
1780           }
1781         else if (uni_is_low_surrogate (dstr->str.utf16[index.cv]))
1782           {
1783             if (uni_is_high_surrogate (dstr->str.utf16[index.cv - 1]))
1784               {
1785                 --index.cv;
1786               }
1787             return index;
1788           }
1789         else
1790           return index;
1791       }
1792     case uni_utf16be:
1793       {
1794         size_t pos;
1795         t_unicode c;
1796 
1797         if (index.cv > dstr->cv_len.cv)
1798           {
1799             index.cv = dstr->cv_len.cv;
1800             return index;
1801           }
1802         else if (index.cv <= 0)
1803           {
1804             index.cv = 0;
1805             return index;
1806           }
1807 
1808         pos = 0;
1809         c = uni_utf16be_iscan ((t_uchar *)(dstr->str.utf16 + index.cv), &pos, (size_t)2);
1810 
1811         if (uni_is_low_surrogate (c))
1812           {
1813             pos = 0;
1814             c = uni_utf16be_iscan ((t_uchar *)(dstr->str.utf16 + index.cv - 1), &pos, (size_t)2);
1815 
1816             if (uni_is_high_surrogate (c))
1817               {
1818                 --index.cv;
1819               }
1820             return index;
1821           }
1822         else
1823           return index;
1824       }
1825     case uni_utf16le:
1826       {
1827         size_t pos;
1828         t_unicode c;
1829 
1830         if (index.cv > dstr->cv_len.cv)
1831           {
1832             index.cv = dstr->cv_len.cv;
1833             return index;
1834           }
1835         else if (index.cv <= 0)
1836           {
1837             index.cv = 0;
1838             return index;
1839           }
1840 
1841         pos = 0;
1842         c = uni_utf16le_iscan ((t_uchar *)(dstr->str.utf16 + index.cv), &pos, (size_t)2);
1843 
1844         if (uni_is_low_surrogate (c))
1845           {
1846             pos = 0;
1847             c = uni_utf16le_iscan ((t_uchar *)(dstr->str.utf16 + index.cv - 1), &pos, (size_t)2);
1848 
1849             if (uni_is_high_surrogate (c))
1850               {
1851                 --index.cv;
1852               }
1853             return index;
1854           }
1855         else
1856           return index;
1857       }
1858     }
1859 }
1860 
1861 
1862 
1863 /************************************************************************
1864  *(h2 "udstr String Iteration")
1865  *
1866  */
1867 
1868 
1869 
1870 
1871 /*(c udstr_cv_inc)
1872  * ustr_cv_index_t udstr_cv_inc (t_udstr dstr,
1873  *                               ustr_cv_index_t orig_index);
1874  *
1875  * Increment `orig_index' (presumed to be the code value index
1876  * of the first code value of a possibly multi-code value codepoint)
1877  * to be the code value index of the beginning of the _next_ codepoint.
1878  * (!)
1879  */
1880 ustr_cv_index_t
udstr_cv_inc(t_udstr dstr,ustr_cv_index_t orig_index)1881 udstr_cv_inc (t_udstr dstr,
1882               ustr_cv_index_t orig_index)
1883 {
1884   if (orig_index.cv >= dstr->cv_len.cv)
1885     return dstr->cv_len;
1886 
1887   if (orig_index.cv < 0)
1888     return ustr_cv_index (0);
1889 
1890   switch (dstr->enc)
1891     {
1892     default:
1893     case uni_utf32:
1894     case uni_utf32be:
1895     case uni_utf32le:
1896     case uni_bogus32:
1897     case uni_bogus32be:
1898     case uni_bogus32le:
1899     case uni_iso8859_1:
1900       {
1901         return ustr_cv_index (orig_index.cv + 1);
1902       }
1903 
1904 #undef CASE_FOR
1905 
1906 #define CASE_FOR(X) \
1907     case uni_ ## X: \
1908       { \
1909         size_t pos; \
1910         size_t len; \
1911         \
1912         pos = orig_index.cv; \
1913         len = dstr->cv_len.cv; \
1914         uni_ ## X ## _scan (dstr->str.X, &pos, len); \
1915         return ustr_cv_index ((ssize_t)pos); \
1916       }
1917 
1918    CASE_FOR(utf8);
1919    CASE_FOR(utf16);
1920 
1921 
1922 #define ICASE_FOR(X) \
1923     case uni_ ## X: \
1924       { \
1925         size_t pos; \
1926         size_t len; \
1927         \
1928         pos = orig_index.cv * 2; \
1929         len = dstr->cv_len.cv * 2; \
1930         uni_ ## X ## _iscan (dstr->str.X, &pos, len); \
1931         return ustr_cv_index ((ssize_t)(pos / 2)); \
1932       }
1933 
1934    ICASE_FOR(utf16be);
1935    ICASE_FOR(utf16le);
1936     }
1937 }
1938 
1939 
1940 
1941 /*(c udstr_cv_inc)
1942  * ustr_cv_index_t udstr_cv_inc (t_udstr dstr,
1943  *                               ustr_cv_index_t orig_index);
1944  *
1945  * Decrement `orig_index' (presumed to be the code value index
1946  * of the first code value of a possibly multi-code value codepoint)
1947  * to be the code value index of the beginning of the _previous_ codepoint.
1948  * (!)
1949  */
1950 ustr_cv_index_t
udstr_cv_dec(t_udstr dstr,ustr_cv_index_t orig_index)1951 udstr_cv_dec (t_udstr dstr,
1952               ustr_cv_index_t orig_index)
1953 {
1954   if (orig_index.cv > dstr->cv_len.cv)
1955     return dstr->cv_len;
1956 
1957   if (orig_index.cv <= 0)
1958     return ustr_cv_index (0);
1959 
1960   return udstr_cv_normalize (dstr, ustr_cv_index (orig_index.cv - 1));
1961 }
1962 
1963 
1964 /************************************************************************
1965  *(hd "udstr Code Value and Codepoint Index Conversion")
1966  *
1967  *
1968  *
1969  */
1970 
1971 
1972 /*(c udstr_cp_to_cv)
1973  * ustr_cv_index_t udstr_cp_to_cv (t_udstr dstr,
1974  *                                 ustr_cp_index_t cpi);
1975  *
1976  * Return the code value index of the indicated codepoint.
1977  */
1978 ustr_cv_index_t
udstr_cp_to_cv(t_udstr dstr,ustr_cp_index_t cpi)1979 udstr_cp_to_cv (t_udstr dstr,
1980                 ustr_cp_index_t cpi)
1981 {
1982   uni_string addressed;
1983 
1984   if (cpi.cp < 0)
1985     return ustr_cv_index (0);
1986 
1987   if (cpi.cp >= dstr->cp_len.cp)
1988     return dstr->cv_len;
1989 
1990   addressed = ustr_cp_offset_n (dstr->str, dstr->enc, dstr->cv_len, cpi);
1991   return ustr_str_subtract (addressed, dstr->str, dstr->enc);
1992 }
1993 
1994 
1995 /*(c udstr_cp_to_cv_range)
1996  * void udstr_cp_to_cv_range (ustr_cv_index_t * from_v,
1997  *                            ustr_cv_index_t * to_v,
1998  *                            t_udstr d,
1999  *                            ustr_cp_index_t from,
2000  *                            ustr_cp_index_t to);
2001  *
2002  * Return the code value indices of the indicated codepoint range.
2003  *
2004  */
2005 void
udstr_cp_to_cv_range(ustr_cv_index_t * from_v,ustr_cv_index_t * to_v,t_udstr d,ustr_cp_index_t from,ustr_cp_index_t to)2006 udstr_cp_to_cv_range (ustr_cv_index_t * from_v,
2007                       ustr_cv_index_t * to_v,
2008                       t_udstr d,
2009                       ustr_cp_index_t from,
2010                       ustr_cp_index_t to)
2011 {
2012   uni_string f_str;
2013   ustr_cv_index_t f_v;
2014   uni_string t_str;
2015   ustr_cv_index_t t_v;
2016 
2017   invariant (from.cp <= to.cp);
2018 
2019   f_str = ustr_cp_offset_n (d->str, d->enc, d->cv_len, from);
2020   f_v = ustr_str_subtract (f_str, d->str, d->enc);
2021   t_str = ustr_cp_offset_n (f_str,
2022                             d->enc,
2023                             ustr_cv_index (d->cv_len.cv - f_v.cv),
2024                             ustr_cp_index (to.cp - from.cp));
2025   t_v = ustr_str_subtract (t_str, f_str, d->enc);
2026 
2027   *from_v = f_v;
2028   *to_v = t_v;
2029 }
2030 
2031 
2032 /*(c udstr_cv_to_cp)
2033  * ustr_cp_index_t udstr_cv_to_cp (t_udstr dstr,
2034  *                                 ustr_cv_index_t cvi);
2035  *
2036  * Return the codepoint index of the indicated code value.
2037  */
2038 ustr_cp_index_t
udstr_cv_to_cp(t_udstr dstr,ustr_cv_index_t cvi)2039 udstr_cv_to_cp (t_udstr dstr,
2040                 ustr_cv_index_t cvi)
2041 
2042 {
2043   if (cvi.cv < 0)
2044     return ustr_cp_index (0);
2045 
2046   if (cvi.cv >= dstr->cv_len.cv)
2047     return dstr->cp_len;
2048 
2049   return ustr_cp_length_n (0, dstr->str, dstr->enc, cvi);
2050 }
2051 
2052 
2053 
2054 
2055 
2056 #if 0
2057 cv_set
2058 cv_set_x
2059 cp_set
2060 cp_set_x
2061 
2062 #endif
2063 
2064 
2065 static enum uni_encoding_scheme
pick_fw_of(enum uni_encoding_scheme a,enum uni_encoding_scheme b)2066 pick_fw_of (enum uni_encoding_scheme a,
2067             enum uni_encoding_scheme b)
2068 {
2069   size_t a_size;
2070   size_t b_size;
2071 
2072   if (a == b)
2073     return a;
2074 
2075   a_size = uni_cv_sizeof (a);
2076   b_size = uni_cv_sizeof (b);
2077 
2078   if (a_size > b_size)
2079     return a;
2080   if (b_size > a_size)
2081     return b;
2082 
2083   switch (a)
2084     {
2085     case uni_iso8859_1: return uni_iso8859_1;
2086 
2087     case uni_utf8: return uni_utf8;
2088 
2089     case uni_utf16be:
2090     case uni_utf16le:
2091     case uni_utf16: return uni_utf16;
2092 
2093     case uni_utf32be:
2094     case uni_utf32le:
2095     case uni_utf32: return uni_utf32;
2096 
2097     default:
2098     case uni_bogus32be:
2099     case uni_bogus32le:
2100     case uni_bogus32: return uni_bogus32;
2101     }
2102 }
2103 
2104 static void
take_new_data(t_udstr dstr,t_udstr new_data)2105 take_new_data (t_udstr dstr, t_udstr new_data)
2106 {
2107     new_data->refs = dstr->refs;
2108     lim_free (dstr->limits, dstr->str.raw);
2109     *dstr = *new_data;
2110     lim_free (new_data->limits, (void *)new_data);
2111 }
2112 
2113 
2114 
2115 /* tag: Tom Lord Fri Jan  2 08:47:21 2004 (udstr.c)
2116  */
2117