1 /*
2     $Id: strscan.c 62429 2018-02-16 08:39:48Z nobu $
3 
4     Copyright (c) 1999-2006 Minero Aoki
5 
6     This program is free software.
7     You can distribute/modify this program under the terms of
8     the Ruby License. For details, see the file COPYING.
9 */
10 
11 #include "ruby/ruby.h"
12 #include "ruby/re.h"
13 #include "ruby/encoding.h"
14 #include "regint.h"
15 
16 #define STRSCAN_VERSION "0.7.0"
17 
18 /* =======================================================================
19                          Data Type Definitions
20    ======================================================================= */
21 
22 static VALUE StringScanner;
23 static VALUE ScanError;
24 static ID id_byteslice;
25 
26 struct strscanner
27 {
28     /* multi-purpose flags */
29     unsigned long flags;
30 #define FLAG_MATCHED (1 << 0)
31 
32     /* the string to scan */
33     VALUE str;
34 
35     /* scan pointers */
36     long prev;   /* legal only when MATCHED_P(s) */
37     long curr;   /* always legal */
38 
39     /* the regexp register; legal only when MATCHED_P(s) */
40     struct re_registers regs;
41 
42     /* regexp used for last scan */
43     VALUE regex;
44 };
45 
46 #define MATCHED_P(s)          ((s)->flags & FLAG_MATCHED)
47 #define MATCHED(s)             (s)->flags |= FLAG_MATCHED
48 #define CLEAR_MATCH_STATUS(s)  (s)->flags &= ~FLAG_MATCHED
49 
50 #define S_PBEG(s)  (RSTRING_PTR((s)->str))
51 #define S_LEN(s)  (RSTRING_LEN((s)->str))
52 #define S_PEND(s)  (S_PBEG(s) + S_LEN(s))
53 #define CURPTR(s) (S_PBEG(s) + (s)->curr)
54 #define S_RESTLEN(s) (S_LEN(s) - (s)->curr)
55 
56 #define EOS_P(s) ((s)->curr >= RSTRING_LEN(p->str))
57 
58 #define GET_SCANNER(obj,var) do {\
59     (var) = check_strscan(obj);\
60     if (NIL_P((var)->str)) rb_raise(rb_eArgError, "uninitialized StringScanner object");\
61 } while (0)
62 
63 /* =======================================================================
64                             Function Prototypes
65    ======================================================================= */
66 
67 static inline long minl _((const long n, const long x));
68 static VALUE infect _((VALUE str, struct strscanner *p));
69 static VALUE extract_range _((struct strscanner *p, long beg_i, long end_i));
70 static VALUE extract_beg_len _((struct strscanner *p, long beg_i, long len));
71 
72 static struct strscanner *check_strscan _((VALUE obj));
73 static void strscan_mark _((void *p));
74 static void strscan_free _((void *p));
75 static size_t strscan_memsize _((const void *p));
76 static VALUE strscan_s_allocate _((VALUE klass));
77 static VALUE strscan_initialize _((int argc, VALUE *argv, VALUE self));
78 static VALUE strscan_init_copy _((VALUE vself, VALUE vorig));
79 
80 static VALUE strscan_s_mustc _((VALUE self));
81 static VALUE strscan_terminate _((VALUE self));
82 static VALUE strscan_clear _((VALUE self));
83 static VALUE strscan_get_string _((VALUE self));
84 static VALUE strscan_set_string _((VALUE self, VALUE str));
85 static VALUE strscan_concat _((VALUE self, VALUE str));
86 static VALUE strscan_get_pos _((VALUE self));
87 static VALUE strscan_set_pos _((VALUE self, VALUE pos));
88 static VALUE strscan_do_scan _((VALUE self, VALUE regex,
89                                 int succptr, int getstr, int headonly));
90 static VALUE strscan_scan _((VALUE self, VALUE re));
91 static VALUE strscan_match_p _((VALUE self, VALUE re));
92 static VALUE strscan_skip _((VALUE self, VALUE re));
93 static VALUE strscan_check _((VALUE self, VALUE re));
94 static VALUE strscan_scan_full _((VALUE self, VALUE re,
95                                   VALUE succp, VALUE getp));
96 static VALUE strscan_scan_until _((VALUE self, VALUE re));
97 static VALUE strscan_skip_until _((VALUE self, VALUE re));
98 static VALUE strscan_check_until _((VALUE self, VALUE re));
99 static VALUE strscan_search_full _((VALUE self, VALUE re,
100                                     VALUE succp, VALUE getp));
101 static void adjust_registers_to_matched _((struct strscanner *p));
102 static VALUE strscan_getch _((VALUE self));
103 static VALUE strscan_get_byte _((VALUE self));
104 static VALUE strscan_getbyte _((VALUE self));
105 static VALUE strscan_peek _((VALUE self, VALUE len));
106 static VALUE strscan_peep _((VALUE self, VALUE len));
107 static VALUE strscan_unscan _((VALUE self));
108 static VALUE strscan_bol_p _((VALUE self));
109 static VALUE strscan_eos_p _((VALUE self));
110 static VALUE strscan_empty_p _((VALUE self));
111 static VALUE strscan_rest_p _((VALUE self));
112 static VALUE strscan_matched_p _((VALUE self));
113 static VALUE strscan_matched _((VALUE self));
114 static VALUE strscan_matched_size _((VALUE self));
115 static VALUE strscan_aref _((VALUE self, VALUE idx));
116 static VALUE strscan_pre_match _((VALUE self));
117 static VALUE strscan_post_match _((VALUE self));
118 static VALUE strscan_rest _((VALUE self));
119 static VALUE strscan_rest_size _((VALUE self));
120 
121 static VALUE strscan_inspect _((VALUE self));
122 static VALUE inspect1 _((struct strscanner *p));
123 static VALUE inspect2 _((struct strscanner *p));
124 
125 /* =======================================================================
126                                    Utils
127    ======================================================================= */
128 
129 static VALUE
infect(VALUE str,struct strscanner * p)130 infect(VALUE str, struct strscanner *p)
131 {
132     OBJ_INFECT(str, p->str);
133     return str;
134 }
135 
136 static VALUE
str_new(struct strscanner * p,const char * ptr,long len)137 str_new(struct strscanner *p, const char *ptr, long len)
138 {
139     VALUE str = rb_str_new(ptr, len);
140     rb_enc_copy(str, p->str);
141     return str;
142 }
143 
144 static inline long
minl(const long x,const long y)145 minl(const long x, const long y)
146 {
147     return (x < y) ? x : y;
148 }
149 
150 static VALUE
extract_range(struct strscanner * p,long beg_i,long end_i)151 extract_range(struct strscanner *p, long beg_i, long end_i)
152 {
153     if (beg_i > S_LEN(p)) return Qnil;
154     end_i = minl(end_i, S_LEN(p));
155     return infect(str_new(p, S_PBEG(p) + beg_i, end_i - beg_i), p);
156 }
157 
158 static VALUE
extract_beg_len(struct strscanner * p,long beg_i,long len)159 extract_beg_len(struct strscanner *p, long beg_i, long len)
160 {
161     if (beg_i > S_LEN(p)) return Qnil;
162     len = minl(len, S_LEN(p) - beg_i);
163     return infect(str_new(p, S_PBEG(p) + beg_i, len), p);
164 }
165 
166 /* =======================================================================
167                                Constructor
168    ======================================================================= */
169 
170 static void
strscan_mark(void * ptr)171 strscan_mark(void *ptr)
172 {
173     struct strscanner *p = ptr;
174     rb_gc_mark(p->str);
175 }
176 
177 static void
strscan_free(void * ptr)178 strscan_free(void *ptr)
179 {
180     struct strscanner *p = ptr;
181     onig_region_free(&(p->regs), 0);
182     ruby_xfree(p);
183 }
184 
185 static size_t
strscan_memsize(const void * ptr)186 strscan_memsize(const void *ptr)
187 {
188     const struct strscanner *p = ptr;
189     return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
190 }
191 
192 static const rb_data_type_t strscanner_type = {
193     "StringScanner",
194     {strscan_mark, strscan_free, strscan_memsize},
195     0, 0, RUBY_TYPED_FREE_IMMEDIATELY
196 };
197 
198 static VALUE
strscan_s_allocate(VALUE klass)199 strscan_s_allocate(VALUE klass)
200 {
201     struct strscanner *p;
202     VALUE obj = TypedData_Make_Struct(klass, struct strscanner, &strscanner_type, p);
203 
204     CLEAR_MATCH_STATUS(p);
205     onig_region_init(&(p->regs));
206     p->str = Qnil;
207     return obj;
208 }
209 
210 /*
211  * call-seq: StringScanner.new(string, dup = false)
212  *
213  * Creates a new StringScanner object to scan over the given +string+.
214  * +dup+ argument is obsolete and not used now.
215  */
216 static VALUE
strscan_initialize(int argc,VALUE * argv,VALUE self)217 strscan_initialize(int argc, VALUE *argv, VALUE self)
218 {
219     struct strscanner *p;
220     VALUE str, need_dup;
221 
222     p = check_strscan(self);
223     rb_scan_args(argc, argv, "11", &str, &need_dup);
224     StringValue(str);
225     p->str = str;
226 
227     return self;
228 }
229 
230 static struct strscanner *
check_strscan(VALUE obj)231 check_strscan(VALUE obj)
232 {
233     return rb_check_typeddata(obj, &strscanner_type);
234 }
235 
236 /*
237  * call-seq:
238  *   dup
239  *   clone
240  *
241  * Duplicates a StringScanner object.
242  */
243 static VALUE
strscan_init_copy(VALUE vself,VALUE vorig)244 strscan_init_copy(VALUE vself, VALUE vorig)
245 {
246     struct strscanner *self, *orig;
247 
248     self = check_strscan(vself);
249     orig = check_strscan(vorig);
250     if (self != orig) {
251 	self->flags = orig->flags;
252 	self->str = orig->str;
253 	self->prev = orig->prev;
254 	self->curr = orig->curr;
255 	if (rb_reg_region_copy(&self->regs, &orig->regs))
256 	    rb_memerror();
257 	RB_GC_GUARD(vorig);
258     }
259 
260     return vself;
261 }
262 
263 /* =======================================================================
264                           Instance Methods
265    ======================================================================= */
266 
267 /*
268  * call-seq: StringScanner.must_C_version
269  *
270  * This method is defined for backward compatibility.
271  */
272 static VALUE
strscan_s_mustc(VALUE self)273 strscan_s_mustc(VALUE self)
274 {
275     return self;
276 }
277 
278 /*
279  * Reset the scan pointer (index 0) and clear matching data.
280  */
281 static VALUE
strscan_reset(VALUE self)282 strscan_reset(VALUE self)
283 {
284     struct strscanner *p;
285 
286     GET_SCANNER(self, p);
287     p->curr = 0;
288     CLEAR_MATCH_STATUS(p);
289     return self;
290 }
291 
292 /*
293  * call-seq:
294  *   terminate
295  *   clear
296  *
297  * Set the scan pointer to the end of the string and clear matching data.
298  */
299 static VALUE
strscan_terminate(VALUE self)300 strscan_terminate(VALUE self)
301 {
302     struct strscanner *p;
303 
304     GET_SCANNER(self, p);
305     p->curr = S_LEN(p);
306     CLEAR_MATCH_STATUS(p);
307     return self;
308 }
309 
310 /*
311  * Equivalent to #terminate.
312  * This method is obsolete; use #terminate instead.
313  */
314 static VALUE
strscan_clear(VALUE self)315 strscan_clear(VALUE self)
316 {
317     rb_warning("StringScanner#clear is obsolete; use #terminate instead");
318     return strscan_terminate(self);
319 }
320 
321 /*
322  * Returns the string being scanned.
323  */
324 static VALUE
strscan_get_string(VALUE self)325 strscan_get_string(VALUE self)
326 {
327     struct strscanner *p;
328 
329     GET_SCANNER(self, p);
330     return p->str;
331 }
332 
333 /*
334  * call-seq: string=(str)
335  *
336  * Changes the string being scanned to +str+ and resets the scanner.
337  * Returns +str+.
338  */
339 static VALUE
strscan_set_string(VALUE self,VALUE str)340 strscan_set_string(VALUE self, VALUE str)
341 {
342     struct strscanner *p = check_strscan(self);
343 
344     StringValue(str);
345     p->str = str;
346     p->curr = 0;
347     CLEAR_MATCH_STATUS(p);
348     return str;
349 }
350 
351 /*
352  * call-seq:
353  *   concat(str)
354  *   <<(str)
355  *
356  * Appends +str+ to the string being scanned.
357  * This method does not affect scan pointer.
358  *
359  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
360  *   s.scan(/Fri /)
361  *   s << " +1000 GMT"
362  *   s.string            # -> "Fri Dec 12 1975 14:39 +1000 GMT"
363  *   s.scan(/Dec/)       # -> "Dec"
364  */
365 static VALUE
strscan_concat(VALUE self,VALUE str)366 strscan_concat(VALUE self, VALUE str)
367 {
368     struct strscanner *p;
369 
370     GET_SCANNER(self, p);
371     StringValue(str);
372     rb_str_append(p->str, str);
373     return self;
374 }
375 
376 /*
377  * Returns the byte position of the scan pointer.  In the 'reset' position, this
378  * value is zero.  In the 'terminated' position (i.e. the string is exhausted),
379  * this value is the bytesize of the string.
380  *
381  * In short, it's a 0-based index into bytes of the string.
382  *
383  *   s = StringScanner.new('test string')
384  *   s.pos               # -> 0
385  *   s.scan_until /str/  # -> "test str"
386  *   s.pos               # -> 8
387  *   s.terminate         # -> #<StringScanner fin>
388  *   s.pos               # -> 11
389  */
390 static VALUE
strscan_get_pos(VALUE self)391 strscan_get_pos(VALUE self)
392 {
393     struct strscanner *p;
394 
395     GET_SCANNER(self, p);
396     return INT2FIX(p->curr);
397 }
398 
399 /*
400  * Returns the character position of the scan pointer.  In the 'reset' position, this
401  * value is zero.  In the 'terminated' position (i.e. the string is exhausted),
402  * this value is the size of the string.
403  *
404  * In short, it's a 0-based index into the string.
405  *
406  *   s = StringScanner.new("abcädeföghi")
407  *   s.charpos           # -> 0
408  *   s.scan_until(/ä/)   # -> "abcä"
409  *   s.pos               # -> 5
410  *   s.charpos           # -> 4
411  */
412 static VALUE
strscan_get_charpos(VALUE self)413 strscan_get_charpos(VALUE self)
414 {
415     struct strscanner *p;
416     VALUE substr;
417 
418     GET_SCANNER(self, p);
419 
420     substr = rb_funcall(p->str, id_byteslice, 2, INT2FIX(0), INT2NUM(p->curr));
421 
422     return rb_str_length(substr);
423 }
424 
425 /*
426  * call-seq: pos=(n)
427  *
428  * Set the byte position of the scan pointer.
429  *
430  *   s = StringScanner.new('test string')
431  *   s.pos = 7            # -> 7
432  *   s.rest               # -> "ring"
433  */
434 static VALUE
strscan_set_pos(VALUE self,VALUE v)435 strscan_set_pos(VALUE self, VALUE v)
436 {
437     struct strscanner *p;
438     long i;
439 
440     GET_SCANNER(self, p);
441     i = NUM2INT(v);
442     if (i < 0) i += S_LEN(p);
443     if (i < 0) rb_raise(rb_eRangeError, "index out of range");
444     if (i > S_LEN(p)) rb_raise(rb_eRangeError, "index out of range");
445     p->curr = i;
446     return INT2NUM(i);
447 }
448 
449 static VALUE
strscan_do_scan(VALUE self,VALUE regex,int succptr,int getstr,int headonly)450 strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
451 {
452     regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
453     struct strscanner *p;
454     regex_t *re;
455     long ret;
456     int tmpreg;
457 
458     Check_Type(regex, T_REGEXP);
459     GET_SCANNER(self, p);
460 
461     CLEAR_MATCH_STATUS(p);
462     if (S_RESTLEN(p) < 0) {
463         return Qnil;
464     }
465 
466     p->regex = regex;
467     re = rb_reg_prepare_re(regex, p->str);
468     tmpreg = re != RREGEXP_PTR(regex);
469     if (!tmpreg) RREGEXP(regex)->usecnt++;
470 
471     if (headonly) {
472         ret = onig_match(re, (UChar* )CURPTR(p),
473                          (UChar* )(CURPTR(p) + S_RESTLEN(p)),
474                          (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
475     }
476     else {
477         ret = onig_search(re,
478                           (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
479                           (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
480                           &(p->regs), ONIG_OPTION_NONE);
481     }
482     if (!tmpreg) RREGEXP(regex)->usecnt--;
483     if (tmpreg) {
484         if (RREGEXP(regex)->usecnt) {
485             onig_free(re);
486         }
487         else {
488             onig_free(RREGEXP_PTR(regex));
489             RREGEXP_PTR(regex) = re;
490         }
491     }
492 
493     if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
494     if (ret < 0) {
495         /* not matched */
496         return Qnil;
497     }
498 
499     MATCHED(p);
500     p->prev = p->curr;
501     if (succptr) {
502         p->curr += p->regs.end[0];
503     }
504     if (getstr) {
505         return extract_beg_len(p, p->prev, p->regs.end[0]);
506     }
507     else {
508         return INT2FIX(p->regs.end[0]);
509     }
510 }
511 
512 /*
513  * call-seq: scan(pattern) => String
514  *
515  * Tries to match with +pattern+ at the current position. If there's a match,
516  * the scanner advances the "scan pointer" and returns the matched string.
517  * Otherwise, the scanner returns +nil+.
518  *
519  *   s = StringScanner.new('test string')
520  *   p s.scan(/\w+/)   # -> "test"
521  *   p s.scan(/\w+/)   # -> nil
522  *   p s.scan(/\s+/)   # -> " "
523  *   p s.scan(/\w+/)   # -> "string"
524  *   p s.scan(/./)     # -> nil
525  *
526  */
527 static VALUE
strscan_scan(VALUE self,VALUE re)528 strscan_scan(VALUE self, VALUE re)
529 {
530     return strscan_do_scan(self, re, 1, 1, 1);
531 }
532 
533 /*
534  * call-seq: match?(pattern)
535  *
536  * Tests whether the given +pattern+ is matched from the current scan pointer.
537  * Returns the length of the match, or +nil+.  The scan pointer is not advanced.
538  *
539  *   s = StringScanner.new('test string')
540  *   p s.match?(/\w+/)   # -> 4
541  *   p s.match?(/\w+/)   # -> 4
542  *   p s.match?(/\s+/)   # -> nil
543  */
544 static VALUE
strscan_match_p(VALUE self,VALUE re)545 strscan_match_p(VALUE self, VALUE re)
546 {
547     return strscan_do_scan(self, re, 0, 0, 1);
548 }
549 
550 /*
551  * call-seq: skip(pattern)
552  *
553  * Attempts to skip over the given +pattern+ beginning with the scan pointer.
554  * If it matches, the scan pointer is advanced to the end of the match, and the
555  * length of the match is returned.  Otherwise, +nil+ is returned.
556  *
557  * It's similar to #scan, but without returning the matched string.
558  *
559  *   s = StringScanner.new('test string')
560  *   p s.skip(/\w+/)   # -> 4
561  *   p s.skip(/\w+/)   # -> nil
562  *   p s.skip(/\s+/)   # -> 1
563  *   p s.skip(/\w+/)   # -> 6
564  *   p s.skip(/./)     # -> nil
565  *
566  */
567 static VALUE
strscan_skip(VALUE self,VALUE re)568 strscan_skip(VALUE self, VALUE re)
569 {
570     return strscan_do_scan(self, re, 1, 0, 1);
571 }
572 
573 /*
574  * call-seq: check(pattern)
575  *
576  * This returns the value that #scan would return, without advancing the scan
577  * pointer.  The match register is affected, though.
578  *
579  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
580  *   s.check /Fri/               # -> "Fri"
581  *   s.pos                       # -> 0
582  *   s.matched                   # -> "Fri"
583  *   s.check /12/                # -> nil
584  *   s.matched                   # -> nil
585  *
586  * Mnemonic: it "checks" to see whether a #scan will return a value.
587  */
588 static VALUE
strscan_check(VALUE self,VALUE re)589 strscan_check(VALUE self, VALUE re)
590 {
591     return strscan_do_scan(self, re, 0, 1, 1);
592 }
593 
594 /*
595  * call-seq: scan_full(pattern, advance_pointer_p, return_string_p)
596  *
597  * Tests whether the given +pattern+ is matched from the current scan pointer.
598  * Advances the scan pointer if +advance_pointer_p+ is true.
599  * Returns the matched string if +return_string_p+ is true.
600  * The match register is affected.
601  *
602  * "full" means "#scan with full parameters".
603  */
604 static VALUE
strscan_scan_full(VALUE self,VALUE re,VALUE s,VALUE f)605 strscan_scan_full(VALUE self, VALUE re, VALUE s, VALUE f)
606 {
607     return strscan_do_scan(self, re, RTEST(s), RTEST(f), 1);
608 }
609 
610 /*
611  * call-seq: scan_until(pattern)
612  *
613  * Scans the string _until_ the +pattern+ is matched.  Returns the substring up
614  * to and including the end of the match, advancing the scan pointer to that
615  * location. If there is no match, +nil+ is returned.
616  *
617  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
618  *   s.scan_until(/1/)        # -> "Fri Dec 1"
619  *   s.pre_match              # -> "Fri Dec "
620  *   s.scan_until(/XYZ/)      # -> nil
621  */
622 static VALUE
strscan_scan_until(VALUE self,VALUE re)623 strscan_scan_until(VALUE self, VALUE re)
624 {
625     return strscan_do_scan(self, re, 1, 1, 0);
626 }
627 
628 /*
629  * call-seq: exist?(pattern)
630  *
631  * Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
632  * without advancing the scan pointer.  This predicates whether a #scan_until
633  * will return a value.
634  *
635  *   s = StringScanner.new('test string')
636  *   s.exist? /s/            # -> 3
637  *   s.scan /test/           # -> "test"
638  *   s.exist? /s/            # -> 2
639  *   s.exist? /e/            # -> nil
640  */
641 static VALUE
strscan_exist_p(VALUE self,VALUE re)642 strscan_exist_p(VALUE self, VALUE re)
643 {
644     return strscan_do_scan(self, re, 0, 0, 0);
645 }
646 
647 /*
648  * call-seq: skip_until(pattern)
649  *
650  * Advances the scan pointer until +pattern+ is matched and consumed.  Returns
651  * the number of bytes advanced, or +nil+ if no match was found.
652  *
653  * Look ahead to match +pattern+, and advance the scan pointer to the _end_
654  * of the match.  Return the number of characters advanced, or +nil+ if the
655  * match was unsuccessful.
656  *
657  * It's similar to #scan_until, but without returning the intervening string.
658  *
659  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
660  *   s.skip_until /12/           # -> 10
661  *   s                           #
662  */
663 static VALUE
strscan_skip_until(VALUE self,VALUE re)664 strscan_skip_until(VALUE self, VALUE re)
665 {
666     return strscan_do_scan(self, re, 1, 0, 0);
667 }
668 
669 /*
670  * call-seq: check_until(pattern)
671  *
672  * This returns the value that #scan_until would return, without advancing the
673  * scan pointer.  The match register is affected, though.
674  *
675  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
676  *   s.check_until /12/          # -> "Fri Dec 12"
677  *   s.pos                       # -> 0
678  *   s.matched                   # -> 12
679  *
680  * Mnemonic: it "checks" to see whether a #scan_until will return a value.
681  */
682 static VALUE
strscan_check_until(VALUE self,VALUE re)683 strscan_check_until(VALUE self, VALUE re)
684 {
685     return strscan_do_scan(self, re, 0, 1, 0);
686 }
687 
688 /*
689  * call-seq: search_full(pattern, advance_pointer_p, return_string_p)
690  *
691  * Scans the string _until_ the +pattern+ is matched.
692  * Advances the scan pointer if +advance_pointer_p+, otherwise not.
693  * Returns the matched string if +return_string_p+ is true, otherwise
694  * returns the number of bytes advanced.
695  * This method does affect the match register.
696  */
697 static VALUE
strscan_search_full(VALUE self,VALUE re,VALUE s,VALUE f)698 strscan_search_full(VALUE self, VALUE re, VALUE s, VALUE f)
699 {
700     return strscan_do_scan(self, re, RTEST(s), RTEST(f), 0);
701 }
702 
703 static void
adjust_registers_to_matched(struct strscanner * p)704 adjust_registers_to_matched(struct strscanner *p)
705 {
706     onig_region_clear(&(p->regs));
707     onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
708 }
709 
710 /*
711  * Scans one character and returns it.
712  * This method is multibyte character sensitive.
713  *
714  *   s = StringScanner.new("ab")
715  *   s.getch           # => "a"
716  *   s.getch           # => "b"
717  *   s.getch           # => nil
718  *
719  *   $KCODE = 'EUC'
720  *   s = StringScanner.new("\244\242")
721  *   s.getch           # => "\244\242"   # Japanese hira-kana "A" in EUC-JP
722  *   s.getch           # => nil
723  */
724 static VALUE
strscan_getch(VALUE self)725 strscan_getch(VALUE self)
726 {
727     struct strscanner *p;
728     long len;
729 
730     GET_SCANNER(self, p);
731     CLEAR_MATCH_STATUS(p);
732     if (EOS_P(p))
733         return Qnil;
734 
735     len = rb_enc_mbclen(CURPTR(p), S_PEND(p), rb_enc_get(p->str));
736     len = minl(len, S_RESTLEN(p));
737     p->prev = p->curr;
738     p->curr += len;
739     MATCHED(p);
740     adjust_registers_to_matched(p);
741     return extract_range(p, p->prev + p->regs.beg[0],
742                             p->prev + p->regs.end[0]);
743 }
744 
745 /*
746  * Scans one byte and returns it.
747  * This method is not multibyte character sensitive.
748  * See also: #getch.
749  *
750  *   s = StringScanner.new('ab')
751  *   s.get_byte         # => "a"
752  *   s.get_byte         # => "b"
753  *   s.get_byte         # => nil
754  *
755  *   $KCODE = 'EUC'
756  *   s = StringScanner.new("\244\242")
757  *   s.get_byte         # => "\244"
758  *   s.get_byte         # => "\242"
759  *   s.get_byte         # => nil
760  */
761 static VALUE
strscan_get_byte(VALUE self)762 strscan_get_byte(VALUE self)
763 {
764     struct strscanner *p;
765 
766     GET_SCANNER(self, p);
767     CLEAR_MATCH_STATUS(p);
768     if (EOS_P(p))
769         return Qnil;
770 
771     p->prev = p->curr;
772     p->curr++;
773     MATCHED(p);
774     adjust_registers_to_matched(p);
775     return extract_range(p, p->prev + p->regs.beg[0],
776                             p->prev + p->regs.end[0]);
777 }
778 
779 /*
780  * Equivalent to #get_byte.
781  * This method is obsolete; use #get_byte instead.
782  */
783 static VALUE
strscan_getbyte(VALUE self)784 strscan_getbyte(VALUE self)
785 {
786     rb_warning("StringScanner#getbyte is obsolete; use #get_byte instead");
787     return strscan_get_byte(self);
788 }
789 
790 /*
791  * call-seq: peek(len)
792  *
793  * Extracts a string corresponding to <tt>string[pos,len]</tt>, without
794  * advancing the scan pointer.
795  *
796  *   s = StringScanner.new('test string')
797  *   s.peek(7)          # => "test st"
798  *   s.peek(7)          # => "test st"
799  *
800  */
801 static VALUE
strscan_peek(VALUE self,VALUE vlen)802 strscan_peek(VALUE self, VALUE vlen)
803 {
804     struct strscanner *p;
805     long len;
806 
807     GET_SCANNER(self, p);
808 
809     len = NUM2LONG(vlen);
810     if (EOS_P(p))
811         return infect(str_new(p, "", 0), p);
812 
813     len = minl(len, S_RESTLEN(p));
814     return extract_beg_len(p, p->curr, len);
815 }
816 
817 /*
818  * Equivalent to #peek.
819  * This method is obsolete; use #peek instead.
820  */
821 static VALUE
strscan_peep(VALUE self,VALUE vlen)822 strscan_peep(VALUE self, VALUE vlen)
823 {
824     rb_warning("StringScanner#peep is obsolete; use #peek instead");
825     return strscan_peek(self, vlen);
826 }
827 
828 /*
829  * Set the scan pointer to the previous position.  Only one previous position is
830  * remembered, and it changes with each scanning operation.
831  *
832  *   s = StringScanner.new('test string')
833  *   s.scan(/\w+/)        # => "test"
834  *   s.unscan
835  *   s.scan(/../)         # => "te"
836  *   s.scan(/\d/)         # => nil
837  *   s.unscan             # ScanError: unscan failed: previous match record not exist
838  */
839 static VALUE
strscan_unscan(VALUE self)840 strscan_unscan(VALUE self)
841 {
842     struct strscanner *p;
843 
844     GET_SCANNER(self, p);
845     if (! MATCHED_P(p))
846         rb_raise(ScanError, "unscan failed: previous match record not exist");
847     p->curr = p->prev;
848     CLEAR_MATCH_STATUS(p);
849     return self;
850 }
851 
852 /*
853  * Returns +true+ iff the scan pointer is at the beginning of the line.
854  *
855  *   s = StringScanner.new("test\ntest\n")
856  *   s.bol?           # => true
857  *   s.scan(/te/)
858  *   s.bol?           # => false
859  *   s.scan(/st\n/)
860  *   s.bol?           # => true
861  *   s.terminate
862  *   s.bol?           # => true
863  */
864 static VALUE
strscan_bol_p(VALUE self)865 strscan_bol_p(VALUE self)
866 {
867     struct strscanner *p;
868 
869     GET_SCANNER(self, p);
870     if (CURPTR(p) > S_PEND(p)) return Qnil;
871     if (p->curr == 0) return Qtrue;
872     return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
873 }
874 
875 /*
876  * Returns +true+ if the scan pointer is at the end of the string.
877  *
878  *   s = StringScanner.new('test string')
879  *   p s.eos?          # => false
880  *   s.scan(/test/)
881  *   p s.eos?          # => false
882  *   s.terminate
883  *   p s.eos?          # => true
884  */
885 static VALUE
strscan_eos_p(VALUE self)886 strscan_eos_p(VALUE self)
887 {
888     struct strscanner *p;
889 
890     GET_SCANNER(self, p);
891     return EOS_P(p) ? Qtrue : Qfalse;
892 }
893 
894 /*
895  * Equivalent to #eos?.
896  * This method is obsolete, use #eos? instead.
897  */
898 static VALUE
strscan_empty_p(VALUE self)899 strscan_empty_p(VALUE self)
900 {
901     rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
902     return strscan_eos_p(self);
903 }
904 
905 /*
906  * Returns true iff there is more data in the string.  See #eos?.
907  * This method is obsolete; use #eos? instead.
908  *
909  *   s = StringScanner.new('test string')
910  *   s.eos?              # These two
911  *   s.rest?             # are opposites.
912  */
913 static VALUE
strscan_rest_p(VALUE self)914 strscan_rest_p(VALUE self)
915 {
916     struct strscanner *p;
917 
918     GET_SCANNER(self, p);
919     return EOS_P(p) ? Qfalse : Qtrue;
920 }
921 
922 /*
923  * Returns +true+ iff the last match was successful.
924  *
925  *   s = StringScanner.new('test string')
926  *   s.match?(/\w+/)     # => 4
927  *   s.matched?          # => true
928  *   s.match?(/\d+/)     # => nil
929  *   s.matched?          # => false
930  */
931 static VALUE
strscan_matched_p(VALUE self)932 strscan_matched_p(VALUE self)
933 {
934     struct strscanner *p;
935 
936     GET_SCANNER(self, p);
937     return MATCHED_P(p) ? Qtrue : Qfalse;
938 }
939 
940 /*
941  * Returns the last matched string.
942  *
943  *   s = StringScanner.new('test string')
944  *   s.match?(/\w+/)     # -> 4
945  *   s.matched           # -> "test"
946  */
947 static VALUE
strscan_matched(VALUE self)948 strscan_matched(VALUE self)
949 {
950     struct strscanner *p;
951 
952     GET_SCANNER(self, p);
953     if (! MATCHED_P(p)) return Qnil;
954     return extract_range(p, p->prev + p->regs.beg[0],
955                             p->prev + p->regs.end[0]);
956 }
957 
958 /*
959  * Returns the size of the most recent match (see #matched), or +nil+ if there
960  * was no recent match.
961  *
962  *   s = StringScanner.new('test string')
963  *   s.check /\w+/           # -> "test"
964  *   s.matched_size          # -> 4
965  *   s.check /\d+/           # -> nil
966  *   s.matched_size          # -> nil
967  */
968 static VALUE
strscan_matched_size(VALUE self)969 strscan_matched_size(VALUE self)
970 {
971     struct strscanner *p;
972 
973     GET_SCANNER(self, p);
974     if (! MATCHED_P(p)) return Qnil;
975     return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
976 }
977 
978 static int
name_to_backref_number(struct re_registers * regs,VALUE regexp,const char * name,const char * name_end,rb_encoding * enc)979 name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end, rb_encoding *enc)
980 {
981     int num;
982 
983     num = onig_name_to_backref_number(RREGEXP_PTR(regexp),
984 	(const unsigned char* )name, (const unsigned char* )name_end, regs);
985     if (num >= 1) {
986 	return num;
987     }
988     else {
989 	rb_enc_raise(enc, rb_eIndexError, "undefined group name reference: %.*s",
990 					  rb_long2int(name_end - name), name);
991     }
992 
993     UNREACHABLE;
994 }
995 
996 /*
997  * call-seq: [](n)
998  *
999  * Returns the n-th subgroup in the most recent match.
1000  *
1001  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
1002  *   s.scan(/(\w+) (\w+) (\d+) /)       # -> "Fri Dec 12 "
1003  *   s[0]                               # -> "Fri Dec 12 "
1004  *   s[1]                               # -> "Fri"
1005  *   s[2]                               # -> "Dec"
1006  *   s[3]                               # -> "12"
1007  *   s.post_match                       # -> "1975 14:39"
1008  *   s.pre_match                        # -> ""
1009  *
1010  *   s.reset
1011  *   s.scan(/(?<wday>\w+) (?<month>\w+) (?<day>\d+) /)       # -> "Fri Dec 12 "
1012  *   s[0]                               # -> "Fri Dec 12 "
1013  *   s[1]                               # -> "Fri"
1014  *   s[2]                               # -> "Dec"
1015  *   s[3]                               # -> "12"
1016  *   s[:wday]                           # -> "Fri"
1017  *   s[:month]                          # -> "Dec"
1018  *   s[:day]                            # -> "12"
1019  *   s.post_match                       # -> "1975 14:39"
1020  *   s.pre_match                        # -> ""
1021  */
1022 static VALUE
strscan_aref(VALUE self,VALUE idx)1023 strscan_aref(VALUE self, VALUE idx)
1024 {
1025     const char *name;
1026     struct strscanner *p;
1027     long i;
1028 
1029     GET_SCANNER(self, p);
1030     if (! MATCHED_P(p))        return Qnil;
1031 
1032     switch (TYPE(idx)) {
1033         case T_SYMBOL:
1034             idx = rb_sym2str(idx);
1035             /* fall through */
1036         case T_STRING:
1037             if (!p->regex) return Qnil;
1038             RSTRING_GETMEM(idx, name, i);
1039             i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
1040             break;
1041         default:
1042             i = NUM2LONG(idx);
1043     }
1044 
1045     if (i < 0)
1046         i += p->regs.num_regs;
1047     if (i < 0)                 return Qnil;
1048     if (i >= p->regs.num_regs) return Qnil;
1049     if (p->regs.beg[i] == -1)  return Qnil;
1050 
1051     return extract_range(p, p->prev + p->regs.beg[i],
1052                             p->prev + p->regs.end[i]);
1053 }
1054 
1055 /*
1056  * call-seq: size
1057  *
1058  * Returns the amount of subgroups in the most recent match.
1059  * The full match counts as a subgroup.
1060  *
1061  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
1062  *   s.scan(/(\w+) (\w+) (\d+) /)       # -> "Fri Dec 12 "
1063  *   s.size                             # -> 4
1064  */
1065 static VALUE
strscan_size(VALUE self)1066 strscan_size(VALUE self)
1067 {
1068     struct strscanner *p;
1069 
1070     GET_SCANNER(self, p);
1071     if (! MATCHED_P(p))        return Qnil;
1072     return INT2FIX(p->regs.num_regs);
1073 }
1074 
1075 /*
1076  * call-seq: captures
1077  *
1078  * Returns the subgroups in the most recent match (not including the full match).
1079  * If nothing was priorly matched, it returns nil.
1080  *
1081  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
1082  *   s.scan(/(\w+) (\w+) (\d+) /)       # -> "Fri Dec 12 "
1083  *   s.captures                         # -> ["Fri", "Dec", "12"]
1084  *   s.scan(/(\w+) (\w+) (\d+) /)       # -> nil
1085  *   s.captures                         # -> nil
1086  */
1087 static VALUE
strscan_captures(VALUE self)1088 strscan_captures(VALUE self)
1089 {
1090     struct strscanner *p;
1091     int   i, num_regs;
1092     VALUE new_ary;
1093 
1094     GET_SCANNER(self, p);
1095     if (! MATCHED_P(p))        return Qnil;
1096 
1097     num_regs = p->regs.num_regs;
1098     new_ary  = rb_ary_new2(num_regs);
1099 
1100     for (i = 1; i < num_regs; i++) {
1101         VALUE str = extract_range(p, p->prev + p->regs.beg[i],
1102                                      p->prev + p->regs.end[i]);
1103         rb_ary_push(new_ary, str);
1104     }
1105 
1106     return new_ary;
1107 }
1108 
1109 /*
1110  *  call-seq:
1111  *     scanner.values_at( i1, i2, ... iN )   -> an_array
1112  *
1113  * Returns the subgroups in the most recent match at the given indices.
1114  * If nothing was priorly matched, it returns nil.
1115  *
1116  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
1117  *   s.scan(/(\w+) (\w+) (\d+) /)       # -> "Fri Dec 12 "
1118  *   s.values_at 0, -1, 5, 2            # -> ["Fri Dec 12 ", "12", nil, "Dec"]
1119  *   s.scan(/(\w+) (\w+) (\d+) /)       # -> nil
1120  *   s.values_at 0, -1, 5, 2            # -> nil
1121  */
1122 
1123 static VALUE
strscan_values_at(int argc,VALUE * argv,VALUE self)1124 strscan_values_at(int argc, VALUE *argv, VALUE self)
1125 {
1126     struct strscanner *p;
1127     long i;
1128     VALUE new_ary;
1129 
1130     GET_SCANNER(self, p);
1131     if (! MATCHED_P(p))        return Qnil;
1132 
1133     new_ary = rb_ary_new2(argc);
1134     for (i = 0; i<argc; i++) {
1135         rb_ary_push(new_ary, strscan_aref(self, argv[i]));
1136     }
1137 
1138     return new_ary;
1139 }
1140 
1141 /*
1142  * Returns the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
1143  *
1144  *   s = StringScanner.new('test string')
1145  *   s.scan(/\w+/)           # -> "test"
1146  *   s.scan(/\s+/)           # -> " "
1147  *   s.pre_match             # -> "test"
1148  *   s.post_match            # -> "string"
1149  */
1150 static VALUE
strscan_pre_match(VALUE self)1151 strscan_pre_match(VALUE self)
1152 {
1153     struct strscanner *p;
1154 
1155     GET_SCANNER(self, p);
1156     if (! MATCHED_P(p)) return Qnil;
1157     return extract_range(p, 0, p->prev + p->regs.beg[0]);
1158 }
1159 
1160 /*
1161  * Returns the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
1162  *
1163  *   s = StringScanner.new('test string')
1164  *   s.scan(/\w+/)           # -> "test"
1165  *   s.scan(/\s+/)           # -> " "
1166  *   s.pre_match             # -> "test"
1167  *   s.post_match            # -> "string"
1168  */
1169 static VALUE
strscan_post_match(VALUE self)1170 strscan_post_match(VALUE self)
1171 {
1172     struct strscanner *p;
1173 
1174     GET_SCANNER(self, p);
1175     if (! MATCHED_P(p)) return Qnil;
1176     return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
1177 }
1178 
1179 /*
1180  * Returns the "rest" of the string (i.e. everything after the scan pointer).
1181  * If there is no more data (eos? = true), it returns <tt>""</tt>.
1182  */
1183 static VALUE
strscan_rest(VALUE self)1184 strscan_rest(VALUE self)
1185 {
1186     struct strscanner *p;
1187 
1188     GET_SCANNER(self, p);
1189     if (EOS_P(p)) {
1190         return infect(str_new(p, "", 0), p);
1191     }
1192     return extract_range(p, p->curr, S_LEN(p));
1193 }
1194 
1195 /*
1196  * <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
1197  */
1198 static VALUE
strscan_rest_size(VALUE self)1199 strscan_rest_size(VALUE self)
1200 {
1201     struct strscanner *p;
1202     long i;
1203 
1204     GET_SCANNER(self, p);
1205     if (EOS_P(p)) {
1206         return INT2FIX(0);
1207     }
1208     i = S_RESTLEN(p);
1209     return INT2FIX(i);
1210 }
1211 
1212 /*
1213  * <tt>s.restsize</tt> is equivalent to <tt>s.rest_size</tt>.
1214  * This method is obsolete; use #rest_size instead.
1215  */
1216 static VALUE
strscan_restsize(VALUE self)1217 strscan_restsize(VALUE self)
1218 {
1219     rb_warning("StringScanner#restsize is obsolete; use #rest_size instead");
1220     return strscan_rest_size(self);
1221 }
1222 
1223 #define INSPECT_LENGTH 5
1224 
1225 /*
1226  * Returns a string that represents the StringScanner object, showing:
1227  * - the current position
1228  * - the size of the string
1229  * - the characters surrounding the scan pointer
1230  *
1231  *   s = StringScanner.new("Fri Dec 12 1975 14:39")
1232  *   s.inspect            # -> '#<StringScanner 0/21 @ "Fri D...">'
1233  *   s.scan_until /12/    # -> "Fri Dec 12"
1234  *   s.inspect            # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
1235  */
1236 static VALUE
strscan_inspect(VALUE self)1237 strscan_inspect(VALUE self)
1238 {
1239     struct strscanner *p;
1240     VALUE a, b;
1241 
1242     p = check_strscan(self);
1243     if (NIL_P(p->str)) {
1244 	a = rb_sprintf("#<%"PRIsVALUE" (uninitialized)>", rb_obj_class(self));
1245 	return infect(a, p);
1246     }
1247     if (EOS_P(p)) {
1248 	a = rb_sprintf("#<%"PRIsVALUE" fin>", rb_obj_class(self));
1249 	return infect(a, p);
1250     }
1251     if (p->curr == 0) {
1252 	b = inspect2(p);
1253 	a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld @ %"PRIsVALUE">",
1254 		       rb_obj_class(self),
1255 		       p->curr, S_LEN(p),
1256 		       b);
1257 	return infect(a, p);
1258     }
1259     a = inspect1(p);
1260     b = inspect2(p);
1261     a = rb_sprintf("#<%"PRIsVALUE" %ld/%ld %"PRIsVALUE" @ %"PRIsVALUE">",
1262 		   rb_obj_class(self),
1263 		   p->curr, S_LEN(p),
1264 		   a, b);
1265     return infect(a, p);
1266 }
1267 
1268 static VALUE
inspect1(struct strscanner * p)1269 inspect1(struct strscanner *p)
1270 {
1271     VALUE str;
1272     long len;
1273 
1274     if (p->curr == 0) return rb_str_new2("");
1275     if (p->curr > INSPECT_LENGTH) {
1276 	str = rb_str_new_cstr("...");
1277 	len = INSPECT_LENGTH;
1278     }
1279     else {
1280 	str = rb_str_new(0, 0);
1281 	len = p->curr;
1282     }
1283     rb_str_cat(str, CURPTR(p) - len, len);
1284     return rb_str_dump(str);
1285 }
1286 
1287 static VALUE
inspect2(struct strscanner * p)1288 inspect2(struct strscanner *p)
1289 {
1290     VALUE str;
1291     long len;
1292 
1293     if (EOS_P(p)) return rb_str_new2("");
1294     len = S_RESTLEN(p);
1295     if (len > INSPECT_LENGTH) {
1296 	str = rb_str_new(CURPTR(p), INSPECT_LENGTH);
1297 	rb_str_cat2(str, "...");
1298     }
1299     else {
1300 	str = rb_str_new(CURPTR(p), len);
1301     }
1302     return rb_str_dump(str);
1303 }
1304 
1305 /* =======================================================================
1306                               Ruby Interface
1307    ======================================================================= */
1308 
1309 /*
1310  * Document-class: StringScanner
1311  *
1312  * StringScanner provides for lexical scanning operations on a String.  Here is
1313  * an example of its usage:
1314  *
1315  *   s = StringScanner.new('This is an example string')
1316  *   s.eos?               # -> false
1317  *
1318  *   p s.scan(/\w+/)      # -> "This"
1319  *   p s.scan(/\w+/)      # -> nil
1320  *   p s.scan(/\s+/)      # -> " "
1321  *   p s.scan(/\s+/)      # -> nil
1322  *   p s.scan(/\w+/)      # -> "is"
1323  *   s.eos?               # -> false
1324  *
1325  *   p s.scan(/\s+/)      # -> " "
1326  *   p s.scan(/\w+/)      # -> "an"
1327  *   p s.scan(/\s+/)      # -> " "
1328  *   p s.scan(/\w+/)      # -> "example"
1329  *   p s.scan(/\s+/)      # -> " "
1330  *   p s.scan(/\w+/)      # -> "string"
1331  *   s.eos?               # -> true
1332  *
1333  *   p s.scan(/\s+/)      # -> nil
1334  *   p s.scan(/\w+/)      # -> nil
1335  *
1336  * Scanning a string means remembering the position of a <i>scan pointer</i>,
1337  * which is just an index.  The point of scanning is to move forward a bit at
1338  * a time, so matches are sought after the scan pointer; usually immediately
1339  * after it.
1340  *
1341  * Given the string "test string", here are the pertinent scan pointer
1342  * positions:
1343  *
1344  *     t e s t   s t r i n g
1345  *   0 1 2 ...             1
1346  *                         0
1347  *
1348  * When you #scan for a pattern (a regular expression), the match must occur
1349  * at the character after the scan pointer.  If you use #scan_until, then the
1350  * match can occur anywhere after the scan pointer.  In both cases, the scan
1351  * pointer moves <i>just beyond</i> the last character of the match, ready to
1352  * scan again from the next character onwards.  This is demonstrated by the
1353  * example above.
1354  *
1355  * == Method Categories
1356  *
1357  * There are other methods besides the plain scanners.  You can look ahead in
1358  * the string without actually scanning.  You can access the most recent match.
1359  * You can modify the string being scanned, reset or terminate the scanner,
1360  * find out or change the position of the scan pointer, skip ahead, and so on.
1361  *
1362  * === Advancing the Scan Pointer
1363  *
1364  * - #getch
1365  * - #get_byte
1366  * - #scan
1367  * - #scan_until
1368  * - #skip
1369  * - #skip_until
1370  *
1371  * === Looking Ahead
1372  *
1373  * - #check
1374  * - #check_until
1375  * - #exist?
1376  * - #match?
1377  * - #peek
1378  *
1379  * === Finding Where we Are
1380  *
1381  * - #beginning_of_line? (#bol?)
1382  * - #eos?
1383  * - #rest?
1384  * - #rest_size
1385  * - #pos
1386  *
1387  * === Setting Where we Are
1388  *
1389  * - #reset
1390  * - #terminate
1391  * - #pos=
1392  *
1393  * === Match Data
1394  *
1395  * - #matched
1396  * - #matched?
1397  * - #matched_size
1398  * - []
1399  * - #pre_match
1400  * - #post_match
1401  *
1402  * === Miscellaneous
1403  *
1404  * - <<
1405  * - #concat
1406  * - #string
1407  * - #string=
1408  * - #unscan
1409  *
1410  * There are aliases to several of the methods.
1411  */
1412 void
Init_strscan(void)1413 Init_strscan(void)
1414 {
1415 #undef rb_intern
1416     ID id_scanerr = rb_intern("ScanError");
1417     VALUE tmp;
1418 
1419     id_byteslice = rb_intern("byteslice");
1420 
1421     StringScanner = rb_define_class("StringScanner", rb_cObject);
1422     ScanError = rb_define_class_under(StringScanner, "Error", rb_eStandardError);
1423     if (!rb_const_defined(rb_cObject, id_scanerr)) {
1424 	rb_const_set(rb_cObject, id_scanerr, ScanError);
1425     }
1426     tmp = rb_str_new2(STRSCAN_VERSION);
1427     rb_obj_freeze(tmp);
1428     rb_const_set(StringScanner, rb_intern("Version"), tmp);
1429     tmp = rb_str_new2("$Id: strscan.c 62429 2018-02-16 08:39:48Z nobu $");
1430     rb_obj_freeze(tmp);
1431     rb_const_set(StringScanner, rb_intern("Id"), tmp);
1432 
1433     rb_define_alloc_func(StringScanner, strscan_s_allocate);
1434     rb_define_private_method(StringScanner, "initialize", strscan_initialize, -1);
1435     rb_define_private_method(StringScanner, "initialize_copy", strscan_init_copy, 1);
1436     rb_define_singleton_method(StringScanner, "must_C_version", strscan_s_mustc, 0);
1437     rb_define_method(StringScanner, "reset",       strscan_reset,       0);
1438     rb_define_method(StringScanner, "terminate",   strscan_terminate,   0);
1439     rb_define_method(StringScanner, "clear",       strscan_clear,       0);
1440     rb_define_method(StringScanner, "string",      strscan_get_string,  0);
1441     rb_define_method(StringScanner, "string=",     strscan_set_string,  1);
1442     rb_define_method(StringScanner, "concat",      strscan_concat,      1);
1443     rb_define_method(StringScanner, "<<",          strscan_concat,      1);
1444     rb_define_method(StringScanner, "pos",         strscan_get_pos,     0);
1445     rb_define_method(StringScanner, "pos=",        strscan_set_pos,     1);
1446     rb_define_method(StringScanner, "charpos",     strscan_get_charpos, 0);
1447     rb_define_method(StringScanner, "pointer",     strscan_get_pos,     0);
1448     rb_define_method(StringScanner, "pointer=",    strscan_set_pos,     1);
1449 
1450     rb_define_method(StringScanner, "scan",        strscan_scan,        1);
1451     rb_define_method(StringScanner, "skip",        strscan_skip,        1);
1452     rb_define_method(StringScanner, "match?",      strscan_match_p,     1);
1453     rb_define_method(StringScanner, "check",       strscan_check,       1);
1454     rb_define_method(StringScanner, "scan_full",   strscan_scan_full,   3);
1455 
1456     rb_define_method(StringScanner, "scan_until",  strscan_scan_until,  1);
1457     rb_define_method(StringScanner, "skip_until",  strscan_skip_until,  1);
1458     rb_define_method(StringScanner, "exist?",      strscan_exist_p,     1);
1459     rb_define_method(StringScanner, "check_until", strscan_check_until, 1);
1460     rb_define_method(StringScanner, "search_full", strscan_search_full, 3);
1461 
1462     rb_define_method(StringScanner, "getch",       strscan_getch,       0);
1463     rb_define_method(StringScanner, "get_byte",    strscan_get_byte,    0);
1464     rb_define_method(StringScanner, "getbyte",     strscan_getbyte,     0);
1465     rb_define_method(StringScanner, "peek",        strscan_peek,        1);
1466     rb_define_method(StringScanner, "peep",        strscan_peep,        1);
1467 
1468     rb_define_method(StringScanner, "unscan",      strscan_unscan,      0);
1469 
1470     rb_define_method(StringScanner, "beginning_of_line?", strscan_bol_p, 0);
1471     rb_alias(StringScanner, rb_intern("bol?"), rb_intern("beginning_of_line?"));
1472     rb_define_method(StringScanner, "eos?",        strscan_eos_p,       0);
1473     rb_define_method(StringScanner, "empty?",      strscan_empty_p,     0);
1474     rb_define_method(StringScanner, "rest?",       strscan_rest_p,      0);
1475 
1476     rb_define_method(StringScanner, "matched?",    strscan_matched_p,   0);
1477     rb_define_method(StringScanner, "matched",     strscan_matched,     0);
1478     rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
1479     rb_define_method(StringScanner, "[]",          strscan_aref,        1);
1480     rb_define_method(StringScanner, "pre_match",   strscan_pre_match,   0);
1481     rb_define_method(StringScanner, "post_match",  strscan_post_match,  0);
1482     rb_define_method(StringScanner, "size",        strscan_size,        0);
1483     rb_define_method(StringScanner, "captures",    strscan_captures,    0);
1484     rb_define_method(StringScanner, "values_at",   strscan_values_at,  -1);
1485 
1486     rb_define_method(StringScanner, "rest",        strscan_rest,        0);
1487     rb_define_method(StringScanner, "rest_size",   strscan_rest_size,   0);
1488     rb_define_method(StringScanner, "restsize",    strscan_restsize,    0);
1489 
1490     rb_define_method(StringScanner, "inspect",     strscan_inspect,     0);
1491 }
1492