1 #include "ml_string.h"
2 #include "minilang.h"
3 #include "ml_macros.h"
4 #include "ml_iterfns.h"
5 #include <string.h>
6 #include <ctype.h>
7 #include <inttypes.h>
8 #include <math.h>
9 #include <float.h>
10 #include <printf.h>
11 #include <gc/gc_typed.h>
12 #ifdef ML_TRE
13 #include <tre/regex.h>
14 #else
15 #include <regex.h>
16 #endif
17
18 extern ml_type_t MLSomeT[];
19
ML_FUNCTION(MLBuffer)20 ML_FUNCTION(MLBuffer) {
21 //!buffer
22 //@buffer
23 //<Length
24 //>buffer
25 ML_CHECK_ARG_COUNT(1);
26 ML_CHECK_ARG_TYPE(0, MLIntegerT);
27 long Size = ml_integer_value_fast(Args[0]);
28 if (Size < 0) return ml_error("ValueError", "Buffer size must be non-negative");
29 ml_buffer_t *Buffer = new(ml_buffer_t);
30 Buffer->Type = MLBufferT;
31 Buffer->Length = Size;
32 Buffer->Value = GC_MALLOC_ATOMIC(Size);
33 return (ml_value_t *)Buffer;
34 }
35
36 ML_TYPE(MLBufferT, (), "buffer",
37 //!buffer
38 .Constructor = (ml_value_t *)MLBuffer
39 );
40
ml_buffer(const char * Value,int Length)41 ml_value_t *ml_buffer(const char *Value, int Length) {
42 ml_string_t *Buffer = new(ml_string_t);
43 Buffer->Type = MLBufferT;
44 Buffer->Value = Value;
45 Buffer->Length = Length;
46 return (ml_value_t *)Buffer;
47 }
48
ml_buffer_value(const ml_value_t * Value)49 const char *ml_buffer_value(const ml_value_t *Value) {
50 return ((ml_buffer_t *)Value)->Value;
51 }
52
ml_buffer_length(const ml_value_t * Value)53 size_t ml_buffer_length(const ml_value_t *Value) {
54 return ((ml_buffer_t *)Value)->Length;
55 }
56
57 ML_METHOD("+", MLBufferT, MLIntegerT) {
58 //!buffer
59 //<Buffer
60 //<Offset
61 //>buffer
62 ml_buffer_t *Buffer = (ml_buffer_t *)Args[0];
63 long Offset = ml_integer_value_fast(Args[1]);
64 if (Offset > Buffer->Length) return ml_error("ValueError", "Offset larger than buffer");
65 ml_buffer_t *Buffer2 = new(ml_buffer_t);
66 Buffer2->Type = MLBufferT;
67 Buffer2->Value = Buffer->Value + Offset;
68 Buffer2->Length = Buffer->Length - Offset;
69 return (ml_value_t *)Buffer2;
70 }
71
72 ML_METHOD("-", MLBufferT, MLBufferT) {
73 //!buffer
74 //<Buffer/1
75 //<Buffer/2
76 //>integer
77 ml_buffer_t *Buffer1 = (ml_buffer_t *)Args[0];
78 ml_buffer_t *Buffer2 = (ml_buffer_t *)Args[1];
79 return ml_integer(Buffer1->Value - Buffer2->Value);
80 }
81
ml_string_hash(ml_string_t * String,ml_hash_chain_t * Chain)82 static long ml_string_hash(ml_string_t *String, ml_hash_chain_t *Chain) {
83 long Hash = String->Hash;
84 if (!Hash) {
85 Hash = 5381;
86 for (int I = 0; I < String->Length; ++I) Hash = ((Hash << 5) + Hash) + String->Value[I];
87 String->Hash = Hash;
88 }
89 return Hash;
90 }
91
92 ML_TYPE(MLStringT, (MLBufferT, MLIteratableT), "string",
93 .hash = (void *)ml_string_hash
94 );
95
ML_METHOD(MLIterCount,MLStringT)96 ML_METHOD(MLIterCount, MLStringT) {
97 return ml_integer(ml_string_length(Args[0]));
98 }
99
ML_METHOD(MLStringT,MLBufferT)100 ML_METHOD(MLStringT, MLBufferT) {
101 //!buffer
102 ml_buffer_t *Buffer = (ml_buffer_t *)Args[0];
103 return ml_string_format("#%" PRIxPTR ":%ld", (uintptr_t)Buffer->Value, Buffer->Length);
104 }
105
ml_string(const char * Value,int Length)106 ml_value_t *ml_string(const char *Value, int Length) {
107 ml_string_t *String = new(ml_string_t);
108 String->Type = MLStringT;
109 if (Length >= 0) {
110 if (Value[Length]) {
111 char *Copy = snew(Length + 1);
112 memcpy(Copy, Value, Length);
113 Copy[Length] = 0;
114 Value = Copy;
115 }
116 } else {
117 Length = Value ? strlen(Value) : 0;
118 }
119 String->Value = Value;
120 String->Length = Length;
121 return (ml_value_t *)String;
122 }
123
ml_string_value(const ml_value_t * Value)124 const char *ml_string_value(const ml_value_t *Value) {
125 return ((ml_string_t *)Value)->Value;
126 }
127
ml_string_length(const ml_value_t * Value)128 size_t ml_string_length(const ml_value_t *Value) {
129 return ((ml_string_t *)Value)->Length;
130 }
131
ml_string_format(const char * Format,...)132 ml_value_t *ml_string_format(const char *Format, ...) {
133 va_list Args;
134 va_start(Args, Format);
135 char *Value;
136 int Length = vasprintf(&Value, Format, Args);
137 va_end(Args);
138 return ml_string(Value, Length);
139 }
140
141
ML_METHOD(MLStringT,MLNilT)142 ML_METHOD(MLStringT, MLNilT) {
143 return ml_cstring("nil");
144 }
145
ML_METHOD(MLStringT,MLSomeT)146 ML_METHOD(MLStringT, MLSomeT) {
147 return ml_cstring("some");
148 }
149
ML_METHOD(MLStringT,MLBooleanT)150 ML_METHOD(MLStringT, MLBooleanT) {
151 //!boolean
152 ml_boolean_t *Boolean = (ml_boolean_t *)Args[0];
153 return ml_string(Boolean->Name, -1);
154 }
155
ML_METHOD(MLStringT,MLIntegerT)156 ML_METHOD(MLStringT, MLIntegerT) {
157 //!number
158 char *Value;
159 int Length = asprintf(&Value, "%ld", ml_integer_value_fast(Args[0]));
160 return ml_string(Value, Length);
161 }
162
ML_METHOD(MLStringT,MLIntegerT,MLIntegerT)163 ML_METHOD(MLStringT, MLIntegerT, MLIntegerT) {
164 //!number
165 int64_t Value = ml_integer_value_fast(Args[0]);
166 int Base = ml_integer_value_fast(Args[1]);
167 if (Base < 2 || Base > 36) return ml_error("RangeError", "Invalid base");
168 int Max = 65;
169 char *P = GC_MALLOC_ATOMIC(Max + 1) + Max, *Q = P;
170 *P = '\0';
171 int64_t Neg = Value < 0 ? Value : -Value;
172 do {
173 *--P = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[-(Neg % Base)];
174 Neg /= Base;
175 } while (Neg);
176 if (Value < 0) *--P = '-';
177 return ml_string(P, Q - P);
178 }
179
180 static regex_t IntFormat[1];
181 static regex_t LongFormat[1];
182 static regex_t RealFormat[1];
183
ML_METHOD(MLStringT,MLIntegerT,MLStringT)184 ML_METHOD(MLStringT, MLIntegerT, MLStringT) {
185 const char *Format = ml_string_value(Args[1]);
186 int64_t Value = ml_integer_value_fast(Args[0]);
187 char *String;
188 int Length;
189 if (!regexec(IntFormat, Format, 0, NULL, 0)) {
190 Length = asprintf(&String, Format, (int)Value);
191 } else if (!regexec(LongFormat, Format, 0, NULL, 0)) {
192 Length = asprintf(&String, Format, (long)Value);
193 } else if (!regexec(RealFormat, Format, 0, NULL, 0)) {
194 Length = asprintf(&String, Format, (double)Value);
195 } else {
196 return ml_error("FormatError", "Invalid format string");
197 }
198 return ml_string(String, Length);
199 }
200
ML_METHOD(MLStringT,MLDoubleT)201 ML_METHOD(MLStringT, MLDoubleT) {
202 //!number
203 char *String;
204 int Length = asprintf(&String, "%g", ml_double_value_fast(Args[0]));
205 return ml_string(String, Length);
206 }
207
ML_METHOD(MLStringT,MLDoubleT,MLStringT)208 ML_METHOD(MLStringT, MLDoubleT, MLStringT) {
209 const char *Format = ml_string_value(Args[1]);
210 double Value = ml_double_value_fast(Args[0]);
211 char *String;
212 int Length;
213 if (!regexec(IntFormat, Format, 0, NULL, 0)) {
214 Length = asprintf(&String, Format, (int)Value);
215 } else if (!regexec(LongFormat, Format, 0, NULL, 0)) {
216 Length = asprintf(&String, Format, (long)Value);
217 } else if (!regexec(RealFormat, Format, 0, NULL, 0)) {
218 Length = asprintf(&String, Format, (double)Value);
219 } else {
220 return ml_error("FormatError", "Invalid format string");
221 }
222 return ml_string(String, Length);
223 }
224
225 #ifdef ML_COMPLEX
226
ML_METHOD(MLStringT,MLComplexT)227 ML_METHOD(MLStringT, MLComplexT) {
228 //!number
229 complex double Complex = ml_complex_value_fast(Args[0]);
230 char *String;
231 int Length;
232 double Real = creal(Complex);
233 double Imag = cimag(Complex);
234 if (fabs(Real) <= DBL_EPSILON) {
235 if (fabs(Imag - 1) <= DBL_EPSILON) {
236 String = "i";
237 Length = 1;
238 } else if (fabs(Imag) <= DBL_EPSILON) {
239 String = "0";
240 Length = 1;
241 } else {
242 Length = asprintf(&String, "%gi", Imag);
243 }
244 } else if (fabs(Imag) <= DBL_EPSILON) {
245 Length = asprintf(&String, "%g", Real);
246 } else if (Imag < 0) {
247 if (fabs(Imag + 1) <= DBL_EPSILON) {
248 Length = asprintf(&String, "%g - i", Real);
249 } else {
250 Length = asprintf(&String, "%g - %gi", Real, -Imag);
251 }
252 } else {
253 if (fabs(Imag - 1) <= DBL_EPSILON) {
254 Length = asprintf(&String, "%g + i", Real);
255 } else {
256 Length = asprintf(&String, "%g + %gi", Real, Imag);
257 }
258 }
259 return ml_string(String, Length);
260 }
261
ML_METHOD(MLStringT,MLComplexT,MLStringT)262 ML_METHOD(MLStringT, MLComplexT, MLStringT) {
263 const char *Format = ml_string_value(Args[1]);
264 if (regexec(RealFormat, Format, 0, NULL, 0)) {
265 return ml_error("FormatError", "Invalid format string");
266 }
267 complex double Complex = ml_complex_value_fast(Args[0]);
268 double Real = creal(Complex);
269 double Imag = cimag(Complex);
270 char *ComplexFormat;
271 if (Imag < 0) {
272 Imag = -Imag;
273 asprintf(&ComplexFormat, "%s - %si", Format, Format);
274 } else {
275 asprintf(&ComplexFormat, "%s + %si", Format, Format);
276 }
277 return ml_string_format(ComplexFormat, Real, Imag);
278 }
279
280 #endif
281
ML_METHOD(MLIntegerT,MLStringT)282 ML_METHOD(MLIntegerT, MLStringT) {
283 //!number
284 const char *Start = ml_string_value(Args[0]);
285 char *End;
286 long Value = strtol(Start, &End, 10);
287 if (End - Start == ml_string_length(Args[0])) {
288 return ml_integer(Value);
289 } else {
290 return ml_error("ValueError", "Error parsing integer");
291 }
292 }
293
ML_METHOD(MLIntegerT,MLStringT,MLIntegerT)294 ML_METHOD(MLIntegerT, MLStringT, MLIntegerT) {
295 //!number
296 const char *Start = ml_string_value(Args[0]);
297 char *End;
298 long Value = strtol(Start, &End, ml_integer_value_fast(Args[1]));
299 if (End - Start == ml_string_length(Args[0])) {
300 return ml_integer(Value);
301 } else {
302 return ml_error("ValueError", "Error parsing integer");
303 }
304 }
305
ML_METHOD(MLDoubleT,MLStringT)306 ML_METHOD(MLDoubleT, MLStringT) {
307 //!number
308 const char *Start = ml_string_value(Args[0]);
309 char *End;
310 double Value = strtod(Start, &End);
311 if (End - Start == ml_string_length(Args[0])) {
312 return ml_real(Value);
313 } else {
314 return ml_error("ValueError", "Error parsing real");
315 }
316 }
317
ML_METHOD(MLRealT,MLStringT)318 ML_METHOD(MLRealT, MLStringT) {
319 //!number
320 const char *Start = ml_string_value(Args[0]);
321 char *End;
322 double Value = strtod(Start, &End);
323 if (End - Start == ml_string_length(Args[0])) {
324 return ml_real(Value);
325 } else {
326 return ml_error("ValueError", "Error parsing real");
327 }
328 }
329
330 #ifdef ML_COMPLEX
331
ML_METHOD(MLComplexT,MLStringT)332 ML_METHOD(MLComplexT, MLStringT) {
333 //!number
334 const char *Start = ml_string_value(Args[0]);
335 int Length = ml_string_length(Args[0]);
336 char *End = (char *)Start;
337 #ifdef ML_COMPLEX
338 if (End[0] == 'i') {
339 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
340 return ml_complex(_Complex_I);
341 }
342 #endif
343 long Integer = strtol(Start, &End, 10);
344 #ifdef ML_COMPLEX
345 if (End[0] == 'i') {
346 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
347 return ml_complex(Integer * _Complex_I);
348 }
349 #endif
350 if (End - Start == Length) return ml_complex(Integer);
351 double Real = strtod(Start, &End);
352 #ifdef ML_COMPLEX
353 if (End[0] == 'i') {
354 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
355 return ml_complex(Real * _Complex_I);
356 }
357 #endif
358 if (End - Start == Length) return ml_complex(Real);
359 #ifdef ML_COMPLEX
360 if (End[0] == ' ') ++End;
361 if (End[0] == '+') {
362 ++End;
363 if (End[0] == ' ') ++End;
364 if (End[0] == 'i') {
365 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
366 return ml_complex(Real + _Complex_I);
367 }
368 double Imag = strtod(End, &End);
369 if (End[0] == 'i') {
370 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
371 return ml_complex(Real + Imag * _Complex_I);
372 }
373 } else if (End[0] == '-') {
374 ++End;
375 if (End[0] == ' ') ++End;
376 if (End[0] == 'i') {
377 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
378 return ml_complex(Real - _Complex_I);
379 }
380 double Imag = strtod(End, &End);
381 if (End[0] == 'i') {
382 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
383 return ml_complex(Real - Imag * _Complex_I);
384 }
385 }
386 #endif
387 return ml_error("ValueError", "Error parsing number");
388 }
389
390 #endif
391
ML_METHOD(MLNumberT,MLStringT)392 ML_METHOD(MLNumberT, MLStringT) {
393 //!number
394 const char *Start = ml_string_value(Args[0]);
395 int Length = ml_string_length(Args[0]);
396 char *End = (char *)Start;
397 #ifdef ML_COMPLEX
398 if (End[0] == 'i') {
399 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
400 return ml_complex(_Complex_I);
401 }
402 #endif
403 long Integer = strtol(Start, &End, 10);
404 #ifdef ML_COMPLEX
405 if (End[0] == 'i') {
406 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
407 return ml_complex(Integer * _Complex_I);
408 }
409 #endif
410 if (End - Start == Length) return ml_integer(Integer);
411 double Real = strtod(Start, &End);
412 #ifdef ML_COMPLEX
413 if (End[0] == 'i') {
414 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
415 return ml_complex(Real * _Complex_I);
416 }
417 #endif
418 if (End - Start == Length) return ml_real(Real);
419 #ifdef ML_COMPLEX
420 if (End[0] == ' ') ++End;
421 if (End[0] == '+') {
422 ++End;
423 if (End[0] == ' ') ++End;
424 if (End[0] == 'i') {
425 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
426 return ml_complex(Real + _Complex_I);
427 }
428 double Imag = strtod(End, &End);
429 if (End[0] == 'i') {
430 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
431 return ml_complex(Real + Imag * _Complex_I);
432 }
433 } else if (End[0] == '-') {
434 ++End;
435 if (End[0] == ' ') ++End;
436 if (End[0] == 'i') {
437 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
438 return ml_complex(Real - _Complex_I);
439 }
440 double Imag = strtod(End, &End);
441 if (End[0] == 'i') {
442 if (++End - Start != Length) return ml_error("ValueError", "Error parsing number");
443 return ml_complex(Real - Imag * _Complex_I);
444 }
445 }
446 #endif
447 return ml_error("ValueError", "Error parsing number");
448 }
449
450 typedef struct {
451 ml_type_t *Type;
452 const char *Value;
453 int Index, Length;
454 } ml_string_iterator_t;
455
456 ML_TYPE(MLStringIteratorT, (), "string-iterator");
457 //!internal
458
ML_TYPED_FN(ml_iter_next,MLStringIteratorT,ml_state_t * Caller,ml_string_iterator_t * Iter)459 static void ML_TYPED_FN(ml_iter_next, MLStringIteratorT, ml_state_t *Caller, ml_string_iterator_t *Iter) {
460 if (++Iter->Index > Iter->Length) ML_RETURN(MLNil);
461 ++Iter->Value;
462 ML_RETURN(Iter);
463 }
464
ML_TYPED_FN(ml_iter_value,MLStringIteratorT,ml_state_t * Caller,ml_string_iterator_t * Iter)465 static void ML_TYPED_FN(ml_iter_value, MLStringIteratorT, ml_state_t *Caller, ml_string_iterator_t *Iter) {
466 ML_RETURN(ml_string(Iter->Value, 1));
467 }
468
ML_TYPED_FN(ml_iter_key,MLStringIteratorT,ml_state_t * Caller,ml_string_iterator_t * Iter)469 static void ML_TYPED_FN(ml_iter_key, MLStringIteratorT, ml_state_t *Caller, ml_string_iterator_t *Iter) {
470 ML_RETURN(ml_integer(Iter->Index));
471 }
472
ML_TYPED_FN(ml_iterate,MLStringT,ml_state_t * Caller,ml_value_t * String)473 static void ML_TYPED_FN(ml_iterate, MLStringT, ml_state_t *Caller, ml_value_t *String) {
474 int Length = ml_string_length(String);
475 if (!Length) ML_RETURN(MLNil);
476 ml_string_iterator_t *Iter = new(ml_string_iterator_t);
477 Iter->Type = MLStringIteratorT;
478 Iter->Index = 1;
479 Iter->Length = Length;
480 Iter->Value = ml_string_value(String);
481 ML_RETURN(Iter);
482 }
483
484 typedef struct ml_regex_t ml_regex_t;
485
486 typedef struct ml_regex_t {
487 ml_type_t *Type;
488 const char *Pattern;
489 regex_t Value[1];
490 } ml_regex_t;
491
ml_regex_hash(ml_regex_t * Regex,ml_hash_chain_t * Chain)492 static long ml_regex_hash(ml_regex_t *Regex, ml_hash_chain_t *Chain) {
493 long Hash = 5381;
494 const char *Pattern = Regex->Pattern;
495 while (*Pattern) Hash = ((Hash << 5) + Hash) + *(Pattern++);
496 return Hash;
497 }
498
ML_FUNCTION(MLRegex)499 ML_FUNCTION(MLRegex) {
500 //@regex
501 //<String
502 //>regex | error
503 // Compiles :mini:`String` as a regular expression. Returns an error if :mini:`String` is not a valid regular expression.
504 ML_CHECK_ARG_COUNT(1);
505 ML_CHECK_ARG_TYPE(0, MLStringT);
506 const char *Pattern = ml_string_value(Args[0]);
507 int Length = ml_string_length(Args[0]);
508 if (Pattern[Length]) return ml_error("ValueError", "Regex pattern must be proper string");
509 return ml_regex(Pattern, Length);
510 }
511
512 ML_TYPE(MLRegexT, (), "regex",
513 .hash = (void *)ml_regex_hash,
514 .Constructor = (ml_value_t *)MLRegex
515 );
516
ml_regex(const char * Pattern,int Length)517 ml_value_t *ml_regex(const char *Pattern, int Length) {
518 ml_regex_t *Regex = new(ml_regex_t);
519 Regex->Type = MLRegexT;
520 Regex->Pattern = Pattern;
521 #ifdef ML_TRE
522 int Error = regncomp(Regex->Value, Pattern, Length, REG_EXTENDED);
523 #else
524 int Error = regcomp(Regex->Value, Pattern, REG_EXTENDED);
525 #endif
526 if (Error) {
527 size_t ErrorSize = regerror(Error, Regex->Value, NULL, 0);
528 char *ErrorMessage = snew(ErrorSize + 1);
529 regerror(Error, Regex->Value, ErrorMessage, ErrorSize);
530 return ml_error("RegexError", "regex error: %s", ErrorMessage);
531 }
532 return (ml_value_t *)Regex;
533 }
534
ml_regexi(const char * Pattern,int Length)535 ml_value_t *ml_regexi(const char *Pattern, int Length) {
536 ml_regex_t *Regex = new(ml_regex_t);
537 Regex->Type = MLRegexT;
538 Regex->Pattern = Pattern;
539 #ifdef ML_TRE
540 int Error = regncomp(Regex->Value, Pattern, Length, REG_EXTENDED | REG_ICASE);
541 #else
542 int Error = regcomp(Regex->Value, Pattern, REG_EXTENDED | REG_ICASE);
543 #endif
544 if (Error) {
545 size_t ErrorSize = regerror(Error, Regex->Value, NULL, 0);
546 char *ErrorMessage = snew(ErrorSize + 1);
547 regerror(Error, Regex->Value, ErrorMessage, ErrorSize);
548 return ml_error("RegexError", "regex error: %s", ErrorMessage);
549 }
550 return (ml_value_t *)Regex;
551 }
552
ml_regex_value(const ml_value_t * Value)553 regex_t *ml_regex_value(const ml_value_t *Value) {
554 ml_regex_t *Regex = (ml_regex_t *)Value;
555 return Regex->Value;
556 }
557
ml_regex_pattern(const ml_value_t * Value)558 const char *ml_regex_pattern(const ml_value_t *Value) {
559 ml_regex_t *Regex = (ml_regex_t *)Value;
560 return Regex->Pattern;
561 }
562
563 #ifdef ML_NANBOXING
564
565 #define NegOne ml_int32(-1)
566 #define One ml_int32(1)
567 #define Zero ml_int32(0)
568
569 #else
570
571 static ml_integer_t One[1] = {{MLIntegerT, 1}};
572 static ml_integer_t NegOne[1] = {{MLIntegerT, -1}};
573 static ml_integer_t Zero[1] = {{MLIntegerT, 0}};
574
575 #endif
576
577 ML_METHOD("<>", MLRegexT, MLRegexT) {
578 const char *PatternA = ml_regex_pattern(Args[0]);
579 const char *PatternB = ml_regex_pattern(Args[1]);
580 int Compare = strcmp(PatternA, PatternB);
581 if (Compare < 0) return (ml_value_t *)NegOne;
582 if (Compare > 0) return (ml_value_t *)One;
583 return (ml_value_t *)Zero;
584 }
585
586 #define ml_comp_method_regex_regex(NAME, SYMBOL) \
587 ML_METHOD(NAME, MLRegexT, MLRegexT) { \
588 const char *PatternA = ml_regex_pattern(Args[0]); \
589 const char *PatternB = ml_regex_pattern(Args[1]); \
590 int Compare = strcmp(PatternA, PatternB); \
591 return Compare SYMBOL 0 ? Args[1] : MLNil; \
592 }
593
594 ml_comp_method_regex_regex("=", ==)
595 ml_comp_method_regex_regex("!=", !=)
596 ml_comp_method_regex_regex("<", <)
597 ml_comp_method_regex_regex(">", >)
598 ml_comp_method_regex_regex("<=", <=)
599 ml_comp_method_regex_regex(">=", >=)
600
601 typedef struct {
602 ml_value_t *Index;
603 ml_string_t *String;
604 ml_regex_t *Regex;
605 } ml_string_case_t;
606
607 typedef struct {
608 ml_type_t *Type;
609 ml_string_case_t Cases[];
610 } ml_string_switch_t;
611
ml_string_switch(ml_state_t * Caller,ml_string_switch_t * Switch,int Count,ml_value_t ** Args)612 static void ml_string_switch(ml_state_t *Caller, ml_string_switch_t *Switch, int Count, ml_value_t **Args) {
613 ML_CHECKX_ARG_COUNT(1);
614 ML_CHECKX_ARG_TYPE(0, MLStringT);
615 const char *Subject = ml_string_value(Args[0]);
616 size_t Length = ml_string_length(Args[0]);
617 for (ml_string_case_t *Case = Switch->Cases;; ++Case) {
618 if (Case->String) {
619 if (Case->String->Length == Length) {
620 if (!memcmp(Subject, Case->String->Value, Length)) ML_RETURN(Case->Index);
621 }
622 } else if (Case->Regex) {
623 #ifdef ML_TRE
624 int Length = ml_string_length(Args[0]);
625 if (!regnexec(Case->Regex->Value, Subject, Length, 0, NULL, 0)) {
626
627 #else
628 if (!regexec(Case->Regex->Value, Subject, 0, NULL, 0)) {
629 #endif
630 ML_RETURN(Case->Index);
631 }
632 } else {
633 ML_RETURN(Case->Index);
634 }
635 }
636 ML_RETURN(MLNil);
637 }
638
639 ML_TYPE(MLStringSwitchT, (MLFunctionT), "string-switch",
640 //!internal
641 .call = (void *)ml_string_switch
642 );
643
644 ML_FUNCTION(MLStringSwitch) {
645 //!internal
646 int Total = 1;
647 for (int I = 0; I < Count; ++I) Total += ml_list_length(Args[I]);
648 ml_string_switch_t *Switch = xnew(ml_string_switch_t, Total, ml_string_case_t);
649 Switch->Type = MLStringSwitchT;
650 ml_string_case_t *Case = Switch->Cases;
651 for (int I = 0; I < Count; ++I) {
652 ML_CHECK_ARG_TYPE(I, MLListT);
653 ML_LIST_FOREACH(Args[I], Iter) {
654 ml_value_t *Value = Iter->Value;
655 if (ml_is(Value, MLStringT)) {
656 Case->String = (ml_string_t *)Value;
657 } else if (ml_is(Value, MLRegexT)) {
658 Case->Regex = (ml_regex_t *)Value;
659 } else {
660 return ml_error("ValueError", "Unsupported value in string case");
661 }
662 Case->Index = ml_integer(I);
663 ++Case;
664 }
665 }
666 Case->Index = ml_integer(Count);
667 return (ml_value_t *)Switch;
668 }
669
670
671 ml_value_t *ml_stringbuffer() {
672 ml_stringbuffer_t *Buffer = new(ml_stringbuffer_t);
673 Buffer->Type = MLStringBufferT;
674 return (ml_value_t *)Buffer;
675 }
676
677 ML_FUNCTION(MLStringBuffer) {
678 //@stringbuffer
679 return ml_stringbuffer();
680 }
681
682 ML_TYPE(MLStringBufferT, (), "stringbuffer",
683 .Constructor = (ml_value_t *)MLStringBuffer
684 );
685
686 struct ml_stringbuffer_node_t {
687 ml_stringbuffer_node_t *Next;
688 char Chars[ML_STRINGBUFFER_NODE_SIZE];
689 };
690
691 static GC_descr StringBufferDesc = 0;
692
693 ssize_t ml_stringbuffer_add(ml_stringbuffer_t *Buffer, const char *String, size_t Length) {
694 size_t Remaining = Length;
695 ml_stringbuffer_node_t *Node = Buffer->Tail ?: (ml_stringbuffer_node_t *)&Buffer->Head;
696 while (Buffer->Space < Remaining) {
697 memcpy(Node->Chars + ML_STRINGBUFFER_NODE_SIZE - Buffer->Space, String, Buffer->Space);
698 String += Buffer->Space;
699 Remaining -= Buffer->Space;
700 ml_stringbuffer_node_t *Next = (ml_stringbuffer_node_t *)GC_MALLOC_EXPLICITLY_TYPED(sizeof(ml_stringbuffer_node_t), StringBufferDesc);
701 //printf("Allocating stringbuffer: %d in total\n", ++NumStringBuffers);
702 Node->Next = Next;
703 Node = Next;
704 Buffer->Space = ML_STRINGBUFFER_NODE_SIZE;
705 }
706 memcpy(Node->Chars + ML_STRINGBUFFER_NODE_SIZE - Buffer->Space, String, Remaining);
707 Buffer->Space -= Remaining;
708 Buffer->Length += Length;
709 Buffer->Tail = Node;
710 return Length;
711 }
712
713 ssize_t ml_stringbuffer_addf(ml_stringbuffer_t *Buffer, const char *Format, ...) {
714 char *String;
715 va_list Args;
716 va_start(Args, Format);
717 size_t Length = vasprintf(&String, Format, Args);
718 va_end(Args);
719 return ml_stringbuffer_add(Buffer, String, Length);
720 }
721
722 static void ml_stringbuffer_finish(ml_stringbuffer_t *Buffer, char *String) {
723 char *P = String;
724 ml_stringbuffer_node_t *Node = Buffer->Head;
725 while (Node->Next) {
726 memcpy(P, Node->Chars, ML_STRINGBUFFER_NODE_SIZE);
727 P += ML_STRINGBUFFER_NODE_SIZE;
728 Node = Node->Next;
729 }
730 memcpy(P, Node->Chars, ML_STRINGBUFFER_NODE_SIZE - Buffer->Space);
731 P += ML_STRINGBUFFER_NODE_SIZE - Buffer->Space;
732 *P++ = 0;
733 Buffer->Head = Buffer->Tail = NULL;
734 Buffer->Length = Buffer->Space = 0;
735 }
736
737 char *ml_stringbuffer_get(ml_stringbuffer_t *Buffer) {
738 if (Buffer->Length == 0) return "";
739 char *String = snew(Buffer->Length + 1);
740 ml_stringbuffer_finish(Buffer, String);
741 return String;
742 }
743
744 char *ml_stringbuffer_get_uncollectable(ml_stringbuffer_t *Buffer) {
745 if (Buffer->Length == 0) return "";
746 char *String = GC_MALLOC_ATOMIC_UNCOLLECTABLE(Buffer->Length + 1);
747 ml_stringbuffer_finish(Buffer, String);
748 return String;
749 }
750
751 ml_value_t *ml_stringbuffer_value(ml_stringbuffer_t *Buffer) {
752 size_t Length = Buffer->Length;
753 if (Length == 0) {
754 return ml_cstring("");
755 } else {
756 char *Chars = snew(Length + 1);
757 ml_stringbuffer_finish(Buffer, Chars);
758 return ml_string(Chars, Length);
759 }
760 }
761
762 ML_METHOD("get", MLStringBufferT) {
763 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
764 return ml_stringbuffer_value(Buffer);
765 }
766
767 int ml_stringbuffer_foreach(ml_stringbuffer_t *Buffer, void *Data, int (*callback)(void *, const char *, size_t)) {
768 ml_stringbuffer_node_t *Node = Buffer->Head;
769 if (!Node) return 0;
770 while (Node->Next) {
771 if (callback(Data, Node->Chars, ML_STRINGBUFFER_NODE_SIZE)) return 1;
772 Node = Node->Next;
773 }
774 return callback(Data, Node->Chars, ML_STRINGBUFFER_NODE_SIZE - Buffer->Space);
775 }
776
777 static ML_METHOD_DECL(AppendMethod, "append");
778
779 ml_value_t *ml_stringbuffer_append(ml_stringbuffer_t *Buffer, ml_value_t *Value) {
780 ml_hash_chain_t *Chain = Buffer->Chain;
781 for (ml_hash_chain_t *Link = Chain; Link; Link = Link->Previous) {
782 if (Link->Value == Value) {
783 ml_stringbuffer_addf(Buffer, "<%s@%ld>", ml_typeof(Value)->Name, Link->Index);
784 return (ml_value_t *)Buffer;
785 }
786 }
787 ml_hash_chain_t NewChain[1] = {{Chain, Value, Chain ? Chain->Index + 1 : 1}};
788 Buffer->Chain = NewChain;
789 ml_value_t *Result = ml_simple_inline(AppendMethod, 2, Buffer, Value);
790 Buffer->Chain = Chain;
791 return Result;
792 }
793
794 ML_METHODV("append", MLStringBufferT, MLAnyT) {
795 ml_value_t *String = ml_simple_call((ml_value_t *)MLStringT, Count - 1, Args + 1);
796 if (ml_is_error(String)) return String;
797 if (!ml_is(String, MLStringT)) return ml_error("TypeError", "String expected, not %s", ml_typeof(String)->Name);
798 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
799 int Length = ml_string_length(String);
800 if (Length) {
801 ml_stringbuffer_add(Buffer, ml_string_value(String), Length);
802 return MLSome;
803 } else {
804 return MLNil;
805 }
806 }
807
808 ML_METHODV("write", MLStringBufferT, MLAnyT) {
809 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
810 for (int I = 1; I < Count; ++I) {
811 ml_value_t *Result = ml_stringbuffer_append(Buffer, Args[I]);
812 if (ml_is_error(Result)) return Result;
813 }
814 return Args[0];
815 }
816
817 ML_METHOD("append", MLStringBufferT, MLNilT) {
818 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
819 ml_stringbuffer_add(Buffer, "nil", 3);
820 return MLSome;
821 }
822
823 ML_METHOD("append", MLStringBufferT, MLSomeT) {
824 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
825 ml_stringbuffer_add(Buffer, "some", 4);
826 return MLSome;
827 }
828
829 ML_METHOD("append", MLStringBufferT, MLIntegerT) {
830 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
831 ml_stringbuffer_addf(Buffer, "%ld", ml_integer_value_fast(Args[1]));
832 return MLSome;
833 }
834
835 ML_METHOD("append", MLStringBufferT, MLDoubleT) {
836 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
837 ml_stringbuffer_addf(Buffer, "%g", ml_double_value_fast(Args[1]));
838 return MLSome;
839 }
840
841 ML_METHOD("append", MLStringBufferT, MLStringT) {
842 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
843 int Length = ml_string_length(Args[1]);
844 if (Length) {
845 ml_stringbuffer_add(Buffer, ml_string_value(Args[1]), Length);
846 return MLSome;
847 } else {
848 return MLNil;
849 }
850 }
851
852 ML_METHOD("[]", MLStringT, MLIntegerT) {
853 const char *Chars = ml_string_value(Args[0]);
854 int Length = ml_string_length(Args[0]);
855 int Index = ml_integer_value_fast(Args[1]);
856 if (Index <= 0) Index += Length + 1;
857 if (Index <= 0) return MLNil;
858 if (Index > Length) return MLNil;
859 return ml_string(Chars + (Index - 1), 1);
860 }
861
862 ML_METHOD("[]", MLStringT, MLIntegerT, MLIntegerT) {
863 const char *Chars = ml_string_value(Args[0]);
864 int Length = ml_string_length(Args[0]);
865 int Lo = ml_integer_value_fast(Args[1]);
866 int Hi = ml_integer_value_fast(Args[2]);
867 if (Lo <= 0) Lo += Length + 1;
868 if (Hi <= 0) Hi += Length + 1;
869 if (Lo <= 0) return MLNil;
870 if (Hi > Length + 1) return MLNil;
871 if (Hi < Lo) return MLNil;
872 int Length2 = Hi - Lo;
873 return ml_string(Chars + Lo - 1, Length2);
874 }
875
876 ML_METHOD("+", MLStringT, MLStringT) {
877 int Length1 = ml_string_length(Args[0]);
878 int Length2 = ml_string_length(Args[1]);
879 int Length = Length1 + Length2;
880 char *Chars = GC_MALLOC_ATOMIC(Length + 1);
881 memcpy(Chars, ml_string_value(Args[0]), Length1);
882 memcpy(Chars + Length1, ml_string_value(Args[1]), Length2);
883 Chars[Length] = 0;
884 return ml_string(Chars, Length);
885 }
886
887 ML_METHOD("trim", MLStringT) {
888 const unsigned char *Start = (const unsigned char *)ml_string_value(Args[0]);
889 const unsigned char *End = Start + ml_string_length(Args[0]);
890 while (Start < End && Start[0] <= ' ') ++Start;
891 while (Start < End && End[-1] <= ' ') --End;
892 int Length = End - Start;
893 return ml_string((const char *)Start, Length);
894 }
895
896 ML_METHOD("trim", MLStringT, MLStringT) {
897 char Trim[256] = {0,};
898 const unsigned char *P = (const unsigned char *)ml_string_value(Args[1]);
899 for (int Length = ml_string_length(Args[1]); --Length >= 0; ++P) Trim[*P] = 1;
900 const unsigned char *Start = (const unsigned char *)ml_string_value(Args[0]);
901 const unsigned char *End = Start + ml_string_length(Args[0]);
902 while (Start < End && Trim[Start[0]]) ++Start;
903 while (Start < End && Trim[End[-1]]) --End;
904 int Length = End - Start;
905 return ml_string((const char *)Start, Length);
906 }
907
908 ML_METHOD("ltrim", MLStringT) {
909 const unsigned char *Start = (const unsigned char *)ml_string_value(Args[0]);
910 const unsigned char *End = Start + ml_string_length(Args[0]);
911 while (Start < End && Start[0] <= ' ') ++Start;
912 int Length = End - Start;
913 return ml_string((const char *)Start, Length);
914 }
915
916 ML_METHOD("ltrim", MLStringT, MLStringT) {
917 char Trim[256] = {0,};
918 const unsigned char *P = (const unsigned char *)ml_string_value(Args[1]);
919 for (int Length = ml_string_length(Args[1]); --Length >= 0; ++P) Trim[*P] = 1;
920 const unsigned char *Start = (const unsigned char *)ml_string_value(Args[0]);
921 const unsigned char *End = Start + ml_string_length(Args[0]);
922 while (Start < End && Trim[Start[0]]) ++Start;
923 int Length = End - Start;
924 return ml_string((const char *)Start, Length);
925 }
926
927 ML_METHOD("rtrim", MLStringT) {
928 const unsigned char *Start = (const unsigned char *)ml_string_value(Args[0]);
929 const unsigned char *End = Start + ml_string_length(Args[0]);
930 while (Start < End && End[-1] <= ' ') --End;
931 int Length = End - Start;
932 return ml_string((const char *)Start, Length);
933 }
934
935 ML_METHOD("rtrim", MLStringT, MLStringT) {
936 char Trim[256] = {0,};
937 const unsigned char *P = (const unsigned char *)ml_string_value(Args[1]);
938 for (int Length = ml_string_length(Args[1]); --Length >= 0; ++P) Trim[*P] = 1;
939 const unsigned char *Start = (const unsigned char *)ml_string_value(Args[0]);
940 const unsigned char *End = Start + ml_string_length(Args[0]);
941 while (Start < End && Trim[End[-1]]) --End;
942 int Length = End - Start;
943 return ml_string((const char *)Start, Length);
944 }
945
946 ML_METHOD("length", MLStringT) {
947 return ml_integer(ml_string_length(Args[0]));
948 }
949
950 ML_METHOD("count", MLStringT) {
951 return ml_integer(ml_string_length(Args[0]));
952 }
953
954 ML_METHOD("<>", MLStringT, MLStringT) {
955 const char *StringA = ml_string_value(Args[0]);
956 const char *StringB = ml_string_value(Args[1]);
957 int LengthA = ml_string_length(Args[0]);
958 int LengthB = ml_string_length(Args[1]);
959 if (LengthA < LengthB) {
960 int Compare = memcmp(StringA, StringB, LengthA);
961 if (Compare > 1) return (ml_value_t *)One;
962 return (ml_value_t *)NegOne;
963 } else if (LengthA > LengthB) {
964 int Compare = memcmp(StringA, StringB, LengthB);
965 if (Compare < 1) return (ml_value_t *)NegOne;
966 return (ml_value_t *)One;
967 } else {
968 int Compare = memcmp(StringA, StringB, LengthA);
969 if (Compare < 0) return (ml_value_t *)NegOne;
970 if (Compare > 0) return (ml_value_t *)One;
971 return (ml_value_t *)Zero;
972 }
973 }
974
975 #define ml_comp_method_string_string(NAME, SYMBOL) \
976 ML_METHOD(NAME, MLStringT, MLStringT) { \
977 const char *StringA = ml_string_value(Args[0]); \
978 const char *StringB = ml_string_value(Args[1]); \
979 int LengthA = ml_string_length(Args[0]); \
980 int LengthB = ml_string_length(Args[1]); \
981 int Compare; \
982 if (LengthA < LengthB) { \
983 Compare = memcmp(StringA, StringB, LengthA) ?: -1; \
984 } else if (LengthA > LengthB) { \
985 Compare = memcmp(StringA, StringB, LengthB) ?: 1; \
986 } else { \
987 Compare = memcmp(StringA, StringB, LengthA); \
988 } \
989 return Compare SYMBOL 0 ? Args[1] : MLNil; \
990 }
991
992 ml_comp_method_string_string("=", ==)
993 ml_comp_method_string_string("!=", !=)
994 ml_comp_method_string_string("<", <)
995 ml_comp_method_string_string(">", >)
996 ml_comp_method_string_string("<=", <=)
997 ml_comp_method_string_string(">=", >=)
998
999 #define SWAP(A, B) { \
1000 typeof(A) Temp = A; \
1001 A = B; \
1002 B = Temp; \
1003 }
1004
1005 ML_METHOD("~", MLStringT, MLStringT) {
1006 const char *CharsA, *CharsB;
1007 int LenA = ml_string_length(Args[0]);
1008 int LenB = ml_string_length(Args[1]);
1009 if (LenA < LenB) {
1010 SWAP(LenA, LenB);
1011 CharsA = ml_string_value(Args[1]);
1012 CharsB = ml_string_value(Args[0]);
1013 } else {
1014 CharsA = ml_string_value(Args[0]);
1015 CharsB = ml_string_value(Args[1]);
1016 }
1017 int *Row0 = alloca((LenB + 1) * sizeof(int));
1018 int *Row1 = alloca((LenB + 1) * sizeof(int));
1019 int *Row2 = alloca((LenB + 1) * sizeof(int));
1020 const int Insert = 1, Replace = 1, Swap = 1, Delete = 1;
1021 for (int J = 0; J <= LenB; ++J) Row1[J] = J * Insert;
1022 char PrevA = 0, PrevB;
1023 for (int I = 0; I < LenA; ++I) {
1024 Row2[0] = (I + 1) * Delete;
1025 for (int J = 0; J < LenB; ++J) {
1026 int Min = Row1[J] + Replace * (CharsA[I] != CharsB[J]);
1027 if (I > 0 && J > 0 && PrevA == CharsB[J] && CharsA[I] == PrevB && Min > Row0[J - 1] + Swap) {
1028 Min = Row0[J - 1] + Swap;
1029 }
1030 if (Min > Row1[J + 1] + Delete) Min = Row1[J + 1] + Delete;
1031 if (Min > Row2[J] + Insert) Min = Row2[J] + Insert;
1032 Row2[J + 1] = Min;
1033 PrevB = CharsB[J];
1034 }
1035 int *Dummy = Row0;
1036 Row0 = Row1;
1037 Row1 = Row2;
1038 Row2 = Dummy;
1039 PrevA = CharsA[I];
1040 }
1041 return ml_integer(Row1[LenB]);
1042 }
1043
1044 ML_METHOD("~>", MLStringT, MLStringT) {
1045 int LenA = ml_string_length(Args[0]);
1046 int LenB = ml_string_length(Args[1]);
1047 const char *CharsA = ml_string_value(Args[0]);
1048 const char *CharsB = ml_string_value(Args[1]);
1049 int *Row0 = alloca((LenB + 1) * sizeof(int));
1050 int *Row1 = alloca((LenB + 1) * sizeof(int));
1051 int *Row2 = alloca((LenB + 1) * sizeof(int));
1052 int Best = LenB;
1053 const int Insert = 1, Replace = 1, Swap = 1, Delete = 1;
1054 for (int J = 0; J <= LenB; ++J) Row1[J] = J * Insert;
1055 char PrevA = 0, PrevB;
1056 for (int I = 0; I < 2 * LenB; ++I) {
1057 Row2[0] = (I + 1) * Delete;
1058 char CharA = I < LenA ? CharsA[I] : 0;
1059 for (int J = 0; J < LenB; ++J) {
1060 int Min = Row1[J] + Replace * (CharA != CharsB[J]);
1061 if (I > 0 && J > 0 && PrevA == CharsB[J] && CharA == PrevB && Min > Row0[J - 1] + Swap) {
1062 Min = Row0[J - 1] + Swap;
1063 }
1064 if (Min > Row1[J + 1] + Delete) Min = Row1[J + 1] + Delete;
1065 if (Min > Row2[J] + Insert) Min = Row2[J] + Insert;
1066 Row2[J + 1] = Min;
1067 PrevB = CharsB[J];
1068 }
1069 int *Dummy = Row0;
1070 Row0 = Row1;
1071 Row1 = Row2;
1072 Row2 = Dummy;
1073 PrevA = CharA;
1074 if (Row1[LenB] < Best) Best = Row1[LenB];
1075 }
1076 return ml_integer(Best);
1077 }
1078
1079 ML_METHOD("/", MLStringT, MLStringT) {
1080 ml_value_t *Results = ml_list();
1081 const char *Subject = ml_string_value(Args[0]);
1082 const char *Pattern = ml_string_value(Args[1]);
1083 size_t Length = strlen(Pattern);
1084 for (;;) {
1085 const char *Next = strstr(Subject, Pattern);
1086 while (Next == Subject) {
1087 Subject += Length;
1088 Next = strstr(Subject, Pattern);
1089 }
1090 if (!Subject[0]) return Results;
1091 if (Next) {
1092 size_t MatchLength = Next - Subject;
1093 char *Match = snew(MatchLength + 1);
1094 memcpy(Match, Subject, MatchLength);
1095 Match[MatchLength] = 0;
1096 ml_list_put(Results, ml_string(Match, MatchLength));
1097 Subject = Next + Length;
1098 } else {
1099 ml_list_put(Results, ml_string(Subject, strlen(Subject)));
1100 break;
1101 }
1102 }
1103 return Results;
1104 }
1105
1106 ML_METHOD("/", MLStringT, MLRegexT) {
1107 ml_value_t *Results = ml_list();
1108 const char *Subject = ml_string_value(Args[0]);
1109 int SubjectLength = ml_string_length(Args[0]);
1110 const char *SubjectEnd = Subject + SubjectLength;
1111 ml_regex_t *Pattern = (ml_regex_t *)Args[1];
1112 int Index = Pattern->Value->re_nsub ? 1 : 0;
1113 regmatch_t Matches[2];
1114 for (;;) {
1115 #ifdef ML_TRE
1116 switch (regnexec(Pattern->Value, Subject, SubjectLength, Index + 1, Matches, 0)) {
1117 #else
1118 switch (regexec(Pattern->Value, Subject, Index + 1, Matches, 0)) {
1119 #endif
1120 case REG_NOMATCH: {
1121 if (SubjectEnd > Subject) ml_list_put(Results, ml_string(Subject, SubjectEnd - Subject));
1122 return Results;
1123 }
1124 case REG_ESPACE: {
1125 size_t ErrorSize = regerror(REG_ESPACE, Pattern->Value, NULL, 0);
1126 char *ErrorMessage = snew(ErrorSize + 1);
1127 regerror(REG_ESPACE, Pattern->Value, ErrorMessage, ErrorSize);
1128 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1129 }
1130 default: {
1131 regoff_t Start = Matches[Index].rm_so;
1132 if (Start > 0) ml_list_put(Results, ml_string(Subject, Start));
1133 Subject += Matches[Index].rm_eo;
1134 SubjectLength -= Matches[Index].rm_eo;
1135 }
1136 }
1137 }
1138 return Results;
1139 }
1140
1141 ML_METHOD("/", MLStringT, MLRegexT, MLIntegerT) {
1142 ml_value_t *Results = ml_list();
1143 const char *Subject = ml_string_value(Args[0]);
1144 int SubjectLength = ml_string_length(Args[0]);
1145 const char *SubjectEnd = Subject + SubjectLength;
1146 ml_regex_t *Pattern = (ml_regex_t *)Args[1];
1147 int Index = ml_integer_value(Args[2]);
1148 if (Index < 0 || Index >= Pattern->Value->re_nsub) return ml_error("RegexError", "Invalid regex group");
1149
1150 regmatch_t Matches[2];
1151 for (;;) {
1152 #ifdef ML_TRE
1153 switch (regnexec(Pattern->Value, Subject, SubjectLength, Index + 1, Matches, 0)) {
1154 #else
1155 switch (regexec(Pattern->Value, Subject, Index + 1, Matches, 0)) {
1156 #endif
1157 case REG_NOMATCH: {
1158 if (SubjectEnd > Subject) ml_list_put(Results, ml_string(Subject, SubjectEnd - Subject));
1159 return Results;
1160 }
1161 case REG_ESPACE: {
1162 size_t ErrorSize = regerror(REG_ESPACE, Pattern->Value, NULL, 0);
1163 char *ErrorMessage = snew(ErrorSize + 1);
1164 regerror(REG_ESPACE, Pattern->Value, ErrorMessage, ErrorSize);
1165 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1166 }
1167 default: {
1168 regoff_t Start = Matches[Index].rm_so;
1169 if (Start > 0) ml_list_put(Results, ml_string(Subject, Start));
1170 Subject += Matches[Index].rm_eo;
1171 SubjectLength -= Matches[Index].rm_eo;
1172 }
1173 }
1174 }
1175 return Results;
1176 }
1177
1178 ML_METHOD("/*", MLStringT, MLStringT) {
1179 const char *Subject = ml_string_value(Args[0]);
1180 const char *End = Subject + ml_string_length(Args[0]);
1181 const char *Pattern = ml_string_value(Args[1]);
1182 size_t Length = strlen(Pattern);
1183 ml_value_t *Results = ml_tuple(2);
1184 const char *Next = strstr(Subject, Pattern);
1185 if (Next) {
1186 ml_tuple_set(Results, 1, ml_string(Subject, Next - Subject));
1187 Next += Length;
1188 ml_tuple_set(Results, 2, ml_string(Next, End - Next));
1189 } else {
1190 ml_tuple_set(Results, 1, Args[0]);
1191 ml_tuple_set(Results, 2, ml_cstring(""));
1192 }
1193 return Results;
1194 }
1195
1196 ML_METHOD("/*", MLStringT, MLRegexT) {
1197 const char *Subject = ml_string_value(Args[0]);
1198 int SubjectLength = ml_string_length(Args[0]);
1199 ml_regex_t *Pattern = (ml_regex_t *)Args[1];
1200 ml_value_t *Results = ml_tuple(2);
1201 regmatch_t Matches[2];
1202 #ifdef ML_TRE
1203 switch (regnexec(Pattern->Value, Subject, SubjectLength, 1, Matches, 0)) {
1204 #else
1205 switch (regexec(Pattern->Value, Subject, 1, Matches, 0)) {
1206 #endif
1207 case REG_NOMATCH:
1208 ml_tuple_set(Results, 1, Args[0]);
1209 ml_tuple_set(Results, 2, ml_cstring(""));
1210 return Results;
1211 case REG_ESPACE: {
1212 size_t ErrorSize = regerror(REG_ESPACE, Pattern->Value, NULL, 0);
1213 char *ErrorMessage = snew(ErrorSize + 1);
1214 regerror(REG_ESPACE, Pattern->Value, ErrorMessage, ErrorSize);
1215 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1216 }
1217 default: {
1218 ml_tuple_set(Results, 1, ml_string(Subject, Matches[0].rm_so));
1219 const char *Next = Subject + Matches[0].rm_eo;
1220 ml_tuple_set(Results, 2, ml_string(Next, Subject + SubjectLength - Next));
1221 return Results;
1222 }
1223 }
1224 }
1225
1226 ML_METHOD("*/", MLStringT, MLStringT) {
1227 const char *Subject = ml_string_value(Args[0]);
1228 const char *End = Subject + ml_string_length(Args[0]);
1229 const char *Pattern = ml_string_value(Args[1]);
1230 size_t Length = strlen(Pattern);
1231 ml_value_t *Results = ml_tuple(2);
1232 const char *Next = End - Length;
1233 while (Next >= Subject) {
1234 if (!memcmp(Next, Pattern, Length)) {
1235 ml_tuple_set(Results, 1, ml_string(Subject, Next - Subject));
1236 Next += Length;
1237 ml_tuple_set(Results, 2, ml_string(Next, End - Next));
1238 return Results;
1239 }
1240 --Next;
1241 }
1242 ml_tuple_set(Results, 1, Args[0]);
1243 ml_tuple_set(Results, 2, ml_cstring(""));
1244 return Results;
1245 }
1246
1247 ML_METHOD("*/", MLStringT, MLRegexT) {
1248 const char *Subject = ml_string_value(Args[0]);
1249 const char *End = Subject + ml_string_length(Args[0]);
1250 ml_regex_t *Pattern = (ml_regex_t *)Args[1];
1251 ml_value_t *Results = ml_tuple(2);
1252 regmatch_t Matches[2];
1253 const char *Next = End - 1;
1254 int NextLength = 1;
1255 while (Next >= Subject) {
1256 #ifdef ML_TRE
1257 switch (regnexec(Pattern->Value, Next, NextLength, 1, Matches, 0)) {
1258 #else
1259 switch (regexec(Pattern->Value, Next, 1, Matches, 0)) {
1260 #endif
1261 case REG_NOMATCH:
1262 --Next;
1263 ++NextLength;
1264 break;
1265 case REG_ESPACE: {
1266 size_t ErrorSize = regerror(REG_ESPACE, Pattern->Value, NULL, 0);
1267 char *ErrorMessage = snew(ErrorSize + 1);
1268 regerror(REG_ESPACE, Pattern->Value, ErrorMessage, ErrorSize);
1269 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1270 }
1271 default: {
1272 ml_tuple_set(Results, 1, ml_string(Subject, Next - Subject));
1273 Next += Matches[0].rm_eo;
1274 ml_tuple_set(Results, 2, ml_string(Next, End - Next));
1275 return Results;
1276 }
1277 }
1278 }
1279 ml_tuple_set(Results, 1, Args[0]);
1280 ml_tuple_set(Results, 2, ml_cstring(""));
1281 return Results;
1282 }
1283
1284 ML_METHOD("lower", MLStringT) {
1285 const char *Source = ml_string_value(Args[0]);
1286 int Length = ml_string_length(Args[0]);
1287 char *Target = snew(Length + 1);
1288 for (int I = 0; I < Length; ++I) Target[I] = tolower(Source[I]);
1289 return ml_string(Target, Length);
1290 }
1291
1292 ML_METHOD("upper", MLStringT) {
1293 const char *Source = ml_string_value(Args[0]);
1294 int Length = ml_string_length(Args[0]);
1295 char *Target = snew(Length + 1);
1296 for (int I = 0; I < Length; ++I) Target[I] = toupper(Source[I]);
1297 return ml_string(Target, Length);
1298 }
1299
1300 ML_METHOD("find", MLStringT, MLStringT) {
1301 const char *Haystack = ml_string_value(Args[0]);
1302 const char *Needle = ml_string_value(Args[1]);
1303 const char *Match = strstr(Haystack, Needle);
1304 if (Match) {
1305 return ml_integer(1 + Match - Haystack);
1306 } else {
1307 return MLNil;
1308 }
1309 }
1310
1311 ML_METHOD("find2", MLStringT, MLStringT) {
1312 const char *Haystack = ml_string_value(Args[0]);
1313 const char *Needle = ml_string_value(Args[1]);
1314 const char *Match = strstr(Haystack, Needle);
1315 if (Match) {
1316 ml_value_t *Result = ml_tuple(2);
1317 ml_tuple_set(Result, 1, ml_integer(1 + Match - Haystack));
1318 ml_tuple_set(Result, 2, Args[1]);
1319 return Result;
1320 } else {
1321 return MLNil;
1322 }
1323 }
1324
1325 ML_METHOD("find", MLStringT, MLStringT, MLIntegerT) {
1326 const char *Haystack = ml_string_value(Args[0]);
1327 size_t HaystackLength = ml_string_length(Args[0]);
1328 const char *Needle = ml_string_value(Args[1]);
1329 int Start = ml_integer_value_fast(Args[2]);
1330 if (Start <= 0) Start += HaystackLength + 1;
1331 if (Start <= 0) return MLNil;
1332 if (Start > HaystackLength) return MLNil;
1333 Haystack += Start - 1;
1334 HaystackLength -= (Start - 1);
1335 const char *Match = strstr(Haystack, Needle);
1336 if (Match) {
1337 return ml_integer(Start + Match - Haystack);
1338 } else {
1339 return MLNil;
1340 }
1341 }
1342
1343 ML_METHOD("find2", MLStringT, MLStringT, MLIntegerT) {
1344 const char *Haystack = ml_string_value(Args[0]);
1345 size_t HaystackLength = ml_string_length(Args[0]);
1346 const char *Needle = ml_string_value(Args[1]);
1347 int Start = ml_integer_value_fast(Args[2]);
1348 if (Start <= 0) Start += HaystackLength + 1;
1349 if (Start <= 0) return MLNil;
1350 if (Start > HaystackLength) return MLNil;
1351 Haystack += Start - 1;
1352 HaystackLength -= (Start - 1);
1353 const char *Match = strstr(Haystack, Needle);
1354 if (Match) {
1355 ml_value_t *Result = ml_tuple(2);
1356 ml_tuple_set(Result, 1, ml_integer(1 + Match - Haystack));
1357 ml_tuple_set(Result, 2, Args[1]);
1358 return Result;
1359 } else {
1360 return MLNil;
1361 }
1362 }
1363
1364 ML_METHOD("find", MLStringT, MLRegexT) {
1365 const char *Haystack = ml_string_value(Args[0]);
1366 regex_t *Regex = ml_regex_value(Args[1]);
1367 regmatch_t Matches[1];
1368 #ifdef ML_TRE
1369 int Length = ml_string_length(Args[0]);
1370 switch (regnexec(Regex, Haystack, Length, 1, Matches, 0)) {
1371 #else
1372 switch (regexec(Regex, Haystack, 1, Matches, 0)) {
1373 #endif
1374 case REG_NOMATCH:
1375 return MLNil;
1376 case REG_ESPACE: {
1377 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1378 char *ErrorMessage = snew(ErrorSize + 1);
1379 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1380 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1381 }
1382 }
1383 return ml_integer(1 + Matches->rm_so);
1384 }
1385
1386 ML_METHOD("find2", MLStringT, MLRegexT) {
1387 const char *Haystack = ml_string_value(Args[0]);
1388 regex_t *Regex = ml_regex_value(Args[1]);
1389 regmatch_t Matches[Regex->re_nsub + 1];
1390 #ifdef ML_TRE
1391 int Length = ml_string_length(Args[0]);
1392 switch (regnexec(Regex, Haystack, Length, Regex->re_nsub + 1, Matches, 0)) {
1393 #else
1394 switch (regexec(Regex, Haystack, Regex->re_nsub + 1, Matches, 0)) {
1395 #endif
1396 case REG_NOMATCH:
1397 return MLNil;
1398 case REG_ESPACE: {
1399 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1400 char *ErrorMessage = snew(ErrorSize + 1);
1401 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1402 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1403 }
1404 }
1405 ml_value_t *Result = ml_tuple(Regex->re_nsub + 2);
1406 ml_tuple_set(Result, 1, ml_integer(1 + Matches->rm_so));
1407 for (int I = 0; I < Regex->re_nsub + 1; ++I) {
1408 regoff_t Start = Matches[I].rm_so;
1409 if (Start >= 0) {
1410 size_t Length = Matches[I].rm_eo - Start;
1411 ml_tuple_set(Result, I + 2, ml_string(Haystack + Start, Length));
1412 } else {
1413 ml_tuple_set(Result, I + 2, MLNil);
1414 }
1415 }
1416 return Result;
1417 }
1418
1419 ML_METHOD("find", MLStringT, MLRegexT, MLIntegerT) {
1420 const char *Haystack = ml_string_value(Args[0]);
1421 int Length = ml_string_length(Args[0]);
1422 regex_t *Regex = ml_regex_value(Args[1]);
1423 int Start = ml_integer_value_fast(Args[2]);
1424 if (Start <= 0) Start += Length + 1;
1425 if (Start <= 0) return MLNil;
1426 if (Start > Length) return MLNil;
1427 Haystack += Start - 1;
1428 Length -= (Start - 1);
1429 regmatch_t Matches[1];
1430 #ifdef ML_TRE
1431 switch (regnexec(Regex, Haystack, Length, 1, Matches, 0)) {
1432 #else
1433 switch (regexec(Regex, Haystack, 1, Matches, 0)) {
1434 #endif
1435 case REG_NOMATCH:
1436 return MLNil;
1437 case REG_ESPACE: {
1438 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1439 char *ErrorMessage = snew(ErrorSize + 1);
1440 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1441 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1442 }
1443 }
1444 return ml_integer(Start + Matches->rm_so);
1445 }
1446
1447 ML_METHOD("find2", MLStringT, MLRegexT, MLIntegerT) {
1448 const char *Haystack = ml_string_value(Args[0]);
1449 int Length = ml_string_length(Args[0]);
1450 regex_t *Regex = ml_regex_value(Args[1]);
1451 int Start = ml_integer_value_fast(Args[2]);
1452 if (Start <= 0) Start += Length + 1;
1453 if (Start <= 0) return MLNil;
1454 if (Start > Length) return MLNil;
1455 Haystack += Start - 1;
1456 Length -= (Start - 1);
1457 regmatch_t Matches[Regex->re_nsub + 1];
1458 #ifdef ML_TRE
1459 switch (regnexec(Regex, Haystack, Length, Regex->re_nsub + 1, Matches, 0)) {
1460 #else
1461 switch (regexec(Regex, Haystack, Regex->re_nsub + 1, Matches, 0)) {
1462 #endif
1463 case REG_NOMATCH:
1464 return MLNil;
1465 case REG_ESPACE: {
1466 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1467 char *ErrorMessage = snew(ErrorSize + 1);
1468 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1469 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1470 }
1471 }
1472 ml_value_t *Result = ml_tuple(Regex->re_nsub + 2);
1473 ml_tuple_set(Result, 1, ml_integer(Start + Matches->rm_so));
1474 for (int I = 0; I < Regex->re_nsub + 1; ++I) {
1475 regoff_t Start = Matches[I].rm_so;
1476 if (Start >= 0) {
1477 size_t Length = Matches[I].rm_eo - Start;
1478 ml_tuple_set(Result, I + 2, ml_string(Haystack + Start, Length));
1479 } else {
1480 ml_tuple_set(Result, I + 2, MLNil);
1481 }
1482 }
1483 return Result;
1484 }
1485
1486 ML_METHOD("%", MLStringT, MLRegexT) {
1487 const char *Subject = ml_string_value(Args[0]);
1488 regex_t *Regex = ml_regex_value(Args[1]);
1489 regmatch_t Matches[Regex->re_nsub + 1];
1490 #ifdef ML_TRE
1491 int Length = ml_string_length(Args[0]);
1492 switch (regnexec(Regex, Subject, Length, Regex->re_nsub + 1, Matches, 0)) {
1493
1494 #else
1495 switch (regexec(Regex, Subject, Regex->re_nsub + 1, Matches, 0)) {
1496 #endif
1497 case REG_NOMATCH:
1498 return MLNil;
1499 case REG_ESPACE: {
1500 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1501 char *ErrorMessage = snew(ErrorSize + 1);
1502 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1503 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1504 }
1505 default: {
1506 ml_value_t *Results = ml_tuple(Regex->re_nsub + 1);
1507 for (int I = 0; I < Regex->re_nsub + 1; ++I) {
1508 regoff_t Start = Matches[I].rm_so;
1509 if (Start >= 0) {
1510 size_t Length = Matches[I].rm_eo - Start;
1511 ml_tuple_set(Results, I + 1, ml_string(Subject + Start, Length));
1512 } else {
1513 ml_tuple_set(Results, I + 1, MLNil);
1514 }
1515 }
1516 return Results;
1517 }
1518 }
1519 }
1520
1521 int ml_regex_match(ml_value_t *Value, const char *Subject, int Length) {
1522 regex_t *Regex = ml_regex_value(Value);
1523 #ifdef ML_TRE
1524 switch (regnexec(Regex, Subject, Length, 0, NULL, 0)) {
1525 #else
1526 switch (regexec(Regex, Subject, 0, NULL, 0)) {
1527 #endif
1528 case REG_NOMATCH: return 1;
1529 case REG_ESPACE: return -1;
1530 default: return 0;
1531 }
1532 }
1533
1534 ML_METHOD("?", MLStringT, MLRegexT) {
1535 const char *Subject = ml_string_value(Args[0]);
1536 regex_t *Regex = ml_regex_value(Args[1]);
1537 regmatch_t Matches[Regex->re_nsub + 1];
1538 #ifdef ML_TRE
1539 int Length = ml_string_length(Args[0]);
1540 switch (regnexec(Regex, Subject, Length, Regex->re_nsub + 1, Matches, 0)) {
1541
1542 #else
1543 switch (regexec(Regex, Subject, Regex->re_nsub + 1, Matches, 0)) {
1544 #endif
1545 case REG_NOMATCH:
1546 return MLNil;
1547 case REG_ESPACE: {
1548 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1549 char *ErrorMessage = snew(ErrorSize + 1);
1550 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1551 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1552 }
1553 default: {
1554 regoff_t Start = Matches[0].rm_so;
1555 if (Start >= 0) {
1556 size_t Length = Matches[0].rm_eo - Start;
1557 return ml_string(Subject + Start, Length);
1558 } else {
1559 return MLNil;
1560 }
1561 }
1562 }
1563 }
1564
1565 ML_METHOD("starts", MLStringT, MLStringT) {
1566 const char *Subject = ml_string_value(Args[0]);
1567 const char *Prefix = ml_string_value(Args[1]);
1568 int Length = ml_string_length(Args[1]);
1569 if (Length > ml_string_length(Args[0])) return MLNil;
1570 if (memcmp(Subject, Prefix, Length)) return MLNil;
1571 return Args[1];
1572 }
1573
1574 ML_METHOD("starts", MLStringT, MLRegexT) {
1575 const char *Subject = ml_string_value(Args[0]);
1576 regex_t *Regex = ml_regex_value(Args[1]);
1577 regmatch_t Matches[Regex->re_nsub + 1];
1578 #ifdef ML_TRE
1579 int Length = ml_string_length(Args[0]);
1580 switch (regnexec(Regex, Subject, Length, Regex->re_nsub + 1, Matches, 0)) {
1581
1582 #else
1583 switch (regexec(Regex, Subject, Regex->re_nsub + 1, Matches, 0)) {
1584 #endif
1585 case REG_NOMATCH:
1586 return MLNil;
1587 case REG_ESPACE: {
1588 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1589 char *ErrorMessage = snew(ErrorSize + 1);
1590 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1591 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1592 }
1593 default: {
1594 regoff_t Start = Matches[0].rm_so;
1595 if (Start == 0) {
1596 size_t Length = Matches[0].rm_eo - Start;
1597 return ml_string(Subject + Start, Length);
1598 } else {
1599 return MLNil;
1600 }
1601 }
1602 }
1603 }
1604
1605 ML_METHOD("ends", MLStringT, MLStringT) {
1606 const char *Subject = ml_string_value(Args[0]);
1607 const char *Suffix = ml_string_value(Args[1]);
1608 int Length = ml_string_length(Args[1]);
1609 int Length0 = ml_string_length(Args[0]);
1610 if (Length > Length0) return MLNil;
1611 if (memcmp(Subject + Length0 - Length, Suffix, Length)) return MLNil;
1612 return Args[1];
1613 }
1614
1615 ML_METHOD("after", MLStringT, MLStringT) {
1616 const char *Haystack = ml_string_value(Args[0]);
1617 size_t HaystackLength = ml_string_length(Args[0]);
1618 const char *Needle = ml_string_value(Args[1]);
1619 size_t NeedleLength = ml_string_length(Args[1]);
1620 const char *Match = strstr(Haystack, Needle);
1621 if (Match) {
1622 Match += NeedleLength;
1623 int Length = HaystackLength - (Match - Haystack);
1624 return ml_string(Match, Length);
1625 } else {
1626 return MLNil;
1627 }
1628 }
1629
1630 ML_METHOD("after", MLStringT, MLStringT, MLIntegerT) {
1631 const char *Haystack = ml_string_value(Args[0]);
1632 size_t HaystackLength = ml_string_length(Args[0]);
1633 const char *HaystackEnd = Haystack + HaystackLength;
1634 const char *Needle = ml_string_value(Args[1]);
1635 size_t NeedleLength = ml_string_length(Args[1]);
1636 int Index = ml_integer_value(Args[2]);
1637 if (Index > 0) {
1638 for (;;) {
1639 const char *Match = strstr(Haystack, Needle);
1640 if (!Match) return MLNil;
1641 if (--Index) {
1642 Haystack = Match + NeedleLength;
1643 } else {
1644 Match += NeedleLength;
1645 int Length = HaystackEnd - Match;
1646 return ml_string(Match, Length);
1647 }
1648 }
1649 } else if (Index < 0) {
1650 for (int I = HaystackLength - NeedleLength; I >= 0; --I) {
1651 const char *Match = Haystack + I;
1652 if (!memcmp(Match, Needle, NeedleLength)) {
1653 if (++Index) {
1654 I -= NeedleLength;
1655 } else {
1656 Match += NeedleLength;
1657 int Length = HaystackEnd - Match;
1658 return ml_string(Match, Length);
1659 }
1660 }
1661 }
1662 return MLNil;
1663 } else {
1664 return Args[0];
1665 }
1666 }
1667
1668 ML_METHOD("before", MLStringT, MLStringT) {
1669 const char *Haystack = ml_string_value(Args[0]);
1670 const char *Needle = ml_string_value(Args[1]);
1671 const char *Match = strstr(Haystack, Needle);
1672 if (Match) {
1673 return ml_string(Haystack, Match - Haystack);
1674 } else {
1675 return MLNil;
1676 }
1677 }
1678
1679 ML_METHOD("before", MLStringT, MLStringT, MLIntegerT) {
1680 const char *Haystack = ml_string_value(Args[0]);
1681 size_t HaystackLength = ml_string_length(Args[0]);
1682 const char *Needle = ml_string_value(Args[1]);
1683 size_t NeedleLength = ml_string_length(Args[1]);
1684 int Index = ml_integer_value(Args[2]);
1685 if (Index > 0) {
1686 for (;;) {
1687 const char *Match = strstr(Haystack, Needle);
1688 if (!Match) return MLNil;
1689 if (--Index) {
1690 Haystack = Match + NeedleLength;
1691 } else {
1692 const char *Haystack = ml_string_value(Args[0]);
1693 return ml_string(Haystack, Match - Haystack);
1694 }
1695 }
1696 } else if (Index < 0) {
1697 for (int I = HaystackLength - NeedleLength; I >= 0; --I) {
1698 if (!memcmp(Haystack + I, Needle, NeedleLength)) {
1699 if (++Index) {
1700 I -= NeedleLength;
1701 } else {
1702 return ml_string(Haystack, I);
1703 }
1704 }
1705 }
1706 return MLNil;
1707 } else {
1708 return Args[0];
1709 }
1710 }
1711
1712 ML_METHOD("replace", MLStringT, MLStringT, MLStringT) {
1713 const char *Subject = ml_string_value(Args[0]);
1714 const char *SubjectEnd = Subject + ml_string_length(Args[0]);
1715 const char *Pattern = ml_string_value(Args[1]);
1716 int PatternLength = ml_string_length(Args[1]);
1717 const char *Replace = ml_string_value(Args[2]);
1718 int ReplaceLength = ml_string_length(Args[2]);
1719 ml_stringbuffer_t Buffer[1] = {ML_STRINGBUFFER_INIT};
1720 const char *Find = strstr(Subject, Pattern);
1721 while (Find) {
1722 if (Find > Subject) ml_stringbuffer_add(Buffer, Subject, Find - Subject);
1723 ml_stringbuffer_add(Buffer, Replace, ReplaceLength);
1724 Subject = Find + PatternLength;
1725 Find = strstr(Subject, Pattern);
1726 }
1727 if (SubjectEnd > Subject) {
1728 ml_stringbuffer_add(Buffer, Subject, SubjectEnd - Subject);
1729 }
1730 return ml_stringbuffer_value(Buffer);
1731 }
1732
1733 ML_METHOD("replace", MLStringT, MLRegexT, MLStringT) {
1734 const char *Subject = ml_string_value(Args[0]);
1735 int SubjectLength = ml_string_length(Args[0]);
1736 regex_t *Regex = ml_regex_value(Args[1]);
1737 const char *Replace = ml_string_value(Args[2]);
1738 int ReplaceLength = ml_string_length(Args[2]);
1739 regmatch_t Matches[1];
1740 ml_stringbuffer_t Buffer[1] = {ML_STRINGBUFFER_INIT};
1741 for (;;) {
1742 #ifdef ML_TRE
1743 switch (regnexec(Regex, Subject, SubjectLength, 1, Matches, 0)) {
1744
1745 #else
1746 switch (regexec(Regex, Subject, 1, Matches, 0)) {
1747 #endif
1748 case REG_NOMATCH:
1749 if (SubjectLength) ml_stringbuffer_add(Buffer, Subject, SubjectLength);
1750 return ml_stringbuffer_value(Buffer);
1751 case REG_ESPACE: {
1752 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1753 char *ErrorMessage = snew(ErrorSize + 1);
1754 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1755 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1756 }
1757 default: {
1758 regoff_t Start = Matches[0].rm_so;
1759 if (Start > 0) ml_stringbuffer_add(Buffer, Subject, Start);
1760 ml_stringbuffer_add(Buffer, Replace, ReplaceLength);
1761 Subject += Matches[0].rm_eo;
1762 SubjectLength -= Matches[0].rm_eo;
1763 }
1764 }
1765 }
1766 return 0;
1767 }
1768
1769 ML_METHOD("replace", MLStringT, MLRegexT, MLFunctionT) {
1770 const char *Subject = ml_string_value(Args[0]);
1771 int SubjectLength = ml_string_length(Args[0]);
1772 regex_t *Regex = ml_regex_value(Args[1]);
1773 ml_value_t *Replacer = Args[2];
1774 int NumSub = Regex->re_nsub + 1;
1775 regmatch_t Matches[NumSub];
1776 ml_value_t *SubArgs[NumSub];
1777 ml_stringbuffer_t Buffer[1] = {ML_STRINGBUFFER_INIT};
1778 for (;;) {
1779 #ifdef ML_TRE
1780 switch (regnexec(Regex, Subject, SubjectLength, NumSub, Matches, 0)) {
1781
1782 #else
1783 switch (regexec(Regex, Subject, NumSub, Matches, 0)) {
1784 #endif
1785 case REG_NOMATCH:
1786 if (SubjectLength) ml_stringbuffer_add(Buffer, Subject, SubjectLength);
1787 return ml_stringbuffer_value(Buffer);
1788 case REG_ESPACE: {
1789 size_t ErrorSize = regerror(REG_ESPACE, Regex, NULL, 0);
1790 char *ErrorMessage = snew(ErrorSize + 1);
1791 regerror(REG_ESPACE, Regex, ErrorMessage, ErrorSize);
1792 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1793 }
1794 default: {
1795 regoff_t Start = Matches[0].rm_so;
1796 if (Start > 0) ml_stringbuffer_add(Buffer, Subject, Start);
1797 for (int I = 0; I < NumSub; ++I) {
1798 SubArgs[I] = ml_string(Subject + Matches[I].rm_so, Matches[I].rm_eo - Matches[I].rm_so);
1799 }
1800 ml_value_t *Replace = ml_simple_call(Replacer, NumSub, SubArgs);
1801 if (ml_is_error(Replace)) return Replace;
1802 if (!ml_is(Replace, MLStringT)) return ml_error("TypeError", "expected string, not %s", ml_typeof(Replace)->Name);
1803 ml_stringbuffer_add(Buffer, ml_string_value(Replace), ml_string_length(Replace));
1804 Subject += Matches[0].rm_eo;
1805 SubjectLength -= Matches[0].rm_eo;
1806 }
1807 }
1808 }
1809 return 0;
1810 }
1811
1812 typedef struct {
1813 union {
1814 const char *String;
1815 regex_t *Regex;
1816 } Pattern;
1817 union {
1818 const char *String;
1819 ml_value_t *Function;
1820 } Replacement;
1821 int PatternLength;
1822 int ReplacementLength;
1823 } ml_replacement_t;
1824
1825 ML_METHOD("replace", MLStringT, MLMapT) {
1826 int NumPatterns = ml_map_size(Args[1]);
1827 ml_replacement_t Replacements[NumPatterns], *Last = Replacements + NumPatterns;
1828 int I = 0, MaxSub = 0;
1829 ML_MAP_FOREACH(Args[1], Iter) {
1830 if (ml_is(Iter->Key, MLStringT)) {
1831 Replacements[I].Pattern.String = ml_string_value(Iter->Key);
1832 Replacements[I].PatternLength = ml_string_length(Iter->Key);
1833 } else if (ml_is(Iter->Key, MLRegexT)) {
1834 regex_t *Regex = ml_regex_value(Iter->Key);
1835 Replacements[I].Pattern.Regex = Regex;
1836 Replacements[I].PatternLength = -1;
1837 if (MaxSub <= Regex->re_nsub) MaxSub = Regex->re_nsub + 1;
1838 } else {
1839 return ml_error("TypeError", "Unsupported pattern type: <%s>", ml_typeof(Iter->Key)->Name);
1840 }
1841 if (ml_is(Iter->Value, MLStringT)) {
1842 Replacements[I].Replacement.String = ml_string_value(Iter->Value);
1843 Replacements[I].ReplacementLength = ml_string_length(Iter->Value);
1844 } else if (ml_is(Iter->Value, MLFunctionT)) {
1845 Replacements[I].Replacement.Function = Iter->Value;
1846 Replacements[I].ReplacementLength = -1;
1847 } else {
1848 return ml_error("TypeError", "Unsupported replacement type: <%s>", ml_typeof(Iter->Value)->Name);
1849 }
1850 ++I;
1851 }
1852 const char *Subject = ml_string_value(Args[0]);
1853 int SubjectLength = ml_string_length(Args[0]);
1854 regmatch_t Matches[MaxSub];
1855 ml_value_t *SubArgs[MaxSub];
1856 ml_stringbuffer_t Buffer[1] = {ML_STRINGBUFFER_INIT};
1857 for (;;) {
1858 int MatchStart = SubjectLength, MatchEnd, SubCount;
1859 ml_replacement_t *Match = NULL;
1860 for (ml_replacement_t *Replacement = Replacements; Replacement < Last; ++Replacement) {
1861 if (Replacement->PatternLength < 0) {
1862 regex_t *Regex = Replacement->Pattern.Regex;
1863 int NumSub = Replacement->Pattern.Regex->re_nsub + 1;
1864 #ifdef ML_TRE
1865 switch (regnexec(Regex, Subject, SubjectLength, NumSub, Matches, 0)) {
1866
1867 #else
1868 switch (regexec(Regex, Subject, NumSub, Matches, 0)) {
1869 #endif
1870 case REG_NOMATCH:
1871 break;
1872 case REG_ESPACE: {
1873 size_t ErrorSize = regerror(REG_ESPACE, Replacement->Pattern.Regex, NULL, 0);
1874 char *ErrorMessage = snew(ErrorSize + 1);
1875 regerror(REG_ESPACE, Replacement->Pattern.Regex, ErrorMessage, ErrorSize);
1876 return ml_error("RegexError", "regex error: %s", ErrorMessage);
1877 }
1878 default: {
1879 if (Matches[0].rm_so < MatchStart) {
1880 MatchStart = Matches[0].rm_so;
1881 for (int I = 0; I < NumSub; ++I) {
1882 SubArgs[I] = ml_string(Subject + Matches[I].rm_so, Matches[I].rm_eo - Matches[I].rm_so);
1883 }
1884 SubCount = NumSub;
1885 MatchEnd = Matches[0].rm_eo;
1886 Match = Replacement;
1887 }
1888 }
1889 }
1890 } else {
1891 const char *Find = strstr(Subject, Replacement->Pattern.String);
1892 if (Find) {
1893 int Start = Find - Subject;
1894 if (Start < MatchStart) {
1895 MatchStart = Start;
1896 SubCount = 0;
1897 MatchEnd = Start + Replacement->PatternLength;
1898 Match = Replacement;
1899 }
1900 }
1901 }
1902 }
1903 if (!Match) break;
1904 if (MatchStart) ml_stringbuffer_add(Buffer, Subject, MatchStart);
1905 if (Match->ReplacementLength < 0) {
1906 ml_value_t *Replace = ml_simple_call(Match->Replacement.Function, SubCount, SubArgs);
1907 if (ml_is_error(Replace)) return Replace;
1908 if (!ml_is(Replace, MLStringT)) return ml_error("TypeError", "expected string, not %s", ml_typeof(Replace)->Name);
1909 ml_stringbuffer_add(Buffer, ml_string_value(Replace), ml_string_length(Replace));
1910 } else {
1911 ml_stringbuffer_add(Buffer, Match->Replacement.String, Match->ReplacementLength);
1912 }
1913 Subject += MatchEnd;
1914 SubjectLength -= MatchEnd;
1915 }
1916 if (SubjectLength) ml_stringbuffer_add(Buffer, Subject, SubjectLength);
1917 return ml_stringbuffer_value(Buffer);
1918 }
1919
1920 ML_METHOD(MLStringT, MLRegexT) {
1921 return ml_string_format("/%s/", ml_regex_pattern(Args[0]));
1922 }
1923
1924 ML_METHOD("append", MLStringBufferT, MLRegexT) {
1925 ml_stringbuffer_t *Buffer = (ml_stringbuffer_t *)Args[0];
1926 ml_stringbuffer_addf(Buffer, "/%s/", ml_regex_pattern(Args[1]));
1927 return MLSome;
1928 }
1929
1930 void ml_string_init() {
1931 GC_word StringBufferLayout[] = {1};
1932 StringBufferDesc = GC_make_descriptor(StringBufferLayout, 1);
1933 regcomp(IntFormat, "^%[-+ #'0]*[.0-9]*[dioxX]$", REG_NOSUB);
1934 regcomp(LongFormat, "^%[-+ #'0]*[.0-9]*l[dioxX]$", REG_NOSUB);
1935 regcomp(RealFormat, "^%[-+ #'0]*[.0-9]*[aefgAEG]$", REG_NOSUB);
1936 stringmap_insert(MLStringT->Exports, "switch", MLStringSwitch);
1937 #include "ml_string_init.c"
1938 ml_method_by_value(MLStringT->Constructor, NULL, ml_identity, MLStringT, NULL);
1939 }
1940