1 /*
2 * Copyright (C) Volition, Inc. 1999. All rights reserved.
3 *
4 * All source code herein is the property of Volition, Inc. You may not sell
5 * or otherwise commercially exploit the source or things you created based on the
6 * source.
7 *
8 */
9
10 #include <cstdio>
11 #include <cstdlib>
12 #include <cstring>
13 #include <cassert>
14 #include <cstdarg>
15 #include <csetjmp>
16
17 #include <cctype>
18 #include "globalincs/version.h"
19 #include "localization/fhash.h"
20 #include "localization/localize.h"
21 #include "mission/missionparse.h"
22 #include "parse/encrypt.h"
23 #include "parse/parselo.h"
24 #include "parse/sexp.h"
25 #include "ship/ship.h"
26 #include "weapon/weapon.h"
27 #include "mod_table/mod_table.h"
28
29 #include "utils/encoding.h"
30 #include "utils/unicode.h"
31
32 #include <utf8.h>
33
34 using namespace parse;
35
36
37 #define ERROR_LENGTH 64
38 #define RS_MAX_TRIES 5
39 #define SHARP_S (char)-33
40
41 // to know that a modular table is currently being parsed
42 bool Parsing_modular_table = false;
43
44 char Current_filename[MAX_PATH_LEN];
45 char Current_filename_sub[MAX_PATH_LEN]; //Last attempted file to load, don't know if ex or not.
46 char Error_str[ERROR_LENGTH];
47 int Warning_count, Error_count;
48 int fred_parse_flag = 0;
49 int Token_found_flag;
50
51 char *Parse_text = nullptr;
52 char *Parse_text_raw = nullptr;
53 char *Mp = NULL, *Mp_save = NULL;
54 const char *token_found;
55
56 SCP_vector<Bookmark> Bookmarks; // Stack of all our previously paused parsing
57
58 // text allocation stuff
59 void allocate_parse_text(size_t size);
60 static size_t Parse_text_size = 0;
61
62
63 // Return true if this character is white space, else false.
is_white_space(char ch)64 int is_white_space(char ch)
65 {
66 return ((ch == ' ') || (ch == '\t') || (ch == EOLN) || (ch == CARRIAGE_RETURN));
67 }
is_white_space(unicode::codepoint_t cp)68 int is_white_space(unicode::codepoint_t cp)
69 {
70 return ((cp == UNICODE_CHAR(' ')) || (cp == UNICODE_CHAR('\t')) || (cp == (unicode::codepoint_t)EOLN) || (cp == (unicode::codepoint_t)CARRIAGE_RETURN));
71 }
72
73 // Returns true if this character is gray space, else false (gray space is white space except for EOLN).
is_gray_space(char ch)74 int is_gray_space(char ch)
75 {
76 return ((ch == ' ') || (ch == '\t'));
77 }
78
is_gray_space(unicode::codepoint_t cp)79 bool is_gray_space(unicode::codepoint_t cp) {
80 return cp == UNICODE_CHAR(' ') || cp == UNICODE_CHAR('\t');
81 }
82
is_parenthesis(char ch)83 int is_parenthesis(char ch)
84 {
85 return ((ch == '(') || (ch == ')'));
86 }
87
88 // Advance global Mp (mission pointer) past all current white space.
89 // Leaves Mp pointing at first non white space character.
ignore_white_space(const char ** pp)90 void ignore_white_space(const char **pp)
91 {
92 if (pp == nullptr)
93 pp = const_cast<const char**>(&Mp);
94
95 while ((**pp != '\0') && is_white_space(**pp))
96 (*pp)++;
97 }
98
ignore_gray_space(const char ** pp)99 void ignore_gray_space(const char **pp)
100 {
101 if (pp == nullptr)
102 pp = const_cast<const char**>(&Mp);
103
104 while ((**pp != '\0') && is_gray_space(**pp))
105 (*pp)++;
106 }
107
108 // Truncate *str, eliminating all trailing white space.
109 // Eg: "abc " becomes "abc"
110 // "abc abc " becomes "abc abc"
111 // "abc \t" becomes "abc"
drop_trailing_white_space(char * str)112 void drop_trailing_white_space(char *str)
113 {
114 auto len = strlen(str);
115 if (len == 0)
116 {
117 // Nothing to do here
118 return;
119 }
120 auto i = len - 1;
121 while (i != INVALID_SIZE && is_white_space(str[i]))
122 {
123 --i;
124 }
125 str[i + 1] = '\0';
126 }
127
128 // Ditto for SCP_string
drop_trailing_white_space(SCP_string & str)129 void drop_trailing_white_space(SCP_string &str)
130 {
131 if (str.empty())
132 {
133 // Nothing to do here
134 return;
135 }
136 auto i = str.size() - 1;
137 while (i != INVALID_SIZE && is_white_space(str[i]))
138 {
139 --i;
140 }
141 str.resize(i + 1);
142 }
143
144 // Eliminate any leading whitespace in str
drop_leading_white_space(char * str)145 void drop_leading_white_space(char *str)
146 {
147 auto len = strlen(str);
148 size_t first = 0;
149
150 // find first non-whitespace
151 while ((first < len) && is_white_space(str[first]))
152 first++;
153
154 // quick out
155 if (first == 0)
156 return;
157
158 memmove(str, str+first, len-first);
159 str[len-first] = 0;
160 }
161
162 // Ditto for SCP_string
drop_leading_white_space(SCP_string & str)163 void drop_leading_white_space(SCP_string &str)
164 {
165 auto len = str.length();
166 size_t first = 0;
167
168 // find first non-whitespace
169 while ((first < len) && is_white_space(str[first]))
170 first++;
171
172 // quick out
173 if (first == 0)
174 return;
175
176 // Assign the found substring to the string
177 str = str.substr(first, len - first);
178 }
179
180 // eliminates all leading and trailing white space from a string. Returns pointer passed in.
drop_white_space(char * str)181 char *drop_white_space(char *str)
182 {
183 drop_trailing_white_space(str);
184 drop_leading_white_space(str);
185
186 return str;
187 }
188
189 // ditto for SCP_string
drop_white_space(SCP_string & str)190 void drop_white_space(SCP_string &str)
191 {
192 drop_trailing_white_space(str);
193 drop_leading_white_space(str);
194 }
195
196 // Advances Mp past current token.
skip_token()197 void skip_token()
198 {
199 ignore_white_space();
200
201 while ((*Mp != '\0') && !is_white_space(*Mp))
202 Mp++;
203 }
204
205 // Display a diagnostic message if Verbose is set.
206 // (Verbose is set if -v command line switch is present.)
diag_printf(const char * format,...)207 void diag_printf(const char *format, ...)
208 {
209 #ifndef NDEBUG
210 SCP_string buffer;
211 va_list args;
212
213 va_start(args, format);
214 vsprintf(buffer, format, args);
215 va_end(args);
216
217 nprintf(("Parse", "%s", buffer.c_str()));
218 #endif
219 }
220
221 // Grab and return (a pointer to) a bunch of tokens, terminating at
222 // ERROR_LENGTH chars, or end of line.
next_tokens(bool terminate_before_parenthesis_or_comma)223 char *next_tokens(bool terminate_before_parenthesis_or_comma)
224 {
225 int count = 0;
226 char *pstr = Mp;
227 char ch;
228
229 while (((ch = *pstr++) != EOLN) && (ch != '\0') && (count < ERROR_LENGTH-1))
230 Error_str[count++] = ch;
231
232 if (terminate_before_parenthesis_or_comma && (Error_str[count-1] == ',' || Error_str[count - 1] == ')'))
233 --count;
234
235 Error_str[count] = 0;
236 return Error_str;
237 }
238
239 // Return the line number given by the current mission pointer, ie Mp.
240 // A very slow function (scans all processed text), but who cares how long
241 // an error reporting function takes?
get_line_num()242 int get_line_num()
243 {
244 int count = 1;
245 bool inquote = false;
246 int incomment = false;
247 int multiline = false;
248 char *stoploc;
249 char *p;
250
251 p = Parse_text;
252 stoploc = Mp;
253
254 while (p < stoploc)
255 {
256 if (*p == '\0') {
257 Warning(LOCATION, "Unexpected end-of-file while looking for line number!");
258 break;
259 }
260
261 if ( !incomment && (*p == '\"') )
262 inquote = !inquote;
263
264 if ( !incomment && !inquote && (*p == COMMENT_CHAR) )
265 incomment = true;
266
267 if ( !incomment && (*p == '/') && (*(p+1) == '*') ) {
268 multiline = true;
269 incomment = true;
270 }
271
272 if ( incomment )
273 stoploc++;
274
275 if ( multiline && (*(p-1) == '*') && (*p == '/') ) {
276 multiline = false;
277 incomment = false;
278 }
279
280 if (*p++ == EOLN) {
281 if ( !multiline && incomment )
282 incomment = false;
283 count++;
284 }
285 }
286
287 return count;
288 }
289
290 // Call this function to display an error message.
291 // error_level == 0 means this is just a warning.
292 // !0 means it's an error message.
293 // Prints line number and other useful information.
294 extern int Cmdline_noparseerrors;
error_display(int error_level,const char * format,...)295 void error_display(int error_level, const char *format, ...)
296 {
297 char type[8];
298 SCP_string error_text;
299 va_list args;
300
301 if (error_level == 0) {
302 strcpy_s(type, "Warning");
303 Warning_count++;
304 } else {
305 strcpy_s(type, "Error");
306 Error_count++;
307 }
308
309 va_start(args, format);
310 vsprintf(error_text, format, args);
311 va_end(args);
312
313 nprintf((type, "%s(line %i): %s: %s\n", Current_filename, get_line_num(), type, error_text.c_str()));
314
315 if(error_level == 0 || Cmdline_noparseerrors)
316 Warning(LOCATION, "%s(line %i):\n%s: %s", Current_filename, get_line_num(), type, error_text.c_str());
317 else
318 Error(LOCATION, "%s(line %i):\n%s: %s", Current_filename, get_line_num(), type, error_text.c_str());
319 }
320
321 // Advance Mp to the next eoln character.
advance_to_eoln(const char * more_terminators)322 void advance_to_eoln(const char *more_terminators)
323 {
324 char terminators[128];
325
326 Assert((more_terminators == NULL) || (strlen(more_terminators) < 125));
327
328 terminators[0] = EOLN;
329 terminators[1] = 0;
330 if (more_terminators != NULL)
331 strcat_s(terminators, more_terminators);
332
333 while (strchr(terminators, *Mp) == NULL)
334 Mp++;
335 }
336
337 // Advance Mp to the next white space (ignoring white space inside of " marks)
advance_to_next_white()338 void advance_to_next_white()
339 {
340 bool in_quotes = false;
341
342 while ((*Mp != EOLN) && (*Mp != '\0')) {
343 if (*Mp == '\"')
344 in_quotes = !in_quotes;
345
346 if (!in_quotes && is_white_space(*Mp))
347 break;
348
349 if (!in_quotes && is_parenthesis(*Mp))
350 break;
351
352 Mp++;
353 }
354 }
355
356 // Search for specified string, skipping everything up to that point. Returns 1 if found,
357 // 0 if string wasn't found (and hit end of file), or -1 if not found, but end of checking
358 // block was reached.
skip_to_string(const char * pstr,const char * end)359 int skip_to_string(const char *pstr, const char *end)
360 {
361 ignore_white_space();
362 auto len = strlen(pstr);
363 size_t len2 = 0;
364
365 if (end)
366 len2 = strlen(end);
367
368 while ((*Mp != '\0') && strnicmp(pstr, Mp, len) != 0) {
369 if (end && *Mp == '#')
370 return 0;
371
372 if (end && !strnicmp(end, Mp, len2))
373 return -1;
374
375 advance_to_eoln(NULL);
376 ignore_white_space();
377 }
378
379 if (!Mp || *Mp == '\0')
380 return 0;
381
382 Mp += strlen(pstr);
383 return 1;
384 }
385
386 // Goober5000
387 // Advance to start of pstr. Return 0 is successful, otherwise return !0
skip_to_start_of_string(const char * pstr,const char * end)388 int skip_to_start_of_string(const char *pstr, const char *end)
389 {
390 ignore_white_space();
391 auto len = strlen(pstr);
392 size_t endlen;
393 if(end)
394 endlen = strlen(end);
395 else
396 endlen = 0;
397
398 while ( (*Mp != '\0') && strnicmp(pstr, Mp, len) != 0 ) {
399 if (end && *Mp == '#')
400 return 0;
401
402 if (end && !strnicmp(end, Mp, endlen))
403 return 0;
404
405 advance_to_eoln(NULL);
406 ignore_white_space();
407 }
408
409 if (!Mp || *Mp == '\0')
410 return 0;
411
412 return 1;
413 }
414
415 // Advance to start of either pstr1 or pstr2. Return 0 is successful, otherwise return !0
skip_to_start_of_string_either(const char * pstr1,const char * pstr2,const char * end)416 int skip_to_start_of_string_either(const char *pstr1, const char *pstr2, const char *end)
417 {
418 size_t len1, len2, endlen;
419
420 ignore_white_space();
421 len1 = strlen(pstr1);
422 len2 = strlen(pstr2);
423 if(end)
424 endlen = strlen(end);
425 else
426 endlen = 0;
427
428 while ( (*Mp != '\0') && strnicmp(pstr1, Mp, len1) != 0 && strnicmp(pstr2, Mp, len2) != 0 ) {
429 if (end && *Mp == '#')
430 return 0;
431
432 if (end && !strnicmp(end, Mp, endlen))
433 return 0;
434
435 advance_to_eoln(NULL);
436 ignore_white_space();
437 }
438
439 if (!Mp || *Mp == '\0')
440 return 0;
441
442 return 1;
443 }
444
445 // Find a required string.
446 // If not found, display an error message, but try up to RS_MAX_TRIES times
447 // to find the string. (This is the groundwork for ignoring non-understood
448 // lines.
449 // If unable to find the required string after RS_MAX_TRIES tries, then
450 // abort using longjmp to parse_abort.
required_string(const char * pstr)451 int required_string(const char *pstr)
452 {
453 int count = 0;
454
455 ignore_white_space();
456
457 while (strnicmp(pstr, Mp, strlen(pstr)) != 0 && (count < RS_MAX_TRIES)) {
458 error_display(1, "Missing required token: [%s]. Found [%.32s] instead.\n", pstr, next_tokens());
459 advance_to_eoln(NULL);
460 ignore_white_space();
461 count++;
462 }
463
464 if (count == RS_MAX_TRIES) {
465 throw parse::ParseException("Required string not found");
466 }
467
468 Mp += strlen(pstr);
469 diag_printf("Found required string [%s]\n", token_found = pstr);
470 return 1;
471 }
472
check_for_eof_raw()473 int check_for_eof_raw()
474 {
475 if (*Mp == '\0')
476 return 1;
477
478 return 0;
479 }
480
check_for_eof()481 int check_for_eof()
482 {
483 ignore_white_space();
484
485 return check_for_eof_raw();
486 }
487
488 /**
489 Returns 1 if it finds a newline character precded by any amount of grayspace.
490 */
check_for_eoln()491 int check_for_eoln()
492 {
493 ignore_gray_space();
494
495 if(*Mp == EOLN)
496 return 1;
497 else
498 return 0;
499 }
500
501 // similar to optional_string, but just checks if next token is a match.
502 // It doesn't advance Mp except to skip past white space.
check_for_string(const char * pstr)503 int check_for_string(const char *pstr)
504 {
505 ignore_white_space();
506
507 if (!strnicmp(pstr, Mp, strlen(pstr)))
508 return 1;
509
510 return 0;
511 }
512
513 // like check for string, but doesn't skip past any whitespace
check_for_string_raw(const char * pstr)514 int check_for_string_raw(const char *pstr)
515 {
516 if (!strnicmp(pstr, Mp, strlen(pstr)))
517 return 1;
518
519 return 0;
520 }
521
522 // Find an optional string.
523 // If found, return 1, else return 0.
524 // If found, point past string, else don't update pointer.
optional_string(const char * pstr)525 int optional_string(const char *pstr)
526 {
527 ignore_white_space();
528
529 if (!strnicmp(pstr, Mp, strlen(pstr))) {
530 Mp += strlen(pstr);
531 return 1;
532 }
533
534 return 0;
535 }
536
optional_string_either(const char * str1,const char * str2)537 int optional_string_either(const char *str1, const char *str2)
538 {
539 ignore_white_space();
540
541 if ( !strnicmp(str1, Mp, strlen(str1)) ) {
542 Mp += strlen(str1);
543 return 0;
544 } else if ( !strnicmp(str2, Mp, strlen(str2)) ) {
545 Mp += strlen(str2);
546 return 1;
547 }
548
549 return -1;
550 }
551
552 // generic parallel to required_string_one_of
optional_string_one_of(int arg_count,...)553 int optional_string_one_of(int arg_count, ...)
554 {
555 Assertion(arg_count > 0, "optional_string_one_of() called with arg_count of %d; get a coder!\n", arg_count);
556 int idx, found = -1;
557 char *pstr;
558 va_list vl;
559
560 ignore_white_space();
561
562 va_start(vl, arg_count);
563 for (idx = 0; idx < arg_count; idx++)
564 {
565 pstr = va_arg(vl, char*);
566
567 if ( !strnicmp(pstr, Mp, strlen(pstr)) )
568 {
569 Mp += strlen(pstr);
570 found = idx;
571 break;
572 }
573 }
574 va_end(vl);
575
576 return found;
577 }
578
required_string_fred(const char * pstr,const char * end)579 int required_string_fred(const char *pstr, const char *end)
580 {
581 char *backup = Mp;
582
583 token_found = pstr;
584 if (fred_parse_flag)
585 return 0;
586
587 ignore_white_space();
588 while (*Mp != '\0' && strnicmp(pstr, Mp, strlen(pstr)) != 0) {
589 if ((*Mp == '#') || (end && !strnicmp(end, Mp, strlen(end)))) {
590 Mp = NULL;
591 break;
592 }
593
594 advance_to_eoln(NULL);
595 ignore_white_space();
596 }
597
598 if (!Mp || *Mp == '\0') {
599 diag_printf("Required string [%s] not found\n", pstr);
600 Mp = backup;
601 Token_found_flag = 0;
602 return 0;
603 }
604
605 Mp += strlen(pstr);
606 diag_printf("Found required string [%s]\n", pstr);
607 Token_found_flag = 1;
608 return 1;
609 }
610
611 // attempt to find token in buffer. It might not exist, however, in which case we don't need
612 // to do anything. If it is found, then we advance the pointer to just after the token. To
613 // further complicate things, we should only search to a certain point, since we don't want
614 // a token that belongs to another section which might match the token we want. Thus, we
615 // also pass in an ending token, which marks the point we should stop looking at.
optional_string_fred(const char * pstr,const char * end,const char * end2)616 int optional_string_fred(const char *pstr, const char *end, const char *end2)
617 {
618 char *mp_save = Mp;
619
620 token_found = pstr;
621 if (fred_parse_flag)
622 return 0;
623
624 ignore_white_space();
625 while ((*Mp != '\0') && strnicmp(pstr, Mp, strlen(pstr)) != 0) {
626 if ((*Mp == '#') || (end && !strnicmp(end, Mp, strlen(end))) ||
627 (end2 && !strnicmp(end2, Mp, strlen(end2)))) {
628 Mp = NULL;
629 break;
630 }
631
632 advance_to_eoln(NULL);
633 ignore_white_space();
634 }
635
636 if (!Mp || *Mp == '\0') {
637 diag_printf("Optional string [%s] not found\n", pstr);
638 Mp = mp_save;
639 Token_found_flag = 0;
640 return 0;
641 }
642
643 Mp += strlen(pstr);
644 diag_printf("Found optional string [%s]\n", pstr);
645 Token_found_flag = 1;
646 return 1;
647 }
648
649 /**
650 * @brief Checks for one of two required strings
651 *
652 * @retval 0 for str1 match
653 * @retval 1 for str2 match
654 * @throws parse::ParseException If neither strings were found
655 *
656 * @details Advances the Mp until a string is found or exceeds RS_MAX_TRIES. Once a string is found, Mp is located at
657 * the start of the found string.
658 */
required_string_either(const char * str1,const char * str2)659 int required_string_either(const char *str1, const char *str2)
660 {
661 ignore_white_space();
662
663 for (int count = 0; count < RS_MAX_TRIES; ++count) {
664 if (strnicmp(str1, Mp, strlen(str1)) == 0) {
665 // Mp += strlen(str1);
666 diag_printf("Found required string [%s]\n", token_found = str1);
667 return 0;
668 } else if (strnicmp(str2, Mp, strlen(str2)) == 0) {
669 // Mp += strlen(str2);
670 diag_printf("Found required string [%s]\n", token_found = str2);
671 return 1;
672 }
673
674 error_display(1, "Required token = [%s] or [%s], found [%.32s].\n", str1, str2, next_tokens());
675
676 advance_to_eoln(NULL);
677 ignore_white_space();
678 }
679
680 throw parse::ParseException("Required string not found");
681 }
682
683 /**
684 * @brief Checks for one of any of the given required strings.
685 *
686 * @returns The index number of the found string, if it was found
687 * @returns -1 if a string was not found
688 *
689 * @details By ngld, with some tweaks by MageKing17.
690 */
required_string_one_of(int arg_count,...)691 int required_string_one_of(int arg_count, ...)
692 {
693 Assertion(arg_count > 0, "required_string_one_of() called with arg_count of %d; get a coder!\n", arg_count);
694 int count = 0;
695 int idx;
696 char *expected;
697 SCP_string message = "";
698 va_list vl;
699
700 ignore_white_space();
701
702 while (count < RS_MAX_TRIES) {
703 va_start(vl, arg_count);
704 for (idx = 0; idx < arg_count; idx++) {
705 expected = va_arg(vl, char*);
706 if (strnicmp(expected, Mp, strlen(expected)) == 0) {
707 diag_printf("Found required string [%s]", token_found = expected);
708 va_end(vl);
709 return idx;
710 }
711 }
712 va_end(vl);
713
714 if (message.empty()) {
715 va_start(vl, arg_count);
716 message = "Required token = ";
717 for (idx = 0; idx < arg_count; idx++) {
718 message += "[";
719 message += va_arg(vl, char*);
720 message += "]";
721 if (arg_count == 2 && idx == 0) {
722 message += " or ";
723 } else if (idx == arg_count - 2) {
724 message += ", or ";
725 } else if (idx < arg_count - 2) {
726 message += ", ";
727 }
728 }
729 va_end(vl);
730 }
731
732 error_display(1, "%s, found [%.32s]\n", message.c_str(), next_tokens());
733 advance_to_eoln(NULL);
734 ignore_white_space();
735 count++;
736 }
737
738 return -1;
739 }
740
required_string_either_fred(const char * str1,const char * str2)741 int required_string_either_fred(const char *str1, const char *str2)
742 {
743 ignore_white_space();
744
745 while (*Mp != '\0') {
746 if (!strnicmp(str1, Mp, strlen(str1))) {
747 // Mp += strlen(str1);
748 diag_printf("Found required string [%s]\n", token_found = str1);
749 return fred_parse_flag = 0;
750
751 } else if (!strnicmp(str2, Mp, strlen(str2))) {
752 // Mp += strlen(str2);
753 diag_printf("Found required string [%s]\n", token_found = str2);
754 return fred_parse_flag = 1;
755 }
756
757 advance_to_eoln(NULL);
758 ignore_white_space();
759 }
760
761 if (*Mp == '\0')
762 diag_printf("Unable to find either required token [%s] or [%s]\n", str1, str2);
763
764 return -1;
765 }
766
767 // Copy characters from instr to outstr until eoln is found, or until max
768 // characters have been copied (including terminator).
copy_to_eoln(char * outstr,const char * more_terminators,const char * instr,int max)769 void copy_to_eoln(char *outstr, const char *more_terminators, const char *instr, int max)
770 {
771 int count = 0;
772 char ch;
773 char terminators[128];
774
775 Assert((more_terminators == NULL) || (strlen(more_terminators) < 125));
776
777 terminators[0] = EOLN;
778 terminators[1] = 0;
779 if (more_terminators != NULL)
780 strcat_s(terminators, more_terminators);
781
782 while (((ch = *instr++) != 0) && (strchr(terminators, ch) == NULL) && (count < max)) {
783 *outstr++ = ch;
784 count++;
785 }
786
787 if (count >= max)
788 error_display(0, "Token too long: [%s]. Length = " SIZE_T_ARG ". Max is %i.\n", next_tokens(), strlen(next_tokens()), max);
789
790 *outstr = 0;
791 }
792
793 // Ditto for SCP_string.
copy_to_eoln(SCP_string & outstr,const char * more_terminators,const char * instr)794 void copy_to_eoln(SCP_string &outstr, const char *more_terminators, const char *instr)
795 {
796 char ch;
797 char terminators[128];
798
799 Assert((more_terminators == NULL) || (strlen(more_terminators) < 125));
800
801 terminators[0] = EOLN;
802 terminators[1] = 0;
803 if (more_terminators != NULL)
804 strcat_s(terminators, more_terminators);
805
806 outstr = "";
807 while (((ch = *instr++) != 0) && (strchr(terminators, ch) == NULL)) {
808 outstr.append(1, ch);
809 }
810 }
811
812 // Copy characters from instr to outstr until next white space is found, or until max
813 // characters have been copied (including terminator).
copy_to_next_white(char * outstr,const char * instr,int max)814 void copy_to_next_white(char *outstr, const char *instr, int max)
815 {
816 int count = 0;
817 bool in_quotes = false;
818 char ch;
819
820 while (((ch = *instr++)>0) && (ch != EOLN) && (ch != '\0') && (count < max)) {
821 if ( ch == '\"' ) {
822 in_quotes = !in_quotes;
823 continue;
824 }
825
826 if ( !in_quotes && is_white_space(ch) ) // not in quotes, white space terminates string
827 break;
828
829 if ( !in_quotes && is_parenthesis(ch) ) // not in quotes, parentheses are important for parsing so we don't want to copy them
830 break;
831
832 *outstr++ = ch;
833 count++;
834 }
835
836 if (count >= max)
837 error_display(0, "Token too long: [%s]. Length = " SIZE_T_ARG ". Max is %i.\n", next_tokens(), strlen(next_tokens()), max);
838
839 *outstr = 0;
840 }
841
842 // Ditto for SCP_string.
copy_to_next_white(SCP_string & outstr,const char * instr)843 void copy_to_next_white(SCP_string &outstr, const char *instr)
844 {
845 bool in_quotes = false;
846 char ch;
847
848 outstr = "";
849 while (((ch = *instr++)>0) && (ch != EOLN) && (ch != '\0')) {
850 if ( ch == '\"' ) {
851 in_quotes = !in_quotes;
852 continue;
853 }
854
855 if ( !in_quotes && is_white_space(ch) ) // not in quotes, white space terminates string
856 break;
857
858 if ( !in_quotes && is_parenthesis(ch) ) // not in quotes, parentheses are important for parsing so we don't want to copy them
859 break;
860
861 outstr.append(1, ch);
862 }
863 }
864
865 //Returns a null-terminated character string allocated with vm_malloc() with the data
alloc_text_until(const char * instr,const char * endstr)866 char* alloc_text_until(const char* instr, const char* endstr)
867 {
868 Assert(instr && endstr);
869 auto foundstr = stristr(instr, endstr);
870
871 if(foundstr == NULL)
872 {
873 Error(LOCATION, "Missing [%s] in file", endstr);
874 throw parse::ParseException("End string not found");
875 }
876 else
877 {
878 if ( (foundstr - instr) <= 0 ) {
879 Int3(); // since this really shouldn't ever happen
880 return NULL;
881 }
882
883 char* rstr = NULL;
884 rstr = (char*) vm_malloc((foundstr - instr + 1)*sizeof(char));
885
886 if(rstr != NULL) {
887 strncpy(rstr, instr, foundstr-instr);
888 rstr[foundstr-instr] = '\0';
889 } else {
890 Error(LOCATION, "Could not allocate enough memory in alloc_text_until");
891 }
892
893 return rstr;
894 }
895 }
896
897 // Copy text until a certain string is matched.
898 // For example, this is used to copy mission notes, scanning until $END NOTES:
899 // is found.
copy_text_until(char * outstr,const char * instr,const char * endstr,int max_chars)900 void copy_text_until(char *outstr, const char *instr, const char *endstr, int max_chars)
901 {
902 Assert(outstr && instr && endstr);
903
904 auto foundstr = stristr(instr, endstr);
905
906 if (foundstr == NULL) {
907 nprintf(("Error", "Error. Looking for [%s], but never found it.\n", endstr));
908 throw parse::ParseException("End string not found");
909 }
910
911 if (foundstr - instr + strlen(endstr) < (uint) max_chars) {
912 strncpy(outstr, instr, foundstr - instr);
913 outstr[foundstr - instr] = 0;
914
915 } else {
916 nprintf(("Error", "Error. Too much text (" SIZE_T_ARG " chars, %i allowed) before %s\n",
917 foundstr - instr + strlen(endstr), max_chars, endstr));
918
919 throw parse::ParseException("Too much text found");
920 }
921
922 diag_printf("Here's the partial wad of text:\n%.30s\n", outstr);
923 }
924
925 // Ditto for SCP_string.
copy_text_until(SCP_string & outstr,const char * instr,const char * endstr)926 void copy_text_until(SCP_string &outstr, const char *instr, const char *endstr)
927 {
928 Assert(instr && endstr);
929
930 auto foundstr = stristr(instr, endstr);
931
932 if (foundstr == NULL) {
933 nprintf(("Error", "Error. Looking for [%s], but never found it.\n", endstr));
934 throw parse::ParseException("End string not found");
935 }
936
937 outstr.assign(instr, foundstr - instr);
938
939 diag_printf("Here's the partial wad of text:\n%.30s\n", outstr.c_str());
940 }
941
942 // stuffs a string into a buffer. Can get a string between " marks and stops
943 // when whitespace is encounted -- not to end of line
stuff_string_white(char * outstr,int len)944 void stuff_string_white(char *outstr, int len)
945 {
946 if(!len)
947 len = NAME_LENGTH-1;
948
949 ignore_white_space();
950 copy_to_next_white(outstr, Mp, len);
951 advance_to_next_white();
952 }
953
954 // ditto for SCP_string
stuff_string_white(SCP_string & outstr)955 void stuff_string_white(SCP_string &outstr)
956 {
957 ignore_white_space();
958 copy_to_next_white(outstr, Mp);
959 advance_to_next_white();
960 }
961
962 // Goober5000
stuff_string_until(char * outstr,const char * endstr,int len)963 void stuff_string_until(char *outstr, const char *endstr, int len)
964 {
965 if(!len)
966 len = NAME_LENGTH-1;
967
968 ignore_gray_space();
969 copy_text_until(outstr, Mp, endstr, len);
970 Mp += strlen(outstr);
971 drop_trailing_white_space(outstr);
972 }
973
974 // Goober5000
stuff_string_until(SCP_string & outstr,const char * endstr)975 void stuff_string_until(SCP_string &outstr, const char *endstr)
976 {
977 ignore_gray_space();
978 copy_text_until(outstr, Mp, endstr);
979 Mp += outstr.length();
980 drop_trailing_white_space(outstr);
981 }
982
983 //WMC
984 //Used for allocating large blocks, eg of Python code
985 //Returns a null-terminated string allocated with vm_malloc(),
986 //or NULL on failure
987 //Does depth checks for the start and end strings
988 //extra_chars indicates extra malloc space that should be allocated.
alloc_block(const char * startstr,const char * endstr,int extra_chars)989 char* alloc_block(const char* startstr, const char* endstr, int extra_chars)
990 {
991 Assert(startstr != NULL && endstr != NULL);
992 Assert(stricmp(startstr, endstr));
993
994 char* rval = NULL;
995 auto elen = strlen(endstr);
996 auto slen = strlen(startstr);
997 size_t flen = 0;
998
999 //Skip the opening thing and any extra stuff
1000 required_string(startstr);
1001 ignore_white_space();
1002
1003 //Allocate it
1004 char* pos = Mp;
1005
1006 //Depth checking
1007 int level = 1;
1008 while(*pos != '\0')
1009 {
1010 if(!strnicmp(pos, startstr, slen))
1011 {
1012 level++;
1013 }
1014 else if(!strnicmp(pos, endstr, elen))
1015 {
1016 level--;
1017 }
1018
1019 if(level<=0)
1020 {
1021 break;
1022 }
1023
1024 pos++;
1025 }
1026
1027 //Check that we left the file
1028 if(level > 0)
1029 {
1030 Error(LOCATION, "Unclosed pair of \"%s\" and \"%s\" on line %d in file", startstr, endstr, get_line_num());
1031 throw parse::ParseException("End string not found");
1032 }
1033 else
1034 {
1035 //Set final length for faster calcs
1036 flen = pos-Mp;
1037
1038 //Allocate the memory
1039 //WMC - Don't forget the null character that's added later on.
1040 rval = (char*) vm_malloc((flen + extra_chars + 1)*sizeof(char));
1041
1042 //Copy the text (if memory was allocated)
1043 if(rval != NULL) {
1044 strncpy(rval, Mp, flen);
1045 rval[flen] = '\0';
1046 } else {
1047 return NULL;
1048 }
1049 }
1050
1051 //Skip the copied stuff
1052 Mp += flen;
1053 required_string(endstr);
1054 return rval;
1055 }
1056
1057 // Karajorma - Stuffs the provided char array with either the contents of a quoted string or the name of a string
1058 // variable. Returns PARSING_FOUND_STRING if a string was found or PARSING_FOUND_VARIABLE if a variable was present.
get_string_or_variable(char * str)1059 int get_string_or_variable (char *str)
1060 {
1061 int result = -1;
1062
1063 ignore_white_space();
1064
1065 // Variable
1066 if (*Mp == '@')
1067 {
1068 Mp++;
1069 stuff_string_white(str);
1070 int sexp_variable_index = get_index_sexp_variable_name(str);
1071
1072 // We only want String variables
1073 Assertion (sexp_variable_index != -1, "Didn't find variable name \"%s\"", str);
1074 Assert (Sexp_variables[sexp_variable_index].type & SEXP_VARIABLE_STRING);
1075
1076 result = PARSING_FOUND_VARIABLE;
1077 }
1078 // Quoted string
1079 else if (*Mp == '"')
1080 {
1081 get_string(str);
1082 result = PARSING_FOUND_STRING;
1083 }
1084 else
1085 {
1086 get_string(str);
1087 Error(LOCATION, "Invalid entry \"%s\" found in get_string_or_variable. Must be a quoted string or a string variable name.", str);
1088 }
1089
1090 return result;
1091 }
1092
1093 // ditto for SCP_string
get_string_or_variable(SCP_string & str)1094 int get_string_or_variable (SCP_string &str)
1095 {
1096 int result = -1;
1097
1098 ignore_white_space();
1099
1100 // Variable
1101 if (*Mp == '@')
1102 {
1103 Mp++;
1104 stuff_string_white(str);
1105 int sexp_variable_index = get_index_sexp_variable_name(str);
1106
1107 // We only want String variables
1108 Assertion (sexp_variable_index != -1, "Didn't find variable name \"%s\"", str.c_str());
1109 Assert (Sexp_variables[sexp_variable_index].type & SEXP_VARIABLE_STRING);
1110
1111 result = PARSING_FOUND_VARIABLE;
1112 }
1113 // Quoted string
1114 else if (*Mp == '"')
1115 {
1116 get_string(str);
1117 result = PARSING_FOUND_STRING;
1118 }
1119 else
1120 {
1121 get_string(str);
1122 Error(LOCATION, "Invalid entry \"%s\" found in get_string_or_variable. Must be a quoted string or a string variable name.", str.c_str());
1123 }
1124
1125 return result;
1126 }
1127
1128 /**
1129 * Stuff a string (" chars ") into *str, return length.
1130 * Accepts an optional max length parameter. If it is omitted or negative, then no max length is enforced.
1131 */
get_string(char * str,int max)1132 int get_string(char *str, int max)
1133 {
1134 auto len = strcspn(Mp + 1, "\"");
1135
1136 if (max >= 0 && len >= (size_t)max)
1137 error_display(0, "String too long. Length = " SIZE_T_ARG ". Max is %i.\n", len, max);
1138
1139 strncpy(str, Mp + 1, len);
1140 str[len] = 0;
1141
1142 Mp += len + 2;
1143 return (int)len;
1144 }
1145
1146 /**
1147 * Stuff a string (" chars ") into str.
1148 */
get_string(SCP_string & str)1149 void get_string(SCP_string &str)
1150 {
1151 auto len = strcspn(Mp + 1, "\"");
1152 str.assign(Mp + 1, len);
1153
1154 Mp += len + 2;
1155 }
1156
1157 // Stuff a string into a string buffer.
1158 // Supports various FreeSpace primitive types. If 'len' is supplied, it will override
1159 // the default string length if using the F_NAME case.
stuff_string(char * outstr,int type,int len,const char * terminators)1160 void stuff_string(char *outstr, int type, int len, const char *terminators)
1161 {
1162 char read_str[PARSE_BUF_SIZE] = "";
1163 int read_len = PARSE_BUF_SIZE;
1164 int final_len = len - 1;
1165 int tag_id;
1166
1167 // make sure we have enough room
1168 Assert( final_len > 0 );
1169
1170 // make sure it's zero'd out
1171 memset( outstr, 0, len );
1172
1173 switch (type) {
1174 case F_RAW:
1175 case F_LNAME:
1176 case F_NAME:
1177 case F_DATE:
1178 case F_FILESPEC:
1179 case F_PATHNAME:
1180 case F_MESSAGE:
1181 ignore_gray_space();
1182 copy_to_eoln(read_str, terminators, Mp, read_len);
1183 drop_trailing_white_space(read_str);
1184 advance_to_eoln(terminators);
1185 break;
1186
1187 case F_NOTES:
1188 ignore_white_space();
1189 copy_text_until(read_str, Mp, "$End Notes:", read_len);
1190 Mp += strlen(read_str);
1191 required_string("$End Notes:");
1192 break;
1193
1194 // F_MULTITEXTOLD keeping for backwards compatability with old missions
1195 // can be deleted once all missions are using new briefing format
1196
1197 case F_MULTITEXTOLD:
1198 ignore_white_space();
1199 copy_text_until(read_str, Mp, "$End Briefing Text:", read_len);
1200 Mp += strlen(read_str);
1201 required_string("$End Briefing Text:");
1202 break;
1203
1204 case F_MULTITEXT:
1205 ignore_white_space();
1206 copy_text_until(read_str, Mp, "$end_multi_text", read_len);
1207 Mp += strlen(read_str);
1208 drop_trailing_white_space(read_str);
1209 required_string("$end_multi_text");
1210 break;
1211
1212 default:
1213 Error(LOCATION, "Unhandled string type %d in stuff_string!", type);
1214 }
1215
1216 if (type == F_FILESPEC) {
1217 // Make sure that the passed string looks like a good filename
1218 if (strlen(read_str) == 0) {
1219 // Empty file name is probably not valid!
1220 error_display(0, "A file name was expected but no name was supplied! This is probably a mistake.");
1221 }
1222 }
1223
1224 // now we want to do any final localization
1225 if(type != F_RAW && type != F_LNAME)
1226 {
1227 lcl_ext_localize(read_str, outstr, final_len, &tag_id);
1228
1229 // if the hash localized text hash table is active and we have a valid external string - hash it
1230 if(fhash_active() && (tag_id > -2)){
1231 fhash_add_str(outstr, tag_id);
1232 }
1233 }
1234 else
1235 {
1236 if ( strlen(read_str) > (uint)final_len )
1237 error_display(0, "Token too long: [%s]. Length = " SIZE_T_ARG ". Max is %i.\n", read_str, strlen(read_str), final_len);
1238
1239 strncpy(outstr, read_str, final_len);
1240 }
1241
1242 diag_printf("Stuffed string = [%.30s]\n", outstr);
1243 }
1244
1245 // Stuff a string into a string buffer.
1246 // Supports various FreeSpace primitive types.
stuff_string(SCP_string & outstr,int type,const char * terminators)1247 void stuff_string(SCP_string &outstr, int type, const char *terminators)
1248 {
1249 SCP_string read_str;
1250 int tag_id;
1251
1252 // make sure it's zero'd out
1253 outstr = "";
1254
1255 switch (type) {
1256 case F_RAW:
1257 case F_LNAME:
1258 case F_NAME:
1259 case F_DATE:
1260 case F_FILESPEC:
1261 case F_PATHNAME:
1262 case F_MESSAGE:
1263 ignore_gray_space();
1264 copy_to_eoln(read_str, terminators, Mp);
1265 drop_trailing_white_space(read_str);
1266 advance_to_eoln(terminators);
1267 break;
1268
1269 case F_NOTES:
1270 ignore_white_space();
1271 copy_text_until(read_str, Mp, "$End Notes:");
1272 Mp += read_str.length();
1273 required_string("$End Notes:");
1274 break;
1275
1276 // F_MULTITEXTOLD keeping for backwards compatability with old missions
1277 // can be deleted once all missions are using new briefing format
1278
1279 case F_MULTITEXTOLD:
1280 ignore_white_space();
1281 copy_text_until(read_str, Mp, "$End Briefing Text:");
1282 Mp += read_str.length();
1283 required_string("$End Briefing Text:");
1284 break;
1285
1286 case F_MULTITEXT:
1287 ignore_white_space();
1288 copy_text_until(read_str, Mp, "$end_multi_text");
1289 Mp += read_str.length();
1290 drop_trailing_white_space(read_str);
1291 required_string("$end_multi_text");
1292 break;
1293
1294 default:
1295 Error(LOCATION, "Unhandled string type %d in stuff_string!", type);
1296 }
1297
1298 if (type == F_FILESPEC) {
1299 // Make sure that the passed string looks like a good filename
1300 if (read_str.empty()) {
1301 // Empty file name is not valid!
1302 error_display(1, "A file name was expected but no name was supplied!\n");
1303 }
1304 }
1305
1306 // now we want to do any final localization
1307 if(type != F_RAW && type != F_LNAME)
1308 {
1309 lcl_ext_localize(read_str, outstr, &tag_id);
1310
1311 // if the hash localized text hash table is active and we have a valid external string - hash it
1312 if(fhash_active() && (tag_id > -2)){
1313 fhash_add_str(outstr.c_str(), tag_id);
1314 }
1315 }
1316 else
1317 {
1318 outstr = read_str;
1319 }
1320
1321 diag_printf("Stuffed string = [%.30s]\n", outstr.c_str());
1322 }
1323
1324 // stuff a string, but only until the end of a line. don't ignore leading whitespace. close analog of fgets()/cfgets()
stuff_string_line(char * outstr,int len)1325 void stuff_string_line(char *outstr, int len)
1326 {
1327 char read_str[PARSE_BUF_SIZE] = "";
1328 int read_len = PARSE_BUF_SIZE;
1329 int final_len = len - 1;
1330 int tag_id;
1331
1332 Assert( final_len > 0 );
1333
1334 // read in a line
1335 copy_to_eoln(read_str, "\n", Mp, read_len);
1336 drop_trailing_white_space(read_str);
1337 advance_to_eoln("");
1338 Mp++;
1339
1340 // now we want to do any final localization
1341 lcl_ext_localize(read_str, outstr, final_len, &tag_id);
1342
1343 // if the hash localized text hash table is active and we have a valid external string - hash it
1344 if(fhash_active() && (tag_id > -2)){
1345 fhash_add_str(outstr, tag_id);
1346 }
1347
1348 diag_printf("Stuffed string = [%.30s]\n", outstr);
1349 }
1350
1351 // ditto for SCP_string
stuff_string_line(SCP_string & outstr)1352 void stuff_string_line(SCP_string &outstr)
1353 {
1354 SCP_string read_str;
1355 int tag_id;
1356
1357 // read in a line
1358 copy_to_eoln(read_str, "\n", Mp);
1359 drop_trailing_white_space(read_str);
1360 advance_to_eoln("");
1361 Mp++;
1362
1363 // now we want to do any final localization
1364 lcl_ext_localize(read_str, outstr, &tag_id);
1365
1366 // if the hash localized text hash table is active and we have a valid external string - hash it
1367 if(fhash_active() && (tag_id > -2)){
1368 fhash_add_str(outstr.c_str(), tag_id);
1369 }
1370
1371 diag_printf("Stuffed string = [%.30s]\n", outstr.c_str());
1372 }
1373
1374 // Exactly the same as stuff string only Malloc's the buffer.
1375 // Supports various FreeSpace primitive types. If 'len' is supplied, it will override
1376 // the default string length if using the F_NAME case.
stuff_and_malloc_string(int type,const char * terminators)1377 char *stuff_and_malloc_string(int type, const char *terminators)
1378 {
1379 SCP_string tmp_result;
1380
1381 stuff_string(tmp_result, type, terminators);
1382 drop_white_space(tmp_result);
1383
1384 if (tmp_result.empty())
1385 return NULL;
1386
1387 return vm_strdup(tmp_result.c_str());
1388 }
1389
stuff_malloc_string(char ** dest,int type,const char * terminators)1390 void stuff_malloc_string(char **dest, int type, const char *terminators)
1391 {
1392 Assert(dest != NULL); //wtf?
1393
1394 char *new_val = stuff_and_malloc_string(type, terminators);
1395
1396 if(new_val != NULL)
1397 {
1398 if((*dest) != NULL) {
1399 vm_free(*dest);
1400 }
1401
1402 (*dest) = new_val;
1403 }
1404 }
1405
1406 // After reading a multitext string, you can call this function to convert any newlines into
1407 // spaces, so it's a one paragraph string (i.e. as in MS-Word).
compact_multitext_string(char * str)1408 void compact_multitext_string(char *str)
1409 {
1410 auto p_dest = str;
1411 auto p_src = str;
1412
1413 while (*p_src)
1414 {
1415 char ch = *p_src;
1416
1417 // skip CR
1418 // convert LF to space
1419 // copy characters backwards if any CRs previously encountered
1420 if (ch != '\r')
1421 {
1422 if (ch == '\n')
1423 *p_dest = ' ';
1424 else if (p_dest != p_src)
1425 *p_dest = *p_src;
1426
1427 p_dest++;
1428 }
1429 p_src++;
1430 }
1431
1432 if (p_dest != p_src)
1433 *p_dest = 0;
1434 }
1435
1436 // ditto for SCP_string
compact_multitext_string(SCP_string & str)1437 void compact_multitext_string(SCP_string &str)
1438 {
1439 auto p_dest = str.begin();
1440 auto p_src = str.begin();
1441
1442 while (p_src != str.end())
1443 {
1444 char ch = *p_src;
1445
1446 // skip CR
1447 // convert LF to space
1448 // copy characters backwards if any CRs previously encountered
1449 if (ch != '\r')
1450 {
1451 if (ch == '\n')
1452 *p_dest = ' ';
1453 else if (p_dest != p_src)
1454 *p_dest = *p_src;
1455
1456 p_dest++;
1457 }
1458 p_src++;
1459 }
1460
1461 if (p_dest != p_src)
1462 str.erase(p_dest);
1463 }
1464
1465 // Converts a character from Windows-1252 to CP437.
maybe_convert_foreign_character(int ch)1466 int maybe_convert_foreign_character(int ch)
1467 {
1468 // time to do some special foreign character conversion
1469 switch (ch) {
1470 case -57:
1471 ch = 128;
1472 break;
1473
1474 case -4:
1475 ch = 129;
1476 break;
1477
1478 case -23:
1479 ch = 130;
1480 break;
1481
1482 case -30:
1483 ch = 131;
1484 break;
1485
1486 case -28:
1487 ch = 132;
1488 break;
1489
1490 case -32:
1491 ch = 133;
1492 break;
1493
1494 case -27:
1495 ch = 134;
1496 break;
1497
1498 case -25:
1499 ch = 135;
1500 break;
1501
1502 case -22:
1503 ch = 136;
1504 break;
1505
1506 case -21:
1507 ch = 137;
1508 break;
1509
1510 case -24:
1511 ch = 138;
1512 break;
1513
1514 case -17:
1515 ch = 139;
1516 break;
1517
1518 case -18:
1519 ch = 140;
1520 break;
1521
1522 case -20:
1523 ch = 141;
1524 break;
1525
1526 case -60:
1527 ch = 142;
1528 break;
1529
1530 case -59:
1531 ch = 143;
1532 break;
1533
1534 case -55:
1535 ch = 144;
1536 break;
1537
1538 case -26:
1539 ch = 145;
1540 break;
1541
1542 case -58:
1543 ch = 146;
1544 break;
1545
1546 case -12:
1547 ch = 147;
1548 break;
1549
1550 case -10:
1551 ch = 148;
1552 break;
1553
1554 case -14:
1555 ch = 149;
1556 break;
1557
1558 case -5:
1559 ch = 150;
1560 break;
1561
1562 case -7:
1563 ch = 151;
1564 break;
1565
1566 case -1:
1567 ch = 152;
1568 break;
1569
1570 case -42:
1571 ch = 153;
1572 break;
1573
1574 case -36:
1575 ch = 154;
1576 break;
1577
1578 case -94:
1579 ch = 155;
1580 break;
1581
1582 case -93:
1583 ch = 156;
1584 break;
1585
1586 case -91:
1587 ch = 157;
1588 break;
1589
1590 case -125:
1591 ch = 159;
1592 break;
1593
1594 case -31:
1595 ch = 160;
1596 break;
1597
1598 case -19:
1599 ch = 161;
1600 break;
1601
1602 case -13:
1603 ch = 162;
1604 break;
1605
1606 case -6:
1607 ch = 163;
1608 break;
1609
1610 case -15:
1611 ch = 164;
1612 break;
1613
1614 case -47:
1615 ch = 165;
1616 break;
1617
1618 case -86:
1619 ch = 166;
1620 break;
1621
1622 case -70:
1623 ch = 167;
1624 break;
1625
1626 case -65:
1627 ch = 168;
1628 break;
1629
1630 case -84:
1631 ch = 170;
1632 break;
1633
1634 case -67:
1635 ch = 171;
1636 break;
1637
1638 case -68:
1639 ch = 172;
1640 break;
1641
1642 case -95:
1643 ch = 173;
1644 break;
1645
1646 case -85:
1647 ch = 174;
1648 break;
1649
1650 case -69:
1651 ch = 175;
1652 break;
1653
1654 case -33:
1655 ch = 225;
1656 break;
1657
1658 case -75:
1659 ch = 230;
1660 break;
1661
1662 case -79:
1663 ch = 241;
1664 break;
1665
1666 case -9:
1667 ch = 246;
1668 break;
1669
1670 case -80:
1671 ch = 248;
1672 break;
1673
1674 case -73:
1675 ch = 250;
1676 break;
1677
1678 case -78:
1679 ch = 253;
1680 break;
1681
1682 case -96:
1683 ch = 255;
1684 break;
1685 }
1686
1687 return ch;
1688 }
1689
1690 // Goober5000
1691 // Yarn - The capacity of out must be at least the value returned by
1692 // get_converted_string_length(in) (plus one if add_null is true).
1693 // Returns the number of characters written to out.
maybe_convert_foreign_characters(const char * in,char * out,bool add_null)1694 size_t maybe_convert_foreign_characters(const char *in, char *out, bool add_null)
1695 {
1696 if (Fred_running) {
1697 size_t len = strlen(in);
1698
1699 if (add_null) {
1700 strcpy(out, in);
1701 return len + 1;
1702 } else {
1703 strncpy(out, in, len);
1704 return len;
1705 }
1706 } else {
1707 auto inp = in;
1708 auto outp = out;
1709
1710 while (*inp != '\0') {
1711 if (*inp == SHARP_S) {
1712 *outp++ = 's';
1713 *outp++ = 's';
1714 } else if (Lcl_pl) {
1715 *outp++ = *inp;
1716 } else {
1717 *outp++ = (char) maybe_convert_foreign_character(*inp);
1718 }
1719 inp++;
1720 }
1721
1722 if (add_null) {
1723 *outp++ = '\0';
1724 }
1725
1726 return outp - out;
1727 }
1728 }
1729
1730 // Goober5000
maybe_convert_foreign_characters(SCP_string & text)1731 void maybe_convert_foreign_characters(SCP_string &text)
1732 {
1733 if (!Fred_running) {
1734 for (SCP_string::iterator ii = text.begin(); ii != text.end(); ++ii) {
1735 text.reserve(get_converted_string_length(text));
1736
1737 if (*ii == SHARP_S) {
1738 text.replace(ii, ii + 1, "ss");
1739 ++ii;
1740 } else if (!Lcl_pl) {
1741 *ii = (char) maybe_convert_foreign_character(*ii);
1742 }
1743 }
1744 }
1745 }
1746
1747 // Yarn - Returns what the length of the text will be after it's processed by
1748 // maybe_convert_foreign_characters, not including the null terminator.
get_converted_string_length(const char * text)1749 size_t get_converted_string_length(const char *text)
1750 {
1751 if (Fred_running) {
1752 return strlen(text);
1753 } else {
1754 size_t count = 0;
1755 auto s = strchr(text, SHARP_S);
1756 while (s != nullptr) {
1757 count++;
1758 s = strchr(s + 1, SHARP_S);
1759 }
1760 return strlen(text) + count;
1761 }
1762 }
1763
1764 // Yarn - Returns what the length of the text will be after it's processed by
1765 // maybe_convert_foreign_characters.
get_converted_string_length(const SCP_string & text)1766 size_t get_converted_string_length(const SCP_string &text)
1767 {
1768 if (Fred_running) {
1769 return text.size();
1770 } else {
1771 size_t count = 0;
1772 for (auto ii = text.begin(); ii != text.end(); ++ii) {
1773 if (*ii == SHARP_S) {
1774 count++;
1775 }
1776 }
1777 return text.size() + count;
1778 }
1779 }
1780
1781 // Goober5000
get_number_before_separator(int & number,int & number_chars,const char * text,char separator)1782 bool get_number_before_separator(int &number, int &number_chars, const char *text, char separator)
1783 {
1784 char buf[8];
1785 const char *ch = text;
1786 int len = 0;
1787
1788 while (true)
1789 {
1790 // didn't find separator
1791 if (*ch == '\0' || len == 8)
1792 return false;
1793
1794 // found separator
1795 if (*ch == separator)
1796 break;
1797
1798 // found nondigit
1799 if (!isdigit(*ch))
1800 return false;
1801
1802 // copying in progress
1803 buf[len] = *ch;
1804 len++;
1805 ch++;
1806 }
1807
1808 // got an integer
1809 buf[len] = '\0';
1810 number = atoi(buf);
1811 number_chars = len;
1812 return true;
1813 }
1814
1815 // Goober5000
get_number_before_separator(int & number,int & number_chars,const SCP_string & text,SCP_string::iterator text_pos,char separator)1816 bool get_number_before_separator(int &number, int &number_chars, const SCP_string &text, SCP_string::iterator text_pos, char separator)
1817 {
1818 char buf[8];
1819 SCP_string::iterator ch = text_pos;
1820 int len = 0;
1821
1822 while (true)
1823 {
1824 // didn't find separator
1825 if (ch == text.end() || len == 8)
1826 return false;
1827
1828 // found separator
1829 if (*ch == separator)
1830 break;
1831
1832 // found nondigit
1833 if (!isdigit(*ch))
1834 return false;
1835
1836 // copying in progress
1837 buf[len] = *ch;
1838 len++;
1839 ++ch;
1840 }
1841
1842 // got an integer
1843 buf[len] = '\0';
1844 number = atoi(buf);
1845 number_chars = len;
1846 return true;
1847 }
1848
matches_version_specific_tag(const char * line_start,bool & compatible_version,int & tag_len)1849 bool matches_version_specific_tag(const char *line_start, bool &compatible_version, int &tag_len)
1850 {
1851 // special version-specific comment
1852 // formatted like e.g. ;;FSO 3.7.0;;
1853 // Should now support anything from ;;FSO 3;; to ;;FSO 3.7.3.20151106;; -MageKing17
1854 if (strnicmp(line_start, ";;FSO ", 6) != 0)
1855 return false;
1856
1857 int major, minor, build, revis;
1858 int s_num = scan_fso_version_string(line_start, &major, &minor, &build, &revis);
1859
1860 if (s_num == 0)
1861 return false;
1862
1863 // hack for releases
1864 if (s_num == 4 && FS_VERSION_REVIS < 1000) {
1865 s_num = 3;
1866 }
1867
1868 const char *ch = line_start + 6;
1869 while ((*ch) != ';') {
1870 Assertion((*ch) != '\0', "String that was already guaranteed to end with semicolons did not end with semicolons; it's possible we have fallen into an alternate universe. Failing string: [%s]\n", line_start);
1871 ch++;
1872 }
1873 ch++;
1874 Assertion((*ch) == ';', "String that was guaranteed to have double semicolons did not; it's possible we have fallen into an alternate universe. Failing string: [%s]\n", line_start);
1875 ch++;
1876
1877 tag_len = (int)(ch - line_start);
1878 compatible_version = true;
1879
1880 // check whether major, minor, and build line up with this version
1881 if (major > FS_VERSION_MAJOR)
1882 {
1883 compatible_version = false;
1884 }
1885 else if (major == FS_VERSION_MAJOR && s_num > 1)
1886 {
1887 if (minor > FS_VERSION_MINOR)
1888 {
1889 compatible_version = false;
1890 }
1891 else if (minor == FS_VERSION_MINOR && s_num > 2)
1892 {
1893 if (build > FS_VERSION_BUILD)
1894 {
1895 compatible_version = false;
1896 }
1897 else if (build == FS_VERSION_BUILD && s_num > 3)
1898 {
1899 if (revis > FS_VERSION_REVIS)
1900 {
1901 compatible_version = false;
1902 }
1903 }
1904 }
1905 }
1906
1907 // true for tag match
1908 return true;
1909 }
1910
1911 // Strip comments from a line of input.
1912 // Goober5000 - rewritten for the second time
strip_comments(char * line,bool & in_quote,bool & in_multiline_comment_a,bool & in_multiline_comment_b)1913 void strip_comments(char *line, bool &in_quote, bool &in_multiline_comment_a, bool &in_multiline_comment_b)
1914 {
1915 char *writep = line;
1916 char *readp = line;
1917
1918 // copy all characters from read to write, unless they're commented
1919 while (*readp != '\r' && *readp != '\n' && *readp != '\0')
1920 {
1921 // only check for comments if not quoting
1922 if (!in_quote)
1923 {
1924 bool compatible_version;
1925 int tag_len;
1926
1927 // see what sort of comment characters we recognize
1928 if (!strncmp(readp, "/*", 2))
1929 {
1930 // comment styles are mutually exclusive
1931 if (!in_multiline_comment_b)
1932 in_multiline_comment_a = true;
1933 }
1934 else if (!strncmp(readp, "!*", 2))
1935 {
1936 // comment styles are mutually exclusive
1937 if (!in_multiline_comment_a)
1938 in_multiline_comment_b = true;
1939 }
1940 else if (!strncmp(readp, "*/", 2))
1941 {
1942 if (in_multiline_comment_a)
1943 {
1944 in_multiline_comment_a = false;
1945 readp += 2;
1946 continue;
1947 }
1948 }
1949 else if (!strncmp(readp, "*!", 2))
1950 {
1951 if (in_multiline_comment_b)
1952 {
1953 in_multiline_comment_b = false;
1954 readp += 2;
1955 continue;
1956 }
1957 }
1958 // special version-specific comment
1959 // formatted like e.g. ;;FSO 3.7.0;;
1960 else if (matches_version_specific_tag(readp, compatible_version, tag_len))
1961 {
1962 // comment passes, so advance pass the tag and keep reading
1963 if (compatible_version)
1964 {
1965 readp += tag_len;
1966 continue;
1967 }
1968 // comment does not pass, so ignore the line
1969 else
1970 {
1971 break;
1972 }
1973 }
1974 // standard comment
1975 else if (*readp == ';')
1976 {
1977 break;
1978 }
1979 }
1980
1981 // maybe toggle quoting
1982 if (*readp == '\"')
1983 in_quote = !in_quote;
1984
1985 // if not inside a comment, copy the characters
1986 if (!in_multiline_comment_a && !in_multiline_comment_b)
1987 {
1988 if (writep != readp)
1989 *writep = *readp;
1990
1991 writep++;
1992 }
1993
1994 // read the next character
1995 readp++;
1996 }
1997
1998 // if we moved any characters, or if we haven't reached the end of the string, then mark end-of-line and terminate string
1999 if (writep != readp || *readp != '\0')
2000 {
2001 writep[0] = EOLN;
2002 writep[1] = '\0';
2003 }
2004 }
2005
parse_get_line(char * lineout,int max_line_len,const char * start,int max_size,const char * cur)2006 int parse_get_line(char *lineout, int max_line_len, const char *start, int max_size, const char *cur)
2007 {
2008 char * t = lineout;
2009 int i, num_chars_read=0;
2010 char c;
2011
2012 for ( i = 0; i < max_line_len-1; i++ ) {
2013 do {
2014 if ( (cur - start) >= max_size ) {
2015 *lineout = 0;
2016 if ( lineout > t ) {
2017 return num_chars_read;
2018 } else {
2019 return 0;
2020 }
2021 }
2022 c = *cur++;
2023 num_chars_read++;
2024 } while ( c == 13 );
2025
2026 *lineout++ = c;
2027 if ( c=='\n' ) break;
2028 }
2029
2030 *lineout++ = 0;
2031 return num_chars_read;
2032 }
2033
2034 // Read mission text, stripping comments.
2035 // When a comment is found, it is removed. If an entire line
2036 // consisted of a comment, a blank line is left in the input file.
2037 // Goober5000 - added ability to read somewhere other than Parse_text
read_file_text(const char * filename,int mode,char * processed_text,char * raw_text)2038 void read_file_text(const char *filename, int mode, char *processed_text, char *raw_text)
2039 {
2040 // copy the filename
2041 if (!filename)
2042 throw parse::ParseException("Invalid filename");
2043
2044 strcpy_s(Current_filename_sub, filename);
2045
2046 // if we are paused then processed_text and raw_text must not be NULL!!
2047 if ( !Bookmarks.empty() && ((processed_text == NULL) || (raw_text == NULL)) ) {
2048 Error(LOCATION, "ERROR: Neither processed_text nor raw_text may be NULL when parsing is paused!!\n");
2049 }
2050
2051 // read the raw text
2052 read_raw_file_text(filename, mode, raw_text);
2053
2054 if (processed_text == NULL)
2055 processed_text = Parse_text;
2056
2057 if (raw_text == NULL)
2058 raw_text = Parse_text_raw;
2059
2060 // process it (strip comments)
2061 process_raw_file_text(processed_text, raw_text);
2062 }
2063
2064 // Goober5000
read_file_text_from_default(const default_file & file,char * processed_text,char * raw_text)2065 void read_file_text_from_default(const default_file& file, char *processed_text, char *raw_text)
2066 {
2067 // we have no filename, so copy a substitute
2068 strcpy_s(Current_filename_sub, "internal default file");
2069
2070 // if we are paused then processed_text and raw_text must not be NULL!!
2071 if ( !Bookmarks.empty() && ((processed_text == NULL) || (raw_text == NULL)) ) {
2072 Error(LOCATION, "ERROR: Neither \"processed_text\" nor \"raw_text\" may be NULL when parsing is paused!!\n");
2073 }
2074
2075 // make sure to do this before anything else
2076 allocate_parse_text(file.size + 1);
2077
2078 // if we have no raw buffer, set it as the default raw text area
2079 if (raw_text == NULL)
2080 raw_text = Parse_text_raw;
2081
2082 auto text = reinterpret_cast<const char*>(file.data);
2083
2084 // copy text in the array (but only if the raw text and the array are not the same)
2085 if (raw_text != file.data)
2086 {
2087 // Copy the file contents into the array and null-terminate it
2088 // We have to make sure to adjust the size if the size of a char is more than 1
2089 strncpy(raw_text, text, file.size / sizeof(char));
2090 raw_text[file.size / sizeof(char)] = '\0';
2091 }
2092
2093 if (processed_text == NULL)
2094 processed_text = Parse_text;
2095
2096 // process the text
2097 process_raw_file_text(processed_text, raw_text);
2098 }
2099
stop_parse()2100 void stop_parse()
2101 {
2102 Assert( Bookmarks.empty() );
2103
2104 if (Parse_text != nullptr) {
2105 vm_free(Parse_text);
2106 Parse_text = nullptr;
2107 }
2108
2109 if (Parse_text_raw != nullptr) {
2110 vm_free(Parse_text_raw);
2111 Parse_text_raw = nullptr;
2112 }
2113
2114 Parse_text_size = 0;
2115 }
2116
allocate_parse_text(size_t size)2117 void allocate_parse_text(size_t size)
2118 {
2119 Assert( size > 0 );
2120
2121 // Make sure that there is space for the terminating null character
2122 size += 1;
2123
2124 if (size <= Parse_text_size) {
2125 // Make sure that a new parsing session does not use uninitialized data.
2126 memset( Parse_text, 0, sizeof(char) * Parse_text_size );
2127 memset( Parse_text_raw, 0, sizeof(char) * Parse_text_size);
2128 return;
2129 }
2130
2131 static ubyte parse_atexit = 0;
2132
2133 if (!parse_atexit) {
2134 atexit(stop_parse);
2135 parse_atexit = 1;
2136 }
2137
2138 if (Parse_text != nullptr) {
2139 vm_free(Parse_text);
2140 Parse_text = nullptr;
2141 }
2142
2143 if (Parse_text_raw != nullptr) {
2144 vm_free(Parse_text_raw);
2145 Parse_text_raw = nullptr;
2146 }
2147
2148 Parse_text = (char *) vm_malloc(sizeof(char) * size, memory::quiet_alloc);
2149 Parse_text_raw = (char *) vm_malloc(sizeof(char) * size, memory::quiet_alloc);
2150
2151 if ( (Parse_text == nullptr) || (Parse_text_raw == nullptr) ) {
2152 Error(LOCATION, "Unable to allocate enough memory for Parse_text! Aborting...\n");
2153 }
2154
2155 memset( Parse_text, 0, sizeof(char) * size );
2156 memset( Parse_text_raw, 0, sizeof(char) * size);
2157
2158 Parse_text_size = size;
2159 }
2160
2161 // Goober5000
read_raw_file_text(const char * filename,int mode,char * raw_text)2162 void read_raw_file_text(const char *filename, int mode, char *raw_text)
2163 {
2164 CFILE *mf;
2165 int file_is_encrypted;
2166
2167 Assert(filename);
2168
2169 mf = cfopen(filename, "rb", CFILE_NORMAL, mode);
2170 if (mf == NULL)
2171 {
2172 nprintf(("Error", "Wokka! Error opening file (%s)!\n", filename));
2173 throw parse::ParseException("Failed to open file");
2174 }
2175
2176 // read the entire file in
2177 int file_len = cfilelength(mf);
2178
2179 if(!file_len) {
2180 nprintf(("Error", "Oh noes!! File is empty! (%s)!\n", filename));
2181 throw parse::ParseException("Failed to open file");
2182 }
2183
2184 // For the possible Latin1 -> UTF-8 conversion we need to reallocate the raw_text at some point and we can only do
2185 // that if we have control over the raw_text pointer which is only the case if it's null.
2186 auto can_reallocate = raw_text == nullptr;
2187 if (raw_text == nullptr) {
2188 // allocate, or reallocate, memory for Parse_text and Parse_text_raw based on size we need now
2189 allocate_parse_text((size_t) (file_len + 1));
2190 // NOTE: this always has to be done *after* the allocate_mission_text() call!!
2191 raw_text = Parse_text_raw;
2192 }
2193
2194 // read first 10 bytes to determine if file is encrypted
2195 cfread(raw_text, MIN(file_len, 10), 1, mf);
2196 file_is_encrypted = is_encrypted(raw_text);
2197 cfseek(mf, 0, CF_SEEK_SET);
2198
2199 file_len = util::check_encoding_and_skip_bom(mf, filename);
2200
2201 if ( file_is_encrypted )
2202 {
2203 int unscrambled_len;
2204 char *scrambled_text;
2205 scrambled_text = (char*)vm_malloc(file_len+1);
2206 Assert(scrambled_text);
2207 cfread(scrambled_text, file_len, 1, mf);
2208 // unscramble text
2209 unencrypt(scrambled_text, file_len, raw_text, &unscrambled_len);
2210 file_len = unscrambled_len;
2211 vm_free(scrambled_text);
2212 }
2213 else
2214 {
2215 cfread(raw_text, file_len, 1, mf);
2216 }
2217
2218 //WMC - Slap a NULL character on here for the odd error where we forgot a #End
2219 raw_text[file_len] = '\0';
2220
2221 if (Unicode_text_mode) {
2222 // Validate the UTF-8 encoding
2223 auto invalid = utf8::find_invalid(raw_text, raw_text + file_len);
2224 if (invalid != raw_text + file_len) {
2225 auto isLatin1 = util::guessLatin1Encoding(raw_text, (size_t) file_len);
2226
2227 // We do the additional can_reallocate check here since we need control over raw_text to reencode the file
2228 if (isLatin1 && can_reallocate) {
2229 // Latin1 is the encoding of retail data and for legacy reasons we convert that to UTF-8.
2230 // We still output a warning though...
2231 Warning(LOCATION, "Found Latin-1 encoded file %s. This file will be automatically converted to UTF-8 but "
2232 "it may cause parsing issues with retail FS2 files since those contained invalid data.\n"
2233 "To silence this warning you must convert the files to UTF-8, e.g. by using a program like iconv.",
2234 filename);
2235
2236 // SDL2 has iconv functionality so we use that to convert from Latin1 to UTF-8
2237
2238 // We need the raw_text as fallback so we first need to copy the current
2239 SCP_string input_str = raw_text;
2240
2241 SCP_string buffer;
2242 bool success = unicode::convert_encoding(buffer, raw_text, unicode::Encoding::Encoding_iso8859_1, unicode::Encoding::Encoding_utf8);
2243
2244 if (Parse_text_size < buffer.length()) {
2245 allocate_parse_text(buffer.length());
2246 }
2247
2248 if (success) {
2249 strncpy(Parse_text_raw, buffer.c_str(), buffer.length());
2250 }
2251 else {
2252 Warning(LOCATION, "File reencoding failed!\n"
2253 "You will probably encounter encoding issues.");
2254
2255 // Copy the original data back to the mission text pointer so that we don't loose any data here
2256 strcpy(Parse_text_raw, input_str.c_str());
2257 }
2258 } else {
2259 Warning(LOCATION, "Found invalid UTF-8 encoding in file %s at position " PTRDIFF_T_ARG "!\n"
2260 "This may cause parsing errors and should be fixed!", filename, invalid - raw_text);
2261 }
2262 }
2263 }
2264
2265 cfclose(mf);
2266 }
2267
2268 // Goober5000, based partly on above iconv usage
coerce_to_utf8(SCP_string & buffer,const char * str)2269 void coerce_to_utf8(SCP_string &buffer, const char *str)
2270 {
2271 auto len = strlen(str);
2272
2273 // Validate the UTF-8 encoding
2274 auto invalid = utf8::find_invalid(str, str + len);
2275 if (invalid == str + len)
2276 {
2277 // turns out this is valid UTF-8
2278 buffer.assign(str);
2279 return;
2280 }
2281
2282 bool isLatin1 = util::guessLatin1Encoding(str, len);
2283
2284 // we can convert it
2285 if (isLatin1)
2286 {
2287 unicode::convert_encoding(buffer, str, unicode::Encoding::Encoding_iso8859_1, unicode::Encoding::Encoding_utf8);
2288 }
2289
2290 // unknown encoding, so just truncate
2291 buffer.assign(str, invalid - str);
2292 Warning(LOCATION, "Truncating non-UTF-8 string '%s' to '%s'!\n", str, buffer.c_str());
2293 }
2294
2295 // Goober5000
process_raw_file_text(char * processed_text,char * raw_text)2296 void process_raw_file_text(char* processed_text, char* raw_text)
2297 {
2298 SCP_string parse_exception_1402;
2299 unicode::convert_encoding(parse_exception_1402, "1402, \"Sie haben IPX-Protokoll als Protokoll ausgew\xE4hlt, aber dieses Protokoll ist auf Ihrer Maschine nicht installiert.\".\"\n", unicode::Encoding::Encoding_iso8859_1);
2300 SCP_string parse_exception_1117;
2301 unicode::convert_encoding(parse_exception_1117, "1117, \"\\r\\n\"Aucun web browser trouva. Del\xE0 isn't on emm\xE9nagea ou if \\r\\non est emm\xE9nagea, ca isn't set pour soient la default browser.\\r\\n\\r\\n\"\n", unicode::Encoding::Encoding_iso8859_1);
2302 SCP_string parse_exception_1337;
2303 unicode::convert_encoding(parse_exception_1337, "1337, \"(fr)Loading\"\n", unicode::Encoding::Encoding_iso8859_1);
2304 SCP_string parse_exception_3966;
2305 unicode::convert_encoding(parse_exception_3966, "3966, \"Es sieht so aus, als habe Staffel Kappa Zugriff auf die GTVA-Zugangscodes f\xFCr das System gehabt. Das ist ein ernstes Sicherheitsleck. Ihre IFF-Kennung erschien als \"verb\xFCndet\", so da\xDF sie sich dem Konvoi ungehindert n\xE4hern konnten. Zum Gl\xFC\x63k flogen Sie und Alpha 2 Geleitschutz und lie\xDF\x65n den Schwindel auffliegen, bevor Kappa ihren Befehl ausf\xFChren konnte.\"\n", unicode::Encoding::Encoding_iso8859_1);
2306
2307 char* mp;
2308 char* mp_raw;
2309 char outbuf[PARSE_BUF_SIZE];
2310 bool in_quote = false;
2311 bool in_multiline_comment_a = false;
2312 bool in_multiline_comment_b = false;
2313 int raw_text_len = (int)strlen(raw_text);
2314
2315 if (processed_text == NULL)
2316 processed_text = Parse_text;
2317
2318 if (raw_text == NULL)
2319 raw_text = Parse_text_raw;
2320
2321 Assert(processed_text != NULL);
2322 Assert(raw_text != NULL);
2323
2324 mp = processed_text;
2325 mp_raw = raw_text;
2326
2327 // strip comments from raw text, reading into file_text
2328 int num_chars_read = 0;
2329 while ((num_chars_read = parse_get_line(outbuf, PARSE_BUF_SIZE, raw_text, raw_text_len, mp_raw)) != 0) {
2330 mp_raw += num_chars_read;
2331
2332 // stupid hacks to make retail data work with fixed parser, per Mantis #3072
2333 if (!strcmp(outbuf, parse_exception_1402.c_str())) {
2334
2335 int offset = Unicode_text_mode ? 1 : 0;
2336 outbuf[121 + offset] = ' ';
2337 outbuf[122 + offset] = ' ';
2338 }
2339 else if (!strcmp(outbuf, parse_exception_1117.c_str())) {
2340 char* ch = &outbuf[11];
2341 do {
2342 *ch = *(ch + 1);
2343 ++ch;
2344 } while (*ch);
2345 }
2346 else if (!strcmp(outbuf, parse_exception_1337.c_str())) {
2347 outbuf[3] = '6';
2348 }
2349 else if (!strcmp(outbuf, parse_exception_3966.c_str())) {
2350 int offset = Unicode_text_mode ? 1 : 0;
2351 outbuf[171 + offset] = '\'';
2352 outbuf[181 + offset * 2] = '\'';
2353 }
2354
2355 strip_comments(outbuf, in_quote, in_multiline_comment_a, in_multiline_comment_b);
2356
2357 if (Unicode_text_mode) {
2358 // In unicode mode we simply assume that the text is already properly encoded in UTF-8
2359 // Also, since we don't know how big mp actually is since we get the pointer from the outside we can't use one of
2360 // the "safe" strcpy variants here...
2361 strcpy(mp, outbuf);
2362 mp += strlen(outbuf);
2363 } else {
2364 mp += maybe_convert_foreign_characters(outbuf, mp, false);
2365 }
2366 }
2367
2368 // Make sure the string is terminated properly
2369 *mp = *mp_raw = '\0';
2370 /*
2371 while (cfgets(outbuf, PARSE_BUF_SIZE, mf) != NULL) {
2372 if (strlen(outbuf) >= PARSE_BUF_SIZE-1)
2373 error_display(0, "Input string too long. Max is %i characters.\n%.256s\n", PARSE_BUF_SIZE, outbuf);
2374
2375 // If you hit this assert, it is probably telling you the obvious. The file
2376 // you are trying to read is truly too large. Look at *filename to see the file name.
2377 Assert(mp_raw - file_text_raw + strlen(outbuf) < PARSE_TEXT_SIZE);
2378 strcpy_s(mp_raw, outbuf);
2379 mp_raw += strlen(outbuf);
2380
2381 in_comment = strip_comments(outbuf, in_comment);
2382 strcpy_s(mp, outbuf);
2383 mp += strlen(outbuf);
2384 }
2385
2386 *mp = *mp_raw = EOF_CHAR;
2387 */
2388
2389 }
2390
debug_show_mission_text()2391 void debug_show_mission_text()
2392 {
2393 char *mp = Parse_text;
2394 char ch;
2395
2396 while ((ch = *mp++) != '\0')
2397 printf("%c", ch);
2398 }
2399
unexpected_numeric_char(char ch)2400 bool unexpected_numeric_char(char ch)
2401 {
2402 return (ch != '\0') && (ch != ',') && (ch != ')') && !is_white_space(ch);
2403 }
2404
2405 // Stuff a floating point value pointed at by Mp.
2406 // Advances past float characters.
stuff_float(float * f,bool optional)2407 int stuff_float(float *f, bool optional)
2408 {
2409 char *str_start = Mp;
2410 char *str_end;
2411
2412 // since strtof ignores white space anyway, might as well make it explicit
2413 ignore_white_space();
2414
2415 auto result = strtof(Mp, &str_end);
2416 bool success = false, comma = false;
2417 int retval = 0;
2418
2419 // no float found?
2420 if (result == 0.0f && str_end == Mp)
2421 {
2422 if (!optional)
2423 error_display(1, "Expected float, found [%.32s].\n", next_tokens());
2424 }
2425 else
2426 {
2427 *f = result;
2428 success = true;
2429 }
2430
2431 if (success)
2432 Mp = str_end;
2433
2434 // if an unexpected character is part of the number, the number parsing should fail
2435 if (success && unexpected_numeric_char(*Mp))
2436 {
2437 Mp = str_start;
2438 success = false;
2439 error_display(1, "Expected float, found [%.32s].\n", next_tokens(true));
2440 }
2441
2442 if (*Mp == ',')
2443 {
2444 comma = true;
2445 Mp++;
2446 }
2447
2448 if (optional && !success)
2449 Mp = str_start;
2450
2451 if (success)
2452 retval = 2;
2453 else if (optional)
2454 retval = comma ? 1 : 0;
2455 else
2456 skip_token();
2457
2458 diag_printf("Stuffed float: %f\n", *f);
2459 return retval;
2460 }
2461
2462 // Stuff an integer value pointed at by Mp.
2463 // Advances past integer characters.
stuff_int(int * i,bool optional)2464 int stuff_int(int *i, bool optional)
2465 {
2466 char *str_start = Mp;
2467
2468 // since atoi ignores white space anyway, might as well make it explicit
2469 ignore_white_space();
2470
2471 // this is a bit cumbersome
2472 size_t span;
2473 if (*Mp == '+' || *Mp == '-')
2474 {
2475 span = strspn(Mp + 1, "0123456789");
2476
2477 // account for the sign symbol, but not if it's the only valid character
2478 if (span > 0)
2479 ++span;
2480 }
2481 else
2482 span = strspn(Mp, "0123456789");
2483
2484 auto result = atoi(Mp);
2485 bool success = false, comma = false;
2486 int retval = 0;
2487
2488 // no int found?
2489 if (result == 0 && span == 0)
2490 {
2491 if (!optional)
2492 error_display(1, "Expected int, found [%.32s].\n", next_tokens());
2493 }
2494 else
2495 {
2496 *i = result;
2497 success = true;
2498 }
2499
2500 if (success)
2501 Mp += span;
2502
2503 // if an unexpected character is part of the number, the number parsing should fail
2504 if (success && unexpected_numeric_char(*Mp))
2505 {
2506 Mp = str_start;
2507 success = false;
2508 error_display(1, "Expected int, found [%.32s].\n", next_tokens(true));
2509 }
2510
2511 if (*Mp == ',')
2512 {
2513 comma = true;
2514 Mp++;
2515 }
2516
2517 if (optional && !success)
2518 Mp = str_start;
2519
2520 if (success)
2521 {
2522 retval = 2;
2523 diag_printf("Stuffed int: %d\n", *i);
2524 }
2525 else if (optional)
2526 retval = comma ? 1 : 0;
2527 else
2528 skip_token();
2529
2530 return retval;
2531 }
2532
2533 // Stuff a long value pointed at by Mp.
2534 // Advances past integer characters.
stuff_long(long * l,bool optional)2535 int stuff_long(long *l, bool optional)
2536 {
2537 char *str_start = Mp;
2538
2539 // since atol ignores white space anyway, might as well make it explicit
2540 ignore_white_space();
2541
2542 // this is a bit cumbersome
2543 size_t span;
2544 if (*Mp == '+' || *Mp == '-')
2545 {
2546 span = strspn(Mp + 1, "0123456789");
2547
2548 // account for the sign symbol, but not if it's the only valid character
2549 if (span > 0)
2550 ++span;
2551 }
2552 else
2553 span = strspn(Mp, "0123456789");
2554
2555 auto result = atol(Mp);
2556 bool success = false, comma = false;
2557 int retval = 0;
2558
2559 // no long found?
2560 if (result == 0 && span == 0)
2561 {
2562 if (!optional)
2563 error_display(1, "Expected long, found [%.32s].\n", next_tokens());
2564 }
2565 else
2566 {
2567 *l = result;
2568 success = true;
2569 }
2570
2571 if (success)
2572 Mp += span;
2573
2574 // if an unexpected character is part of the number, the number parsing should fail
2575 if (success && unexpected_numeric_char(*Mp))
2576 {
2577 Mp = str_start;
2578 success = false;
2579 error_display(1, "Expected long, found [%.32s].\n", next_tokens(true));
2580 }
2581
2582 if (*Mp == ',')
2583 {
2584 comma = true;
2585 Mp++;
2586 }
2587
2588 if (optional && !success)
2589 Mp = str_start;
2590
2591 if (success)
2592 {
2593 retval = 2;
2594 diag_printf("Stuffed long: %ld\n", *l);
2595 }
2596 else if (optional)
2597 retval = comma ? 1 : 0;
2598 else
2599 skip_token();
2600
2601 return retval;
2602 }
2603
stuff_float_optional(float * f)2604 int stuff_float_optional(float *f)
2605 {
2606 return stuff_float(f, true);
2607 }
2608
stuff_int_optional(int * i)2609 int stuff_int_optional(int *i)
2610 {
2611 return stuff_int(i, true);
2612 }
2613
2614 // Stuff an integer value pointed at by Mp. If a variable is found instead, stuff the value of that variable and record the
2615 // index of the variable in the following slot.
stuff_int_or_variable(int * i,int * var_index,bool need_positive_value)2616 void stuff_int_or_variable(int *i, int *var_index, bool need_positive_value)
2617 {
2618 if (*Mp == '@')
2619 {
2620 Mp++;
2621 int value = -1;
2622 SCP_string str;
2623 stuff_string(str, F_NAME);
2624
2625 int index = get_index_sexp_variable_name(str);
2626
2627 if (index > -1 && index < MAX_SEXP_VARIABLES)
2628 {
2629 if (Sexp_variables[index].type & SEXP_VARIABLE_NUMBER)
2630 {
2631 value = atoi(Sexp_variables[index].text);
2632 }
2633 else
2634 {
2635 error_display(1, "Invalid variable type \"%s\" found in mission. Variable must be a number variable!", str.c_str());
2636 }
2637 }
2638 else
2639 {
2640
2641 error_display(1, "Invalid variable name \"%s\" found.", str.c_str());
2642 }
2643
2644 // zero negative values if requested
2645 if (need_positive_value && value < 0)
2646 {
2647 value = 0;
2648 }
2649
2650 // Record the value of the index for FreeSpace
2651 *i = value;
2652 // Record the index itself because we may need it later.
2653 *var_index = index;
2654 }
2655 else
2656 {
2657 stuff_int(i);
2658 // Since we have a numerical value we don't have a SEXP variable index to add for next slot.
2659 *var_index = NOT_SET_BY_SEXP_VARIABLE;
2660 }
2661 }
2662
2663 //Stuffs boolean value.
2664 //Passes things off to stuff_boolean(bool)
stuff_boolean(int * i,bool a_to_eol)2665 void stuff_boolean(int *i, bool a_to_eol)
2666 {
2667 bool tempb;
2668 stuff_boolean(&tempb, a_to_eol);
2669 if(tempb)
2670 *i = 1;
2671 else
2672 *i = 0;
2673 }
2674
stuff_boolean_flag(int * i,int flag,bool a_to_eol)2675 void stuff_boolean_flag(int *i, int flag, bool a_to_eol)
2676 {
2677 bool temp;
2678 stuff_boolean(&temp, a_to_eol);
2679 if(temp)
2680 *i |= flag;
2681 else
2682 *i &= ~(flag);
2683 }
2684
2685 // Stuffs a boolean value pointed at by Mp.
2686 // YES/NO (supporting 1/0 now as well)
2687 // Now supports localization :) -WMC
2688
stuff_boolean(bool * b,bool a_to_eol)2689 void stuff_boolean(bool *b, bool a_to_eol)
2690 {
2691 char token[32];
2692 stuff_string_white(token, sizeof(token)/sizeof(char));
2693 if(a_to_eol)
2694 advance_to_eoln(NULL);
2695
2696 if( isdigit(token[0]))
2697 {
2698 if(token[0] != '0')
2699 *b = true;
2700 else
2701 *b = false;
2702 }
2703 else
2704 {
2705 if(!stricmp(token, "yes")
2706 || !stricmp(token, "true")
2707 || !stricmp(token, "ja") //German
2708 || !stricmp(token, "Oui") //French
2709 || !stricmp(token, "si") //Spanish
2710 || !stricmp(token, "ita vero") //Latin
2711 || !stricmp(token, "HIja'") || !stricmp(token, "HISlaH")) //Klingon
2712 {
2713 *b = true;
2714 }
2715 else if(!stricmp(token, "no")
2716 || !stricmp(token, "false")
2717 || !stricmp(token, "nein") //German
2718 || !stricmp(token, "Non") //French
2719 //I don't know spanish for "no"
2720 //But according to altavista, spanish for "No" is "no"
2721 //Go figure.
2722 || !stricmp(token, "minime") //Latin
2723 || !stricmp(token, "ghobe'")) //Klingon
2724 {
2725 *b = false;
2726 }
2727 else
2728 {
2729 *b = false;
2730 error_display(0, "Boolean '%s' type unknown; assuming 'no/false'",token);
2731 }
2732 }
2733
2734 diag_printf("Stuffed bool: %s\n", (b) ? NOX("true") : NOX("false"));
2735 }
2736
2737 // Stuff an integer value (cast to a ubyte) pointed at by Mp.
2738 // Advances past integer characters.
stuff_ubyte(ubyte * i)2739 void stuff_ubyte(ubyte *i)
2740 {
2741 int temp;
2742 stuff_int(&temp);
2743 *i = (ubyte)temp;
2744 }
2745
2746 template <typename T, typename F>
stuff_token_list(SCP_vector<T> & list,F stuff_one_token,const char * type_as_string)2747 void stuff_token_list(SCP_vector<T> &list, F stuff_one_token, const char *type_as_string)
2748 {
2749 list.clear();
2750
2751 ignore_white_space();
2752
2753 if (*Mp != '(')
2754 {
2755 error_display(1, "Reading %s list. Found [%c]. Expected '('.\n", type_as_string, *Mp);
2756 throw parse::ParseException("Syntax error");
2757 }
2758 Mp++;
2759
2760 ignore_white_space();
2761
2762 while (*Mp != ')')
2763 {
2764 T item;
2765 if (stuff_one_token(&item))
2766 list.push_back(std::move(item));
2767
2768 ignore_white_space();
2769
2770 if (*Mp == ',')
2771 {
2772 Mp++;
2773 ignore_white_space();
2774 }
2775 }
2776 Mp++;
2777 }
2778
2779 template <typename T, typename F>
stuff_token_list(T * listp,size_t list_max,F stuff_one_token,const char * type_as_string)2780 size_t stuff_token_list(T *listp, size_t list_max, F stuff_one_token, const char *type_as_string)
2781 {
2782 SCP_vector<T> list;
2783 stuff_token_list(list, stuff_one_token, type_as_string);
2784
2785 if (list_max < list.size())
2786 {
2787 error_display(0, "Too many items in %s list. Found " SIZE_T_ARG "; max is " SIZE_T_ARG ". List has been truncated.", type_as_string, list.size(), list_max);
2788 list.resize(list_max);
2789 }
2790
2791 size_t i = 0;
2792 for (const auto &item : list)
2793 listp[i++] = item;
2794
2795 Assert(i == list.size());
2796 return i;
2797 }
2798
2799 // If this data is going to be parsed multiple times (like for mission load), then the dest variable
2800 // needs to be set to zero in between parses, otherwise we keep bad data.
2801 // For tbm files, it must not be reset.
parse_string_flag_list(int * dest,flag_def_list defs[],size_t defs_size)2802 void parse_string_flag_list(int *dest, flag_def_list defs[], size_t defs_size)
2803 {
2804 Assert(dest!=NULL); //wtf?
2805
2806 SCP_vector<SCP_string> slp;
2807 stuff_string_list(slp);
2808
2809 for (auto &str : slp)
2810 {
2811 for (size_t j = 0; j < defs_size; j++)
2812 {
2813 if (!stricmp(str.c_str(), defs[j].name)) {
2814 (*dest) |= defs[j].def;
2815 }
2816 }
2817 }
2818 }
2819
stuff_bool_list(bool * blp,size_t max_bools)2820 size_t stuff_bool_list(bool *blp, size_t max_bools)
2821 {
2822 return stuff_token_list(blp, max_bools, [](bool *b)->bool {
2823 stuff_boolean(b, false);
2824 return true;
2825 }, "bool");
2826 }
2827
stuff_string_list(SCP_vector<SCP_string> & slp)2828 void stuff_string_list(SCP_vector<SCP_string> &slp)
2829 {
2830 stuff_token_list(slp, [](SCP_string *buf)->bool {
2831 if (*Mp != '\"') {
2832 error_display(0, "Missing quotation marks in string list.");
2833 // Since this is a bad token, skip characters until we find a comma, parenthesis, or EOLN
2834 advance_to_eoln(",)");
2835 return false;
2836 }
2837
2838 *buf = "";
2839 get_string(*buf);
2840
2841 return true;
2842 }, "string");
2843 }
2844
stuff_string_list(char slp[][NAME_LENGTH],size_t max_strings)2845 size_t stuff_string_list(char slp[][NAME_LENGTH], size_t max_strings)
2846 {
2847 SCP_vector<SCP_string> list;
2848 stuff_string_list(list);
2849
2850 if (max_strings < list.size())
2851 {
2852 error_display(0, "Too many items in %s list. Found " SIZE_T_ARG "; max is " SIZE_T_ARG ". List has been truncated.", "string", list.size(), max_strings);
2853 list.resize(max_strings);
2854 }
2855
2856 for (size_t i = 0; i < list.size(); ++i)
2857 {
2858 if (list[i].size() >= NAME_LENGTH)
2859 {
2860 Warning(LOCATION, "'%s' is too long and will be truncated. Max length is %d.", list[i].c_str(), NAME_LENGTH - 1);
2861 list[i].resize(NAME_LENGTH - 1);
2862 }
2863
2864 strcpy_s(slp[i], list[i].c_str());
2865 }
2866
2867 return list.size();
2868 }
2869
get_lookup_type_name(int lookup_type)2870 const char* get_lookup_type_name(int lookup_type)
2871 {
2872 switch (lookup_type) {
2873 case SHIP_TYPE:
2874 return "Ships";
2875 case SHIP_INFO_TYPE:
2876 return "Ship Classes";
2877 case WEAPON_POOL_TYPE:
2878 return "Weapon Pool";
2879 case WEAPON_LIST_TYPE:
2880 return "Weapon Types";
2881 case RAW_INTEGER_TYPE:
2882 return "Untyped integer list";
2883 case MISSION_LOADOUT_SHIP_LIST:
2884 return "Mission Loadout Ships";
2885 case MISSION_LOADOUT_WEAPON_LIST:
2886 return "Mission Loadout Weapons";
2887 case CAMPAIGN_LOADOUT_SHIP_LIST:
2888 return "Campaign Loadout Ships";
2889 case CAMPAIGN_LOADOUT_WEAPON_LIST:
2890 return "Campaign Loadout Weapons";
2891 }
2892
2893 return "Unknown lookup type, tell a coder!";
2894 }
2895
2896 // Stuffs an integer list.
2897 // This is of the form ( i* )
2898 // where i is an integer.
2899 // For example, (1) () (1 2 3) ( 1 ) are legal integer lists.
stuff_int_list(int * ilp,size_t max_ints,int lookup_type)2900 size_t stuff_int_list(int *ilp, size_t max_ints, int lookup_type)
2901 {
2902 return stuff_token_list(ilp, max_ints, [&](int *buf)->bool {
2903 if (*Mp == '"') {
2904 int num = 0;
2905 bool valid_negative = false;
2906 SCP_string str;
2907 get_string(str);
2908
2909 switch (lookup_type) {
2910 case SHIP_TYPE:
2911 num = ship_name_lookup(str.c_str()); // returns index of Ship[] entry with name
2912 if (num < 0)
2913 error_display(0, "Unable to find ship %s in stuff_int_list!", str.c_str());
2914 break;
2915
2916 case SHIP_INFO_TYPE:
2917 num = ship_info_lookup(str.c_str()); // returns index of Ship_info[] entry with name
2918 if (num < 0)
2919 error_display(0, "Unable to find ship class %s in stuff_int_list!", str.c_str());
2920 break;
2921
2922 case WEAPON_POOL_TYPE:
2923 num = weapon_info_lookup(str.c_str());
2924 if (num < 0)
2925 error_display(0, "Unable to find weapon class %s in stuff_int_list!", str.c_str());
2926 break;
2927
2928 case WEAPON_LIST_TYPE:
2929 num = weapon_info_lookup(str.c_str());
2930 if (str.empty())
2931 valid_negative = true;
2932 else if (num < 0)
2933 error_display(0, "Unable to find weapon class %s in stuff_int_list!", str.c_str());
2934 break;
2935
2936 case RAW_INTEGER_TYPE:
2937 num = atoi(str.c_str());
2938 valid_negative = true;
2939 break;
2940
2941 default:
2942 error_display(1, "Unknown lookup_type %d in stuff_int_list", lookup_type);
2943 break;
2944 }
2945
2946 if (num < 0 && !valid_negative)
2947 return false;
2948
2949 *buf = num;
2950 } else {
2951 stuff_int(buf);
2952 }
2953
2954 return true;
2955 }, get_lookup_type_name(lookup_type));
2956 }
2957
2958 // Karajorma/Goober5000 - Stuffs a loadout list by parsing a list of ship or weapon choices.
2959 // Unlike stuff_int_list it can deal with variables
stuff_loadout_list(SCP_vector<loadout_row> & list,int lookup_type)2960 void stuff_loadout_list(SCP_vector<loadout_row> &list, int lookup_type)
2961 {
2962 stuff_token_list(list, [&](loadout_row *buf)->bool {
2963 SCP_string str;
2964 int variable_found = get_string_or_variable(str);
2965
2966 // if we've got a variable get the variable index and copy its value into str so that regardless of whether we found
2967 // a variable or not it now holds the name of the ship or weapon we're interested in.
2968 if (variable_found) {
2969 Assert(lookup_type != CAMPAIGN_LOADOUT_SHIP_LIST);
2970 buf->index_sexp_var = get_index_sexp_variable_name(str);
2971
2972 if (buf->index_sexp_var < 0) {
2973 error_display(1, "Invalid SEXP variable name \"%s\" found in stuff_loadout_list.", str.c_str());
2974 }
2975
2976 str = Sexp_variables[buf->index_sexp_var].text;
2977 }
2978
2979 switch (lookup_type) {
2980 case MISSION_LOADOUT_SHIP_LIST:
2981 case CAMPAIGN_LOADOUT_SHIP_LIST:
2982 buf->index = ship_info_lookup(str.c_str());
2983 break;
2984
2985 case MISSION_LOADOUT_WEAPON_LIST:
2986 case CAMPAIGN_LOADOUT_WEAPON_LIST:
2987 buf->index = weapon_info_lookup(str.c_str());
2988 break;
2989
2990 default:
2991 Assertion(false, "Unsupported lookup type %d", lookup_type);
2992 return false;
2993 }
2994
2995 bool skip_this_entry = false;
2996
2997 // Complain if this isn't a valid ship or weapon and we are loading a mission. Campaign files can be loaded containing
2998 // no ships from the current tables (when swapping mods) so don't report that as an error.
2999 if (buf->index < 0 && (lookup_type == MISSION_LOADOUT_SHIP_LIST || lookup_type == MISSION_LOADOUT_WEAPON_LIST)) {
3000 error_display(0, "Invalid type \"%s\" found in loadout of mission file...skipping", str.c_str());
3001 skip_this_entry = true;
3002
3003 // increment counter for release FRED builds.
3004 Num_unknown_loadout_classes++;
3005 }
3006 else if ((Game_mode & GM_MULTIPLAYER) && (lookup_type == MISSION_LOADOUT_WEAPON_LIST) && (Weapon_info[buf->index].maximum_children_spawned > 300)){
3007 Warning(LOCATION, "Weapon '%s' has more than 300 possible spawned weapons over its lifetime! This can cause issues for Multiplayer.", Weapon_info[buf->index].name);
3008 }
3009
3010 if (!skip_this_entry) {
3011 // similarly, complain if this is a valid ship or weapon class that the player can't use
3012 if ((lookup_type == MISSION_LOADOUT_SHIP_LIST) && (!(Ship_info[buf->index].flags[Ship::Info_Flags::Player_ship])) ) {
3013 error_display(0, "Ship type \"%s\" found in loadout of mission file. This class is not marked as a player ship...skipping", str.c_str());
3014 skip_this_entry = true;
3015 }
3016 else if ((lookup_type == MISSION_LOADOUT_WEAPON_LIST) && (!(Weapon_info[buf->index].wi_flags[Weapon::Info_Flags::Player_allowed])) ) {
3017 nprintf(("Warning", "Warning: Weapon type %s found in loadout of mission file. This class is not marked as a player allowed weapon...skipping\n", str.c_str()));
3018 if ( !Is_standalone )
3019 error_display(0, "Weapon type \"%s\" found in loadout of mission file. This class is not marked as a player allowed weapon...skipping", str.c_str());
3020 skip_this_entry = true;
3021 }
3022 }
3023
3024 // Loadout counts are only needed for missions
3025 if (lookup_type == MISSION_LOADOUT_SHIP_LIST || lookup_type == MISSION_LOADOUT_WEAPON_LIST)
3026 {
3027 ignore_white_space();
3028
3029 // Now read in the number of this type available. The number must be positive
3030 stuff_int_or_variable(&buf->count, &buf->count_sexp_var, true);
3031 }
3032
3033 return !skip_this_entry;
3034 }, get_lookup_type_name(lookup_type));
3035 }
3036
3037 //Stuffs an float list like stuff_int_list.
stuff_float_list(float * flp,size_t max_floats)3038 size_t stuff_float_list(float* flp, size_t max_floats)
3039 {
3040 return stuff_token_list(flp, max_floats, [](float *f)->bool {
3041 stuff_float(f);
3042 return true;
3043 }, "float");
3044 }
3045
3046 // ditto the above, but a vector of floats...
stuff_float_list(SCP_vector<float> & flp)3047 void stuff_float_list(SCP_vector<float>& flp)
3048 {
3049 stuff_token_list(flp, [](float* buf)->bool {
3050 stuff_float(buf);
3051 return true;
3052 }, "float");
3053 }
3054
3055 // Stuff a vec3d struct, which is 3 floats.
stuff_vec3d(vec3d * vp)3056 void stuff_vec3d(vec3d *vp)
3057 {
3058 stuff_float(&vp->xyz.x);
3059 stuff_float(&vp->xyz.y);
3060 stuff_float(&vp->xyz.z);
3061 }
3062
stuff_parenthesized_vec3d(vec3d * vp)3063 void stuff_parenthesized_vec3d(vec3d *vp)
3064 {
3065 ignore_white_space();
3066
3067 if (*Mp != '(') {
3068 error_display(1, "Reading parenthesized vec3d. Found [%c]. Expected '('.\n", *Mp);
3069 throw parse::ParseException("Syntax error");
3070 } else {
3071 Mp++;
3072 stuff_vec3d(vp);
3073 ignore_white_space();
3074 if (*Mp != ')') {
3075 error_display(1, "Reading parenthesized vec3d. Found [%c]. Expected ')'.\n", *Mp);
3076 throw parse::ParseException("Syntax error");
3077 }
3078 Mp++;
3079 }
3080 }
3081
3082 // Stuffs vec3d list. *vlp is an array of vec3ds.
3083 // This is of the form ( (vec3d)* )
3084 // (where * is a kleene star, not a pointer indirection)
3085 // For example, ( (1 2 3) (2 3 4) (2 3 5) )
3086 // is a list of three vec3ds.
stuff_vec3d_list(vec3d * vlp,size_t max_vecs)3087 size_t stuff_vec3d_list(vec3d *vlp, size_t max_vecs)
3088 {
3089 return stuff_token_list(vlp, max_vecs, [](vec3d *buf)->bool {
3090 stuff_parenthesized_vec3d(buf);
3091 return true;
3092 }, "vec3d");
3093 }
3094
3095 // ditto the above, but a vector of vec3ds...
stuff_vec3d_list(SCP_vector<vec3d> & vec_list)3096 void stuff_vec3d_list(SCP_vector<vec3d> &vec_list)
3097 {
3098 stuff_token_list(vec_list, [](vec3d *buf)->bool {
3099 stuff_parenthesized_vec3d(buf);
3100 return true;
3101 }, "vec3d");
3102 }
3103
3104 // Stuff a matrix, which is 3 vec3ds.
stuff_matrix(matrix * mp)3105 void stuff_matrix(matrix *mp)
3106 {
3107 stuff_vec3d(&mp->vec.rvec);
3108 stuff_vec3d(&mp->vec.uvec);
3109 stuff_vec3d(&mp->vec.fvec);
3110 }
3111
3112 /**
3113 * @brief Given a string, find it in a string array.
3114 *
3115 * @param str1 is the string to be found.
3116 * @param strlist is the list of strings to search.
3117 * @param max is the number of entries in *strlist to scan.
3118 * @param description is only used for diagnostics in case it can't be found.
3119 * @param say_errors @c true if errors should be reported
3120 * @return
3121 */
string_lookup(const char * str1,const char * const * strlist,size_t max,const char * description,bool say_errors)3122 int string_lookup(const char *str1, const char* const *strlist, size_t max, const char *description, bool say_errors) {
3123 for (size_t i=0; i<max; i++) {
3124 Assert(strlen(strlist[i]) != 0); //-V805
3125
3126 if (!stricmp(str1, strlist[i]))
3127 return (int)i;
3128 }
3129
3130 if (say_errors)
3131 error_display(0, "Unable to find [%s] in %s list.\n", str1, description);
3132
3133 return -1;
3134 }
3135
3136 // Find a required string (*id), then stuff the text of type f_type that
3137 // follows it at *addr. *strlist[] contains the strings it should try to
3138 // match.
find_and_stuff(const char * id,int * addr,int f_type,const char * strlist[],size_t max,const char * description)3139 void find_and_stuff(const char *id, int *addr, int f_type, const char *strlist[], size_t max, const char *description)
3140 {
3141 char token[128];
3142 int checking_ship_classes = (stricmp(id, "$class:") == 0);
3143
3144 // Goober5000 - don't say errors when we're checking classes because 1) we have more checking to do; and 2) we will say a redundant error later
3145 required_string(id);
3146 stuff_string(token, f_type, sizeof(token));
3147 *addr = string_lookup(token, strlist, max, description, !checking_ship_classes);
3148
3149 // Goober5000 - handle certain FSPort idiosyncracies with ship classes
3150 if (*addr < 0 && checking_ship_classes)
3151 {
3152 int idx = ship_info_lookup(token);
3153
3154 if (idx >= 0)
3155 *addr = string_lookup(Ship_info[idx].name, strlist, max, description, 0);
3156 else
3157 *addr = -1;
3158 }
3159 }
3160
find_and_stuff_optional(const char * id,int * addr,int f_type,const char * const * strlist,size_t max,const char * description)3161 void find_and_stuff_optional(const char *id, int *addr, int f_type, const char * const *strlist, size_t max, const char *description)
3162 {
3163 char token[128];
3164
3165 if(optional_string(id))
3166 {
3167 stuff_string(token, f_type, sizeof(token));
3168 *addr = string_lookup(token, strlist, max, description, 1);
3169 }
3170 }
3171
3172 // Mp points at a string.
3173 // Find the string in the list of strings *strlist[].
3174 // Returns the index of the match, -1 if none.
match_and_stuff(int f_type,const char * const * strlist,int max,const char * description)3175 int match_and_stuff(int f_type, const char * const *strlist, int max, const char *description)
3176 {
3177 char token[128];
3178
3179 stuff_string(token, f_type, sizeof(token));
3180 return string_lookup(token, strlist, max, description, 0);
3181 }
3182
find_and_stuff_or_add(const char * id,int * addr,int f_type,char * strlist[],int * total,int max,const char * description)3183 void find_and_stuff_or_add(const char *id, int *addr, int f_type, char *strlist[], int *total,
3184 int max, const char *description)
3185 {
3186 char token[128];
3187
3188 *addr = -1;
3189 required_string(id);
3190 stuff_string(token, f_type, sizeof(token));
3191 if (*total)
3192 *addr = string_lookup(token, strlist, *total, description, 0);
3193
3194 if (*addr == -1) // not in list, so lets try and add it.
3195 {
3196 Assert(*total < max);
3197 strcpy(strlist[*total], token);
3198 *addr = (*total)++;
3199 }
3200 }
3201
3202 // pause current parsing so that some else can be parsed without interfering
3203 // with the currently parsing file
pause_parse()3204 void pause_parse()
3205 {
3206 Bookmark Mark;
3207
3208 Mark.filename = Current_filename;
3209 Mark.Mp = Mp;
3210 Mark.Warning_count = Warning_count;
3211 Mark.Error_count = Error_count;
3212
3213 Bookmarks.push_back(Mark);
3214 }
3215
3216 // unpause parsing to continue with previously parsing file
unpause_parse()3217 void unpause_parse()
3218 {
3219 Assert( !Bookmarks.empty() );
3220 if (Bookmarks.empty())
3221 return;
3222
3223 Bookmark Mark = Bookmarks.back();
3224
3225 Mp = Mark.Mp;
3226 Warning_count = Mark.Warning_count;
3227 Error_count = Mark.Error_count;
3228
3229 strcpy_s(Current_filename, Mark.filename.c_str());
3230
3231 Bookmarks.pop_back();
3232 }
3233
reset_parse(char * text)3234 void reset_parse(char *text)
3235 {
3236 if (text != NULL) {
3237 Mp = text;
3238 } else {
3239 Mp = Parse_text;
3240 }
3241
3242 Warning_count = 0;
3243 Error_count = 0;
3244
3245 strcpy_s(Current_filename, Current_filename_sub);
3246 }
3247
3248 // Display number of warnings and errors at the end of a parse.
display_parse_diagnostics()3249 void display_parse_diagnostics()
3250 {
3251 nprintf(("Parse", "\nParse complete.\n"));
3252 nprintf(("Parse", "%i errors. %i warnings.\n", Error_count, Warning_count));
3253 }
3254
3255 // Splits a string into 2 lines if the string is wider than max_pixel_w pixels. A null
3256 // terminator is placed where required to make the first line <= max_pixel_w. The remaining
3257 // text is returned (leading whitespace removed). If the line doesn't need to be split,
3258 // NULL is returned.
split_str_once(char * src,int max_pixel_w)3259 char *split_str_once(char *src, int max_pixel_w)
3260 {
3261 char *brk = nullptr;
3262 int i, w, len;
3263 bool last_was_white = false;
3264
3265 Assert(src);
3266 Assert(max_pixel_w > 0);
3267
3268 gr_get_string_size(&w, nullptr, src);
3269 if ( (w <= max_pixel_w) && !strstr(src, "\n") ) {
3270 return nullptr; // string doesn't require a cut
3271 }
3272
3273 len = (int)strlen(src);
3274 for (i=0; i<len; i++) {
3275 gr_get_string_size(&w, nullptr, src, i);
3276
3277 if (w <= max_pixel_w) {
3278 if (src[i] == '\n') { // reached natural end of line
3279 src[i] = 0;
3280 return src + i + 1;
3281 }
3282 }
3283
3284 if (is_white_space(src[i])) {
3285 if (!last_was_white) {
3286 // only update the line break if:
3287 // a) we don't have a line break yet;
3288 // b) we're still within the required real estate
3289 // (basically we want the latest line break that doesn't go off the edge of the screen,
3290 // but if the *first* line break is off the end of the screen, we want that)
3291 if (brk == nullptr || w <= max_pixel_w) {
3292 brk = src + i;
3293 }
3294 }
3295
3296 last_was_white = true;
3297
3298 } else {
3299 last_was_white = false;
3300 }
3301 }
3302
3303 // if we are over max pixel width and weren't able to come up with a good non-word
3304 // split then just return the original src text and the calling function should
3305 // have to handle the result
3306 if ( (w > max_pixel_w) && ((i == 0) || !brk) ) {
3307 return src;
3308 }
3309
3310 if (!brk) {
3311 brk = src + i;
3312 }
3313
3314 *brk = 0;
3315 src = brk + 1;
3316 while (is_white_space(*src))
3317 src++;
3318
3319 if (!*src)
3320 return nullptr; // end of the string anyway
3321
3322 if (*src == '\n')
3323 src++;
3324
3325 return src;
3326 }
3327
3328 #define SPLIT_STR_BUFFER_SIZE 512
3329
3330 // --------------------------------------------------------------------------------------
3331 // split_str()
3332 //
3333 // A general function that will split a string into several lines. Lines are allowed up
3334 // to max_pixel_w pixels. Breaks are found in white space.
3335 //
3336 // Supports \n's in the strings!
3337 //
3338 // parameters: src => source string to be broken up
3339 // max_pixel_w => max width of line in pixels
3340 // n_chars => output array that will hold number of characters in each line
3341 // p_str => output array of pointers to start of lines within src
3342 // max_lines => limit of number of lines to break src up into
3343 // ignore_char => OPTIONAL parameter (default val -1). Ignore words starting with this character
3344 // This is useful when you want to ignore embedded control information that starts
3345 // with a specific character, like $ or #
3346 //
3347 // returns: number of lines src is broken into
3348 // -1 is returned when an error occurs
3349 //
split_str(const char * src,int max_pixel_w,int * n_chars,const char ** p_str,int max_lines,int max_line_length,unicode::codepoint_t ignore_char,bool strip_leading_whitespace)3350 int split_str(const char *src, int max_pixel_w, int *n_chars, const char **p_str, int max_lines, int max_line_length, unicode::codepoint_t ignore_char, bool strip_leading_whitespace)
3351 {
3352 char buffer[SPLIT_STR_BUFFER_SIZE];
3353 const char *breakpoint = NULL;
3354 int sw, new_line = 1, line_num = 0, last_was_white = 0;
3355 int ignore_until_whitespace, buf_index;
3356
3357 // check our assumptions..
3358 Assert(src != NULL);
3359 Assert(n_chars != NULL);
3360 Assert(p_str != NULL);
3361 Assert(max_lines > 0);
3362 Assert(max_pixel_w > 0);
3363
3364 Assertion(max_line_length > 0, "Max line length should be >0, not %d; get a coder!\n", max_line_length);
3365
3366 memset(buffer, 0, sizeof(buffer));
3367 buf_index = 0;
3368 ignore_until_whitespace = 0;
3369
3370 // get rid of any leading whitespace
3371 while (strip_leading_whitespace && is_white_space(*src))
3372 src++;
3373
3374 new_line = 1;
3375 p_str[0] = NULL;
3376
3377 // iterate through chars in line, keeping track of most recent "white space" location that can be used
3378 // as a line splitting point if necessary
3379 unicode::codepoint_range range(src);
3380 auto end_iter = std::end(range);
3381 auto iter = std::begin(range);
3382 for (; iter != end_iter; ++iter) {
3383 auto cp = *iter;
3384
3385 if (line_num >= max_lines)
3386 return line_num; // time to bail out
3387
3388 // starting a new line of text, init stuff for that
3389 if (new_line) {
3390 p_str[line_num] = NULL;
3391 if (strip_leading_whitespace && is_gray_space(cp))
3392 continue;
3393
3394 p_str[line_num] = iter.pos();
3395 breakpoint = NULL;
3396 new_line = 0;
3397 }
3398
3399 // maybe skip leading whitespace
3400 if (ignore_until_whitespace) {
3401 if ( is_white_space(cp) )
3402 ignore_until_whitespace = 0;
3403
3404 continue;
3405 }
3406
3407 // if we have a newline, split the line here
3408 if (cp == UNICODE_CHAR('\n')) {
3409 n_chars[line_num] = (int)(iter.pos() - p_str[line_num]); // track length of line
3410 line_num++;
3411 if (line_num < max_lines) {
3412 p_str[line_num] = NULL;
3413 }
3414 new_line = 1;
3415
3416 memset(buffer, 0, SPLIT_STR_BUFFER_SIZE);
3417 buf_index = 0;
3418 continue;
3419 }
3420
3421 if (cp == ignore_char) {
3422 ignore_until_whitespace = 1;
3423 continue;
3424 }
3425
3426 if (is_gray_space(cp)) {
3427 if (!last_was_white) // track at first whitespace in a series of whitespace
3428 breakpoint = iter.pos();
3429
3430 last_was_white = 1;
3431
3432 } else {
3433 // indicate next time around that this wasn't a whitespace character
3434 last_was_white = 0;
3435 }
3436
3437 auto encoded_width = unicode::encoded_size(cp);
3438 Assertion(buf_index + encoded_width < SPLIT_STR_BUFFER_SIZE,
3439 "buffer overflow in split_str: screen width causes this text to be longer than %d characters!",
3440 SPLIT_STR_BUFFER_SIZE - 1);
3441
3442 // throw it in our buffer
3443 unicode::encode(cp, &buffer[buf_index]);
3444 buf_index += (int)encoded_width;
3445 buffer[buf_index] = 0; // null terminate it
3446
3447 gr_get_string_size(&sw, NULL, buffer);
3448 if (sw >= max_pixel_w || buf_index >= max_line_length) {
3449 const char *end;
3450
3451 if (breakpoint) {
3452 end = breakpoint;
3453 iter = unicode::text_iterator(breakpoint, src, src + strlen(src));
3454
3455 } else {
3456 end = iter.pos(); // force a split here since to whitespace
3457 --iter; // reuse this character in next line
3458 }
3459
3460 n_chars[line_num] = (int)(end - p_str[line_num]); // track length of line
3461 Assert(n_chars[line_num]);
3462 line_num++;
3463 if (line_num < max_lines) {
3464 p_str[line_num] = NULL;
3465 }
3466 new_line = 1;
3467
3468 memset(buffer, 0, sizeof(buffer));
3469 buf_index = 0;
3470 continue;
3471 }
3472 } // end for
3473
3474 if (!new_line && p_str[line_num]) {
3475 n_chars[line_num] = (int)(iter.pos() - p_str[line_num]); // track length of line
3476 Assert(n_chars[line_num]);
3477 line_num++;
3478 }
3479
3480 return line_num;
3481 }
3482
split_str(const char * src,int max_pixel_w,SCP_vector<int> & n_chars,SCP_vector<const char * > & p_str,int max_line_length,unicode::codepoint_t ignore_char,bool strip_leading_whitespace)3483 int split_str(const char *src, int max_pixel_w, SCP_vector<int> &n_chars, SCP_vector<const char*> &p_str, int max_line_length, unicode::codepoint_t ignore_char, bool strip_leading_whitespace)
3484 {
3485 char buffer[SPLIT_STR_BUFFER_SIZE];
3486 const char *breakpoint = NULL;
3487 int sw, new_line = 1, line_num = 0, last_was_white = 0;
3488 int ignore_until_whitespace = 0, buf_index = 0;
3489
3490 // check our assumptions..
3491 Assert(src != NULL);
3492 Assert(max_pixel_w > 0);
3493
3494 Assertion(max_line_length > 0, "Max line length should be >0, not %d; get a coder!\n", max_line_length);
3495
3496 memset(buffer, 0, sizeof(buffer));
3497
3498 // get rid of any leading whitespace
3499 while (strip_leading_whitespace && is_white_space(*src))
3500 src++;
3501
3502 p_str.clear();
3503
3504 // iterate through chars in line, keeping track of most recent "white space" location that can be used
3505 // as a line splitting point if necessary
3506 unicode::codepoint_range range(src);
3507 auto end_iter = std::end(range);
3508 auto iter = std::begin(range);
3509 for (; iter != end_iter; ++iter) {
3510 auto cp = *iter;
3511
3512 // starting a new line of text, init stuff for that
3513 if (new_line) {
3514 if (strip_leading_whitespace && is_gray_space(cp))
3515 continue;
3516
3517 p_str.push_back(iter.pos());
3518 breakpoint = NULL;
3519 new_line = 0;
3520 }
3521
3522 // maybe skip leading whitespace
3523 if (ignore_until_whitespace) {
3524 if ( is_white_space(cp) ) {
3525 ignore_until_whitespace = 0;
3526
3527 // don't eat the newline
3528 if (cp == EOLN)
3529 --iter;
3530 }
3531
3532 continue;
3533 }
3534
3535 // if we have a newline, split the line here
3536 if (cp == UNICODE_CHAR('\n')) {
3537 n_chars.push_back((int)(iter.pos() - p_str[line_num])); // track length of line
3538 line_num++;
3539 new_line = 1;
3540
3541 memset(buffer, 0, SPLIT_STR_BUFFER_SIZE);
3542 buf_index = 0;
3543 continue;
3544 }
3545
3546 if (cp == ignore_char) {
3547 ignore_until_whitespace = 1;
3548 continue;
3549 }
3550
3551 if (is_gray_space(cp)) {
3552 if (!last_was_white) // track at first whitespace in a series of whitespace
3553 breakpoint = iter.pos();
3554
3555 last_was_white = 1;
3556
3557 } else {
3558 // indicate next time around that this wasn't a whitespace character
3559 last_was_white = 0;
3560 }
3561
3562 auto encoded_width = unicode::encoded_size(cp);
3563 Assertion(buf_index + encoded_width < SPLIT_STR_BUFFER_SIZE,
3564 "buffer overflow in split_str: screen width causes this text to be longer than %d characters!",
3565 SPLIT_STR_BUFFER_SIZE - 1);
3566
3567 // throw it in our buffer
3568 unicode::encode(cp, &buffer[buf_index]);
3569 buf_index += (int)encoded_width;
3570 buffer[buf_index] = 0; // null terminate it
3571
3572 gr_get_string_size(&sw, NULL, buffer);
3573 if (sw >= max_pixel_w || buf_index >= max_line_length) {
3574 const char *end;
3575
3576 if (breakpoint) {
3577 end = breakpoint;
3578 iter = unicode::text_iterator(breakpoint, src, src + strlen(src));
3579
3580 } else {
3581 end = iter.pos(); // force a split here since to whitespace
3582 --iter; // reuse this character in next line
3583 }
3584
3585 n_chars.push_back((int)(end - p_str[line_num])); // track length of line
3586 Assert(n_chars[line_num]);
3587 line_num++;
3588 new_line = 1;
3589
3590 memset(buffer, 0, sizeof(buffer));
3591 buf_index = 0;
3592 continue;
3593 }
3594 } // end for
3595
3596 if (!new_line && p_str[line_num]) {
3597 n_chars.push_back((int)(iter.pos() - p_str[line_num])); // track length of line
3598 Assert(n_chars[line_num]);
3599 line_num++;
3600 }
3601
3602 return line_num;
3603 }
3604
3605 // Goober5000
3606 // accounts for the dumb communications != communication, etc.
subsystem_stricmp(const char * str1,const char * str2)3607 int subsystem_stricmp(const char *str1, const char *str2)
3608 {
3609 Assert(str1 && str2);
3610
3611 // ensure len-1 will be valid
3612 if (!*str1 || !*str2)
3613 return stricmp(str1, str2);
3614
3615 // calc lengths
3616 auto len1 = (int)strlen(str1);
3617 auto len2 = (int)strlen(str2);
3618
3619 // get rid of trailing s on s1?
3620 if (SCP_tolower(*(str1+len1-1)) == 's')
3621 len1--;
3622
3623 // get rid of trailing s on s2?
3624 if (SCP_tolower(*(str2+len2-1)) == 's')
3625 len2--;
3626
3627 // once we remove the trailing s on both names, they should be the same length
3628 if (len1 == len2)
3629 return strnicmp(str1, str2, len1);
3630
3631 // if not, just do a regular comparison
3632 return stricmp(str1, str2);
3633 }
3634
3635 // Goober5000
3636 // current algorithm adapted from http://www.codeproject.com/string/stringsearch.asp
stristr(const char * str,const char * substr)3637 const char *stristr(const char *str, const char *substr)
3638 {
3639 // check for null and insanity
3640 Assert(str);
3641 Assert(substr);
3642 if (str == NULL || substr == NULL || *substr == '\0')
3643 return NULL;
3644
3645 // save both a lowercase and an uppercase version of the first character of substr
3646 char substr_ch_lower = SCP_tolower(*substr);
3647 char substr_ch_upper = SCP_toupper(*substr);
3648
3649 // find the maximum distance to search
3650 const char *upper_bound = str + strlen(str) - strlen(substr);
3651
3652 // loop through every character of str
3653 for (const char *start = str; start <= upper_bound; start++)
3654 {
3655 // check first character of substr
3656 if ((*start == substr_ch_upper) || (*start == substr_ch_lower))
3657 {
3658 // first character matched, so check the rest
3659 for (const char *str_ch = start+1, *substr_ch = substr+1; *substr_ch != '\0'; str_ch++, substr_ch++)
3660 {
3661 // character match?
3662 if (*str_ch == *substr_ch)
3663 continue;
3664
3665 // converted character match?
3666 if (SCP_tolower(*str_ch) == SCP_tolower(*substr_ch))
3667 continue;
3668
3669 // mismatch
3670 goto stristr_continue_outer_loop;
3671 }
3672
3673 // finished inner loop with success!
3674 return start;
3675 }
3676
3677 stristr_continue_outer_loop:
3678 /* NO-OP */ ;
3679 }
3680
3681 // no match
3682 return NULL;
3683 }
3684
3685 // non-const version
stristr(char * str,const char * substr)3686 char *stristr(char *str, const char *substr)
3687 {
3688 // check for null and insanity
3689 Assert(str);
3690 Assert(substr);
3691 if (str == NULL || substr == NULL || *substr == '\0')
3692 return NULL;
3693
3694 // save both a lowercase and an uppercase version of the first character of substr
3695 char substr_ch_lower = SCP_tolower(*substr);
3696 char substr_ch_upper = SCP_toupper(*substr);
3697
3698 // find the maximum distance to search
3699 const char *upper_bound = str + strlen(str) - strlen(substr);
3700
3701 // loop through every character of str
3702 for (char *start = str; start <= upper_bound; start++)
3703 {
3704 // check first character of substr
3705 if ((*start == substr_ch_upper) || (*start == substr_ch_lower))
3706 {
3707 // first character matched, so check the rest
3708 for (const char *str_ch = start+1, *substr_ch = substr+1; *substr_ch != '\0'; str_ch++, substr_ch++)
3709 {
3710 // character match?
3711 if (*str_ch == *substr_ch)
3712 continue;
3713
3714 // converted character match?
3715 if (SCP_tolower(*str_ch) == SCP_tolower(*substr_ch))
3716 continue;
3717
3718 // mismatch
3719 goto stristr_continue_outer_loop;
3720 }
3721
3722 // finished inner loop with success!
3723 return start;
3724 }
3725
3726 stristr_continue_outer_loop:
3727 /* NO-OP */ ;
3728 }
3729
3730 // no match
3731 return NULL;
3732 }
3733
3734 // Goober5000
can_construe_as_integer(const char * text)3735 bool can_construe_as_integer(const char *text)
3736 {
3737 // trivial case; evaluates to 0
3738 if (*text == '\0')
3739 return true;
3740
3741 // number sign or digit for first char
3742 if ((*text != '+') && (*text != '-') && !isdigit(*text))
3743 return false;
3744
3745 // check digits for rest
3746 for (const char *p = text + 1; *p != '\0'; p++)
3747 {
3748 if (!isdigit(*p))
3749 return false;
3750 }
3751
3752 return true;
3753 }
3754
3755 // Goober5000
3756 // yoinked gratefully from dbugfile.cpp
vsprintf(SCP_string & dest,const char * format,va_list ap)3757 void vsprintf(SCP_string &dest, const char *format, va_list ap)
3758 {
3759 va_list copy;
3760
3761 #if defined(_MSC_VER) && _MSC_VER < 1800
3762 // Only Visual Studio >= 2013 supports va_copy
3763 // This isn't portable but should work for Visual Studio
3764 copy = ap;
3765 #else
3766 va_copy(copy, ap);
3767 #endif
3768
3769 int needed_length = vsnprintf(nullptr, 0, format, copy);
3770 va_end(copy);
3771
3772 if (needed_length < 0) {
3773 // Error
3774 return;
3775 }
3776
3777 dest.resize(static_cast<size_t>(needed_length));
3778 vsnprintf(&dest[0], dest.size() + 1, format, ap);
3779 }
3780
sprintf(SCP_string & dest,const char * format,...)3781 void sprintf(SCP_string &dest, const char *format, ...)
3782 {
3783 va_list args;
3784 va_start(args, format);
3785 vsprintf(dest, format, args);
3786 va_end(args);
3787 }
3788
3789 // Goober5000
end_string_at_first_hash_symbol(char * src,bool ignore_doubled_hash)3790 bool end_string_at_first_hash_symbol(char *src, bool ignore_doubled_hash)
3791 {
3792 char *p;
3793 Assert(src);
3794
3795 p = get_pointer_to_first_hash_symbol(src, ignore_doubled_hash);
3796 if (p)
3797 {
3798 while ((p != src) && (*(p-1) == ' '))
3799 p--;
3800
3801 *p = '\0';
3802 return true;
3803 }
3804
3805 return false;
3806 }
3807
3808 // Goober5000
end_string_at_first_hash_symbol(SCP_string & src,bool ignore_doubled_hash)3809 bool end_string_at_first_hash_symbol(SCP_string &src, bool ignore_doubled_hash)
3810 {
3811 int index = get_index_of_first_hash_symbol(src, ignore_doubled_hash);
3812 if (index >= 0)
3813 {
3814 while (index > 0 && src[index-1] == ' ')
3815 index--;
3816
3817 src.resize(index);
3818 return true;
3819 }
3820
3821 return false;
3822 }
3823
3824 // Goober5000
get_pointer_to_first_hash_symbol(char * src,bool ignore_doubled_hash)3825 char *get_pointer_to_first_hash_symbol(char *src, bool ignore_doubled_hash)
3826 {
3827 Assert(src);
3828
3829 if (ignore_doubled_hash)
3830 {
3831 for (auto ch = src; *ch; ++ch)
3832 {
3833 if (*ch == '#')
3834 {
3835 if (*(ch + 1) == '#')
3836 ++ch;
3837 else
3838 return ch;
3839 }
3840 }
3841 return nullptr;
3842 }
3843 else
3844 return strchr(src, '#');
3845 }
3846
3847 // Goober5000
get_pointer_to_first_hash_symbol(const char * src,bool ignore_doubled_hash)3848 const char *get_pointer_to_first_hash_symbol(const char *src, bool ignore_doubled_hash)
3849 {
3850 Assert(src);
3851
3852 if (ignore_doubled_hash)
3853 {
3854 for (auto ch = src; *ch; ++ch)
3855 {
3856 if (*ch == '#')
3857 {
3858 if (*(ch + 1) == '#')
3859 ++ch;
3860 else
3861 return ch;
3862 }
3863 }
3864 return nullptr;
3865 }
3866 else
3867 return strchr(src, '#');
3868 }
3869
3870 // Goober5000
get_index_of_first_hash_symbol(SCP_string & src,bool ignore_doubled_hash)3871 int get_index_of_first_hash_symbol(SCP_string &src, bool ignore_doubled_hash)
3872 {
3873 if (ignore_doubled_hash)
3874 {
3875 for (auto ch = src.begin(); ch != src.end(); ++ch)
3876 {
3877 if (*ch == '#')
3878 {
3879 if ((ch + 1) != src.end() && *(ch + 1) == '#')
3880 ++ch;
3881 else
3882 return (int)std::distance(src.begin(), ch);
3883 }
3884 }
3885 return -1;
3886 }
3887 else
3888 {
3889 size_t pos = src.find('#');
3890 return (pos == SCP_string::npos) ? -1 : (int)pos;
3891 }
3892 }
3893
3894 // Goober5000
3895 // Used for escape sequences: ## to #, !! to !, etc.
consolidate_double_characters(char * src,char ch)3896 void consolidate_double_characters(char *src, char ch)
3897 {
3898 auto dest = src;
3899 while (*src)
3900 {
3901 if (*src == ch && *(src + 1) == ch)
3902 dest--;
3903
3904 ++src;
3905 ++dest;
3906
3907 if (src != dest)
3908 *dest = *src;
3909 }
3910 }
3911
3912 // Goober5000
replace_one(char * str,const char * oldstr,const char * newstr,size_t max_len,ptrdiff_t range)3913 ptrdiff_t replace_one(char *str, const char *oldstr, const char *newstr, size_t max_len, ptrdiff_t range)
3914 {
3915 Assert(str && oldstr && newstr);
3916
3917 // search
3918 char *ch = stristr(str, oldstr);
3919
3920 // found?
3921 if (ch)
3922 {
3923 // not found within bounds?
3924 if ((range > 0) && ((ch - str) > range))
3925 {
3926 return 0;
3927 }
3928
3929 // determine if replacement will exceed max len
3930 if (strlen(str) + strlen(newstr) - strlen(oldstr) > max_len)
3931 {
3932 return -1;
3933 }
3934
3935 // allocate temp string to hold extra stuff
3936 char *temp = (char *) vm_malloc(sizeof(char) * max_len);
3937
3938 // ensure allocation was successful
3939 if (temp)
3940 {
3941 // save remainder of string
3942 strcpy_s(temp, sizeof(char)*max_len, ch + strlen(oldstr));
3943
3944 // replace
3945 strcpy(ch, newstr);
3946
3947 // append rest of string
3948 strcpy(ch + strlen(newstr), temp);
3949 }
3950
3951 // free temp string
3952 vm_free(temp);
3953 }
3954 // not found
3955 else
3956 {
3957 return 0;
3958 }
3959
3960 // return pos of replacement
3961 return (ch - str);
3962 }
3963
3964 // Goober5000
replace_all(char * str,const char * oldstr,const char * newstr,size_t max_len,ptrdiff_t range)3965 ptrdiff_t replace_all(char *str, const char *oldstr, const char *newstr, size_t max_len, ptrdiff_t range)
3966 {
3967 ptrdiff_t val, tally = 0;
3968
3969 while ((val = replace_one(str, oldstr, newstr, max_len, range)) > 0)
3970 {
3971 tally++;
3972
3973 // adjust range (if we have one), because the text length might have changed
3974 if (range) {
3975 range += strlen(newstr) - strlen(oldstr);
3976 }
3977 }
3978
3979 return (val < 0) ? val : tally;
3980 }
3981
replace_one(SCP_string & context,const SCP_string & from,const SCP_string & to)3982 SCP_string& replace_one(SCP_string& context, const SCP_string& from, const SCP_string& to)
3983 {
3984 size_t foundHere;
3985 if ((foundHere = context.find(from, 0)) != SCP_string::npos)
3986 {
3987 context.replace(foundHere, from.length(), to);
3988 }
3989 return context;
3990 }
3991
replace_one(SCP_string & context,const char * from,const char * to)3992 SCP_string& replace_one(SCP_string& context, const char* from, const char* to)
3993 {
3994 size_t foundHere;
3995 if ((foundHere = context.find(from, 0)) != SCP_string::npos)
3996 {
3997 context.replace(foundHere, strlen(from), to);
3998 }
3999 return context;
4000 }
4001
4002 // http://www.cppreference.com/wiki/string/replace
replace_all(SCP_string & context,const SCP_string & from,const SCP_string & to)4003 SCP_string& replace_all(SCP_string& context, const SCP_string& from, const SCP_string& to)
4004 {
4005 size_t from_len = from.length();
4006 size_t to_len = to.length();
4007
4008 size_t lookHere = 0;
4009 size_t foundHere;
4010 while ((foundHere = context.find(from, lookHere)) != SCP_string::npos)
4011 {
4012 context.replace(foundHere, from_len, to);
4013 lookHere = foundHere + to_len;
4014 }
4015 return context;
4016 }
4017
4018 // http://www.cppreference.com/wiki/string/replace
replace_all(SCP_string & context,const char * from,const char * to)4019 SCP_string& replace_all(SCP_string& context, const char* from, const char* to)
4020 {
4021 size_t from_len = strlen(from);
4022 size_t to_len = strlen(to);
4023
4024 size_t lookHere = 0;
4025 size_t foundHere;
4026 while ((foundHere = context.find(from, lookHere)) != SCP_string::npos)
4027 {
4028 context.replace(foundHere, from_len, to);
4029 lookHere = foundHere + to_len;
4030 }
4031 return context;
4032 }
4033
4034 // WMC
4035 // Compares two strings, ignoring (last) extension
4036 // Returns 0 if equal, nonzero if not
strextcmp(const char * s1,const char * s2)4037 int strextcmp(const char *s1, const char *s2)
4038 {
4039 // sanity check
4040 Assert( (s1 != NULL) && (s2 != NULL) );
4041
4042 // find last '.' in both strings
4043 char *s1_end = (char *)strrchr(s1, '.');
4044 char *s2_end = (char *)strrchr(s2, '.');
4045
4046 // get length
4047 size_t s1_len, s2_len;
4048
4049 if (s1_end != NULL)
4050 s1_len = (s1_end - s1);
4051 else
4052 s1_len = strlen(s1);
4053
4054 if (s2_end != NULL)
4055 s2_len = (s2_end - s2);
4056 else
4057 s2_len = strlen(s2);
4058
4059 // if the lengths aren't the same then it's deffinitely not the same name
4060 if (s2_len != s1_len)
4061 return 1;
4062
4063 return strnicmp(s1, s2, s1_len);
4064 }
4065
4066 // Goober5000
drop_extension(char * str)4067 bool drop_extension(char *str)
4068 {
4069 char *p = strrchr(str, '.');
4070 if (p != NULL)
4071 {
4072 *p = 0;
4073 return true;
4074 }
4075
4076 return false;
4077 }
4078
4079 // Goober5000
drop_extension(SCP_string & str)4080 bool drop_extension(SCP_string &str)
4081 {
4082 size_t pos = str.rfind('.');
4083 if (pos != SCP_string::npos)
4084 {
4085 str.resize(pos);
4086 return true;
4087 }
4088
4089 return false;
4090 }
4091
4092 //WMC
backspace(char * src)4093 void backspace(char* src)
4094 {
4095 Assert(src!= NULL); //this would be bad
4096
4097 char *dest = src;
4098 src++;
4099
4100 while(*src != '\0') {
4101 *dest++ = *src++;
4102 }
4103
4104 *dest = '\0';
4105 }
4106
4107 // Goober5000
format_integer_with_commas(char * buf,int integer,bool use_comma_with_four_digits)4108 void format_integer_with_commas(char *buf, int integer, bool use_comma_with_four_digits)
4109 {
4110 int old_pos, new_pos, triad_count;
4111 char backward_buf[32];
4112
4113 // print an initial string of just the digits
4114 sprintf(buf, "%d", integer);
4115
4116 // no commas needed?
4117 if ((integer < 1000) || (integer < 10000 && !use_comma_with_four_digits))
4118 return;
4119
4120 // scan the string backwards, writing commas after every third digit
4121 new_pos = 0;
4122 triad_count = 0;
4123 for (old_pos = (int)strlen(buf) - 1; old_pos >= 0; old_pos--)
4124 {
4125 backward_buf[new_pos] = buf[old_pos];
4126 new_pos++;
4127 triad_count++;
4128
4129 if (triad_count == 3 && old_pos > 0)
4130 {
4131 backward_buf[new_pos] = ',';
4132 new_pos++;
4133 triad_count = 0;
4134 }
4135 }
4136 backward_buf[new_pos] = '\0';
4137
4138 // now reverse the string
4139 new_pos = 0;
4140 for (old_pos = (int)strlen(backward_buf) - 1; old_pos >= 0; old_pos--)
4141 {
4142 buf[new_pos] = backward_buf[old_pos];
4143 new_pos++;
4144 }
4145 buf[new_pos] = '\0';
4146 }
4147
4148 // Goober5000
4149 // there's probably a better way to do this, but this way works and is clear and short
scan_fso_version_string(const char * text,int * major,int * minor,int * build,int * revis)4150 int scan_fso_version_string(const char *text, int *major, int *minor, int *build, int *revis)
4151 {
4152 int val;
4153
4154 val = sscanf(text, ";;FSO %i.%i.%i.%i;;", major, minor, build, revis);
4155 if (val == 4)
4156 return val;
4157
4158 *revis = 0;
4159 val = sscanf(text, ";;FSO %i.%i.%i;;", major, minor, build);
4160 if (val == 3)
4161 return val;
4162
4163 *build = *revis = 0;
4164 val = sscanf(text, ";;FSO %i.%i;;", major, minor);
4165 if (val == 2)
4166 return val;
4167
4168 *minor = *build = *revis = 0;
4169 val = sscanf(text, ";;FSO %i;;", major);
4170 if (val == 1)
4171 return val;
4172
4173 *major = *minor = *build = *revis = 0;
4174 return 0;
4175 }
4176
4177 // Goober5000 - used for long Warnings, Errors, and FRED error messages with SEXPs
truncate_message_lines(SCP_string & text,int num_allowed_lines)4178 void truncate_message_lines(SCP_string &text, int num_allowed_lines)
4179 {
4180 Assert(num_allowed_lines > 0);
4181 size_t find_from = 0;
4182
4183 while (find_from < text.size())
4184 {
4185 if (num_allowed_lines <= 0)
4186 {
4187 text.resize(find_from);
4188 text.append("[...]");
4189 break;
4190 }
4191
4192 size_t pos = text.find('\n', find_from);
4193 if (pos == SCP_string::npos)
4194 break;
4195
4196 num_allowed_lines--;
4197 find_from = pos + 1;
4198 }
4199 }
4200
4201 // Goober5000 - ugh, I can't see why they didn't just use stuff_*_list for these;
4202 // the only difference is the lack of parentheses
4203
4204 // from aicode.cpp
4205 // Stuff a list of floats at *plist.
parse_float_list(float * plist,size_t size)4206 void parse_float_list(float *plist, size_t size)
4207 {
4208 for (size_t i=0; i<size; i++)
4209 {
4210 stuff_float(&plist[i]);
4211 }
4212 }
4213
4214 // from aicode.cpp
4215 // Stuff a list of ints at *plist.
parse_int_list(int * ilist,size_t size)4216 void parse_int_list(int *ilist, size_t size)
4217 {
4218 for (size_t i=0; i<size; i++)
4219 {
4220 stuff_int(&ilist[i]);
4221 }
4222 }
4223
4224 // parse a modular table of type "name_check" and parse it using the specified function callback
parse_modular_table(const char * name_check,void (* parse_callback)(const char * filename),int path_type,int sort_type)4225 int parse_modular_table(const char *name_check, void (*parse_callback)(const char *filename), int path_type, int sort_type)
4226 {
4227 SCP_vector<SCP_string> tbl_file_names;
4228 int i, num_files = 0;
4229
4230 if ( (name_check == NULL) || (parse_callback == NULL) || ((*name_check) != '*') ) {
4231 UNREACHABLE("parse_modular_table() called with invalid arguments; get a coder!\n");
4232 return 0;
4233 }
4234
4235 num_files = cf_get_file_list(tbl_file_names, path_type, name_check, sort_type);
4236
4237 Parsing_modular_table = true;
4238
4239 const auto ext = strrchr(name_check, '.');
4240
4241 for (i = 0; i < num_files; i++){
4242 if (ext != nullptr) {
4243 tbl_file_names[i] += ext;
4244 }
4245 mprintf(("TBM => Starting parse of '%s' ...\n", tbl_file_names[i].c_str()));
4246 (*parse_callback)(tbl_file_names[i].c_str());
4247 }
4248
4249 Parsing_modular_table = false;
4250
4251 return num_files;
4252 }
4253