1
2 // pgn.c
3
4 // includes
5
6 #include <ctype.h>
7 #include <errno.h>
8 #include <stdio.h>
9 #include <string.h>
10
11 #include "pgn.h"
12 #include "util.h"
13
14 // constants
15
16 static const bool DispMove = FALSE;
17 static const bool DispToken = FALSE;
18 static const bool DispChar = FALSE;
19
20 static const int TAB_SIZE = 8;
21
22 static const int CHAR_EOF = 256;
23
24 // types
25
26 enum token_t {
27 TOKEN_ERROR = -1,
28 TOKEN_EOF = 256,
29 TOKEN_SYMBOL = 257,
30 TOKEN_STRING = 258,
31 TOKEN_INTEGER = 259,
32 TOKEN_NAG = 260,
33 TOKEN_RESULT = 261
34 };
35
36 // prototypes
37
38 static void pgn_token_read (pgn_t * pgn);
39 static void pgn_token_unread (pgn_t * pgn);
40
41 static void pgn_read_token (pgn_t * pgn);
42
43 static bool is_symbol_start (int c);
44 static bool is_symbol_next (int c);
45
46 static void pgn_skip_blanks (pgn_t * pgn);
47
48 static void pgn_char_read (pgn_t * pgn);
49 static void pgn_char_unread (pgn_t * pgn);
50
51 // functions
52
53 // pgn_open()
54
pgn_open(pgn_t * pgn,const char file_name[])55 void pgn_open(pgn_t * pgn, const char file_name[]) {
56
57 ASSERT(pgn!=NULL);
58 ASSERT(file_name!=NULL);
59
60 pgn->file = fopen(file_name,"r");
61 if (pgn->file == NULL) my_fatal("pgn_open(): can't open file \"%s\": %s\n",file_name,strerror(errno));
62
63 pgn->char_hack = CHAR_EOF; // DEBUG
64 pgn->char_line = 1;
65 pgn->char_column = 0;
66 pgn->char_unread = FALSE;
67 pgn->char_first = TRUE;
68
69 pgn->token_type = TOKEN_ERROR; // DEBUG
70 strcpy(pgn->token_string,"?"); // DEBUG
71 pgn->token_length = -1; // DEBUG
72 pgn->token_line = -1; // DEBUG
73 pgn->token_column = -1; // DEBUG
74 pgn->token_unread = FALSE;
75 pgn->token_first = TRUE;
76
77 strcpy(pgn->result,"?"); // DEBUG
78 strcpy(pgn->fen,"?"); // DEBUG
79
80 pgn->move_line = -1; // DEBUG
81 pgn->move_column = -1; // DEBUG
82 }
83
84 // pgn_close()
85
pgn_close(pgn_t * pgn)86 void pgn_close(pgn_t * pgn) {
87
88 ASSERT(pgn!=NULL);
89
90 fclose(pgn->file);
91 }
92
93 // pgn_next_game()
94
pgn_next_game(pgn_t * pgn)95 bool pgn_next_game(pgn_t * pgn) {
96
97 char name[PGN_STRING_SIZE];
98 char value[PGN_STRING_SIZE];
99
100 ASSERT(pgn!=NULL);
101
102 // init
103
104 strcpy(pgn->result,"*");
105 strcpy(pgn->fen,"");
106
107 // loop
108
109 while (TRUE) {
110
111 pgn_token_read(pgn);
112
113 if (pgn->token_type != '[') break;
114
115 // tag
116
117 pgn_token_read(pgn);
118 if (pgn->token_type != TOKEN_SYMBOL) {
119 my_fatal("pgn_next_game(): malformed tag at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
120 }
121 strcpy(name,pgn->token_string);
122
123 pgn_token_read(pgn);
124 if (pgn->token_type != TOKEN_STRING) {
125 my_fatal("pgn_next_game(): malformed tag at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
126 }
127 strcpy(value,pgn->token_string);
128
129 pgn_token_read(pgn);
130 if (pgn->token_type != ']') {
131 my_fatal("pgn_next_game(): malformed tag at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
132 }
133
134 // special tag?
135
136 if (FALSE) {
137 } else if (my_string_equal(name,"Result")) {
138 strcpy(pgn->result,value);
139 } else if (my_string_equal(name,"FEN")) {
140 strcpy(pgn->fen,value);
141 }
142 }
143
144 if (pgn->token_type == TOKEN_EOF) return FALSE;
145
146 pgn_token_unread(pgn);
147
148 return TRUE;
149 }
150
151 // pgn_next_move()
152
pgn_next_move(pgn_t * pgn,char string[],int size)153 bool pgn_next_move(pgn_t * pgn, char string[], int size) {
154
155 int depth;
156
157 ASSERT(pgn!=NULL);
158 ASSERT(string!=NULL);
159 ASSERT(size>=PGN_STRING_SIZE);
160
161 // init
162
163 pgn->move_line = -1; // DEBUG
164 pgn->move_column = -1; // DEBUG
165
166 // loop
167
168 depth = 0;
169
170 while (TRUE) {
171
172 pgn_token_read(pgn);
173
174 if (FALSE) {
175
176 } else if (pgn->token_type == '(') {
177
178 // open RAV
179
180 depth++;
181
182 } else if (pgn->token_type == ')') {
183
184 // close RAV
185
186 if (depth == 0) {
187 my_fatal("pgn_next_move(): malformed variation at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
188 }
189
190 depth--;
191 ASSERT(depth>=0);
192
193 } else if (pgn->token_type == TOKEN_RESULT) {
194
195 // game finished
196
197 if (depth > 0) {
198 my_fatal("pgn_next_move(): malformed variation at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
199 }
200
201 return FALSE;
202
203 } else {
204
205 // skip optional move number
206
207 if (pgn->token_type == TOKEN_INTEGER) {
208 do pgn_token_read(pgn); while (pgn->token_type == '.');
209 }
210
211 // move must be a symbol
212
213 if (pgn->token_type != TOKEN_SYMBOL) {
214 my_fatal("pgn_next_move(): malformed move at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
215 }
216
217 // store move for later use
218
219 if (depth == 0) {
220
221 if (pgn->token_length >= size) {
222 my_fatal("pgn_next_move(): move too long at line %d, column %d, game %d\n",pgn->token_line,pgn->token_column,pgn->game_nb);
223 }
224
225 strcpy(string,pgn->token_string);
226 pgn->move_line = pgn->token_line;
227 pgn->move_column = pgn->token_column;
228 }
229
230 // skip optional NAGs
231
232 do pgn_token_read(pgn); while (pgn->token_type == TOKEN_NAG);
233 pgn_token_unread(pgn);
234
235 // return move
236
237 if (depth == 0) {
238 if (DispMove) printf("move=\"%s\"\n",string);
239 return TRUE;
240 }
241 }
242 }
243
244 ASSERT(FALSE);
245
246 return FALSE;
247 }
248
249 // pgn_token_read()
250
pgn_token_read(pgn_t * pgn)251 static void pgn_token_read(pgn_t * pgn) {
252
253 ASSERT(pgn!=NULL);
254
255 // token "stack"
256
257 if (pgn->token_unread) {
258 pgn->token_unread = FALSE;
259 return;
260 }
261
262 // consume the current token
263
264 if (pgn->token_first) {
265 pgn->token_first = FALSE;
266 } else {
267 ASSERT(pgn->token_type!=TOKEN_ERROR);
268 ASSERT(pgn->token_type!=TOKEN_EOF);
269 }
270
271 // read a new token
272
273 pgn_read_token(pgn);
274 if (pgn->token_type == TOKEN_ERROR) my_fatal("pgn_token_read(): lexical error at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
275
276 if (DispToken) printf("< L%d C%d \"%s\" (%03X)\n",pgn->token_line,pgn->token_column,pgn->token_string,pgn->token_type);
277 }
278
279 // pgn_token_unread()
280
pgn_token_unread(pgn_t * pgn)281 static void pgn_token_unread(pgn_t * pgn) {
282
283 ASSERT(pgn!=NULL);
284
285 ASSERT(!pgn->token_unread);
286 ASSERT(!pgn->token_first);
287
288 pgn->token_unread = TRUE;
289 }
290
291 // pgn_read_token()
292
pgn_read_token(pgn_t * pgn)293 static void pgn_read_token(pgn_t * pgn) {
294
295 ASSERT(pgn!=NULL);
296
297 // skip white-space characters
298
299 pgn_skip_blanks(pgn);
300
301 // init
302
303 pgn->token_type = TOKEN_ERROR;
304 strcpy(pgn->token_string,"");
305 pgn->token_length = 0;
306 pgn->token_line = pgn->char_line;
307 pgn->token_column = pgn->char_column;
308
309 // determine token type
310
311 if (FALSE) {
312
313 } else if (pgn->char_hack == CHAR_EOF) {
314
315 pgn->token_type = TOKEN_EOF;
316
317 } else if (strchr(".[]()<>",pgn->char_hack) != NULL) {
318
319 // single-character token
320
321 pgn->token_type = pgn->char_hack;
322 sprintf(pgn->token_string,"%c",pgn->char_hack);
323 pgn->token_length = 1;
324
325 } else if (pgn->char_hack == '*') {
326
327 pgn->token_type = TOKEN_RESULT;
328 sprintf(pgn->token_string,"%c",pgn->char_hack);
329 pgn->token_length = 1;
330
331 } else if (pgn->char_hack == '!') {
332
333 pgn_char_read(pgn);
334
335 if (FALSE) {
336
337 } else if (pgn->char_hack == '!') { // "!!"
338
339 pgn->token_type = TOKEN_NAG;
340 strcpy(pgn->token_string,"3");
341 pgn->token_length = 1;
342
343 } else if (pgn->char_hack == '?') { // "!?"
344
345 pgn->token_type = TOKEN_NAG;
346 strcpy(pgn->token_string,"5");
347 pgn->token_length = 1;
348
349 } else { // "!"
350
351 pgn_char_unread(pgn);
352
353 pgn->token_type = TOKEN_NAG;
354 strcpy(pgn->token_string,"1");
355 pgn->token_length = 1;
356 }
357
358 } else if (pgn->char_hack == '?') {
359
360 pgn_char_read(pgn);
361
362 if (FALSE) {
363
364 } else if (pgn->char_hack == '?') { // "??"
365
366 pgn->token_type = TOKEN_NAG;
367 strcpy(pgn->token_string,"4");
368 pgn->token_length = 1;
369
370 } else if (pgn->char_hack == '!') { // "?!"
371
372 pgn->token_type = TOKEN_NAG;
373 strcpy(pgn->token_string,"6");
374 pgn->token_length = 1;
375
376 } else { // "?"
377
378 pgn_char_unread(pgn);
379
380 pgn->token_type = TOKEN_NAG;
381 strcpy(pgn->token_string,"2");
382 pgn->token_length = 1;
383 }
384
385 } else if (is_symbol_start(pgn->char_hack)) {
386
387 // symbol, integer, or result
388
389 pgn->token_type = TOKEN_INTEGER;
390 pgn->token_length = 0;
391
392 do {
393
394 if (pgn->token_length >= PGN_STRING_SIZE-1) {
395 my_fatal("pgn_read_token(): symbol too long at line %d, column %d,game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
396 }
397
398 if (!isdigit(pgn->char_hack)) pgn->token_type = TOKEN_SYMBOL;
399
400 pgn->token_string[pgn->token_length++] = pgn->char_hack;
401
402 pgn_char_read(pgn);
403
404 } while (is_symbol_next(pgn->char_hack));
405
406 pgn_char_unread(pgn);
407
408 ASSERT(pgn->token_length>0&&pgn->token_length<PGN_STRING_SIZE);
409 pgn->token_string[pgn->token_length] = '\0';
410
411 if (my_string_equal(pgn->token_string,"1-0")
412 || my_string_equal(pgn->token_string,"0-1")
413 || my_string_equal(pgn->token_string,"1/2-1/2")) {
414 pgn->token_type = TOKEN_RESULT;
415 }
416
417 } else if (pgn->char_hack == '"') {
418
419 // string
420
421 pgn->token_type = TOKEN_STRING;
422 pgn->token_length = 0;
423
424 while (TRUE) {
425
426 pgn_char_read(pgn);
427
428 if (pgn->char_hack == CHAR_EOF) {
429 my_fatal("pgn_read_token(): EOF in string at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
430 }
431
432 if (pgn->char_hack == '"') break;
433
434 if (pgn->char_hack == '\\') {
435
436 pgn_char_read(pgn);
437
438 if (pgn->char_hack == CHAR_EOF) {
439 my_fatal("pgn_read_token(): EOF in string at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
440 }
441
442 if (pgn->char_hack != '"' && pgn->char_hack != '\\') {
443
444 // bad escape, ignore
445
446 if (pgn->token_length >= PGN_STRING_SIZE-1) {
447 my_fatal("pgn_read_token(): string too long at line %d, column %d,game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
448 }
449
450 pgn->token_string[pgn->token_length++] = '\\';
451 }
452 }
453
454 if (pgn->token_length >= PGN_STRING_SIZE-1) {
455 my_fatal("pgn_read_token(): string too long at line %d, column %d,game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
456 }
457
458 pgn->token_string[pgn->token_length++] = pgn->char_hack;
459 }
460
461 ASSERT(pgn->token_length>=0&&pgn->token_length<PGN_STRING_SIZE);
462 pgn->token_string[pgn->token_length] = '\0';
463
464 } else if (pgn->char_hack == '$') {
465
466 // NAG
467
468 pgn->token_type = TOKEN_NAG;
469 pgn->token_length = 0;
470
471 while (TRUE) {
472
473 pgn_char_read(pgn);
474
475 if (!isdigit(pgn->char_hack)) break;
476
477 if (pgn->token_length >= 3) {
478 my_fatal("pgn_read_token(): NAG too long at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
479 }
480
481 pgn->token_string[pgn->token_length++] = pgn->char_hack;
482 }
483
484 pgn_char_unread(pgn);
485
486 if (pgn->token_length == 0) {
487 my_fatal("pgn_read_token(): malformed NAG at line %d, column %d,game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
488 }
489
490 ASSERT(pgn->token_length>0&&pgn->token_length<=3);
491 pgn->token_string[pgn->token_length] = '\0';
492
493 } else {
494
495 // unknown token
496
497 my_fatal("lexical error at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
498 }
499 }
500
501 // pgn_skip_blanks()
502
pgn_skip_blanks(pgn_t * pgn)503 static void pgn_skip_blanks(pgn_t * pgn) {
504
505 ASSERT(pgn!=NULL);
506
507 while (TRUE) {
508
509 pgn_char_read(pgn);
510
511 if (FALSE) {
512 }else if(pgn->char_hack==CHAR_EOF){ break;
513 } else if (isspace(pgn->char_hack)) {
514
515 // skip white space
516
517 } else if (pgn->char_hack == ';') {
518
519 // skip comment to EOL
520
521 do {
522
523 pgn_char_read(pgn);
524
525 if (pgn->char_hack == CHAR_EOF) {
526 my_fatal("pgn_skip_blanks(): EOF in comment at line %d, column %d,game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
527 }
528
529 } while (pgn->char_hack != '\n');
530
531 } else if (pgn->char_hack == '%' && pgn->char_column == 0) {
532
533 // skip comment to EOL
534
535 do {
536
537 pgn_char_read(pgn);
538
539 if (pgn->char_hack == CHAR_EOF) {
540 my_fatal("pgn_skip_blanks(): EOF in comment at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
541 }
542
543 } while (pgn->char_hack != '\n');
544
545 } else if (pgn->char_hack == '{') {
546
547 // skip comment to next '}'
548
549 do {
550
551 pgn_char_read(pgn);
552
553 if (pgn->char_hack == CHAR_EOF) {
554 my_fatal("pgn_skip_blanks(): EOF in comment at line %d, column %d, game %d\n",pgn->char_line,pgn->char_column,pgn->game_nb);
555 }
556
557 } while (pgn->char_hack != '}');
558
559 } else { // not a white space
560
561 break;
562 }
563 }
564 }
565
566 // is_symbol_start()
567
is_symbol_start(int c)568 static bool is_symbol_start(int c) {
569
570 return strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",c) != NULL;
571 }
572
573 // is_symbol_next()
574
is_symbol_next(int c)575 static bool is_symbol_next(int c) {
576
577 return strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_+#=:-/",c) != NULL;
578 }
579
580 // pgn_char_read()
581
pgn_char_read(pgn_t * pgn)582 static void pgn_char_read(pgn_t * pgn) {
583
584 ASSERT(pgn!=NULL);
585
586 // char "stack"
587
588 if (pgn->char_unread) {
589 pgn->char_unread = FALSE;
590 return;
591 }
592
593 // consume the current character
594
595 if (pgn->char_first) {
596
597 pgn->char_first = FALSE;
598
599 } else {
600
601 // update counters
602
603 ASSERT(pgn->char_hack!=CHAR_EOF);
604
605 if (FALSE) {
606 } else if (pgn->char_hack == '\n') {
607 pgn->char_line++;
608 pgn->char_column = 0;
609 } else if (pgn->char_hack == '\t') {
610 pgn->char_column += TAB_SIZE - (pgn->char_column % TAB_SIZE);
611 } else {
612 pgn->char_column++;
613 }
614 }
615
616 // read a new character
617
618 pgn->char_hack = fgetc(pgn->file);
619
620 if (pgn->char_hack == EOF) {
621 if (ferror(pgn->file)) my_fatal("pgn_char_read(): fgetc(): %s\n",strerror(errno));
622 pgn->char_hack = CHAR_EOF;
623 }
624
625 if (DispChar) printf("< L%d C%d '%c' (%02X)\n",pgn->char_line,pgn->char_column,pgn->char_hack,pgn->char_hack);
626 }
627
628 // pgn_char_unread()
629
pgn_char_unread(pgn_t * pgn)630 static void pgn_char_unread(pgn_t * pgn) {
631
632 ASSERT(pgn!=NULL);
633
634 ASSERT(!pgn->char_unread);
635 ASSERT(!pgn->char_first);
636
637 pgn->char_unread = TRUE;
638 }
639
640 // end of pgn.cpp
641
642