1 /* Copyright 2010-2019 Free Software Foundation, Inc.
2
3 This program is free software: you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation, either version 3 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program. If not, see <http://www.gnu.org/licenses/>. */
15
16 #ifdef HAVE_CONFIG_H
17 #include <config.h>
18 #endif
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <locale.h>
23 #ifndef _WIN32
24 #include <langinfo.h>
25 #else /* _WIN32 */
26 /* Workaround for problems caused in mingw.org's MinGW build by
27 Gnulib's wchar.h overriding the wint_t type definition, which
28 causes compilation errors when perl.h is included below, because
29 perl.h includes ctype.h. */
30 #include <ctype.h>
31 #endif
32 #include <wchar.h>
33 #include <wctype.h>
34
35 /* See "How do I use all this in extensions" in 'man perlguts'. */
36 #define PERL_NO_GET_CONTEXT
37
38 #include "EXTERN.h"
39 #include "perl.h"
40 #include "XSUB.h"
41
42 #include "ppport.h"
43
44 #include "miscxs.h"
45
46 const char *whitespace_chars = " \t\f\v\r\n";
47
48 int
xs_abort_empty_line(HV * self,HV * current,SV * additional_spaces_in)49 xs_abort_empty_line (HV *self, HV *current, SV *additional_spaces_in)
50 {
51 char *additional_spaces;
52 AV *contents_array;
53 SV **svp;
54 int contents_num;
55 HV *spaces_elt;
56 //char *key;
57 HV *test_extra = 0;
58 HV *command_extra = 0;
59
60 HV *owning_elt = 0;
61 char *type;
62 SV *existing_text_sv;
63
64 dTHX;
65
66 /* Get additional text in UTF-8. */
67 if (additional_spaces_in)
68 {
69 STRLEN len;
70 static char *new_string;
71 additional_spaces = SvPV (additional_spaces_in, len);
72 if (!SvUTF8 (additional_spaces_in))
73 {
74 Safefree (new_string);
75 new_string = bytes_to_utf8 (additional_spaces, &len);
76 additional_spaces = new_string;
77 }
78 }
79 else
80 additional_spaces = "";
81
82 svp = hv_fetch (current, "contents", strlen("contents"), 0);
83 if (!svp)
84 return 0;
85 contents_array = (AV *)SvRV(*svp);
86
87 contents_num = av_len(contents_array) + 1;
88 if (contents_num == 0)
89 return 0;
90
91 spaces_elt = (HV *) SvRV (*av_fetch (contents_array, contents_num - 1, 0));
92
93 svp = hv_fetch (spaces_elt, "type", strlen ("type"), 0);
94 if (!svp)
95 return 0;
96
97 type = SvPV_nolen (*svp);
98 if (!type)
99 return 0;
100
101 /* Must be one of these types to continue. */
102 if (strcmp (type, "empty_line")
103 && strcmp (type, "empty_line_after_command")
104 && strcmp (type, "empty_spaces_before_argument")
105 && strcmp (type, "empty_spaces_after_close_brace"))
106 {
107 return 0;
108 }
109
110 //fprintf (stderr, "ABORT EMPTY\n");
111
112 svp = hv_fetch (spaces_elt, "extra", strlen ("extra"), 0);
113 if (svp)
114 {
115 test_extra = (HV *) SvRV (*svp);
116 svp = hv_fetch (test_extra, "command",
117 strlen ("command"), 0);
118 if (svp)
119 {
120 owning_elt = (HV *) SvRV (*svp);
121 svp = hv_fetch (owning_elt, "extra", strlen ("extra"), 0);
122 if (svp)
123 command_extra = (HV *) SvRV (*svp);
124 }
125 }
126
127 svp = hv_fetch (spaces_elt, "text", strlen ("text"), 0);
128 if (!svp)
129 return 0; /* or create it? change last arg from 0 to 1 */
130 existing_text_sv = *svp;
131
132 /* Append the 'additional_spaces' argument. */
133 sv_utf8_upgrade (existing_text_sv);
134 sv_catpv (existing_text_sv, additional_spaces);
135
136 if (!*SvPV_nolen (existing_text_sv)) /* existing text is empty */
137 {
138 /* Remove spaces_elt */
139 av_pop (contents_array);
140 }
141 else if (!strcmp (type, "empty_line"))
142 {
143 char *current_type;
144 AV *context_stack;
145 SV *top_context_sv;
146 char *top_context;
147 int top_index;
148
149 svp = hv_fetch (current, "type", strlen ("type"), 0);
150 if (!svp)
151 current_type = 0;
152 else
153 current_type = SvPV_nolen (*svp);
154
155 /* "Types with paragraphs". Remove the type unless we are inside
156 one of these types. */
157 if (current_type
158 && strcmp (current_type, "before_item")
159 && strcmp (current_type, "text_root")
160 && strcmp (current_type, "document_root")
161 && strcmp (current_type, "brace_command_context"))
162 goto delete_type;
163
164 /* Check the context stack. */
165 svp = hv_fetch (self, "context_stack", strlen ("context_stack"), 0);
166 if (!svp)
167 goto delete_type; /* shouldn't happen */
168 context_stack = (AV *) SvRV (*svp);
169 top_index = av_len (context_stack);
170 if (top_index < 0)
171 goto delete_type; /* shouldn't happen */
172 svp = av_fetch (context_stack, top_index, 0);
173 if (!svp)
174 goto delete_type; /* shouldn't happen */
175 top_context_sv = *svp;
176 top_context = SvPV_nolen (top_context_sv);
177
178 /* Change type to "empty_spaces_before_paragraph" unless we are in
179 one of these contexts. */
180 if (strcmp (top_context, "math")
181 && strcmp (top_context, "menu")
182 && strcmp (top_context, "preformatted")
183 && strcmp (top_context, "rawpreformatted")
184 && strcmp (top_context, "def")
185 && strcmp (top_context, "inlineraw"))
186 {
187 hv_store (spaces_elt, "type", strlen ("type"),
188 newSVpv ("empty_spaces_before_paragraph", 0), 0);
189 }
190 else
191 {
192 delete_type:
193 hv_delete (spaces_elt, "type", strlen ("type"), G_DISCARD);
194 }
195 }
196 else if (!strcmp (type, "empty_line_after_command")
197 || !strcmp (type, "empty_spaces_before_argument"))
198 {
199 STRLEN len;
200 char *ptr;
201
202 if (owning_elt)
203 {
204 /* Remove spaces_elt */
205 av_pop (contents_array);
206
207 ptr = SvPV(existing_text_sv, len);
208 /* Replace element reference with a simple string. */
209 if (!command_extra)
210 {
211 command_extra = newHV ();
212 hv_store (owning_elt, "extra", strlen ("extra"),
213 newRV_inc((SV *)command_extra), 0);
214 }
215 hv_store (command_extra,
216 "spaces_before_argument",
217 strlen ("spaces_before_argument"),
218 newSVpv(ptr, len),
219 0);
220 }
221 else
222 {
223 hv_store (spaces_elt, "type", strlen ("type"),
224 newSVpv ("empty_spaces_after_command", 0), 0);
225
226 }
227 }
228 return 1;
229 }
230
231 HV *
xs_merge_text(HV * self,HV * current,SV * text_in)232 xs_merge_text (HV *self, HV *current, SV *text_in)
233 {
234 AV *contents_array;
235
236 int no_merge_with_following_text = 0;
237 char *text;
238 int leading_spaces;
239 SV *leading_spaces_sv = 0;
240 int call_ret;
241 SV *returned_sv;
242
243 SV *contents_ref;
244 int contents_num;
245 HV *last_elt;
246 SV *existing_text_sv;
247 char *existing_text;
248 SV **svp;
249
250 dTHX;
251
252 dSP;
253
254 /* Get text in UTF-8. */
255 {
256 STRLEN len;
257 static char *new_string;
258 text = SvPV (text_in, len);
259 if (!SvUTF8 (text_in))
260 {
261 Safefree (new_string);
262 new_string = bytes_to_utf8 (text, &len);
263 text = new_string;
264 }
265 }
266
267 leading_spaces = strspn (text, whitespace_chars);
268 if (text[leading_spaces])
269 {
270 int contents_num;
271
272 if (leading_spaces > 0)
273 {
274 leading_spaces_sv = newSVpv (text, leading_spaces);
275 }
276
277 svp = hv_fetch (current,
278 "contents", strlen ("contents"), 0);
279 contents_array = (AV *)SvRV(*svp);
280
281 contents_num = av_len(contents_array) + 1;
282 if (contents_num > 0)
283 {
284 HV *last_elt;
285 char *type = 0;
286
287 last_elt = (HV *)
288 SvRV (*av_fetch (contents_array, contents_num - 1, 0));
289
290 svp = hv_fetch (last_elt, "type", strlen ("type"), 0);
291 if (svp)
292 type = SvPV_nolen (*svp);
293 if (type
294 && (!strcmp (type, "empty_line_after_command")
295 || !strcmp (type, "empty_spaces_after_command")
296 || !strcmp (type, "empty_spaces_before_argument")
297 || !strcmp (type, "empty_spaces_after_close_brace")))
298 {
299 no_merge_with_following_text = 1;
300 }
301 }
302
303 if (xs_abort_empty_line(self, current, leading_spaces_sv))
304 {
305 text += leading_spaces;
306 }
307
308 /************************/
309 /* See 'perlcall' man page. */
310 ENTER;
311 SAVETMPS;
312
313
314 PUSHMARK(SP);
315 XPUSHs(sv_2mortal(newRV_inc((SV *)self)));
316 XPUSHs(sv_2mortal(newRV_inc((SV *)current)));
317 PUTBACK;
318
319 call_ret = call_pv ("Texinfo::Parser::_begin_paragraph", G_SCALAR);
320
321 SPAGAIN;
322
323 returned_sv = POPs;
324
325 /************************/
326
327 if (returned_sv && SvRV(returned_sv))
328 {
329 current = (HV *)SvRV(returned_sv);
330 }
331
332 FREETMPS;
333 LEAVE;
334 }
335
336 svp = hv_fetch (current, "contents", strlen ("contents"), 0);
337 if (!svp)
338 {
339 contents_array = newAV ();
340 contents_ref = newRV_inc ((SV *) contents_array);
341 hv_store (current, "contents", strlen ("contents"),
342 contents_ref, 0);
343 fprintf (stderr, "NEW CONTENTS %p\n", contents_array);
344 goto NEW_TEXT;
345 }
346 else
347 {
348 contents_ref = *svp;
349 contents_array = (AV *)SvRV(contents_ref);
350 }
351
352 if (no_merge_with_following_text)
353 goto NEW_TEXT;
354
355 contents_num = av_len(contents_array) + 1;
356 if (contents_num == 0)
357 goto NEW_TEXT;
358
359 last_elt = (HV *)
360 SvRV (*av_fetch (contents_array, contents_num - 1, 0));
361 svp = hv_fetch (last_elt, "text", strlen ("text"), 0);
362 if (!svp)
363 goto NEW_TEXT;
364 existing_text_sv = *svp;
365 existing_text = SvPV_nolen (existing_text_sv);
366 if (strchr (existing_text, '\n'))
367 goto NEW_TEXT;
368
369 MERGED_TEXT:
370 sv_catpv (existing_text_sv, text);
371 //fprintf (stderr, "MERGED TEXT: %s|||\n", text);
372
373 if (0)
374 {
375 HV *hv;
376 SV *sv;
377 NEW_TEXT:
378 hv = newHV ();
379 sv = newSVpv (text, 0);
380 hv_store (hv, "text", strlen ("text"), sv, 0);
381 SvUTF8_on (sv);
382 hv_store (hv, "parent", strlen ("parent"),
383 newRV_inc ((SV *)current), 0);
384 av_push (contents_array, newRV_inc ((SV *)hv));
385 //fprintf (stderr, "NEW TEXT: %s|||\n", text);
386 }
387
388 return current;
389 }
390
391 char *
xs_process_text(char * text)392 xs_process_text (char *text)
393 {
394 static char *new;
395 char *p, *q;
396
397 dTHX;
398
399 new = realloc (new, strlen (text) + 1);
400 strcpy (new, text);
401
402 p = q = new;
403 while (*p)
404 {
405 if (*p == '-' && p[1] == '-')
406 {
407 if (p[2] == '-')
408 {
409 *q = '-'; q[1] = '-';
410 p += 3; q += 2;
411 }
412 else
413 {
414 *q = '-';
415 p += 2; q += 1;
416 }
417 }
418 else if (*p == '\'' && p[1] == '\'')
419 {
420 *q = '"';
421 p += 2; q += 1;
422 }
423 else if (*p == '`')
424 {
425 if (p[1] == '`')
426 {
427 *q = '"';
428 p += 2; q += 1;
429 }
430 else
431 {
432 *q = '\'';
433 p += 1; q += 1;
434 }
435 }
436 else
437 {
438 *q++ = *p++;
439 }
440 }
441 *q = '\0';
442
443 return new;
444 }
445
446 char *
xs_unicode_text(char * text,int in_code)447 xs_unicode_text (char *text, int in_code)
448 {
449 char *p, *q;
450 static char *new;
451 int new_space, new_len;
452
453 dTHX; /* Perl boilerplate. */
454
455 if (in_code)
456 return text;
457
458 p = text;
459 new_space = strlen (text);
460 new = realloc (new, new_space + 1);
461 new_len = 0;
462 #define ADD3(s) \
463 if (new_len + 2 >= new_space - 1) \
464 { \
465 new_space += 2; \
466 new = realloc (new, new_space *= 2); \
467 } \
468 new[new_len++] = s[0]; \
469 new[new_len++] = s[1]; \
470 new[new_len++] = s[2];
471
472 #define ADD1(s) \
473 if (new_len >= new_space - 1) \
474 new = realloc (new, (new_space *= 2) + 1); \
475 new[new_len++] = s;
476
477 #define ADDN(s, n) \
478 if (new_len + n - 1 >= new_space - 1) \
479 { \
480 new_space += n; \
481 new = realloc (new, (new_space *= 2) + 1); \
482 } \
483 memcpy(new + new_len, s, n); \
484 new_len += n;
485
486 while (1)
487 {
488 q = p + strcspn (p, "-`'");
489 ADDN(p, q - p);
490 if (!*q)
491 break;
492 switch (*q)
493 {
494 case '-':
495 if (!memcmp (q, "---", 3))
496 {
497 p = q + 3;
498 /* Unicode em dash U+2014 (0xE2 0x80 0x94) */
499 ADD3("\xE2\x80\x94");
500 }
501 else if (!memcmp (q, "--", 2))
502 {
503 p = q + 2;
504 /* Unicode en dash U+2013 (0xE2 0x80 0x93) */
505 ADD3("\xE2\x80\x93");
506 }
507 else
508 {
509 p = q + 1;
510 ADD1(*q);
511 }
512 break;
513 case '`':
514 if (!memcmp (q, "``", 2))
515 {
516 p = q + 2;
517 /* U+201C E2 80 9C */
518 ADD3("\xE2\x80\x9C");
519 }
520 else
521 {
522 p = q + 1;
523 /* U+2018 E2 80 98 */
524 ADD3("\xE2\x80\x98");
525 }
526 break;
527 case '\'':
528 if (!memcmp (q, "''", 2))
529 {
530 p = q + 2;
531 /* U+201D E2 80 9D */
532 ADD3("\xE2\x80\x9D");
533 }
534 else
535 {
536 p = q + 1;
537 /* U+2019 E2 80 99 */
538 ADD3("\xE2\x80\x99");
539 }
540 break;
541 }
542 }
543
544 new[new_len] = '\0';
545 return new;
546 }
547
548 /* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
549 $separator_match) */
xs_parse_texi_regex(SV * text_in,char ** at_command,char ** open_brace,char ** asterisk,char ** single_letter_command,char ** separator_match,char ** new_text)550 void xs_parse_texi_regex (SV *text_in,
551 char **at_command,
552 char **open_brace,
553 char **asterisk,
554 char **single_letter_command,
555 char **separator_match,
556 char **new_text)
557 {
558 char *text;
559
560 dTHX;
561
562 /* Make sure the input is in UTF8. */
563 if (!SvUTF8 (text_in))
564 sv_utf8_upgrade (text_in);
565 text = SvPV_nolen (text_in);
566
567 *at_command = *open_brace = *asterisk = *single_letter_command
568 = *separator_match = *new_text = 0;
569
570 if (*text == '@' && isalnum(text[1]))
571 {
572 char *p, *q;
573 static char *s;
574
575 p = text + 1;
576 q = text + 2;
577 while (isalnum (*q) || *q == '-' || *q == '_')
578 q++;
579
580 s = realloc (s, q - p + 1);
581 memcpy (s, p, q - p);
582 s[q - p] = '\0';
583 *at_command = s;
584 }
585 else
586 {
587 if (*text == '{')
588 {
589 *open_brace = "{";
590 *separator_match = "{";
591 }
592
593 else if (*text == '@'
594 && text[1] && strchr ("([\"'~@&}{,.!?"
595 " \t\n"
596 "*-^`=:|/\\",
597 text[1]))
598 {
599 static char a[2];
600 *single_letter_command = a;
601 a[0] = text[1];
602 a[1] = '\0';
603 }
604
605 else if (strchr ("{}@,:\t.\f", *text))
606 {
607 static char a[2];
608 *separator_match = a;
609 a[0] = *text;
610 a[1] = '\0';
611 }
612
613 else
614 {
615 char *p;
616
617 if (*text == '*')
618 *asterisk = "*";
619
620 p = text;
621 p += strcspn (p, "{}@,:\t.\n\f");
622 if (p > text)
623 {
624 static char *s;
625 s = realloc (s, p - text + 1);
626 memcpy (s, text, p - text);
627 s[p - text] = '\0';
628 *new_text = s;
629 }
630 }
631 }
632
633 return;
634 }
635