/* Copyright 2010-2019 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . */
#ifdef HAVE_CONFIG_H
#include
#endif
#include
#include
#include
#include
#ifndef _WIN32
#include
#else /* _WIN32 */
/* Workaround for problems caused in mingw.org's MinGW build by
Gnulib's wchar.h overriding the wint_t type definition, which
causes compilation errors when perl.h is included below, because
perl.h includes ctype.h. */
#include
#endif
#include
#include
/* See "How do I use all this in extensions" in 'man perlguts'. */
#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"
#include "miscxs.h"
const char *whitespace_chars = " \t\f\v\r\n";
int
xs_abort_empty_line (HV *self, HV *current, SV *additional_spaces_in)
{
char *additional_spaces;
AV *contents_array;
SV **svp;
int contents_num;
HV *spaces_elt;
//char *key;
HV *test_extra = 0;
HV *command_extra = 0;
HV *owning_elt = 0;
char *type;
SV *existing_text_sv;
dTHX;
/* Get additional text in UTF-8. */
if (additional_spaces_in)
{
STRLEN len;
static char *new_string;
additional_spaces = SvPV (additional_spaces_in, len);
if (!SvUTF8 (additional_spaces_in))
{
Safefree (new_string);
new_string = bytes_to_utf8 (additional_spaces, &len);
additional_spaces = new_string;
}
}
else
additional_spaces = "";
svp = hv_fetch (current, "contents", strlen("contents"), 0);
if (!svp)
return 0;
contents_array = (AV *)SvRV(*svp);
contents_num = av_len(contents_array) + 1;
if (contents_num == 0)
return 0;
spaces_elt = (HV *) SvRV (*av_fetch (contents_array, contents_num - 1, 0));
svp = hv_fetch (spaces_elt, "type", strlen ("type"), 0);
if (!svp)
return 0;
type = SvPV_nolen (*svp);
if (!type)
return 0;
/* Must be one of these types to continue. */
if (strcmp (type, "empty_line")
&& strcmp (type, "empty_line_after_command")
&& strcmp (type, "empty_spaces_before_argument")
&& strcmp (type, "empty_spaces_after_close_brace"))
{
return 0;
}
//fprintf (stderr, "ABORT EMPTY\n");
svp = hv_fetch (spaces_elt, "extra", strlen ("extra"), 0);
if (svp)
{
test_extra = (HV *) SvRV (*svp);
svp = hv_fetch (test_extra, "command",
strlen ("command"), 0);
if (svp)
{
owning_elt = (HV *) SvRV (*svp);
svp = hv_fetch (owning_elt, "extra", strlen ("extra"), 0);
if (svp)
command_extra = (HV *) SvRV (*svp);
}
}
svp = hv_fetch (spaces_elt, "text", strlen ("text"), 0);
if (!svp)
return 0; /* or create it? change last arg from 0 to 1 */
existing_text_sv = *svp;
/* Append the 'additional_spaces' argument. */
sv_utf8_upgrade (existing_text_sv);
sv_catpv (existing_text_sv, additional_spaces);
if (!*SvPV_nolen (existing_text_sv)) /* existing text is empty */
{
/* Remove spaces_elt */
av_pop (contents_array);
}
else if (!strcmp (type, "empty_line"))
{
char *current_type;
AV *context_stack;
SV *top_context_sv;
char *top_context;
int top_index;
svp = hv_fetch (current, "type", strlen ("type"), 0);
if (!svp)
current_type = 0;
else
current_type = SvPV_nolen (*svp);
/* "Types with paragraphs". Remove the type unless we are inside
one of these types. */
if (current_type
&& strcmp (current_type, "before_item")
&& strcmp (current_type, "text_root")
&& strcmp (current_type, "document_root")
&& strcmp (current_type, "brace_command_context"))
goto delete_type;
/* Check the context stack. */
svp = hv_fetch (self, "context_stack", strlen ("context_stack"), 0);
if (!svp)
goto delete_type; /* shouldn't happen */
context_stack = (AV *) SvRV (*svp);
top_index = av_len (context_stack);
if (top_index < 0)
goto delete_type; /* shouldn't happen */
svp = av_fetch (context_stack, top_index, 0);
if (!svp)
goto delete_type; /* shouldn't happen */
top_context_sv = *svp;
top_context = SvPV_nolen (top_context_sv);
/* Change type to "empty_spaces_before_paragraph" unless we are in
one of these contexts. */
if (strcmp (top_context, "math")
&& strcmp (top_context, "menu")
&& strcmp (top_context, "preformatted")
&& strcmp (top_context, "rawpreformatted")
&& strcmp (top_context, "def")
&& strcmp (top_context, "inlineraw"))
{
hv_store (spaces_elt, "type", strlen ("type"),
newSVpv ("empty_spaces_before_paragraph", 0), 0);
}
else
{
delete_type:
hv_delete (spaces_elt, "type", strlen ("type"), G_DISCARD);
}
}
else if (!strcmp (type, "empty_line_after_command")
|| !strcmp (type, "empty_spaces_before_argument"))
{
STRLEN len;
char *ptr;
if (owning_elt)
{
/* Remove spaces_elt */
av_pop (contents_array);
ptr = SvPV(existing_text_sv, len);
/* Replace element reference with a simple string. */
if (!command_extra)
{
command_extra = newHV ();
hv_store (owning_elt, "extra", strlen ("extra"),
newRV_inc((SV *)command_extra), 0);
}
hv_store (command_extra,
"spaces_before_argument",
strlen ("spaces_before_argument"),
newSVpv(ptr, len),
0);
}
else
{
hv_store (spaces_elt, "type", strlen ("type"),
newSVpv ("empty_spaces_after_command", 0), 0);
}
}
return 1;
}
HV *
xs_merge_text (HV *self, HV *current, SV *text_in)
{
AV *contents_array;
int no_merge_with_following_text = 0;
char *text;
int leading_spaces;
SV *leading_spaces_sv = 0;
int call_ret;
SV *returned_sv;
SV *contents_ref;
int contents_num;
HV *last_elt;
SV *existing_text_sv;
char *existing_text;
SV **svp;
dTHX;
dSP;
/* Get text in UTF-8. */
{
STRLEN len;
static char *new_string;
text = SvPV (text_in, len);
if (!SvUTF8 (text_in))
{
Safefree (new_string);
new_string = bytes_to_utf8 (text, &len);
text = new_string;
}
}
leading_spaces = strspn (text, whitespace_chars);
if (text[leading_spaces])
{
int contents_num;
if (leading_spaces > 0)
{
leading_spaces_sv = newSVpv (text, leading_spaces);
}
svp = hv_fetch (current,
"contents", strlen ("contents"), 0);
contents_array = (AV *)SvRV(*svp);
contents_num = av_len(contents_array) + 1;
if (contents_num > 0)
{
HV *last_elt;
char *type = 0;
last_elt = (HV *)
SvRV (*av_fetch (contents_array, contents_num - 1, 0));
svp = hv_fetch (last_elt, "type", strlen ("type"), 0);
if (svp)
type = SvPV_nolen (*svp);
if (type
&& (!strcmp (type, "empty_line_after_command")
|| !strcmp (type, "empty_spaces_after_command")
|| !strcmp (type, "empty_spaces_before_argument")
|| !strcmp (type, "empty_spaces_after_close_brace")))
{
no_merge_with_following_text = 1;
}
}
if (xs_abort_empty_line(self, current, leading_spaces_sv))
{
text += leading_spaces;
}
/************************/
/* See 'perlcall' man page. */
ENTER;
SAVETMPS;
PUSHMARK(SP);
XPUSHs(sv_2mortal(newRV_inc((SV *)self)));
XPUSHs(sv_2mortal(newRV_inc((SV *)current)));
PUTBACK;
call_ret = call_pv ("Texinfo::Parser::_begin_paragraph", G_SCALAR);
SPAGAIN;
returned_sv = POPs;
/************************/
if (returned_sv && SvRV(returned_sv))
{
current = (HV *)SvRV(returned_sv);
}
FREETMPS;
LEAVE;
}
svp = hv_fetch (current, "contents", strlen ("contents"), 0);
if (!svp)
{
contents_array = newAV ();
contents_ref = newRV_inc ((SV *) contents_array);
hv_store (current, "contents", strlen ("contents"),
contents_ref, 0);
fprintf (stderr, "NEW CONTENTS %p\n", contents_array);
goto NEW_TEXT;
}
else
{
contents_ref = *svp;
contents_array = (AV *)SvRV(contents_ref);
}
if (no_merge_with_following_text)
goto NEW_TEXT;
contents_num = av_len(contents_array) + 1;
if (contents_num == 0)
goto NEW_TEXT;
last_elt = (HV *)
SvRV (*av_fetch (contents_array, contents_num - 1, 0));
svp = hv_fetch (last_elt, "text", strlen ("text"), 0);
if (!svp)
goto NEW_TEXT;
existing_text_sv = *svp;
existing_text = SvPV_nolen (existing_text_sv);
if (strchr (existing_text, '\n'))
goto NEW_TEXT;
MERGED_TEXT:
sv_catpv (existing_text_sv, text);
//fprintf (stderr, "MERGED TEXT: %s|||\n", text);
if (0)
{
HV *hv;
SV *sv;
NEW_TEXT:
hv = newHV ();
sv = newSVpv (text, 0);
hv_store (hv, "text", strlen ("text"), sv, 0);
SvUTF8_on (sv);
hv_store (hv, "parent", strlen ("parent"),
newRV_inc ((SV *)current), 0);
av_push (contents_array, newRV_inc ((SV *)hv));
//fprintf (stderr, "NEW TEXT: %s|||\n", text);
}
return current;
}
char *
xs_process_text (char *text)
{
static char *new;
char *p, *q;
dTHX;
new = realloc (new, strlen (text) + 1);
strcpy (new, text);
p = q = new;
while (*p)
{
if (*p == '-' && p[1] == '-')
{
if (p[2] == '-')
{
*q = '-'; q[1] = '-';
p += 3; q += 2;
}
else
{
*q = '-';
p += 2; q += 1;
}
}
else if (*p == '\'' && p[1] == '\'')
{
*q = '"';
p += 2; q += 1;
}
else if (*p == '`')
{
if (p[1] == '`')
{
*q = '"';
p += 2; q += 1;
}
else
{
*q = '\'';
p += 1; q += 1;
}
}
else
{
*q++ = *p++;
}
}
*q = '\0';
return new;
}
char *
xs_unicode_text (char *text, int in_code)
{
char *p, *q;
static char *new;
int new_space, new_len;
dTHX; /* Perl boilerplate. */
if (in_code)
return text;
p = text;
new_space = strlen (text);
new = realloc (new, new_space + 1);
new_len = 0;
#define ADD3(s) \
if (new_len + 2 >= new_space - 1) \
{ \
new_space += 2; \
new = realloc (new, new_space *= 2); \
} \
new[new_len++] = s[0]; \
new[new_len++] = s[1]; \
new[new_len++] = s[2];
#define ADD1(s) \
if (new_len >= new_space - 1) \
new = realloc (new, (new_space *= 2) + 1); \
new[new_len++] = s;
#define ADDN(s, n) \
if (new_len + n - 1 >= new_space - 1) \
{ \
new_space += n; \
new = realloc (new, (new_space *= 2) + 1); \
} \
memcpy(new + new_len, s, n); \
new_len += n;
while (1)
{
q = p + strcspn (p, "-`'");
ADDN(p, q - p);
if (!*q)
break;
switch (*q)
{
case '-':
if (!memcmp (q, "---", 3))
{
p = q + 3;
/* Unicode em dash U+2014 (0xE2 0x80 0x94) */
ADD3("\xE2\x80\x94");
}
else if (!memcmp (q, "--", 2))
{
p = q + 2;
/* Unicode en dash U+2013 (0xE2 0x80 0x93) */
ADD3("\xE2\x80\x93");
}
else
{
p = q + 1;
ADD1(*q);
}
break;
case '`':
if (!memcmp (q, "``", 2))
{
p = q + 2;
/* U+201C E2 80 9C */
ADD3("\xE2\x80\x9C");
}
else
{
p = q + 1;
/* U+2018 E2 80 98 */
ADD3("\xE2\x80\x98");
}
break;
case '\'':
if (!memcmp (q, "''", 2))
{
p = q + 2;
/* U+201D E2 80 9D */
ADD3("\xE2\x80\x9D");
}
else
{
p = q + 1;
/* U+2019 E2 80 99 */
ADD3("\xE2\x80\x99");
}
break;
}
}
new[new_len] = '\0';
return new;
}
/* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
$separator_match) */
void xs_parse_texi_regex (SV *text_in,
char **at_command,
char **open_brace,
char **asterisk,
char **single_letter_command,
char **separator_match,
char **new_text)
{
char *text;
dTHX;
/* Make sure the input is in UTF8. */
if (!SvUTF8 (text_in))
sv_utf8_upgrade (text_in);
text = SvPV_nolen (text_in);
*at_command = *open_brace = *asterisk = *single_letter_command
= *separator_match = *new_text = 0;
if (*text == '@' && isalnum(text[1]))
{
char *p, *q;
static char *s;
p = text + 1;
q = text + 2;
while (isalnum (*q) || *q == '-' || *q == '_')
q++;
s = realloc (s, q - p + 1);
memcpy (s, p, q - p);
s[q - p] = '\0';
*at_command = s;
}
else
{
if (*text == '{')
{
*open_brace = "{";
*separator_match = "{";
}
else if (*text == '@'
&& text[1] && strchr ("([\"'~@&}{,.!?"
" \t\n"
"*-^`=:|/\\",
text[1]))
{
static char a[2];
*single_letter_command = a;
a[0] = text[1];
a[1] = '\0';
}
else if (strchr ("{}@,:\t.\f", *text))
{
static char a[2];
*separator_match = a;
a[0] = *text;
a[1] = '\0';
}
else
{
char *p;
if (*text == '*')
*asterisk = "*";
p = text;
p += strcspn (p, "{}@,:\t.\n\f");
if (p > text)
{
static char *s;
s = realloc (s, p - text + 1);
memcpy (s, text, p - text);
s[p - text] = '\0';
*new_text = s;
}
}
}
return;
}