1 /* ------------------------------------------------------------------------
2 @NAME       : btxs_support.c
3 @DESCRIPTION: Support functions needed by the XSUBs in BibTeX.xs.
4 @GLOBALS    :
5 @CREATED    : 1997/11/16, Greg Ward (from code in BibTeX.xs)
6 @MODIFIED   :
7 @VERSION    : $Id$
8 @COPYRIGHT  : Copyright (c) 1997-2000 by Gregory P. Ward.  All rights reserved.
9 -------------------------------------------------------------------------- */
10 #include "EXTERN.h"
11 #include "perl.h"
12 #include "XSUB.h"
13 
14 #define BT_DEBUG 0
15 
16 #include "btparse.h"
17 #include "btxs_support.h"
18 
19 
20 static char *nodetype_names[] =
21 {
22    "entry", "macrodef", "text", "key", "field", "string", "number", "macro"
23 };
24 
25 
26 /* ----------------------------------------------------------------------
27  * Miscellaneous stuff
28  */
29 
30 int
constant(char * name,IV * arg)31 constant (char * name, IV * arg)
32 {
33    int   ok = FALSE;
34 
35    DBG_ACTION (1, printf ("constant: name=%s\n", name));
36 
37    if (! (name[0] == 'B' && name[1] == 'T')) /* should not happen! */
38       croak ("Illegal constant name \"%s\"", name);
39 
40    switch (name[2])
41    {
42       case 'E':                         /* entry metatypes */
43          if (strEQ (name, "BTE_UNKNOWN"))  { *arg = BTE_UNKNOWN;  ok = TRUE; }
44          if (strEQ (name, "BTE_REGULAR"))  { *arg = BTE_REGULAR;  ok = TRUE; }
45          if (strEQ (name, "BTE_COMMENT"))  { *arg = BTE_COMMENT;  ok = TRUE; }
46          if (strEQ (name, "BTE_PREAMBLE")) { *arg = BTE_PREAMBLE; ok = TRUE; }
47          if (strEQ (name, "BTE_MACRODEF")) { *arg = BTE_MACRODEF; ok = TRUE; }
48          break;
49       case 'A':                         /* AST nodetypes (not all of them) */
50          if (strEQ (name, "BTAST_STRING")) { *arg = BTAST_STRING; ok = TRUE; }
51          if (strEQ (name, "BTAST_NUMBER")) { *arg = BTAST_NUMBER; ok = TRUE; }
52          if (strEQ (name, "BTAST_MACRO"))  { *arg = BTAST_MACRO;  ok = TRUE; }
53          break;
54       case 'N':                         /* name parts */
55          if (strEQ (name, "BTN_FIRST")) { *arg = BTN_FIRST; ok = TRUE; }
56          if (strEQ (name, "BTN_VON"))   { *arg = BTN_VON;   ok = TRUE; }
57          if (strEQ (name, "BTN_LAST"))  { *arg = BTN_LAST;  ok = TRUE; }
58          if (strEQ (name, "BTN_JR"))    { *arg = BTN_JR;    ok = TRUE; }
59          if (strEQ (name, "BTN_NONE"))  { *arg = BTN_NONE;  ok = TRUE; }
60          break;
61       case 'J':                         /* token join methods */
62          if (strEQ (name, "BTJ_MAYTIE"))   { *arg = BTJ_MAYTIE;   ok = TRUE; }
63          if (strEQ (name, "BTJ_SPACE"))    { *arg = BTJ_SPACE;    ok = TRUE; }
64          if (strEQ (name, "BTJ_FORCETIE")) { *arg = BTJ_FORCETIE; ok = TRUE; }
65          if (strEQ (name, "BTJ_NOTHING"))  { *arg = BTJ_NOTHING;  ok = TRUE; }
66          break;
67       default:
68          break;
69    }
70 
71    return ok;
72 }
73 
74 
75 /* ----------------------------------------------------------------------
76  * Stuff for converting a btparse entry AST to a Perl structure:
77  *   convert_value() [private]
78  *   convert_assigned_entry() [private]
79  *   convert_value_entry() [private]
80  *   ast_to_hash()
81  */
82 
83 static SV *
convert_value(char * field_name,AST * field,boolean preserve)84 convert_value (char * field_name, AST * field, boolean preserve)
85 {
86    AST *  value;
87    bt_nodetype
88           nodetype;
89    char * text;
90    SV *   sv_field_value;
91 
92    value = bt_next_value (field, NULL, &nodetype, &text);
93    if (preserve)
94    {
95       HV * val_stash;                   /* stash for Text::BibTeX::Value pkg */
96       HV * sval_stash;                  /* and for Text::BibTeX::SimpleValue */
97       AV * compound_value;              /* list of simple values */
98       SV * sval_contents[2];            /* type and text */
99       AV * simple_value;                /* list of (type, text) */
100       SV * simple_value_ref;            /* ref to simple_value */
101 
102       /*
103        * Get the stashes for the two classes into which we'll be
104        * blessing things.
105        */
106       val_stash  = gv_stashpv ("Text::BibTeX::Value",       TRUE);
107       sval_stash = gv_stashpv ("Text::BibTeX::SimpleValue", TRUE);
108 
109       if (val_stash == NULL || sval_stash == NULL) {
110           croak ("unable to get stash for one or both of "
111                  "Text::BibTeX::Value or Text::BibTeX::SimpleValue");
112       }
113 
114       /* Start the compound value as an empty list */
115       compound_value = newAV ();
116 
117       /* Walk the list of simple values */
118       while (value)
119       {
120          /*
121           * Convert the nodetype and text to SVs and save them in what will
122           * soon become a Text::BibTeX::SimpleValue object.
123           */
124          sval_contents[0] = newSViv ((IV) nodetype);
125          sval_contents[1] = newSVpv (text, 0);
126          simple_value = av_make (2, sval_contents);
127 
128          /*
129           * We're done with these two SVs (they're saved in the
130           * simple_value AV), so decrement them out of existence
131           */
132          SvREFCNT_dec (sval_contents[0]);
133          SvREFCNT_dec (sval_contents[1]);
134 
135          /* Create the SimpleValue object by blessing a reference */
136          simple_value_ref = newRV_noinc ((SV *) simple_value);
137          sv_bless (simple_value_ref, sval_stash);
138 
139          /* Push this SimpleValue object onto the main list */
140          av_push (compound_value, simple_value_ref);
141 
142          /* And find the next simple value in this field */
143          value = bt_next_value (field, value, &nodetype, &text);
144       }
145 
146       /* Make a Text::BibTeX::Value object from our list of SimpleValues */
147       sv_field_value  = newRV_noinc ((SV *) compound_value);
148       sv_bless (sv_field_value, val_stash);
149    }
150    else
151    {
152       if (value &&
153           (nodetype != BTAST_STRING ||
154            bt_next_value (field, value, NULL, NULL) != NULL))
155       {
156          croak ("BibTeX.xs: internal error in entry post-processing--"
157                 "value for field %s is not a simple string",
158                 field_name);
159       }
160 
161       DBG_ACTION (2, printf ("  field=%s, value=\"%s\"\n",
162                              field_name, text));
163       sv_field_value = text ? newSVpv (text, 0) : &PL_sv_undef;
164    }
165 
166    return sv_field_value;
167 }  /* convert_value () */
168 
169 
170 static void
convert_assigned_entry(AST * top,HV * entry,boolean preserve)171 convert_assigned_entry (AST *top, HV *entry, boolean preserve)
172 {
173    AV *    flist;                 /* the field list -- put into entry */
174    HV *    values;                /* the field values -- put into entry */
175    HV *    lines;                 /* line numbers of entry and its fields */
176    AST *   field;
177    char *  field_name;
178    AST *   item;
179    char *  item_text;
180    int     prev_line;
181 
182    /*
183     * Start the line number hash.  It will contain (num_fields)+2 elements;
184     * one for each field (keyed on the field name), and the `start' and
185     * `stop' lines for the entry as a whole.  (Currently, the `stop' line
186     * number is the same as the line number of the last field.  This isn't
187     * strictly correct, but by the time we get our hands on the AST, that
188     * closing brace or parenthesis is long lost -- so this is the best we
189     * get.  I just want to put this redundant line number in in case some
190     * day I get ambitious and keep track of its true value.)
191     */
192 
193    lines = newHV ();
194    hv_store (lines, "START", 5, newSViv (top->line), 0);
195 
196    /*
197     * Now loop over all fields in the entry.   As we loop, we build
198     * three structures: the list of field names, the hash relating
199     * field names to (fully expanded) values, and the list of line
200     * numbers.
201     */
202 
203    DBG_ACTION (2, printf ("  creating field list, value hash\n"));
204    flist = newAV ();
205    values = newHV ();
206 
207    DBG_ACTION (2, printf ("  getting fields and values\n"));
208    field = bt_next_field (top, NULL, &field_name);
209    while (field)
210    {
211       SV *   sv_field_name;
212       SV *   sv_field_value;
213 
214       if (!field_name)                  /* this shouldn't happen -- but if */
215          continue;                      /* it does, skipping the field seems */
216                                         /* reasonable to me */
217 
218       /* Convert the field name to an SV (for storing in the entry hash) */
219       sv_field_name = newSVpv (field_name, 0);
220 
221       /*
222        * Convert the field value to an SV; this might be just a string, or
223        * it might be a reference to a Text::BibTeX::Value object (if
224        * 'preserve' is true).
225        */
226       sv_field_value = convert_value (field_name, field, preserve);
227 
228       /*
229        * Push the field name onto the field list, add the field value to
230        * the values hash, and add the line number onto the line number
231        * hash.
232        */
233       av_push (flist, sv_field_name);
234       hv_store (values, field_name, strlen (field_name), sv_field_value, 0);
235       hv_store (lines, field_name, strlen (field_name),
236                 newSViv (field->line), 0);
237       prev_line = field->line;          /* so we can duplicate last line no. */
238 
239       field = bt_next_field (top, field, &field_name);
240       DBG_ACTION (2, printf ("  stored field/value; next will be %s\n",
241                              field_name));
242    }
243 
244 
245    /*
246     * Duplicate the last element of `lines' (kludge until we keep track of
247     * the true end-of-entry line number).
248     */
249    hv_store (lines, "STOP", 4, newSViv (prev_line), 0);
250 
251 
252    /* Put refs to field list, value hash, and line list into the main hash */
253 
254    DBG_ACTION (2, printf ("  got all fields; storing list/hash refs\n"));
255    hv_store (entry, "fields", 6, newRV ((SV *) flist), 0);
256    hv_store (entry, "values", 6, newRV ((SV *) values), 0);
257    hv_store (entry, "lines", 5, newRV ((SV *) lines), 0);
258 
259 } /* convert_assigned_entry () */
260 
261 
262 static void
convert_value_entry(AST * top,HV * entry,boolean preserve)263 convert_value_entry (AST *top, HV *entry, boolean preserve)
264 {
265    HV *    lines;                 /* line numbers of entry and its fields */
266    AST *   item,
267        *   prev_item = NULL;
268    int     last_line;
269    char *  value;
270    SV *    sv_value;
271 
272    /*
273     * Start the line number hash.  For "value" entries, it's a bit simpler --
274     * just a `start' and `stop' line number.  Again, the `stop' line is
275     * inaccurate; it's just the line number of the last value in the
276     * entry.
277     */
278    lines = newHV ();
279    hv_store (lines, "START", 5, newSViv (top->line), 0);
280 
281    /* Walk the list of values to find the last one (for its line number) */
282    item = NULL;
283    while ((item = bt_next_value (top, item, NULL, NULL)))
284       prev_item = item;
285 
286    if (prev_item) {
287       last_line = prev_item->line;
288       hv_store (lines, "STOP", 4, newSViv (last_line), 0);
289 
290       /* Store the line number hash in the entry hash */
291       hv_store (entry, "lines", 5, newRV ((SV *) lines), 0);
292    }
293 
294    /* And get the value of the entry as a single string (fully processed) */
295 
296    if (preserve)
297    {
298       sv_value = convert_value (NULL, top, TRUE);
299    }
300    else
301    {
302       value = bt_get_text (top);
303       sv_value = value ? newSVpv (value, 0) : &PL_sv_undef;
304    }
305    hv_store (entry, "value", 5, sv_value, 0);
306 
307 } /* convert_value_entry () */
308 
309 
310 void
ast_to_hash(SV * entry_ref,AST * top,boolean parse_status,boolean preserve)311 ast_to_hash (SV *    entry_ref,
312              AST *   top,
313              boolean parse_status,
314              boolean preserve)
315 {
316    char *  type;
317    char *  key;
318    bt_metatype
319            metatype;
320    btshort options;                     /* post-processing options */
321    HV *    entry;                       /* the main hash -- build and return */
322 
323    DBG_ACTION (1, printf ("ast_to_hash: entry\n"));
324 
325    /* printf ("checking that entry_ref is a ref and a hash ref\n"); */
326    if (! (SvROK (entry_ref) && (SvTYPE (SvRV (entry_ref)) == SVt_PVHV)))
327       croak ("entry_ref must be a hash ref");
328    entry = (HV *) SvRV (entry_ref);
329 
330    /*
331     * Clear out all hash values that might not be replaced in this
332     * conversion (in case the user parses into an existing
333     * Text::BibTeX::Entry object).  (We don't blow the hash away with
334     * hv_clear() in case higher-up code has put interesting stuff into it.)
335     */
336 
337    hv_delete (entry, "key",    3, G_DISCARD);
338    hv_delete (entry, "fields", 6, G_DISCARD);
339    hv_delete (entry, "lines",  5, G_DISCARD);
340    hv_delete (entry, "values", 6, G_DISCARD);
341    hv_delete (entry, "value",  5, G_DISCARD);
342 
343    /*
344     * Perform entry post-processing.  How exactly we post-process depends on
345     * 1) the entry type, and 2) the 'preserve' flag.
346     */
347 
348    metatype = bt_entry_metatype (top);
349    if (preserve)                        /* if true, then entry type */
350    {                                    /* doesn't matter */
351       options = BTO_MINIMAL;
352    }
353    else
354    {
355       if (metatype == BTE_MACRODEF)
356          options = BTO_MACRO;
357       else
358          options = BTO_FULL;
359    }
360 
361    /*
362     * Postprocess the entry, with the string-processing options we just
363     * determined plus "no store macros" turned on.  (That's because
364     * macros will already have been stored by the postprocessing done
365     * by bt_parse*; we don't want to do it again and generate spurious
366     * warnings!
367     */
368    bt_postprocess_entry (top, options | BTO_NOSTORE);
369 
370 
371    /*
372     * Start filling in the hash; all entries have a type and metatype,
373     * and we'll do the key here (even though it's not in all entries)
374     * for good measure.
375     */
376 
377    type = bt_entry_type (top);
378    key = bt_entry_key (top);
379    DBG_ACTION (2, printf ("  inserting type (%s), metatype (%d)\n",
380                           type ? type : "*none*", bt_entry_metatype (top)));
381    DBG_ACTION (2, printf ("        ... key (%s) status (%d)\n",
382                           key ? key : "*none*", parse_status));
383 
384    if (!type)
385       croak ("entry has no type");
386    hv_store (entry, "type", 4, newSVpv (type, 0), 0);
387    hv_store (entry, "metatype", 8, newSViv (bt_entry_metatype (top)), 0);
388 
389    if (key)
390       hv_store (entry, "key", 3, newSVpv (key, 0), 0);
391 
392    hv_store (entry, "status", 6, newSViv ((IV) parse_status), 0);
393 
394 
395    switch (metatype)
396    {
397       case BTE_MACRODEF:
398       case BTE_REGULAR:
399          convert_assigned_entry (top, entry, preserve);
400          break;
401 
402       case BTE_COMMENT:
403       case BTE_PREAMBLE:
404          convert_value_entry (top, entry, preserve);
405          break;
406 
407       default:                          /* this should never happen! */
408          croak ("unknown entry metatype (%d)\n", bt_entry_metatype (top));
409    }
410 
411    /*
412     * If 'preserve' was true, then the user is going to need the
413     * Text::BibTeX::Value module!
414     *
415     * XXX this doesn't work!  Why?!?!
416     */
417 /*
418    if (preserve)
419    {
420       printf ("requiring Text::BibTeX::Value...\n");
421       perl_require_pv ("Text::BibTeX::Value");
422    }
423 */
424 
425    /* And finally, free up the AST */
426 
427    bt_free_ast (top);
428 
429 /*   hv_store (entry, "ast", 3, newSViv ((IV) top), 0); */
430 
431    DBG_ACTION (1, printf ("ast_to_hash: exit\n"));
432 }  /* ast_to_hash () */
433 
434 
435 /* ----------------------------------------------------------------------
436  * Stuff for converting a list of C strings to Perl
437  *   convert_stringlist()   [private]
438  *   store_stringlist()
439  */
440 
441 static SV *
convert_stringlist(char ** list,int num_strings)442 convert_stringlist (char **list, int num_strings)
443 {
444    int    i;
445    AV *   perl_list;
446    SV *   sv_string;
447 
448    perl_list = newAV ();
449    for (i = 0; i < num_strings; i++)
450    {
451       sv_string = newSVpv (list[i], 0);
452       av_push (perl_list, sv_string);
453    }
454 
455    return newRV ((SV *) perl_list);
456 
457 } /* convert_stringlist() */
458 
459 
460 void
store_stringlist(HV * hash,char * key,char ** list,int num_strings)461 store_stringlist (HV *hash, char *key, char **list, int num_strings)
462 {
463    SV *  listref;
464 
465    if (list)
466    {
467       DBG_ACTION (2,
468       {
469          int i;
470 
471          printf ("store_stringlist(): hash=%p, key=%s, list=(",
472                  hash, key);
473          for (i = 0; i < num_strings; i++)
474             printf ("%s%c", list[i], (i == num_strings-1) ? ')' : ',');
475          printf ("\n");
476       })
477 
478       listref = convert_stringlist (list, num_strings);
479       hv_store (hash, key, strlen (key), listref, 0);
480    }
481    else
482    {
483       DBG_ACTION (2, printf ("store_stringlist(): hash=%p, key=%s: deleting\n",
484                              hash, key))
485       hv_delete (hash, key, strlen (key), G_DISCARD);
486    }
487 
488 } /* store_stringlist() */
489