1 /* ------------------------------------------------------------------------
2 @NAME : btxs_support.c
3 @DESCRIPTION: Support functions needed by the XSUBs in BibTeX.xs.
4 @GLOBALS :
5 @CREATED : 1997/11/16, Greg Ward (from code in BibTeX.xs)
6 @MODIFIED :
7 @VERSION : $Id$
8 @COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved.
9 -------------------------------------------------------------------------- */
10 #include "EXTERN.h"
11 #include "perl.h"
12 #include "XSUB.h"
13
14 #define BT_DEBUG 0
15
16 #include "btparse.h"
17 #include "btxs_support.h"
18
19
20 static char *nodetype_names[] =
21 {
22 "entry", "macrodef", "text", "key", "field", "string", "number", "macro"
23 };
24
25
26 /* ----------------------------------------------------------------------
27 * Miscellaneous stuff
28 */
29
30 int
constant(char * name,IV * arg)31 constant (char * name, IV * arg)
32 {
33 int ok = FALSE;
34
35 DBG_ACTION (1, printf ("constant: name=%s\n", name));
36
37 if (! (name[0] == 'B' && name[1] == 'T')) /* should not happen! */
38 croak ("Illegal constant name \"%s\"", name);
39
40 switch (name[2])
41 {
42 case 'E': /* entry metatypes */
43 if (strEQ (name, "BTE_UNKNOWN")) { *arg = BTE_UNKNOWN; ok = TRUE; }
44 if (strEQ (name, "BTE_REGULAR")) { *arg = BTE_REGULAR; ok = TRUE; }
45 if (strEQ (name, "BTE_COMMENT")) { *arg = BTE_COMMENT; ok = TRUE; }
46 if (strEQ (name, "BTE_PREAMBLE")) { *arg = BTE_PREAMBLE; ok = TRUE; }
47 if (strEQ (name, "BTE_MACRODEF")) { *arg = BTE_MACRODEF; ok = TRUE; }
48 break;
49 case 'A': /* AST nodetypes (not all of them) */
50 if (strEQ (name, "BTAST_STRING")) { *arg = BTAST_STRING; ok = TRUE; }
51 if (strEQ (name, "BTAST_NUMBER")) { *arg = BTAST_NUMBER; ok = TRUE; }
52 if (strEQ (name, "BTAST_MACRO")) { *arg = BTAST_MACRO; ok = TRUE; }
53 break;
54 case 'N': /* name parts */
55 if (strEQ (name, "BTN_FIRST")) { *arg = BTN_FIRST; ok = TRUE; }
56 if (strEQ (name, "BTN_VON")) { *arg = BTN_VON; ok = TRUE; }
57 if (strEQ (name, "BTN_LAST")) { *arg = BTN_LAST; ok = TRUE; }
58 if (strEQ (name, "BTN_JR")) { *arg = BTN_JR; ok = TRUE; }
59 if (strEQ (name, "BTN_NONE")) { *arg = BTN_NONE; ok = TRUE; }
60 break;
61 case 'J': /* token join methods */
62 if (strEQ (name, "BTJ_MAYTIE")) { *arg = BTJ_MAYTIE; ok = TRUE; }
63 if (strEQ (name, "BTJ_SPACE")) { *arg = BTJ_SPACE; ok = TRUE; }
64 if (strEQ (name, "BTJ_FORCETIE")) { *arg = BTJ_FORCETIE; ok = TRUE; }
65 if (strEQ (name, "BTJ_NOTHING")) { *arg = BTJ_NOTHING; ok = TRUE; }
66 break;
67 default:
68 break;
69 }
70
71 return ok;
72 }
73
74
75 /* ----------------------------------------------------------------------
76 * Stuff for converting a btparse entry AST to a Perl structure:
77 * convert_value() [private]
78 * convert_assigned_entry() [private]
79 * convert_value_entry() [private]
80 * ast_to_hash()
81 */
82
83 static SV *
convert_value(char * field_name,AST * field,boolean preserve)84 convert_value (char * field_name, AST * field, boolean preserve)
85 {
86 AST * value;
87 bt_nodetype
88 nodetype;
89 char * text;
90 SV * sv_field_value;
91
92 value = bt_next_value (field, NULL, &nodetype, &text);
93 if (preserve)
94 {
95 HV * val_stash; /* stash for Text::BibTeX::Value pkg */
96 HV * sval_stash; /* and for Text::BibTeX::SimpleValue */
97 AV * compound_value; /* list of simple values */
98 SV * sval_contents[2]; /* type and text */
99 AV * simple_value; /* list of (type, text) */
100 SV * simple_value_ref; /* ref to simple_value */
101
102 /*
103 * Get the stashes for the two classes into which we'll be
104 * blessing things.
105 */
106 val_stash = gv_stashpv ("Text::BibTeX::Value", TRUE);
107 sval_stash = gv_stashpv ("Text::BibTeX::SimpleValue", TRUE);
108
109 if (val_stash == NULL || sval_stash == NULL) {
110 croak ("unable to get stash for one or both of "
111 "Text::BibTeX::Value or Text::BibTeX::SimpleValue");
112 }
113
114 /* Start the compound value as an empty list */
115 compound_value = newAV ();
116
117 /* Walk the list of simple values */
118 while (value)
119 {
120 /*
121 * Convert the nodetype and text to SVs and save them in what will
122 * soon become a Text::BibTeX::SimpleValue object.
123 */
124 sval_contents[0] = newSViv ((IV) nodetype);
125 sval_contents[1] = newSVpv (text, 0);
126 simple_value = av_make (2, sval_contents);
127
128 /*
129 * We're done with these two SVs (they're saved in the
130 * simple_value AV), so decrement them out of existence
131 */
132 SvREFCNT_dec (sval_contents[0]);
133 SvREFCNT_dec (sval_contents[1]);
134
135 /* Create the SimpleValue object by blessing a reference */
136 simple_value_ref = newRV_noinc ((SV *) simple_value);
137 sv_bless (simple_value_ref, sval_stash);
138
139 /* Push this SimpleValue object onto the main list */
140 av_push (compound_value, simple_value_ref);
141
142 /* And find the next simple value in this field */
143 value = bt_next_value (field, value, &nodetype, &text);
144 }
145
146 /* Make a Text::BibTeX::Value object from our list of SimpleValues */
147 sv_field_value = newRV_noinc ((SV *) compound_value);
148 sv_bless (sv_field_value, val_stash);
149 }
150 else
151 {
152 if (value &&
153 (nodetype != BTAST_STRING ||
154 bt_next_value (field, value, NULL, NULL) != NULL))
155 {
156 croak ("BibTeX.xs: internal error in entry post-processing--"
157 "value for field %s is not a simple string",
158 field_name);
159 }
160
161 DBG_ACTION (2, printf (" field=%s, value=\"%s\"\n",
162 field_name, text));
163 sv_field_value = text ? newSVpv (text, 0) : &PL_sv_undef;
164 }
165
166 return sv_field_value;
167 } /* convert_value () */
168
169
170 static void
convert_assigned_entry(AST * top,HV * entry,boolean preserve)171 convert_assigned_entry (AST *top, HV *entry, boolean preserve)
172 {
173 AV * flist; /* the field list -- put into entry */
174 HV * values; /* the field values -- put into entry */
175 HV * lines; /* line numbers of entry and its fields */
176 AST * field;
177 char * field_name;
178 AST * item;
179 char * item_text;
180 int prev_line;
181
182 /*
183 * Start the line number hash. It will contain (num_fields)+2 elements;
184 * one for each field (keyed on the field name), and the `start' and
185 * `stop' lines for the entry as a whole. (Currently, the `stop' line
186 * number is the same as the line number of the last field. This isn't
187 * strictly correct, but by the time we get our hands on the AST, that
188 * closing brace or parenthesis is long lost -- so this is the best we
189 * get. I just want to put this redundant line number in in case some
190 * day I get ambitious and keep track of its true value.)
191 */
192
193 lines = newHV ();
194 hv_store (lines, "START", 5, newSViv (top->line), 0);
195
196 /*
197 * Now loop over all fields in the entry. As we loop, we build
198 * three structures: the list of field names, the hash relating
199 * field names to (fully expanded) values, and the list of line
200 * numbers.
201 */
202
203 DBG_ACTION (2, printf (" creating field list, value hash\n"));
204 flist = newAV ();
205 values = newHV ();
206
207 DBG_ACTION (2, printf (" getting fields and values\n"));
208 field = bt_next_field (top, NULL, &field_name);
209 while (field)
210 {
211 SV * sv_field_name;
212 SV * sv_field_value;
213
214 if (!field_name) /* this shouldn't happen -- but if */
215 continue; /* it does, skipping the field seems */
216 /* reasonable to me */
217
218 /* Convert the field name to an SV (for storing in the entry hash) */
219 sv_field_name = newSVpv (field_name, 0);
220
221 /*
222 * Convert the field value to an SV; this might be just a string, or
223 * it might be a reference to a Text::BibTeX::Value object (if
224 * 'preserve' is true).
225 */
226 sv_field_value = convert_value (field_name, field, preserve);
227
228 /*
229 * Push the field name onto the field list, add the field value to
230 * the values hash, and add the line number onto the line number
231 * hash.
232 */
233 av_push (flist, sv_field_name);
234 hv_store (values, field_name, strlen (field_name), sv_field_value, 0);
235 hv_store (lines, field_name, strlen (field_name),
236 newSViv (field->line), 0);
237 prev_line = field->line; /* so we can duplicate last line no. */
238
239 field = bt_next_field (top, field, &field_name);
240 DBG_ACTION (2, printf (" stored field/value; next will be %s\n",
241 field_name));
242 }
243
244
245 /*
246 * Duplicate the last element of `lines' (kludge until we keep track of
247 * the true end-of-entry line number).
248 */
249 hv_store (lines, "STOP", 4, newSViv (prev_line), 0);
250
251
252 /* Put refs to field list, value hash, and line list into the main hash */
253
254 DBG_ACTION (2, printf (" got all fields; storing list/hash refs\n"));
255 hv_store (entry, "fields", 6, newRV ((SV *) flist), 0);
256 hv_store (entry, "values", 6, newRV ((SV *) values), 0);
257 hv_store (entry, "lines", 5, newRV ((SV *) lines), 0);
258
259 } /* convert_assigned_entry () */
260
261
262 static void
convert_value_entry(AST * top,HV * entry,boolean preserve)263 convert_value_entry (AST *top, HV *entry, boolean preserve)
264 {
265 HV * lines; /* line numbers of entry and its fields */
266 AST * item,
267 * prev_item = NULL;
268 int last_line;
269 char * value;
270 SV * sv_value;
271
272 /*
273 * Start the line number hash. For "value" entries, it's a bit simpler --
274 * just a `start' and `stop' line number. Again, the `stop' line is
275 * inaccurate; it's just the line number of the last value in the
276 * entry.
277 */
278 lines = newHV ();
279 hv_store (lines, "START", 5, newSViv (top->line), 0);
280
281 /* Walk the list of values to find the last one (for its line number) */
282 item = NULL;
283 while ((item = bt_next_value (top, item, NULL, NULL)))
284 prev_item = item;
285
286 if (prev_item) {
287 last_line = prev_item->line;
288 hv_store (lines, "STOP", 4, newSViv (last_line), 0);
289
290 /* Store the line number hash in the entry hash */
291 hv_store (entry, "lines", 5, newRV ((SV *) lines), 0);
292 }
293
294 /* And get the value of the entry as a single string (fully processed) */
295
296 if (preserve)
297 {
298 sv_value = convert_value (NULL, top, TRUE);
299 }
300 else
301 {
302 value = bt_get_text (top);
303 sv_value = value ? newSVpv (value, 0) : &PL_sv_undef;
304 }
305 hv_store (entry, "value", 5, sv_value, 0);
306
307 } /* convert_value_entry () */
308
309
310 void
ast_to_hash(SV * entry_ref,AST * top,boolean parse_status,boolean preserve)311 ast_to_hash (SV * entry_ref,
312 AST * top,
313 boolean parse_status,
314 boolean preserve)
315 {
316 char * type;
317 char * key;
318 bt_metatype
319 metatype;
320 btshort options; /* post-processing options */
321 HV * entry; /* the main hash -- build and return */
322
323 DBG_ACTION (1, printf ("ast_to_hash: entry\n"));
324
325 /* printf ("checking that entry_ref is a ref and a hash ref\n"); */
326 if (! (SvROK (entry_ref) && (SvTYPE (SvRV (entry_ref)) == SVt_PVHV)))
327 croak ("entry_ref must be a hash ref");
328 entry = (HV *) SvRV (entry_ref);
329
330 /*
331 * Clear out all hash values that might not be replaced in this
332 * conversion (in case the user parses into an existing
333 * Text::BibTeX::Entry object). (We don't blow the hash away with
334 * hv_clear() in case higher-up code has put interesting stuff into it.)
335 */
336
337 hv_delete (entry, "key", 3, G_DISCARD);
338 hv_delete (entry, "fields", 6, G_DISCARD);
339 hv_delete (entry, "lines", 5, G_DISCARD);
340 hv_delete (entry, "values", 6, G_DISCARD);
341 hv_delete (entry, "value", 5, G_DISCARD);
342
343 /*
344 * Perform entry post-processing. How exactly we post-process depends on
345 * 1) the entry type, and 2) the 'preserve' flag.
346 */
347
348 metatype = bt_entry_metatype (top);
349 if (preserve) /* if true, then entry type */
350 { /* doesn't matter */
351 options = BTO_MINIMAL;
352 }
353 else
354 {
355 if (metatype == BTE_MACRODEF)
356 options = BTO_MACRO;
357 else
358 options = BTO_FULL;
359 }
360
361 /*
362 * Postprocess the entry, with the string-processing options we just
363 * determined plus "no store macros" turned on. (That's because
364 * macros will already have been stored by the postprocessing done
365 * by bt_parse*; we don't want to do it again and generate spurious
366 * warnings!
367 */
368 bt_postprocess_entry (top, options | BTO_NOSTORE);
369
370
371 /*
372 * Start filling in the hash; all entries have a type and metatype,
373 * and we'll do the key here (even though it's not in all entries)
374 * for good measure.
375 */
376
377 type = bt_entry_type (top);
378 key = bt_entry_key (top);
379 DBG_ACTION (2, printf (" inserting type (%s), metatype (%d)\n",
380 type ? type : "*none*", bt_entry_metatype (top)));
381 DBG_ACTION (2, printf (" ... key (%s) status (%d)\n",
382 key ? key : "*none*", parse_status));
383
384 if (!type)
385 croak ("entry has no type");
386 hv_store (entry, "type", 4, newSVpv (type, 0), 0);
387 hv_store (entry, "metatype", 8, newSViv (bt_entry_metatype (top)), 0);
388
389 if (key)
390 hv_store (entry, "key", 3, newSVpv (key, 0), 0);
391
392 hv_store (entry, "status", 6, newSViv ((IV) parse_status), 0);
393
394
395 switch (metatype)
396 {
397 case BTE_MACRODEF:
398 case BTE_REGULAR:
399 convert_assigned_entry (top, entry, preserve);
400 break;
401
402 case BTE_COMMENT:
403 case BTE_PREAMBLE:
404 convert_value_entry (top, entry, preserve);
405 break;
406
407 default: /* this should never happen! */
408 croak ("unknown entry metatype (%d)\n", bt_entry_metatype (top));
409 }
410
411 /*
412 * If 'preserve' was true, then the user is going to need the
413 * Text::BibTeX::Value module!
414 *
415 * XXX this doesn't work! Why?!?!
416 */
417 /*
418 if (preserve)
419 {
420 printf ("requiring Text::BibTeX::Value...\n");
421 perl_require_pv ("Text::BibTeX::Value");
422 }
423 */
424
425 /* And finally, free up the AST */
426
427 bt_free_ast (top);
428
429 /* hv_store (entry, "ast", 3, newSViv ((IV) top), 0); */
430
431 DBG_ACTION (1, printf ("ast_to_hash: exit\n"));
432 } /* ast_to_hash () */
433
434
435 /* ----------------------------------------------------------------------
436 * Stuff for converting a list of C strings to Perl
437 * convert_stringlist() [private]
438 * store_stringlist()
439 */
440
441 static SV *
convert_stringlist(char ** list,int num_strings)442 convert_stringlist (char **list, int num_strings)
443 {
444 int i;
445 AV * perl_list;
446 SV * sv_string;
447
448 perl_list = newAV ();
449 for (i = 0; i < num_strings; i++)
450 {
451 sv_string = newSVpv (list[i], 0);
452 av_push (perl_list, sv_string);
453 }
454
455 return newRV ((SV *) perl_list);
456
457 } /* convert_stringlist() */
458
459
460 void
store_stringlist(HV * hash,char * key,char ** list,int num_strings)461 store_stringlist (HV *hash, char *key, char **list, int num_strings)
462 {
463 SV * listref;
464
465 if (list)
466 {
467 DBG_ACTION (2,
468 {
469 int i;
470
471 printf ("store_stringlist(): hash=%p, key=%s, list=(",
472 hash, key);
473 for (i = 0; i < num_strings; i++)
474 printf ("%s%c", list[i], (i == num_strings-1) ? ')' : ',');
475 printf ("\n");
476 })
477
478 listref = convert_stringlist (list, num_strings);
479 hv_store (hash, key, strlen (key), listref, 0);
480 }
481 else
482 {
483 DBG_ACTION (2, printf ("store_stringlist(): hash=%p, key=%s: deleting\n",
484 hash, key))
485 hv_delete (hash, key, strlen (key), G_DISCARD);
486 }
487
488 } /* store_stringlist() */
489