1 /* XMLPPM: an XML compressor
2 Copyright (C) 2003 James Cheney
3 
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
17 
18 Contacting the author:
19 James Cheney
20 Computer Science Department
21 Cornell University
22 Ithaca, NY 14850
23 
24 jcheney@cs.cornell.edu
25 */
26 
27 #ifndef __XMLMODEL_H__
28 #define __XMLMODEL_H__
29 
30 /* XmlModel.c: maintains compression state of XML compressor */
31 
32 /* XML encoding of SAX events into bytecode,
33    using context to disambiguate. */
34 
35 #include "IFile.h"
36 #include "StringArray.h"
37 #include "Model.h"
38 
39 /* States: START, DTD, MISC, STRING, ELTLIST, ELEMENT, ATTLIST  */
40 
41 /* STRING: strings of CDATA, eltnames, attnames, attvalues, PIs, comments)
42    encoded as null terminated strings.  */
43 #define ENDSTRING 0
44 
45 /* ELTLIST: a number of reasonable things might come next, including
46    a PI, comment, entity ref, characters, or element.
47    Elements are encoded using range [0-MAXELTS-1]; ENDELT signals the end
48    of the sequence of immediate children of an element list (ie
49    a matching </elt>).
50    At most 250 distinct element names.
51    ENTITY,PI,CHARS,COMMENT,CDATA are all raw strings.
52 
53    MISC: a sequence of PIs and COMMENTS only, terminated by ENDELT
54 */
55 #define MAXELTS 250
56 #define CDATA 250
57 #define ENTITY 251
58 #define PI 252
59 #define COMMENT 253
60 #define CHARS 254
61 #define ENDELT 255
62 
63 /* ELEMENT: Elements are stored by first storing elttag, a reference into
64    the element symbol table, followed by the string that goes there if this
65    is the first time we use it, followed by an ELTLIST.
66 */
67 
68 /* ATTLIST: Attributes stored in a list of (atttag,attval) pairs terminated
69    by ENDATT.
70    atttag is a pointer into attribute name table, followed by string if
71    that table entry needs to be filled in.
72    attval is a string.
73    At most 254 distinct attribute names.
74 */
75 #define MAXATTS 255
76 #define ENDATT 255
77 
78 /* START : This state expects
79    XMLDECL
80    then MISC (a list containing only PIs and comments, same code as ELTLIST)
81    then a DTD (= list of DTD stuff, maybe empty )
82         followed by MISC
83    then a ELEMENT
84    then MISC
85 */
86 
87 /* DTD */
88 #define DTDPENTITY      245
89 #define DTDSTRING       246
90 #define DTDELEMENTDECL  247
91 #define DTDATTLISTDECL  248
92 #define DTDENTITYDECL   249
93 #define DTDNOTATIONDECL 250
94 #define DTDENTITY       (ENTITY) /* these MUST be == ordinary ones */
95 #define DTDCHARS        (CHARS)
96 #define DTDCOMMENT      (COMMENT)
97 #define DTDPI           (PI)
98 #define ENDDTD          255
99 
100 /* NOTATIONS (list of notations in attribute) */
101 
102 #define ENDNOT 255
103 #define MAXNOTS 255
104 
105 /* ENTITIES (list of notations in attribute) */
106 #define ENDENT 255
107 #define MAXENTS 255
108 
109 typedef struct elStackNode {
110   int elem;
111   struct elStackNode* next;
112 } elStackNode;
113 
114 
115 enum char_state {cs_none, cs_characters, cs_cdata};
116 
117 typedef struct xml_state {
118   xml_state();
119   StringArray* elts;
120   StringArray* atts;
121   StringArray* nots;
122   StringArray* ents;
123   struct elStackNode* elTop;
124   char* lastAttlistElt;
125   int depth;
126   enum char_state char_state;
127   int hasDtd;
128   int hasDecl;
129   FILE* file;
130   int standalone;
131 }xml_state;
132 
133 typedef struct _xml_enc_state : public xml_state {
134   PPM_ENCODER* charPPM;
135   PPM_ENCODER* symPPM;
136   PPM_ENCODER* eltPPM;
137   PPM_ENCODER* attPPM;
138   XML_Parser p;
139 } xml_enc_state;
140 
141 typedef struct _xml_dec_state : public xml_state {
142   PPM_DECODER* charPPM;
143   PPM_DECODER* symPPM;
144   PPM_DECODER* eltPPM;
145   PPM_DECODER* attPPM;
146   IFILE* ifile;
147 } xml_dec_state;
148 
149 
150 void pushElStack(xml_state* state, int);
151 int getTopEl(xml_state* state);
152 void popElStack(xml_state* state);
153 
154 #endif
155