1 /* XMLPPM: an XML compressor 2 Copyright (C) 2003 James Cheney 3 4 This program is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License 6 as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 18 Contacting the author: 19 James Cheney 20 Computer Science Department 21 Cornell University 22 Ithaca, NY 14850 23 24 jcheney@cs.cornell.edu 25 */ 26 27 #ifndef __XMLMODEL_H__ 28 #define __XMLMODEL_H__ 29 30 /* XmlModel.c: maintains compression state of XML compressor */ 31 32 /* XML encoding of SAX events into bytecode, 33 using context to disambiguate. */ 34 35 #include "IFile.h" 36 #include "StringArray.h" 37 #include "Model.h" 38 39 /* States: START, DTD, MISC, STRING, ELTLIST, ELEMENT, ATTLIST */ 40 41 /* STRING: strings of CDATA, eltnames, attnames, attvalues, PIs, comments) 42 encoded as null terminated strings. */ 43 #define ENDSTRING 0 44 45 /* ELTLIST: a number of reasonable things might come next, including 46 a PI, comment, entity ref, characters, or element. 47 Elements are encoded using range [0-MAXELTS-1]; ENDELT signals the end 48 of the sequence of immediate children of an element list (ie 49 a matching </elt>). 50 At most 250 distinct element names. 51 ENTITY,PI,CHARS,COMMENT,CDATA are all raw strings. 52 53 MISC: a sequence of PIs and COMMENTS only, terminated by ENDELT 54 */ 55 #define MAXELTS 250 56 #define CDATA 250 57 #define ENTITY 251 58 #define PI 252 59 #define COMMENT 253 60 #define CHARS 254 61 #define ENDELT 255 62 63 /* ELEMENT: Elements are stored by first storing elttag, a reference into 64 the element symbol table, followed by the string that goes there if this 65 is the first time we use it, followed by an ELTLIST. 66 */ 67 68 /* ATTLIST: Attributes stored in a list of (atttag,attval) pairs terminated 69 by ENDATT. 70 atttag is a pointer into attribute name table, followed by string if 71 that table entry needs to be filled in. 72 attval is a string. 73 At most 254 distinct attribute names. 74 */ 75 #define MAXATTS 255 76 #define ENDATT 255 77 78 /* START : This state expects 79 XMLDECL 80 then MISC (a list containing only PIs and comments, same code as ELTLIST) 81 then a DTD (= list of DTD stuff, maybe empty ) 82 followed by MISC 83 then a ELEMENT 84 then MISC 85 */ 86 87 /* DTD */ 88 #define DTDPENTITY 245 89 #define DTDSTRING 246 90 #define DTDELEMENTDECL 247 91 #define DTDATTLISTDECL 248 92 #define DTDENTITYDECL 249 93 #define DTDNOTATIONDECL 250 94 #define DTDENTITY (ENTITY) /* these MUST be == ordinary ones */ 95 #define DTDCHARS (CHARS) 96 #define DTDCOMMENT (COMMENT) 97 #define DTDPI (PI) 98 #define ENDDTD 255 99 100 /* NOTATIONS (list of notations in attribute) */ 101 102 #define ENDNOT 255 103 #define MAXNOTS 255 104 105 /* ENTITIES (list of notations in attribute) */ 106 #define ENDENT 255 107 #define MAXENTS 255 108 109 typedef struct elStackNode { 110 int elem; 111 struct elStackNode* next; 112 } elStackNode; 113 114 115 enum char_state {cs_none, cs_characters, cs_cdata}; 116 117 typedef struct xml_state { 118 xml_state(); 119 StringArray* elts; 120 StringArray* atts; 121 StringArray* nots; 122 StringArray* ents; 123 struct elStackNode* elTop; 124 char* lastAttlistElt; 125 int depth; 126 enum char_state char_state; 127 int hasDtd; 128 int hasDecl; 129 FILE* file; 130 int standalone; 131 }xml_state; 132 133 typedef struct _xml_enc_state : public xml_state { 134 PPM_ENCODER* charPPM; 135 PPM_ENCODER* symPPM; 136 PPM_ENCODER* eltPPM; 137 PPM_ENCODER* attPPM; 138 XML_Parser p; 139 } xml_enc_state; 140 141 typedef struct _xml_dec_state : public xml_state { 142 PPM_DECODER* charPPM; 143 PPM_DECODER* symPPM; 144 PPM_DECODER* eltPPM; 145 PPM_DECODER* attPPM; 146 IFILE* ifile; 147 } xml_dec_state; 148 149 150 void pushElStack(xml_state* state, int); 151 int getTopEl(xml_state* state); 152 void popElStack(xml_state* state); 153 154 #endif 155