1 /*****************************************************************************
2  *
3  * Copyright (c) 2008-2010, CoreCodec, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *     * Redistributions of source code must retain the above copyright
9  *       notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above copyright
11  *       notice, this list of conditions and the following disclaimer in the
12  *       documentation and/or other materials provided with the distribution.
13  *     * Neither the name of CoreCodec, Inc. nor the
14  *       names of its contributors may be used to endorse or promote products
15  *       derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY CoreCodec, Inc. ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL CoreCodec, Inc. BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ****************************************************************************/
29 
30 #ifndef __PARSER_H
31 #define __PARSER_H
32 
33 #include "corec/helpers/file/file.h"
34 #include "corec/helpers/charconvert/charconvert.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
40 #include "dataheap.h"
41 #include "strtab.h"
42 #include "strtypes.h"
43 #include "hotkey.h"
44 #include "buffer.h"
45 #include "nodelookup.h"
46 #include "urlpart.h"
47 
48 #define LANG_STRINGS_ID  FOURCC('L','N','G','S')
49 
50 #define MAXTOKEN		32
51 #define MAXLINE			1024
52 #define BIGLINE			16 * 1024
53 
54 #define Parser_Context(p) ((parsercontext*)Node_Context(p))
55 
56 typedef struct parsercontext
57 {
58     nodecontext Base;
59 	charconv* ToUTF8;
60 	charconv* FromUTF8;
61 	charconv* ToStr;
62 	charconv* FromStr;
63 	charconv* ToWcs;
64 	charconv* FromWcs;
65 	charconv* ToUtf16;
66 	charconv* FromUtf16;
67 	strtab StrTab;
68 	bool_t LowStartMemory;
69 
70 } parsercontext;
71 
72 NODE_DLL void ParserContext_Init(parsercontext* p,const nodemeta* Custom, const cc_memheap* Heap, const cc_memheap* ConstHeap);
73 NODE_DLL void ParserContext_Done(parsercontext* p);
74 
75 NODE_DLL void Node_ToUTF8(anynode*, char* Out,size_t OutLen, const tchar_t*);
76 NODE_DLL void Node_FromUTF8(anynode*, tchar_t* Out,size_t OutLen, const char*);
77 NODE_DLL void Node_ToStr(anynode*, char* Out,size_t OutLen, const tchar_t*);
78 NODE_DLL void Node_FromStr(anynode*, tchar_t* Out,size_t OutLen, const char*);
79 NODE_DLL void Node_ToWcs(anynode*, wchar_t* Out,size_t OutLen, const tchar_t*);
80 NODE_DLL void Node_FromWcs(anynode*, tchar_t* Out,size_t OutLen, const wchar_t*);
81 NODE_DLL void Node_ToUTF16(anynode*, utf16_t* Out,size_t OutLen, const tchar_t*);
82 NODE_DLL void Node_FromUTF16(anynode*, tchar_t* Out,size_t OutLen, const utf16_t*);
83 
84 typedef bool_t (*exprmacro)(void* Cookie, const tchar_t* Name, size_t NameLen, tchar_t* Out, size_t OutLen);
85 
86 typedef struct exprstate
87 {
88     nodecontext* Context;
89     node* Base;
90     cc_point CoordScale;
91     fourcc_t ClassId;
92     const tchar_t* EnumList;
93     exprmacro MacroFunc;
94     void* MacroCookie;
95     array NodeLookup;
96 
97 } exprstate;
98 
99 NODE_DLL void ExprState(exprstate* State, node*, dataid Id, dataflags Flags);
100 
101 typedef err_t (*parserexpradd)(node* Node, const datadef* DataDef, exprstate* State, const tchar_t* Expr, bool_t Save);
102 
103 typedef struct parser
104 {
105     parsercontext *Context;
106 	stream* Stream;
107 	buffer Buffer;
108 	charconv* CC;
109     charconv* EscapeCC;
110     tchar_t *BigLine;
111     boolmem_t Element;
112     boolmem_t ElementEof;
113     boolmem_t URL;
114     boolmem_t OwnCC;
115     boolmem_t Error;
116 
117 } parser;
118 
119 NODE_DLL err_t ParserStream(parser*, stream* Stream, parsercontext* Context);
120 NODE_DLL err_t ParserStreamXML(parser*, stream* Stream, parsercontext* Context, const tchar_t* Root, bool_t NeedRootAttribs);
121 NODE_DLL void ParserCC(parser*, charconv* CC, bool_t OwnCC);
122 NODE_DLL void ParserBOM(parser*);
123 
124 NODE_DLL err_t ParserFill(parser*,size_t AdditionalNeeded); // non-blocking stream could return ERR_NEED_MORE_DATA
125 NODE_DLL err_t ParserFillLine(parser*); // non-blocking stream could return ERR_NEED_MORE_DATA
126 NODE_DLL void ParserDataFeed(parser* p,const void* Ptr,size_t Len);
127 NODE_DLL err_t ParserSkip(parser*, intptr_t* Skip);
128 NODE_DLL err_t ParserRead(parser*, void* Data, size_t Size, size_t* Readed);
129 NODE_DLL err_t ParserReadEx(parser*, void* Data, size_t Size, size_t* Readed, bool_t Fill);
130 NODE_DLL intptr_t ParserReadUntil(parser* p, tchar_t* Out, size_t OutLen, int Delimiter);
131 NODE_DLL void ParserSkipAfter(parser* p, int Delimiter);
132 NODE_DLL bool_t ParserLine(parser*, tchar_t* Out, size_t OutLen);
133 NODE_DLL bool_t ParserBigLine(parser*);
134 NODE_DLL bool_t ParserIsToken(parser*, const tchar_t* Token); // case insensitive, ascii
135 NODE_DLL bool_t ParserIsFormat(parser*, const tchar_t* Name, void *Value);
136 NODE_DLL const uint8_t* ParserPeek(parser*, size_t Len);
137 NODE_DLL const uint8_t* ParserPeekEx(parser*, size_t Len, bool_t Fill, err_t*);
138 
139 NODE_DLL bool_t ParserIsRootElement(parser*, tchar_t* Root, size_t RootLen);
140 NODE_DLL bool_t ParserIsElement(parser*, tchar_t* Name, size_t NameLen);
141 NODE_DLL bool_t ParserIsElementNested(parser*, tchar_t* Name, size_t NameLen);
142 NODE_DLL bool_t ParserElementContent(parser*, tchar_t* Out, size_t OutLen);
143 NODE_DLL void ParserElementSkip(parser* p);
144 NODE_DLL void ParserElementSkipNested(parser* p);
145 
146 NODE_DLL bool_t ParserIsAttrib(parser*, tchar_t* Name, size_t NameLen);
147 NODE_DLL bool_t ParserAttribString(parser*, tchar_t* Out, size_t OutLen);
148 NODE_DLL bool_t ParserAttrib(parser*, void* Data, size_t Size, dataflags Flags, exprstate* State);
149 NODE_DLL bool_t ParserAttribData(parser*, node* Node, const datadef* DataDef, exprstate* State, parserexpradd ExprAdd, bool_t ExprSave);
150 NODE_DLL bool_t ParserValueData(const tchar_t* Value, node* Node, const datadef* DataDef, exprstate* State, parserexpradd ExprAdd, bool_t ExprSave);
151 NODE_DLL bool_t ParserAttribLangStr(parser* p, parsercontext* Context, fourcc_t Class, dataid Id);
152 NODE_DLL void ParserAttribSkip(parser*);
153 
154 
155 NODE_DLL void ParserHTMLChars(parser*,tchar_t*,size_t);
156 NODE_DLL void ParserHTMLToURL(tchar_t*,size_t);
157 
158 NODE_DLL void ParserURLToHTML(tchar_t*,size_t);
159 
160 NODE_DLL void ParserImport(parser* Parser,node* Node);
161 NODE_DLL void ParserImportNested(parser* Parser,node* Node);
162 
163 typedef struct textwriter
164 {
165 	stream* Stream;
166 	charconv* CC;
167 	bool_t SafeFormat; // use <tag></tag> instead of <tag/>
168 
169 	// private
170 	const tchar_t* Element;
171 	int Deep;
172 	bool_t HasChild;
173     bool_t InsideContent;
174 
175 } textwriter;
176 
177 NODE_DLL err_t TextWrite(textwriter*, const tchar_t*);
178 NODE_DLL err_t TextPrintf(textwriter*, const tchar_t* Msg,...)
179 #if defined(__GNUC__)
180     __attribute__ ((format(printf, 2, 3)))
181 #endif
182     ;
183 NODE_DLL void TextElementXML(parsercontext *Context, textwriter* Text, const tchar_t* Root);
184 NODE_DLL void TextElementBegin(textwriter* Out, textwriter* In, const tchar_t* Element);
185 NODE_DLL void TextElementEnd(textwriter*);
186 NODE_DLL void TextElementAppendData(textwriter* Text, const tchar_t *Value);
187 NODE_DLL void TextElementEndData(textwriter* Text, const tchar_t *Value);
188 NODE_DLL void TextAttrib(textwriter*, const tchar_t* Name, const void* Data, dataflags Type);
189 NODE_DLL void TextAttribEx(textwriter*,const tchar_t* Name, const void* Data, size_t Size, dataflags Type);
190 NODE_DLL void TextSerializeNode(textwriter* Text, node* p, uint_fast32_t Mask, uint_fast32_t Filter);
191 
192 NODE_DLL bool_t StringToPin(pin* Data, datadef* DataDef, exprstate* State, const tchar_t** Expr);
193 NODE_DLL bool_t StringToNode(node** Data, exprstate* State, const tchar_t** Expr);
194 NODE_DLL bool_t StringToData(void* Data, size_t Size, dataflags Type, exprstate* State, const tchar_t* Value);
195 NODE_DLL bool_t DataToString(tchar_t* Value, size_t ValueLen, const void* Data, size_t Size, dataflags Type);
196 NODE_DLL bool_t PinToString(tchar_t* Value, size_t ValueLen, const pin* Data, node* Base);
197 NODE_DLL bool_t NodeToString(tchar_t* Value, size_t ValueLen, node* Node, node* Base);
198 
199 NODE_DLL void ExprSkipSpace(const tchar_t** p);
200 NODE_DLL void ExprTrimSpace(tchar_t** p);
201 NODE_DLL bool_t ExprCmd(const tchar_t** Expr, tchar_t* Out, size_t OutLen);
202 NODE_DLL bool_t ExprSkipAfter(const tchar_t** p,int ch); //ch=0 for general space
203 NODE_DLL bool_t ExprIsToken(const tchar_t** p,const tchar_t* Name);
204 NODE_DLL bool_t ExprIsTokenEx(const tchar_t** p,const tchar_t* Name,...)
205 #if defined(__GNUC__)
206     __attribute__ ((format(printf, 2, 3)))
207 #endif
208     ;
209 NODE_DLL bool_t ExprIsSymbol(const tchar_t** p,int ch);
210 NODE_DLL bool_t ExprIsSymbol2(const tchar_t** p,int ch, int ch2);
211 NODE_DLL void ExprParamEnd(const tchar_t** p);
212 NODE_DLL bool_t ExprParamNext(const tchar_t** p);
213 NODE_DLL bool_t ExprIsFrac(const tchar_t** p,cc_fraction*); // no space skipping!
214 NODE_DLL bool_t ExprIsInt(const tchar_t** p,intptr_t*); // no space skipping!
215 NODE_DLL bool_t ExprIsInt64(const tchar_t** p,int64_t*); // no space skipping!
216 NODE_DLL bool_t ExprIsIntEx(const tchar_t** p,int size,intptr_t*); // no space skipping!
217 NODE_DLL bool_t ExprIsInt64Ex(const tchar_t** p,int size,int64_t*); // no space skipping!
218 NODE_DLL bool_t ExprIsHex(const tchar_t** p,intptr_t*); // no space skipping!
219 NODE_DLL bool_t ExprIsRGB(const tchar_t** p,intptr_t*); // no space skipping!
220 NODE_DLL bool_t ExprIsFourCC(const tchar_t** p,intptr_t*); // no space skipping!
221 NODE_DLL bool_t ExprIsPoint(const tchar_t** p,cc_point*);
222 NODE_DLL bool_t ExprIsName(const tchar_t** p,tchar_t* Out,size_t OutLen, const tchar_t* Delimiter);
223 NODE_DLL size_t ExprIsBase64(const tchar_t** p,uint8_t* Out,size_t OutSize);
224 
225 NODE_DLL void SimplifyFrac(cc_fraction*, int64_t Num, int64_t Den);
226 NODE_DLL void DoubleToFrac(cc_fraction*, int64_t v);
227 
228 NODE_DLL bool_t BufferToBase64(tchar_t *Out, size_t OutLen, const uint8_t *Buffer, size_t BufferSize);
229 NODE_DLL uint32_t StringToIP(const tchar_t *Addr);
230 
231 NODE_DLL err_t Stream_Printf(stream*, const tchar_t* Msg,...)
232 #if defined(__GNUC__)
233     __attribute__ ((format(printf, 2, 3)))
234 #endif
235     ;
236 NODE_DLL void NodeDump(nodecontext*,textwriter*);
237 
238 #ifdef __cplusplus
239 }
240 #endif
241 
242 #endif
243