1 /***************************************************************************** 2 * 3 * Copyright (c) 2008-2010, CoreCodec, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * * Neither the name of CoreCodec, Inc. nor the 14 * names of its contributors may be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY CoreCodec, Inc. ``AS IS'' AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL CoreCodec, Inc. BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 ****************************************************************************/ 29 30 #ifndef __PARSER_H 31 #define __PARSER_H 32 33 #include "corec/helpers/file/file.h" 34 #include "corec/helpers/charconvert/charconvert.h" 35 36 #ifdef __cplusplus 37 extern "C" { 38 #endif 39 40 #include "dataheap.h" 41 #include "strtab.h" 42 #include "strtypes.h" 43 #include "hotkey.h" 44 #include "buffer.h" 45 #include "nodelookup.h" 46 #include "urlpart.h" 47 48 #define LANG_STRINGS_ID FOURCC('L','N','G','S') 49 50 #define MAXTOKEN 32 51 #define MAXLINE 1024 52 #define BIGLINE 16 * 1024 53 54 #define Parser_Context(p) ((parsercontext*)Node_Context(p)) 55 56 typedef struct parsercontext 57 { 58 nodecontext Base; 59 charconv* ToUTF8; 60 charconv* FromUTF8; 61 charconv* ToStr; 62 charconv* FromStr; 63 charconv* ToWcs; 64 charconv* FromWcs; 65 charconv* ToUtf16; 66 charconv* FromUtf16; 67 strtab StrTab; 68 bool_t LowStartMemory; 69 70 } parsercontext; 71 72 NODE_DLL void ParserContext_Init(parsercontext* p,const nodemeta* Custom, const cc_memheap* Heap, const cc_memheap* ConstHeap); 73 NODE_DLL void ParserContext_Done(parsercontext* p); 74 75 NODE_DLL void Node_ToUTF8(anynode*, char* Out,size_t OutLen, const tchar_t*); 76 NODE_DLL void Node_FromUTF8(anynode*, tchar_t* Out,size_t OutLen, const char*); 77 NODE_DLL void Node_ToStr(anynode*, char* Out,size_t OutLen, const tchar_t*); 78 NODE_DLL void Node_FromStr(anynode*, tchar_t* Out,size_t OutLen, const char*); 79 NODE_DLL void Node_ToWcs(anynode*, wchar_t* Out,size_t OutLen, const tchar_t*); 80 NODE_DLL void Node_FromWcs(anynode*, tchar_t* Out,size_t OutLen, const wchar_t*); 81 NODE_DLL void Node_ToUTF16(anynode*, utf16_t* Out,size_t OutLen, const tchar_t*); 82 NODE_DLL void Node_FromUTF16(anynode*, tchar_t* Out,size_t OutLen, const utf16_t*); 83 84 typedef bool_t (*exprmacro)(void* Cookie, const tchar_t* Name, size_t NameLen, tchar_t* Out, size_t OutLen); 85 86 typedef struct exprstate 87 { 88 nodecontext* Context; 89 node* Base; 90 cc_point CoordScale; 91 fourcc_t ClassId; 92 const tchar_t* EnumList; 93 exprmacro MacroFunc; 94 void* MacroCookie; 95 array NodeLookup; 96 97 } exprstate; 98 99 NODE_DLL void ExprState(exprstate* State, node*, dataid Id, dataflags Flags); 100 101 typedef err_t (*parserexpradd)(node* Node, const datadef* DataDef, exprstate* State, const tchar_t* Expr, bool_t Save); 102 103 typedef struct parser 104 { 105 parsercontext *Context; 106 stream* Stream; 107 buffer Buffer; 108 charconv* CC; 109 charconv* EscapeCC; 110 tchar_t *BigLine; 111 boolmem_t Element; 112 boolmem_t ElementEof; 113 boolmem_t URL; 114 boolmem_t OwnCC; 115 boolmem_t Error; 116 117 } parser; 118 119 NODE_DLL err_t ParserStream(parser*, stream* Stream, parsercontext* Context); 120 NODE_DLL err_t ParserStreamXML(parser*, stream* Stream, parsercontext* Context, const tchar_t* Root, bool_t NeedRootAttribs); 121 NODE_DLL void ParserCC(parser*, charconv* CC, bool_t OwnCC); 122 NODE_DLL void ParserBOM(parser*); 123 124 NODE_DLL err_t ParserFill(parser*,size_t AdditionalNeeded); // non-blocking stream could return ERR_NEED_MORE_DATA 125 NODE_DLL err_t ParserFillLine(parser*); // non-blocking stream could return ERR_NEED_MORE_DATA 126 NODE_DLL void ParserDataFeed(parser* p,const void* Ptr,size_t Len); 127 NODE_DLL err_t ParserSkip(parser*, intptr_t* Skip); 128 NODE_DLL err_t ParserRead(parser*, void* Data, size_t Size, size_t* Readed); 129 NODE_DLL err_t ParserReadEx(parser*, void* Data, size_t Size, size_t* Readed, bool_t Fill); 130 NODE_DLL intptr_t ParserReadUntil(parser* p, tchar_t* Out, size_t OutLen, int Delimiter); 131 NODE_DLL void ParserSkipAfter(parser* p, int Delimiter); 132 NODE_DLL bool_t ParserLine(parser*, tchar_t* Out, size_t OutLen); 133 NODE_DLL bool_t ParserBigLine(parser*); 134 NODE_DLL bool_t ParserIsToken(parser*, const tchar_t* Token); // case insensitive, ascii 135 NODE_DLL bool_t ParserIsFormat(parser*, const tchar_t* Name, void *Value); 136 NODE_DLL const uint8_t* ParserPeek(parser*, size_t Len); 137 NODE_DLL const uint8_t* ParserPeekEx(parser*, size_t Len, bool_t Fill, err_t*); 138 139 NODE_DLL bool_t ParserIsRootElement(parser*, tchar_t* Root, size_t RootLen); 140 NODE_DLL bool_t ParserIsElement(parser*, tchar_t* Name, size_t NameLen); 141 NODE_DLL bool_t ParserIsElementNested(parser*, tchar_t* Name, size_t NameLen); 142 NODE_DLL bool_t ParserElementContent(parser*, tchar_t* Out, size_t OutLen); 143 NODE_DLL void ParserElementSkip(parser* p); 144 NODE_DLL void ParserElementSkipNested(parser* p); 145 146 NODE_DLL bool_t ParserIsAttrib(parser*, tchar_t* Name, size_t NameLen); 147 NODE_DLL bool_t ParserAttribString(parser*, tchar_t* Out, size_t OutLen); 148 NODE_DLL bool_t ParserAttrib(parser*, void* Data, size_t Size, dataflags Flags, exprstate* State); 149 NODE_DLL bool_t ParserAttribData(parser*, node* Node, const datadef* DataDef, exprstate* State, parserexpradd ExprAdd, bool_t ExprSave); 150 NODE_DLL bool_t ParserValueData(const tchar_t* Value, node* Node, const datadef* DataDef, exprstate* State, parserexpradd ExprAdd, bool_t ExprSave); 151 NODE_DLL bool_t ParserAttribLangStr(parser* p, parsercontext* Context, fourcc_t Class, dataid Id); 152 NODE_DLL void ParserAttribSkip(parser*); 153 154 155 NODE_DLL void ParserHTMLChars(parser*,tchar_t*,size_t); 156 NODE_DLL void ParserHTMLToURL(tchar_t*,size_t); 157 158 NODE_DLL void ParserURLToHTML(tchar_t*,size_t); 159 160 NODE_DLL void ParserImport(parser* Parser,node* Node); 161 NODE_DLL void ParserImportNested(parser* Parser,node* Node); 162 163 typedef struct textwriter 164 { 165 stream* Stream; 166 charconv* CC; 167 bool_t SafeFormat; // use <tag></tag> instead of <tag/> 168 169 // private 170 const tchar_t* Element; 171 int Deep; 172 bool_t HasChild; 173 bool_t InsideContent; 174 175 } textwriter; 176 177 NODE_DLL err_t TextWrite(textwriter*, const tchar_t*); 178 NODE_DLL err_t TextPrintf(textwriter*, const tchar_t* Msg,...) 179 #if defined(__GNUC__) 180 __attribute__ ((format(printf, 2, 3))) 181 #endif 182 ; 183 NODE_DLL void TextElementXML(parsercontext *Context, textwriter* Text, const tchar_t* Root); 184 NODE_DLL void TextElementBegin(textwriter* Out, textwriter* In, const tchar_t* Element); 185 NODE_DLL void TextElementEnd(textwriter*); 186 NODE_DLL void TextElementAppendData(textwriter* Text, const tchar_t *Value); 187 NODE_DLL void TextElementEndData(textwriter* Text, const tchar_t *Value); 188 NODE_DLL void TextAttrib(textwriter*, const tchar_t* Name, const void* Data, dataflags Type); 189 NODE_DLL void TextAttribEx(textwriter*,const tchar_t* Name, const void* Data, size_t Size, dataflags Type); 190 NODE_DLL void TextSerializeNode(textwriter* Text, node* p, uint_fast32_t Mask, uint_fast32_t Filter); 191 192 NODE_DLL bool_t StringToPin(pin* Data, datadef* DataDef, exprstate* State, const tchar_t** Expr); 193 NODE_DLL bool_t StringToNode(node** Data, exprstate* State, const tchar_t** Expr); 194 NODE_DLL bool_t StringToData(void* Data, size_t Size, dataflags Type, exprstate* State, const tchar_t* Value); 195 NODE_DLL bool_t DataToString(tchar_t* Value, size_t ValueLen, const void* Data, size_t Size, dataflags Type); 196 NODE_DLL bool_t PinToString(tchar_t* Value, size_t ValueLen, const pin* Data, node* Base); 197 NODE_DLL bool_t NodeToString(tchar_t* Value, size_t ValueLen, node* Node, node* Base); 198 199 NODE_DLL void ExprSkipSpace(const tchar_t** p); 200 NODE_DLL void ExprTrimSpace(tchar_t** p); 201 NODE_DLL bool_t ExprCmd(const tchar_t** Expr, tchar_t* Out, size_t OutLen); 202 NODE_DLL bool_t ExprSkipAfter(const tchar_t** p,int ch); //ch=0 for general space 203 NODE_DLL bool_t ExprIsToken(const tchar_t** p,const tchar_t* Name); 204 NODE_DLL bool_t ExprIsTokenEx(const tchar_t** p,const tchar_t* Name,...) 205 #if defined(__GNUC__) 206 __attribute__ ((format(printf, 2, 3))) 207 #endif 208 ; 209 NODE_DLL bool_t ExprIsSymbol(const tchar_t** p,int ch); 210 NODE_DLL bool_t ExprIsSymbol2(const tchar_t** p,int ch, int ch2); 211 NODE_DLL void ExprParamEnd(const tchar_t** p); 212 NODE_DLL bool_t ExprParamNext(const tchar_t** p); 213 NODE_DLL bool_t ExprIsFrac(const tchar_t** p,cc_fraction*); // no space skipping! 214 NODE_DLL bool_t ExprIsInt(const tchar_t** p,intptr_t*); // no space skipping! 215 NODE_DLL bool_t ExprIsInt64(const tchar_t** p,int64_t*); // no space skipping! 216 NODE_DLL bool_t ExprIsIntEx(const tchar_t** p,int size,intptr_t*); // no space skipping! 217 NODE_DLL bool_t ExprIsInt64Ex(const tchar_t** p,int size,int64_t*); // no space skipping! 218 NODE_DLL bool_t ExprIsHex(const tchar_t** p,intptr_t*); // no space skipping! 219 NODE_DLL bool_t ExprIsRGB(const tchar_t** p,intptr_t*); // no space skipping! 220 NODE_DLL bool_t ExprIsFourCC(const tchar_t** p,intptr_t*); // no space skipping! 221 NODE_DLL bool_t ExprIsPoint(const tchar_t** p,cc_point*); 222 NODE_DLL bool_t ExprIsName(const tchar_t** p,tchar_t* Out,size_t OutLen, const tchar_t* Delimiter); 223 NODE_DLL size_t ExprIsBase64(const tchar_t** p,uint8_t* Out,size_t OutSize); 224 225 NODE_DLL void SimplifyFrac(cc_fraction*, int64_t Num, int64_t Den); 226 NODE_DLL void DoubleToFrac(cc_fraction*, int64_t v); 227 228 NODE_DLL bool_t BufferToBase64(tchar_t *Out, size_t OutLen, const uint8_t *Buffer, size_t BufferSize); 229 NODE_DLL uint32_t StringToIP(const tchar_t *Addr); 230 231 NODE_DLL err_t Stream_Printf(stream*, const tchar_t* Msg,...) 232 #if defined(__GNUC__) 233 __attribute__ ((format(printf, 2, 3))) 234 #endif 235 ; 236 NODE_DLL void NodeDump(nodecontext*,textwriter*); 237 238 #ifdef __cplusplus 239 } 240 #endif 241 242 #endif 243