1/** 2 3 MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more. 4 5 @file xml.c 6 7 @brief Utilities to help parse XML files 8 9 10 @author Fletcher T. Penney 11 @bug 12 13**/ 14 15/* 16 17 Copyright © 2016 - 2019 Fletcher T. Penney. 18 19 20 The `MultiMarkdown 6` project is released under the MIT License.. 21 22 GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: 23 24 https://github.com/fletcher/MultiMarkdown-4/ 25 26 MMD 4 is released under both the MIT License and GPL. 27 28 29 CuTest is released under the zlib/libpng license. See CuTest.c for the 30 text of the license. 31 32 uthash library: 33 Copyright (c) 2005-2016, Troy D. Hanson 34 35 Licensed under Revised BSD license 36 37 miniz library: 38 Copyright 2013-2014 RAD Game Tools and Valve Software 39 Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC 40 41 Licensed under the MIT license 42 43 argtable3 library: 44 Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann 45 <sheitmann@users.sourceforge.net> 46 All rights reserved. 47 48 Licensed under the Revised BSD License 49 50 51 ## The MIT License ## 52 53 Permission is hereby granted, free of charge, to any person obtaining 54 a copy of this software and associated documentation files (the 55 "Software"), to deal in the Software without restriction, including 56 without limitation the rights to use, copy, modify, merge, publish, 57 distribute, sublicense, and/or sell copies of the Software, and to 58 permit persons to whom the Software is furnished to do so, subject to 59 the following conditions: 60 61 The above copyright notice and this permission notice shall be 62 included in all copies or substantial portions of the Software. 63 64 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 65 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 66 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 67 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 68 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 69 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 70 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 71 72 73 ## Revised BSD License ## 74 75 Redistribution and use in source and binary forms, with or without 76 modification, are permitted provided that the following conditions are 77 met: 78 * Redistributions of source code must retain the above copyright 79 notice, this list of conditions and the following disclaimer. 80 * Redistributions in binary form must reproduce the above 81 copyright notice, this list of conditions and the following 82 disclaimer in the documentation and/or other materials provided 83 with the distribution. 84 * Neither the name of the <organization> nor the 85 names of its contributors may be used to endorse or promote 86 products derived from this software without specific prior 87 written permission. 88 89 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 90 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 91 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 92 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT 93 HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 94 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 95 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE, DATA, OR 96 PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 97 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 98 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 99 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 100 101 102*/ 103 104#include <ctype.h> 105#include <stdlib.h> 106#include <string.h> 107 108#include "d_string.h" 109#include "xml.h" 110 111 112#define print(x) d_string_append(out, x) 113#define print_const(x) d_string_append_c_array(out, x, sizeof(x) - 1) 114#define print_char(x) d_string_append_c(out, x) 115#define printf(...) d_string_append_printf(out, __VA_ARGS__) 116 117 118/// strndup not available on all platforms 119static char * my_strndup(const char * source, size_t n) { 120 if (source == NULL) { 121 return NULL; 122 } 123 124 size_t len = 0; 125 char * result; 126 const char * test = source; 127 128 // strlen is too slow if strlen(source) >> n 129 for (len = 0; len < n; ++len) { 130 if (test == '\0') { 131 break; 132 } 133 134 test++; 135 } 136 137 result = malloc(len + 1); 138 139 if (result) { 140 memcpy(result, source, len); 141 result[len] = '\0'; 142 } 143 144 return result; 145} 146 147 148/*!re2c 149 150 re2c:define:YYCTYPE = "unsigned char"; 151 re2c:define:YYCURSOR = c; 152 re2c:define:YYMARKER = marker; 153 re2c:define:YYCTXMARKER = marker; 154 re2c:yyfill:enable = 0; 155 156 NL = "\r\n" | '\n' | '\r'; 157 WS = [ \t]+; 158 WSNL = (NL | WS)+; 159 160 EQUAL = '='; 161 162 double_quoted = '"' [^"\x00]* '"'; 163 single_quoted = "'" [^'\x00]* "'"; 164 quoted_value = double_quoted | single_quoted; 165 166 attribute_name = [a-zA-Z_:] [a-zA-Z0-9_:.\-]*; 167 regular_attribute = WSNL* attribute_name WSNL* EQUAL WSNL* quoted_value WSNL*; 168 boolean_attribute = WSNL* attribute_name WSNL*; 169 attribute = regular_attribute | boolean_attribute; 170 171 contains_newline = " " | " "; 172 173*/ 174 175 176/// skip through whitespace 177size_t xml_scan_wsnl(const char * c) { 178 const char * start = c; 179 180/*!re2c 181 WSNL* { return (size_t)( c - start ); } 182 * { return 0; } 183*/ 184} 185 186 187/// scan generic attribute_name 188size_t xml_scan_attribute_name(const char * c) {; 189 const char * start = c; 190 191/*!re2c 192 attribute_name { return (size_t)( c - start ); } 193 * { return 0; } 194*/ 195} 196 197 198/// scan until start of value, if present 199size_t xml_scan_until_value(const char * c) { 200 const char * marker = NULL; 201 const char * start = c; 202 203/*!re2c 204 WSNL* EQUAL WSNL* / quoted_value { return (size_t)( c - start ); } 205 * { return 0; } 206*/ 207} 208 209 210/// scan value 211size_t xml_scan_value(const char * c) { 212 const char * marker = NULL; 213 const char * start = c; 214 215/*!re2c 216 quoted_value { return (size_t)( c - start ); } 217 * { return 0; } 218*/ 219} 220 221 222/// Does the string include encoded newline? 223size_t xml_scan_encoded_newline(const char * c, size_t len) { 224 const char * marker = NULL; 225 const char * start = c; 226 227 scan: 228 229 if ((*c == '\0') || ((c - start) > len)) { 230 // Not found 231 return -1; 232 } 233 234/*!re2c 235 contains_newline { return (size_t)(c - start); } 236 * { goto scan; } 237*/ 238} 239 240 241/// Decode XML encoded text and print to DString 242void print_xml_as_text(DString * out, const char * source, size_t start, size_t len) { 243 const char * s_start = &source[start]; 244 const char * s_stop = &source[start + len]; 245 246 char * c = (char *) s_start; 247 248 while (c < s_stop) { 249 switch (*c) { 250 case '&': 251 switch (*++c) { 252 case '#': 253 if (strncmp(c, "#10;", 4) == 0) { 254 print_char('\n'); 255 c += 4; 256 continue; 257 } 258 259 if (strncmp(c, "#9;", 3) == 0) { 260 print_char('\t'); 261 c += 3; 262 continue; 263 } 264 265 if (strncmp(c, "#13;", 4) == 0) { 266 print_char('\r'); 267 c += 4; 268 continue; 269 } 270 271 break; 272 273 case 'a': 274 if (strncmp(c, "amp;", 4) == 0) { 275 print_char('&'); 276 c += 4; 277 continue; 278 } 279 280 if (strncmp(c, "apos;", 5) == 0) { 281 print_char('\''); 282 c += 5; 283 continue; 284 } 285 286 break; 287 288 case 'l': 289 if (strncmp(c, "lt;", 3) == 0) { 290 print_char('<'); 291 c += 3; 292 continue; 293 } 294 295 break; 296 297 case 'g': 298 if (strncmp(c, "gt;", 3) == 0) { 299 print_char('>'); 300 c += 3; 301 continue; 302 } 303 304 break; 305 306 case 'q': 307 if (strncmp(c, "quot;", 5) == 0) { 308 print_char('"'); 309 c += 5; 310 continue; 311 } 312 313 break; 314 315 default: 316 break; 317 } 318 319 print_char('&'); 320 continue; 321 break; 322 323 default: 324 print_char(*c); 325 break; 326 } 327 328 c++; 329 } 330} 331 332 333/// Parse XML text for attribute and value 334size_t xml_extract_attribute(const char * source, size_t start, char ** attr, char ** value) { 335 size_t cursor = start; 336 size_t len = 0; 337 338 if (*attr) { 339 free(*attr); 340 *attr = NULL; 341 } 342 343 if (*value) { 344 free(*value); 345 *value = NULL; 346 } 347 348 // Skip leading whitespace 349 cursor += xml_scan_wsnl(&source[start]); 350 351 len = xml_scan_attribute_name(&source[cursor]); 352 353 if (len) { 354 // Copy attribute name 355 *attr = my_strndup(&source[cursor], len); 356 357 cursor += len; 358 359 // Value? 360 cursor += xml_scan_until_value(&source[cursor]); 361 len = xml_scan_value(&source[cursor]); 362 363 if (len) { 364 *value = my_strndup(&source[cursor + 1], len - 2); 365 } 366 367 cursor += len; 368 } 369 370 371 return cursor - start; 372} 373 374 375/// Extract attribute with specified name 376char * xml_extract_named_attribute(const char * source, size_t start, const char * name) { 377 char * lower_name = my_strndup(name, strlen(name)); 378 char * result = NULL; 379 380 // Use lower case for easy comparison 381 for(int i = 0; lower_name[i]; i++){ 382 lower_name[i] = tolower(lower_name[i]); 383 } 384 385 char * attr = NULL, * value = NULL, * lower_attr = NULL; 386 387 do { 388 start += xml_extract_attribute(source, start, &attr, &value); 389 390 if (attr) { 391 lower_attr = my_strndup(attr, strlen(attr)); 392 393 // Use lower case for easy comparison 394 for(int i = 0; lower_name[i]; i++){ 395 lower_attr[i] = tolower(lower_attr[i]); 396 } 397 398 if (strcmp(lower_name, lower_attr) == 0) { 399 // Match 400 result = value; 401 value = NULL; 402 free(lower_attr); 403 goto finish; 404 } 405 406 free(lower_attr); 407 } 408 } while (attr); 409 410 finish: 411 free(attr); 412 free(value); 413 free(lower_name); 414 415 return result; 416} 417