1/** 2 3 MultiMarkdown -- Lightweight markup processor to produce HTML, LaTeX, and more. 4 5 @file opml-lexer.c 6 7 @brief Tokenize OPML file for parsing 8 9 10 @author Fletcher T. Penney 11 @bug 12 13**/ 14 15/* 16 17 Copyright © 2016 - 2019 Fletcher T. Penney. 18 19 20 The `MultiMarkdown 6` project is released under the MIT License.. 21 22 GLibFacade.c and GLibFacade.h are from the MultiMarkdown v4 project: 23 24 https://github.com/fletcher/MultiMarkdown-4/ 25 26 MMD 4 is released under both the MIT License and GPL. 27 28 29 CuTest is released under the zlib/libpng license. See CuTest.c for the 30 text of the license. 31 32 uthash library: 33 Copyright (c) 2005-2016, Troy D. Hanson 34 35 Licensed under Revised BSD license 36 37 miniz library: 38 Copyright 2013-2014 RAD Game Tools and Valve Software 39 Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC 40 41 Licensed under the MIT license 42 43 argtable3 library: 44 Copyright (C) 1998-2001,2003-2011,2013 Stewart Heitmann 45 <sheitmann@users.sourceforge.net> 46 All rights reserved. 47 48 Licensed under the Revised BSD License 49 50 51 ## The MIT License ## 52 53 Permission is hereby granted, free of charge, to any person obtaining 54 a copy of this software and associated documentation files (the 55 "Software"), to deal in the Software without restriction, including 56 without limitation the rights to use, copy, modify, merge, publish, 57 distribute, sublicense, and/or sell copies of the Software, and to 58 permit persons to whom the Software is furnished to do so, subject to 59 the following conditions: 60 61 The above copyright notice and this permission notice shall be 62 included in all copies or substantial portions of the Software. 63 64 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 65 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 66 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 67 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 68 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 69 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 70 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 71 72 73 ## Revised BSD License ## 74 75 Redistribution and use in source and binary forms, with or without 76 modification, are permitted provided that the following conditions are 77 met: 78 * Redistributions of source code must retain the above copyright 79 notice, this list of conditions and the following disclaimer. 80 * Redistributions in binary form must reproduce the above 81 copyright notice, this list of conditions and the following 82 disclaimer in the documentation and/or other materials provided 83 with the distribution. 84 * Neither the name of the <organization> nor the 85 names of its contributors may be used to endorse or promote 86 products derived from this software without specific prior 87 written permission. 88 89 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 90 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 91 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 92 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT 93 HOLDER> BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 94 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 95 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES LOSS OF USE, DATA, OR 96 PROFITS OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 97 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 98 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 99 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 100 101 102*/ 103 104#include <stdlib.h> 105 106#include "itmz-lexer.h" 107#include "itmz-parser.h" 108 109 110// Basic scanner struct 111 112#define YYCTYPE unsigned char 113#define YYCURSOR s->cur 114#define YYMARKER s->ptr 115#define YYCTXMARKER s->ctx 116 117int itmz_scan(Scanner * s, const char * stop) { 118 119 scan: 120 121 if (s->cur >= stop) { 122 return 0; 123 } 124 125 s->start = s->cur; 126 127 /*!re2c 128 re2c:yyfill:enable = 0; 129 130 NL = "\r\n" | '\n' | '\r'; 131 WS = [ \t]+; 132 WSNL = (NL | WS)+; 133 134 EQUAL = '='; 135 136 double_quoted = '"' [^"\x00]* '"'; 137 single_quoted = "'" [^'\x00]* "'"; 138 quoted_value = double_quoted | single_quoted; 139 140 uuid_attribute = WSNL* 'uuid' WSNL* EQUAL WSNL*; 141 text_attribute = WSNL* 'text' WSNL* EQUAL WSNL*; 142 note_attribute = WSNL* 'note' WSNL* EQUAL WSNL*; 143 144 attribute_name = [a-zA-Z_:] [a-zA-Z0-9_:.\-]*; 145 regular_attribute = WSNL* attribute_name WSNL* EQUAL WSNL* quoted_value WSNL*; 146 boolean_attribute = WSNL* attribute_name WSNL*; 147 attribute = regular_attribute | boolean_attribute; 148 149 contains_newline = " " | " "; 150 151 '<iThoughts' [^>\x00]* '>' { return ITMZ_ITHOUGHTS_OPEN; } 152 '</iThoughts>' { return ITMZ_ITHOUGHTS_CLOSE; } 153 154 '<topics>' { return ITMZ_TOPICS_OPEN; } 155 '</topics>' { return ITMZ_TOPICS_CLOSE; } 156 157 '<topic' text_attribute '">>Preamble<<"' attribute* '>' { return ITMZ_TOPIC_PREAMBLE; } 158 '<topic' text_attribute '">>Metadata<<"' attribute* '>' { return ITMZ_TOPIC_METADATA; } 159 160 '<topic' attribute text_attribute '">>Preamble<<"' attribute* '>' { return ITMZ_TOPIC_PREAMBLE; } 161 '<topic' attribute text_attribute '">>Metadata<<"' attribute* '>' { return ITMZ_TOPIC_METADATA; } 162 163 '<topic' attribute attribute text_attribute '">>Preamble<<"' attribute* '>' { return ITMZ_TOPIC_PREAMBLE; } 164 '<topic' attribute attribute text_attribute '">>Metadata<<"' attribute* '>' { return ITMZ_TOPIC_METADATA; } 165 166 '<topic' [^>\x00]* '/>' { return ITMZ_TOPIC_SELF_CLOSE; } 167 '<topic' [^>\x00]* '>' { return ITMZ_TOPIC_OPEN; } 168 '</topic>' { return ITMZ_TOPIC_CLOSE; } 169 170 WSNL { return ITMZ_WSNL; } 171 172 '<relationships>' { return ITMZ_RELATIONSHIPS_OPEN; } 173 '</relationships>' { return ITMZ_RELATIONSHIPS_CLOSE; } 174 175 // Skip over anything else - '.' does not include '\n' 176 * { goto scan; } 177 */ 178} 179 180