1 /*
2  markdown_lexem.h     MindForger thinking notebook
3 
4  Copyright (C) 2016-2020 Martin Dvorak <martin.dvorak@mindforger.com>
5 
6  This program is free software; you can redistribute it and/or
7  modify it under the terms of the GNU General Public License
8  as published by the Free Software Foundation; either version 2
9  of the License, or (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program. If not, see <http://www.gnu.org/licenses/>.
18  */
19 #ifndef M8R_MARKDOWN_LEXEM_H_
20 #define M8R_MARKDOWN_LEXEM_H_
21 
22 #include <climits>
23 #include <string>
24 
25 namespace m8r {
26 
27 /**
28  * @brief Symbol type.
29  */
30 enum class MarkdownLexemType
31 {
32     BEGIN_DOC,                  // begin of document
33 
34     SECTION,                    // "#" .. "##...#"
35     SECTION_equals,              // "==" 2 or more equal signs (post declared section)
36     SECTION_hyphens,             // "--" 2 or more hyphens (post declared section)
37     TEXT,                       // "name"
38     LINE,                       // ^"This is sentence."$ (w/o ^ and $ markers)
39     WHITESPACES,                 // [:whitespace]+ (\n\r TAB)
40 
41     META_BEGIN,                 // Metadata (case insensitive)
42     META_TEXT,                  // text of metadata HTML comment: '[:whitespace]Metadata: ...' (w/o HTML comment begin/end markers)
43     META_PROPERTY_DELIMITER,    // ,
44 
45     META_PROPERTY_UNKNOWN,      // unknown property name > stored in text field
46     META_PROPERTY_type,         // "type" keyword symbol
47     META_PROPERTY_created,
48     META_PROPERTY_reads,
49     META_PROPERTY_read,
50     META_PROPERTY_revision,
51     META_PROPERTY_modified,
52     META_PROPERTY_importance,
53     META_PROPERTY_urgency,
54     META_PROPERTY_progress,
55     META_PROPERTY_tags,         // outline or note tags
56     META_PROPERTY_links,
57     META_PROPERTY_deadline,
58     META_PROPERTY_scope,
59 
60     META_NAMEVALUE_DELIMITER,   // :
61     META_PROPERTY_VALUE,
62 
63     HTML_COMMENT_BEGIN,         // <!--
64     HTML_COMMENT_TEXT,
65     HTML_COMMENT_END,           // -->
66 
67     BR,                         // \n
68 
69     END_DOC                    // end of document
70 };
71 
72 /**
73  * @brief Token created by a Markdown lexical analyzer (tokenizer).
74  */
75 class MarkdownLexem
76 {
77 public:
78     // IMPROVE constexpr
79     static unsigned short int NO_TEXT;
80     static unsigned short int WHOLE_LINE;
81 
82 private:
83     MarkdownLexemType type;
84     /**
85      * @brief Offset - line number where text presents (2^32 lines - I had >64k lines MD books)
86      */
87     unsigned int off;
88     /**
89      * @brief Index - beginning of the text on the line (64k chars line length max).
90      */
91     unsigned short int idx;
92     /**
93      * @brief Length - text length (USHRT_MAX represents whole line).
94      */
95     unsigned short int lng;
96     /**
97      * @brief Depth - if lexem represents section [1,INF> (64k levels deep sections hierarchy).
98      */
99     unsigned short int depth;
100 
101 public:
102     MarkdownLexem() = delete;
103     explicit MarkdownLexem(MarkdownLexemType type);
104     MarkdownLexem(
105             MarkdownLexemType type,
106             unsigned int offset,
107             unsigned short int index,
108             unsigned short int lenght);
109     MarkdownLexem(MarkdownLexemType type, unsigned short int depth);
110     MarkdownLexem(const MarkdownLexem&) = delete;
111     MarkdownLexem(const MarkdownLexem&&) = delete;
112     MarkdownLexem& operator=(const MarkdownLexem&) = delete;
113     MarkdownLexem& operator=(const MarkdownLexem&&) = delete;
114     virtual ~MarkdownLexem();
115 
116     MarkdownLexemType getType() const;
117     void setType(MarkdownLexemType type);
118     unsigned getDepth() const;
119     void setDepth(unsigned depth);
getIdx()120     unsigned short int getIdx() const { return idx; }
121     void setIdx(unsigned short int idx);
getLng()122     unsigned short int getLng() const { return lng; }
123     void setLng(unsigned short int lng);
getOff()124     unsigned int getOff() const { return off; }
125     void setOff(unsigned int off);
126 };
127 
128 } // m8r namespace
129 
130 #endif /* M8R_MARKDOWN_LEXEM_H_ */
131