1 // Scintilla Lexer for X12
2 // @file LexX12.cxx
3 // Written by Iain Clarke, IMCSoft & Inobiz AB.
4 // X12 official documentation is behind a paywall, but there's a description of the syntax here:
5 // http://www.rawlinsecconsulting.com/x12tutorial/x12syn.html
6 // This code is subject to the same license terms as the rest of the scintilla project:
7 // The License.txt file describes the conditions under which this software may be distributed.
8 //
9
10 // Header order must match order in scripts/HeaderOrder.txt
11 #include <cstdlib>
12 #include <cassert>
13 #include <cstring>
14 #include <cctype>
15
16 #include <vector>
17 #include <algorithm>
18
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 #include "LexerModule.h"
23 #include "DefaultLexer.h"
24
25 using namespace Scintilla;
26
27 class LexerX12 : public DefaultLexer
28 {
29 public:
30 LexerX12();
~LexerX12()31 virtual ~LexerX12() {} // virtual destructor, as we inherit from ILexer
32
Factory()33 static ILexer5 *Factory() {
34 return new LexerX12;
35 }
36
Version() const37 int SCI_METHOD Version() const override
38 {
39 return lvRelease5;
40 }
Release()41 void SCI_METHOD Release() override
42 {
43 delete this;
44 }
45
PropertyNames()46 const char * SCI_METHOD PropertyNames() override
47 {
48 return "fold";
49 }
PropertyType(const char *)50 int SCI_METHOD PropertyType(const char *) override
51 {
52 return SC_TYPE_BOOLEAN; // Only one property!
53 }
DescribeProperty(const char * name)54 const char * SCI_METHOD DescribeProperty(const char *name) override
55 {
56 if (!strcmp(name, "fold"))
57 return "Whether to apply folding to document or not";
58 return NULL;
59 }
60
PropertySet(const char * key,const char * val)61 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
62 {
63 if (!strcmp(key, "fold"))
64 {
65 m_bFold = strcmp(val, "0") ? true : false;
66 return 0;
67 }
68 return -1;
69 }
PropertyGet(const char *)70 const char * SCI_METHOD PropertyGet(const char *) override {
71 return "";
72 }
DescribeWordListSets()73 const char * SCI_METHOD DescribeWordListSets() override
74 {
75 return NULL;
76 }
WordListSet(int,const char *)77 Sci_Position SCI_METHOD WordListSet(int, const char *) override
78 {
79 return -1;
80 }
81 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
82 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
PrivateCall(int,void *)83 void * SCI_METHOD PrivateCall(int, void *) override
84 {
85 return NULL;
86 }
87
88 protected:
89 struct Terminator
90 {
91 int Style = SCE_X12_BAD;
92 Sci_PositionU pos = 0;
93 Sci_PositionU length = 0;
94 int FoldChange = 0;
95 };
96 Terminator InitialiseFromISA(IDocument *pAccess);
97 Sci_PositionU FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const;
98 Terminator DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const;
99 Terminator FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator = false) const;
100
101 bool m_bFold;
102 char m_chSubElement;
103 char m_chElement;
104 char m_chSegment[3]; // might be CRLF
105 };
106
107 LexerModule lmX12(SCLEX_X12, LexerX12::Factory, "x12");
108
109 ///////////////////////////////////////////////////////////////////////////////
110
111
112
113 ///////////////////////////////////////////////////////////////////////////////
114
LexerX12()115 LexerX12::LexerX12() : DefaultLexer("x12", SCLEX_X12)
116 {
117 m_bFold = false;
118 m_chSegment[0] = m_chSegment[1] = m_chSegment[2] = m_chElement = m_chSubElement = 0;
119 }
120
Lex(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)121 void LexerX12::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
122 {
123 Sci_PositionU posFinish = startPos + length;
124
125 Terminator T = InitialiseFromISA(pAccess);
126
127 if (T.Style == SCE_X12_BAD)
128 {
129 if (T.pos < startPos)
130 T.pos = startPos; // we may be colouring in batches.
131 pAccess->StartStyling(startPos);
132 pAccess->SetStyleFor(T.pos - startPos, SCE_X12_ENVELOPE);
133 pAccess->SetStyleFor(posFinish - T.pos, SCE_X12_BAD);
134 return;
135 }
136
137 // Look backwards for a segment start or a document beginning
138 Sci_PositionU posCurrent = FindPreviousSegmentStart (pAccess, startPos);
139
140 // Style buffer, so we're not issuing loads of notifications
141 pAccess->StartStyling(posCurrent);
142
143 while (posCurrent < posFinish)
144 {
145 // Look for first element marker, so we can denote segment
146 T = DetectSegmentHeader(pAccess, posCurrent);
147 if (T.Style == SCE_X12_BAD)
148 break;
149
150 pAccess->SetStyleFor(T.pos - posCurrent, T.Style);
151 pAccess->SetStyleFor(T.length, SCE_X12_SEP_ELEMENT);
152 posCurrent = T.pos + T.length;
153
154 while (T.Style != SCE_X12_BAD && T.Style != SCE_X12_SEGMENTEND) // Break on bad or segment ending
155 {
156 T = FindNextTerminator(pAccess, posCurrent);
157 if (T.Style == SCE_X12_BAD)
158 break;
159
160 int Style = T.Style;
161 if (T.Style == SCE_X12_SEGMENTEND && m_chSegment[0] == '\r') // don't style cr/crlf
162 Style = SCE_X12_DEFAULT;
163
164 pAccess->SetStyleFor(T.pos - posCurrent, SCE_X12_DEFAULT);
165 pAccess->SetStyleFor(T.length, Style);
166 posCurrent = T.pos + T.length;
167 }
168 if (T.Style == SCE_X12_BAD)
169 break;
170 }
171
172 pAccess->SetStyleFor(posFinish - posCurrent, SCE_X12_BAD);
173 }
174
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)175 void LexerX12::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
176 {
177 if (!m_bFold)
178 return;
179
180 // Are we even foldable?
181 if (m_chSegment[0] != '\r' && m_chSegment[0] != '\n') // check for cr,lf,cr+lf.
182 return;
183
184 Sci_PositionU posFinish = startPos + length;
185
186 // Look backwards for a segment start or a document beginning
187 startPos = FindPreviousSegmentStart(pAccess, startPos);
188 Terminator T;
189
190 Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
191 int levelCurrentStyle = SC_FOLDLEVELBASE;
192 if (currLine > 0)
193 levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
194 int indentCurrent = levelCurrentStyle & (SC_FOLDLEVELBASE - 1);
195
196 while (startPos < posFinish)
197 {
198 T = DetectSegmentHeader(pAccess, startPos);
199 int indentNext = indentCurrent + T.FoldChange;
200 if (indentNext < 0)
201 indentNext = 0;
202
203 levelCurrentStyle = (T.FoldChange > 0) ? (SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG) : SC_FOLDLEVELBASE;
204
205 currLine = pAccess->LineFromPosition(startPos);
206 pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
207
208 T = FindNextTerminator(pAccess, startPos, true);
209 startPos = T.pos + T.length;
210 indentCurrent = indentNext;
211 }
212 }
213
InitialiseFromISA(IDocument * pAccess)214 LexerX12::Terminator LexerX12::InitialiseFromISA(IDocument *pAccess)
215 {
216 Sci_Position length = pAccess->Length();
217 char c;
218 if (length <= 106)
219 return { SCE_X12_BAD, 0 };
220
221 pAccess->GetCharRange(&m_chElement, 3, 1);
222 pAccess->GetCharRange(&m_chSubElement, 104, 1);
223 pAccess->GetCharRange(m_chSegment, 105, 1);
224 if (m_chSegment[0] == '\r') // are we CRLF?
225 {
226 pAccess->GetCharRange(&c, 106, 1);
227 if (c == '\n')
228 m_chSegment[1] = c;
229 }
230
231 // Validate we have an element separator, and it's not silly!
232 if (m_chElement == '\0' || m_chElement == '\n' || m_chElement == '\r')
233 return { SCE_X12_BAD, 3 };
234
235 // Validate we have an element separator, and it's not silly!
236 if (m_chSubElement == '\0' || m_chSubElement == '\n' || m_chSubElement == '\r')
237 return { SCE_X12_BAD, 103 };
238
239 if (m_chElement == m_chSubElement)
240 return { SCE_X12_BAD, 104 };
241 if (m_chElement == m_chSegment[0])
242 return { SCE_X12_BAD, 105 };
243 if (m_chSubElement == m_chSegment[0])
244 return { SCE_X12_BAD, 104 };
245
246 // Check we have element markers at all the right places! ISA element has fixed entries.
247 std::vector<Sci_PositionU> ElementMarkers = { 3, 6, 17, 20, 31, 34, 50, 53, 69, 76, 81, 83, 89, 99, 101, 103 };
248 for (auto i : ElementMarkers)
249 {
250 pAccess->GetCharRange(&c, i, 1);
251 if (c != m_chElement)
252 return { SCE_X12_BAD, i };
253 }
254 // Check we have no element markers anywhere else!
255 for (Sci_PositionU i = 0; i < 105; i++)
256 {
257 if (std::find(ElementMarkers.begin(), ElementMarkers.end(), i) != ElementMarkers.end())
258 continue;
259
260 pAccess->GetCharRange(&c, i, 1);
261 if (c == m_chElement)
262 return { SCE_X12_BAD, i };
263 }
264
265 return { SCE_X12_ENVELOPE };
266 }
267
FindPreviousSegmentStart(IDocument * pAccess,Sci_Position startPos) const268 Sci_PositionU LexerX12::FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const
269 {
270 char c;
271
272 for ( ; startPos > 0; startPos--)
273 {
274 pAccess->GetCharRange(&c, startPos, 1);
275 if (c != m_chSegment[0])
276 continue;
277 // we've matched one - if this is not crlf we're done.
278 if (!m_chSegment[1])
279 return startPos + 1;
280 pAccess->GetCharRange(&c, startPos+1, 1);
281 if (c == m_chSegment[1])
282 return startPos + 2;
283 }
284 // We didn't find a ', so just go with the beginning
285 return 0;
286 }
287
DetectSegmentHeader(IDocument * pAccess,Sci_PositionU pos) const288 LexerX12::Terminator LexerX12::DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const
289 {
290 Sci_PositionU posStart = pos;
291 Sci_Position Length = pAccess->Length();
292 char Buf[6] = { 0 };
293 while (pos - posStart < 5 && pos < (Sci_PositionU)Length)
294 {
295 pAccess->GetCharRange(Buf + pos - posStart, pos, 1);
296 if (Buf [pos - posStart] != m_chElement) // more?
297 {
298 pos++;
299 continue;
300 }
301 if (strcmp(Buf, "ISA*") == 0)
302 return { SCE_X12_ENVELOPE, pos, 1, +1 };
303 if (strcmp(Buf, "IEA*") == 0)
304 return { SCE_X12_ENVELOPE, pos, 1, -1 };
305 if (strcmp(Buf, "GS*") == 0)
306 return { SCE_X12_FUNCTIONGROUP, pos, 1, +1 };
307 if (strcmp(Buf, "GE*") == 0)
308 return { SCE_X12_FUNCTIONGROUP, pos, 1, -1 };
309 if (strcmp(Buf, "ST*") == 0)
310 return { SCE_X12_TRANSACTIONSET, pos, 1, +1 };
311 if (strcmp(Buf, "SE*") == 0)
312 return { SCE_X12_TRANSACTIONSET, pos, 1, -1 };
313 return { SCE_X12_SEGMENTHEADER, pos, 1, 0 };
314 }
315 return { SCE_X12_BAD, pos, 0, 0 };
316 }
317
FindNextTerminator(IDocument * pAccess,Sci_PositionU pos,bool bJustSegmentTerminator) const318 LexerX12::Terminator LexerX12::FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator) const
319 {
320 char c;
321 Sci_Position Length = pAccess->Length();
322
323 while (pos < (Sci_PositionU)Length)
324 {
325 pAccess->GetCharRange(&c, pos, 1);
326 if (!bJustSegmentTerminator && c == m_chElement)
327 return { SCE_X12_SEP_ELEMENT, pos, 1 };
328 else if (!bJustSegmentTerminator && c == m_chSubElement)
329 return { SCE_X12_SEP_SUBELEMENT, pos, 1 };
330 else if (c == m_chSegment[0])
331 {
332 if (!m_chSegment[1])
333 return { SCE_X12_SEGMENTEND, pos, 1 };
334 pos++;
335 if (pos >= (Sci_PositionU)Length)
336 break;
337 pAccess->GetCharRange(&c, pos, 1);
338 if (c == m_chSegment[1])
339 return { SCE_X12_SEGMENTEND, pos-1, 2 };
340 }
341 pos++;
342 }
343
344 return { SCE_X12_BAD, pos };
345 }
346