1 // Scintilla Lexer for X12
2 // @file LexX12.cxx
3 // Written by Iain Clarke, IMCSoft & Inobiz AB.
4 // X12 official documentation is behind a paywall, but there's a description of the syntax here:
5 // http://www.rawlinsecconsulting.com/x12tutorial/x12syn.html
6 // This code is subject to the same license terms as the rest of the scintilla project:
7 // The License.txt file describes the conditions under which this software may be distributed.
8 //
9 
10 // Header order must match order in scripts/HeaderOrder.txt
11 #include <cstdlib>
12 #include <cassert>
13 #include <cstring>
14 #include <cctype>
15 
16 #include <vector>
17 #include <algorithm>
18 
19 #include "ILexer.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 #include "LexerModule.h"
23 #include "DefaultLexer.h"
24 
25 using namespace Scintilla;
26 
27 class LexerX12 : public DefaultLexer
28 {
29 public:
30 	LexerX12();
~LexerX12()31 	virtual ~LexerX12() {} // virtual destructor, as we inherit from ILexer
32 
Factory()33 	static ILexer5 *Factory() {
34 		return new LexerX12;
35 	}
36 
Version() const37 	int SCI_METHOD Version() const override
38 	{
39 		return lvRelease5;
40 	}
Release()41 	void SCI_METHOD Release() override
42 	{
43 		delete this;
44 	}
45 
PropertyNames()46 	const char * SCI_METHOD PropertyNames() override
47 	{
48 		return "fold";
49 	}
PropertyType(const char *)50 	int SCI_METHOD PropertyType(const char *) override
51 	{
52 		return SC_TYPE_BOOLEAN; // Only one property!
53 	}
DescribeProperty(const char * name)54 	const char * SCI_METHOD DescribeProperty(const char *name) override
55 	{
56 		if (!strcmp(name, "fold"))
57 			return "Whether to apply folding to document or not";
58 		return NULL;
59 	}
60 
PropertySet(const char * key,const char * val)61 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
62 	{
63 		if (!strcmp(key, "fold"))
64 		{
65 			m_bFold = strcmp(val, "0") ? true : false;
66 			return 0;
67 		}
68 		return -1;
69 	}
PropertyGet(const char *)70 	const char * SCI_METHOD PropertyGet(const char *) override {
71 		return "";
72 	}
DescribeWordListSets()73 	const char * SCI_METHOD DescribeWordListSets() override
74 	{
75 		return NULL;
76 	}
WordListSet(int,const char *)77 	Sci_Position SCI_METHOD WordListSet(int, const char *) override
78 	{
79 		return -1;
80 	}
81 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
82 	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
PrivateCall(int,void *)83 	void * SCI_METHOD PrivateCall(int, void *) override
84 	{
85 		return NULL;
86 	}
87 
88 protected:
89 	struct Terminator
90 	{
91 		int Style = SCE_X12_BAD;
92 		Sci_PositionU pos = 0;
93 		Sci_PositionU length = 0;
94 		int FoldChange = 0;
95 	};
96 	Terminator InitialiseFromISA(IDocument *pAccess);
97 	Sci_PositionU FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const;
98 	Terminator DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const;
99 	Terminator FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator = false) const;
100 
101 	bool m_bFold;
102 	char m_chSubElement;
103 	char m_chElement;
104 	char m_chSegment[3]; // might be CRLF
105 };
106 
107 LexerModule lmX12(SCLEX_X12, LexerX12::Factory, "x12");
108 
109 ///////////////////////////////////////////////////////////////////////////////
110 
111 
112 
113 ///////////////////////////////////////////////////////////////////////////////
114 
LexerX12()115 LexerX12::LexerX12() : DefaultLexer("x12", SCLEX_X12)
116 {
117 	m_bFold = false;
118 	m_chSegment[0] = m_chSegment[1] = m_chSegment[2] = m_chElement = m_chSubElement = 0;
119 }
120 
Lex(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)121 void LexerX12::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
122 {
123 	Sci_PositionU posFinish = startPos + length;
124 
125 	Terminator T = InitialiseFromISA(pAccess);
126 
127 	if (T.Style == SCE_X12_BAD)
128 	{
129 		if (T.pos < startPos)
130 			T.pos = startPos; // we may be colouring in batches.
131 		pAccess->StartStyling(startPos);
132 		pAccess->SetStyleFor(T.pos - startPos, SCE_X12_ENVELOPE);
133 		pAccess->SetStyleFor(posFinish - T.pos, SCE_X12_BAD);
134 		return;
135 	}
136 
137 	// Look backwards for a segment start or a document beginning
138 	Sci_PositionU posCurrent = FindPreviousSegmentStart (pAccess, startPos);
139 
140 	// Style buffer, so we're not issuing loads of notifications
141 	pAccess->StartStyling(posCurrent);
142 
143 	while (posCurrent < posFinish)
144 	{
145 		// Look for first element marker, so we can denote segment
146 		T = DetectSegmentHeader(pAccess, posCurrent);
147 		if (T.Style == SCE_X12_BAD)
148 			break;
149 
150 		pAccess->SetStyleFor(T.pos - posCurrent, T.Style);
151 		pAccess->SetStyleFor(T.length, SCE_X12_SEP_ELEMENT);
152 		posCurrent = T.pos + T.length;
153 
154 		while (T.Style != SCE_X12_BAD && T.Style != SCE_X12_SEGMENTEND) // Break on bad or segment ending
155 		{
156 			T = FindNextTerminator(pAccess, posCurrent);
157 			if (T.Style == SCE_X12_BAD)
158 				break;
159 
160 			int Style = T.Style;
161 			if (T.Style == SCE_X12_SEGMENTEND && m_chSegment[0] == '\r') // don't style cr/crlf
162 				Style = SCE_X12_DEFAULT;
163 
164 			pAccess->SetStyleFor(T.pos - posCurrent, SCE_X12_DEFAULT);
165 			pAccess->SetStyleFor(T.length, Style);
166 			posCurrent = T.pos + T.length;
167 		}
168 		if (T.Style == SCE_X12_BAD)
169 			break;
170 	}
171 
172 	pAccess->SetStyleFor(posFinish - posCurrent, SCE_X12_BAD);
173 }
174 
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)175 void LexerX12::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
176 {
177 	if (!m_bFold)
178 		return;
179 
180 	// Are we even foldable?
181 	if (m_chSegment[0] != '\r' && m_chSegment[0] != '\n') // check for cr,lf,cr+lf.
182 		return;
183 
184 	Sci_PositionU posFinish = startPos + length;
185 
186 	// Look backwards for a segment start or a document beginning
187 	startPos = FindPreviousSegmentStart(pAccess, startPos);
188 	Terminator T;
189 
190 	Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
191 	int levelCurrentStyle = SC_FOLDLEVELBASE;
192 	if (currLine > 0)
193 		levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
194 	int indentCurrent = levelCurrentStyle & (SC_FOLDLEVELBASE - 1);
195 
196 	while (startPos < posFinish)
197 	{
198 		T = DetectSegmentHeader(pAccess, startPos);
199 		int indentNext = indentCurrent + T.FoldChange;
200 		if (indentNext < 0)
201 			indentNext = 0;
202 
203 		levelCurrentStyle = (T.FoldChange > 0) ? (SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG) : SC_FOLDLEVELBASE;
204 
205 		currLine = pAccess->LineFromPosition(startPos);
206 		pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
207 
208 		T = FindNextTerminator(pAccess, startPos, true);
209 		startPos = T.pos + T.length;
210 		indentCurrent = indentNext;
211 	}
212 }
213 
InitialiseFromISA(IDocument * pAccess)214 LexerX12::Terminator LexerX12::InitialiseFromISA(IDocument *pAccess)
215 {
216 	Sci_Position length = pAccess->Length();
217 	char c;
218 	if (length <= 106)
219 		return { SCE_X12_BAD, 0 };
220 
221 	pAccess->GetCharRange(&m_chElement, 3, 1);
222 	pAccess->GetCharRange(&m_chSubElement, 104, 1);
223 	pAccess->GetCharRange(m_chSegment, 105, 1);
224 	if (m_chSegment[0] == '\r') // are we CRLF?
225 	{
226 		pAccess->GetCharRange(&c, 106, 1);
227 		if (c == '\n')
228 			m_chSegment[1] = c;
229 	}
230 
231 	// Validate we have an element separator, and it's not silly!
232 	if (m_chElement == '\0' || m_chElement == '\n' || m_chElement == '\r')
233 		return { SCE_X12_BAD, 3 };
234 
235 	// Validate we have an element separator, and it's not silly!
236 	if (m_chSubElement == '\0' || m_chSubElement == '\n' || m_chSubElement == '\r')
237 		return { SCE_X12_BAD, 103 };
238 
239 	if (m_chElement == m_chSubElement)
240 		return { SCE_X12_BAD, 104 };
241 	if (m_chElement == m_chSegment[0])
242 		return { SCE_X12_BAD, 105 };
243 	if (m_chSubElement == m_chSegment[0])
244 		return { SCE_X12_BAD, 104 };
245 
246 	// Check we have element markers at all the right places! ISA element has fixed entries.
247 	std::vector<Sci_PositionU> ElementMarkers = { 3, 6, 17, 20, 31, 34, 50, 53, 69, 76, 81, 83, 89, 99, 101, 103  };
248 	for (auto i : ElementMarkers)
249 	{
250 		pAccess->GetCharRange(&c, i, 1);
251 		if (c != m_chElement)
252 			return { SCE_X12_BAD, i };
253 	}
254 	// Check we have no element markers anywhere else!
255 	for (Sci_PositionU i = 0; i < 105; i++)
256 	{
257 		if (std::find(ElementMarkers.begin(), ElementMarkers.end(), i) != ElementMarkers.end())
258 			continue;
259 
260 		pAccess->GetCharRange(&c, i, 1);
261 		if (c == m_chElement)
262 			return { SCE_X12_BAD, i };
263 	}
264 
265 	return { SCE_X12_ENVELOPE };
266 }
267 
FindPreviousSegmentStart(IDocument * pAccess,Sci_Position startPos) const268 Sci_PositionU LexerX12::FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const
269 {
270 	char c;
271 
272 	for ( ; startPos > 0; startPos--)
273 	{
274 		pAccess->GetCharRange(&c, startPos, 1);
275 		if (c != m_chSegment[0])
276 			continue;
277 		// we've matched one - if this is not crlf we're done.
278 		if (!m_chSegment[1])
279 			return startPos + 1;
280 		pAccess->GetCharRange(&c, startPos+1, 1);
281 		if (c == m_chSegment[1])
282 			return startPos + 2;
283 	}
284 	// We didn't find a ', so just go with the beginning
285 	return 0;
286 }
287 
DetectSegmentHeader(IDocument * pAccess,Sci_PositionU pos) const288 LexerX12::Terminator LexerX12::DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const
289 {
290 	Sci_PositionU posStart = pos;
291 	Sci_Position Length = pAccess->Length();
292 	char Buf[6] = { 0 };
293 	while (pos - posStart < 5 && pos < (Sci_PositionU)Length)
294 	{
295 		pAccess->GetCharRange(Buf + pos - posStart, pos, 1);
296 		if (Buf [pos - posStart] != m_chElement) // more?
297 		{
298 			pos++;
299 			continue;
300 		}
301 		if (strcmp(Buf, "ISA*") == 0)
302 			return { SCE_X12_ENVELOPE, pos, 1, +1 };
303 		if (strcmp(Buf, "IEA*") == 0)
304 			return { SCE_X12_ENVELOPE, pos, 1, -1 };
305 		if (strcmp(Buf, "GS*") == 0)
306 			return { SCE_X12_FUNCTIONGROUP, pos, 1, +1 };
307 		if (strcmp(Buf, "GE*") == 0)
308 			return { SCE_X12_FUNCTIONGROUP, pos, 1, -1 };
309 		if (strcmp(Buf, "ST*") == 0)
310 			return { SCE_X12_TRANSACTIONSET, pos, 1, +1 };
311 		if (strcmp(Buf, "SE*") == 0)
312 			return { SCE_X12_TRANSACTIONSET, pos, 1, -1 };
313 		return { SCE_X12_SEGMENTHEADER, pos, 1, 0 };
314 	}
315 	return { SCE_X12_BAD, pos, 0, 0 };
316 }
317 
FindNextTerminator(IDocument * pAccess,Sci_PositionU pos,bool bJustSegmentTerminator) const318 LexerX12::Terminator LexerX12::FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator) const
319 {
320 	char c;
321 	Sci_Position Length = pAccess->Length();
322 
323 	while (pos < (Sci_PositionU)Length)
324 	{
325 		pAccess->GetCharRange(&c, pos, 1);
326 		if (!bJustSegmentTerminator && c == m_chElement)
327 			return { SCE_X12_SEP_ELEMENT, pos, 1 };
328 		else if (!bJustSegmentTerminator && c == m_chSubElement)
329 			return { SCE_X12_SEP_SUBELEMENT, pos, 1 };
330 		else if (c == m_chSegment[0])
331 		{
332 			if (!m_chSegment[1])
333 				return { SCE_X12_SEGMENTEND, pos, 1 };
334 			pos++;
335 			if (pos >= (Sci_PositionU)Length)
336 				break;
337 			pAccess->GetCharRange(&c, pos, 1);
338 			if (c == m_chSegment[1])
339 				return { SCE_X12_SEGMENTEND, pos-1, 2 };
340 		}
341 		pos++;
342 	}
343 
344 	return { SCE_X12_BAD, pos };
345 }
346