1 // Scintilla source code edit control
2 /** @file TestDocument.cxx
3  ** Lexer testing.
4  **/
5  // Copyright 2019 by Neil Hodgson <neilh@scintilla.org>
6  // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <cassert>
9 
10 #include <string>
11 #include <string_view>
12 #include <vector>
13 #include <algorithm>
14 
15 #include <iostream>
16 
17 #include "ILexer.h"
18 
19 #include "TestDocument.h"
20 
21 namespace {
22 
23 	const unsigned char UTF8BytesOfLead[256] = {
24 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00 - 0F
25 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10 - 1F
26 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20 - 2F
27 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30 - 3F
28 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40 - 4F
29 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50 - 5F
30 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60 - 6F
31 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70 - 7F
32 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80 - 8F
33 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90 - 9F
34 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0 - AF
35 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0 - BF
36 	1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0 - CF
37 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // D0 - DF
38 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // E0 - EF
39 	4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // F0 - FF
40 	};
41 
UnicodeFromUTF8(const unsigned char * us)42 	int UnicodeFromUTF8(const unsigned char *us) noexcept {
43 		switch (UTF8BytesOfLead[us[0]]) {
44 		case 1:
45 			return us[0];
46 		case 2:
47 			return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
48 		case 3:
49 			return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
50 		default:
51 			return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
52 		}
53 	}
54 
UTF8IsTrailByte(unsigned char ch)55 	inline constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept {
56 		return (ch >= 0x80) && (ch < 0xc0);
57 	}
58 
59 }
60 
Set(std::string_view sv)61 void TestDocument::Set(std::string_view sv) {
62 	text = sv;
63 	textStyles.resize(text.size());
64 	lineStarts.clear();
65 	endStyled = 0;
66 	lineStarts.push_back(0);
67 	for (size_t pos = 0; pos < text.length(); pos++) {
68 		if (text[pos] == '\n') {
69 			lineStarts.push_back(pos + 1);
70 		}
71 	}
72 	lineStarts.push_back(text.length());
73 	lineStates.resize(lineStarts.size());
74 }
75 
Version() const76 int SCI_METHOD TestDocument::Version() const {
77 	return Scintilla::dvRelease4;
78 }
79 
SetErrorStatus(int)80 void SCI_METHOD TestDocument::SetErrorStatus(int) {
81 }
82 
Length() const83 Sci_Position SCI_METHOD TestDocument::Length() const {
84 	return text.length();
85 }
86 
GetCharRange(char * buffer,Sci_Position position,Sci_Position lengthRetrieve) const87 void SCI_METHOD TestDocument::GetCharRange(char *buffer, Sci_Position position, Sci_Position lengthRetrieve) const {
88 	text.copy(buffer, lengthRetrieve, position);
89 }
90 
StyleAt(Sci_Position position) const91 char SCI_METHOD TestDocument::StyleAt(Sci_Position position) const {
92 	return textStyles.at(position);
93 }
94 
LineFromPosition(Sci_Position position) const95 Sci_Position SCI_METHOD TestDocument::LineFromPosition(Sci_Position position) const {
96 	if (position >= static_cast<Sci_Position>(text.length())) {
97 		return lineStarts.size() - 1 - 1;
98 	}
99 
100 	std::vector<Sci_Position>::const_iterator it = std::lower_bound(lineStarts.begin(), lineStarts.end(), position);
101 	Sci_Position line = it - lineStarts.begin();
102 	if (*it > position)
103 		line--;
104 	return line;
105 }
106 
LineStart(Sci_Position line) const107 Sci_Position SCI_METHOD TestDocument::LineStart(Sci_Position line) const {
108 	if (line >= static_cast<Sci_Position>(lineStarts.size())) {
109 		return text.length();
110 	}
111 	return lineStarts.at(line);
112 }
113 
GetLevel(Sci_Position) const114 int SCI_METHOD TestDocument::GetLevel(Sci_Position) const {
115 	// Only for folding so not implemented yet
116 	return 0;
117 }
118 
SetLevel(Sci_Position,int)119 int SCI_METHOD TestDocument::SetLevel(Sci_Position, int) {
120 	// Only for folding so not implemented yet
121 	return 0;
122 }
123 
GetLineState(Sci_Position line) const124 int SCI_METHOD TestDocument::GetLineState(Sci_Position line) const {
125 	return lineStates.at(line);
126 }
127 
SetLineState(Sci_Position line,int state)128 int SCI_METHOD TestDocument::SetLineState(Sci_Position line, int state) {
129 	return lineStates.at(line) = state;
130 }
131 
StartStyling(Sci_Position position)132 void SCI_METHOD TestDocument::StartStyling(Sci_Position position) {
133 	endStyled = position;
134 }
135 
SetStyleFor(Sci_Position length,char style)136 bool SCI_METHOD TestDocument::SetStyleFor(Sci_Position length, char style) {
137 	for (Sci_Position i = 0; i < length; i++) {
138 		textStyles[endStyled] = style;
139 		endStyled++;
140 	}
141 	return true;
142 }
143 
SetStyles(Sci_Position length,const char * styles)144 bool SCI_METHOD TestDocument::SetStyles(Sci_Position length, const char *styles) {
145 	for (Sci_Position i = 0; i < length; i++) {
146 		textStyles[endStyled] = styles[i];
147 		endStyled++;
148 	}
149 	return true;
150 }
151 
DecorationSetCurrentIndicator(int)152 void SCI_METHOD TestDocument::DecorationSetCurrentIndicator(int) {
153 	// Not implemented as no way to read decorations
154 }
155 
DecorationFillRange(Sci_Position,int,Sci_Position)156 void SCI_METHOD TestDocument::DecorationFillRange(Sci_Position, int, Sci_Position) {
157 	// Not implemented as no way to read decorations
158 }
159 
ChangeLexerState(Sci_Position,Sci_Position)160 void SCI_METHOD TestDocument::ChangeLexerState(Sci_Position, Sci_Position) {
161 	// Not implemented as no watcher to trigger
162 }
163 
CodePage() const164 int SCI_METHOD TestDocument::CodePage() const {
165 	// Always UTF-8 for now
166 	return 65001;
167 }
168 
IsDBCSLeadByte(char) const169 bool SCI_METHOD TestDocument::IsDBCSLeadByte(char) const {
170 	// Always UTF-8 for now
171 	return false;
172 }
173 
BufferPointer()174 const char *SCI_METHOD TestDocument::BufferPointer() {
175 	return text.c_str();
176 }
177 
GetLineIndentation(Sci_Position)178 int SCI_METHOD TestDocument::GetLineIndentation(Sci_Position) {
179 	// Never actually called - lexers use Accessor::IndentAmount
180 	return 0;
181 }
182 
LineEnd(Sci_Position line) const183 Sci_Position SCI_METHOD TestDocument::LineEnd(Sci_Position line) const {
184 	Sci_Position position = LineStart(line + 1);
185 	position--; // Back over CR or LF
186 	// When line terminator is CR+LF, may need to go back one more
187 	if ((position > LineStart(line)) && (text.at(position - 1) == '\r')) {
188 		position--;
189 	}
190 	return position;
191 }
192 
GetRelativePosition(Sci_Position positionStart,Sci_Position characterOffset) const193 Sci_Position SCI_METHOD TestDocument::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
194 	Sci_Position pos = positionStart;
195 	if (characterOffset < 0) {
196 		while (characterOffset < 0) {
197 			if (pos <= 0) {
198 				return 0;
199 			}
200 			unsigned char previousByte = text.at(pos - 1);
201 			if (previousByte < 0x80) {
202 				pos--;
203 				characterOffset++;
204 			} else {
205 				while ((pos > 1) && UTF8IsTrailByte(previousByte)) {
206 					pos--;
207 					previousByte = text.at(pos - 1);
208 				}
209 				pos--;
210 				// text[pos] is now a character start
211 				characterOffset++;
212 			}
213 		}
214 		return pos;
215 	}
216 	assert(characterOffset >= 0);
217 	// TODO: invalid UTF-8
218 	while (characterOffset > 0) {
219 		Sci_Position width = 0;
220 		GetCharacterAndWidth(pos, &width);
221 		pos += width;
222 		characterOffset--;
223 	}
224 	return pos;
225 }
226 
GetCharacterAndWidth(Sci_Position position,Sci_Position * pWidth) const227 int SCI_METHOD TestDocument::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
228 	// TODO: invalid UTF-8
229 	if (position >= static_cast<Sci_Position>(text.length())) {
230 		// Return NULs after document end
231 		if (pWidth) {
232 			*pWidth = 1;
233 		}
234 		return '\0';
235 	}
236 	const unsigned char leadByte = text.at(position);
237 	const int widthCharBytes = UTF8BytesOfLead[leadByte];
238 	unsigned char charBytes[] = { leadByte,0,0,0 };
239 	for (int b = 1; b < widthCharBytes; b++)
240 		charBytes[b] = text[position + b];
241 
242 	if (pWidth) {
243 		*pWidth = widthCharBytes;
244 	}
245 	return UnicodeFromUTF8(charBytes);
246 }
247