1 // Scintilla source code edit control
2 /** @file Document.cxx
3  ** Text document that handles notifications, DBCS, styling, words and end of line.
4  **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <cstddef>
9 #include <cstdlib>
10 #include <cassert>
11 #include <cstring>
12 #include <cstdio>
13 #include <cmath>
14 
15 #include <stdexcept>
16 #include <string>
17 #include <string_view>
18 #include <vector>
19 #include <forward_list>
20 #include <algorithm>
21 #include <memory>
22 #include <chrono>
23 
24 #ifndef NO_CXX11_REGEX
25 #include <regex>
26 #endif
27 
28 #include "Platform.h"
29 
30 #include "ILoader.h"
31 #include "ILexer.h"
32 #include "Scintilla.h"
33 
34 #include "CharacterSet.h"
35 #include "CharacterCategory.h"
36 #include "Position.h"
37 #include "SplitVector.h"
38 #include "Partitioning.h"
39 #include "RunStyles.h"
40 #include "CellBuffer.h"
41 #include "PerLine.h"
42 #include "CharClassify.h"
43 #include "Decoration.h"
44 #include "CaseFolder.h"
45 #include "Document.h"
46 #include "RESearch.h"
47 #include "UniConversion.h"
48 #include "ElapsedPeriod.h"
49 
50 using namespace Scintilla;
51 
Colourise(Sci::Position start,Sci::Position end)52 void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
53 	if (pdoc && instance && !performingStyle) {
54 		// Protect against reentrance, which may occur, for example, when
55 		// fold points are discovered while performing styling and the folding
56 		// code looks for child lines which may trigger styling.
57 		performingStyle = true;
58 
59 		const Sci::Position lengthDoc = pdoc->Length();
60 		if (end == -1)
61 			end = lengthDoc;
62 		const Sci::Position len = end - start;
63 
64 		PLATFORM_ASSERT(len >= 0);
65 		PLATFORM_ASSERT(start + len <= lengthDoc);
66 
67 		int styleStart = 0;
68 		if (start > 0)
69 			styleStart = pdoc->StyleAt(start - 1);
70 
71 		if (len > 0) {
72 			instance->Lex(start, len, styleStart, pdoc);
73 			instance->Fold(start, len, styleStart, pdoc);
74 		}
75 
76 		performingStyle = false;
77 	}
78 }
79 
LineEndTypesSupported()80 int LexInterface::LineEndTypesSupported() {
81 	if (instance) {
82 		return instance->LineEndTypesSupported();
83 	}
84 	return 0;
85 }
86 
ActionDuration(double duration_,double minDuration_,double maxDuration_)87 ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
88 	duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
89 }
90 
AddSample(size_t numberActions,double durationOfActions)91 void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
92 	// Only adjust for multiple actions to avoid instability
93 	if (numberActions < 8)
94 		return;
95 
96 	// Alpha value for exponential smoothing.
97 	// Most recent value contributes 25% to smoothed value.
98 	constexpr double alpha = 0.25;
99 
100 	const double durationOne = durationOfActions / numberActions;
101 	duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration,
102 		minDuration, maxDuration);
103 }
104 
Duration() const105 double ActionDuration::Duration() const noexcept {
106 	return duration;
107 }
108 
Document(int options)109 Document::Document(int options) :
110 	cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0),
111 	durationStyleOneLine(0.00001, 0.000001, 0.0001) {
112 	refCount = 0;
113 #ifdef _WIN32
114 	eolMode = SC_EOL_CRLF;
115 #else
116 	eolMode = SC_EOL_LF;
117 #endif
118 	dbcsCodePage = SC_CP_UTF8;
119 	lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
120 	endStyled = 0;
121 	styleClock = 0;
122 	enteredModification = 0;
123 	enteredStyling = 0;
124 	enteredReadOnlyCount = 0;
125 	insertionSet = false;
126 	tabInChars = 8;
127 	indentInChars = 0;
128 	actualIndentInChars = 8;
129 	useTabs = true;
130 	tabIndents = true;
131 	backspaceUnindents = false;
132 
133 	matchesValid = false;
134 
135 	perLineData[ldMarkers] = std::make_unique<LineMarkers>();
136 	perLineData[ldLevels] = std::make_unique<LineLevels>();
137 	perLineData[ldState] = std::make_unique<LineState>();
138 	perLineData[ldMargin] = std::make_unique<LineAnnotation>();
139 	perLineData[ldAnnotation] = std::make_unique<LineAnnotation>();
140 	perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>();
141 
142 	decorations = DecorationListCreate(IsLarge());
143 
144 	cb.SetPerLine(this);
145 	cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
146 }
147 
~Document()148 Document::~Document() {
149 	for (const WatcherWithUserData &watcher : watchers) {
150 		watcher.watcher->NotifyDeleted(this, watcher.userData);
151 	}
152 }
153 
154 // Increase reference count and return its previous value.
AddRef()155 int Document::AddRef() {
156 	return refCount++;
157 }
158 
159 // Decrease reference count and return its previous value.
160 // Delete the document if reference count reaches zero.
Release()161 int SCI_METHOD Document::Release() {
162 	const int curRefCount = --refCount;
163 	if (curRefCount == 0)
164 		delete this;
165 	return curRefCount;
166 }
167 
Init()168 void Document::Init() {
169 	for (const std::unique_ptr<PerLine> &pl : perLineData) {
170 		if (pl)
171 			pl->Init();
172 	}
173 }
174 
InsertLine(Sci::Line line)175 void Document::InsertLine(Sci::Line line) {
176 	for (const std::unique_ptr<PerLine> &pl : perLineData) {
177 		if (pl)
178 			pl->InsertLine(line);
179 	}
180 }
181 
InsertLines(Sci::Line line,Sci::Line lines)182 void Document::InsertLines(Sci::Line line, Sci::Line lines) {
183 	for (const auto &pl : perLineData) {
184 		if (pl)
185 			pl->InsertLines(line, lines);
186 	}
187 }
188 
RemoveLine(Sci::Line line)189 void Document::RemoveLine(Sci::Line line) {
190 	for (const std::unique_ptr<PerLine> &pl : perLineData) {
191 		if (pl)
192 			pl->RemoveLine(line);
193 	}
194 }
195 
Markers() const196 LineMarkers *Document::Markers() const noexcept {
197 	return dynamic_cast<LineMarkers *>(perLineData[ldMarkers].get());
198 }
199 
Levels() const200 LineLevels *Document::Levels() const noexcept {
201 	return dynamic_cast<LineLevels *>(perLineData[ldLevels].get());
202 }
203 
States() const204 LineState *Document::States() const noexcept {
205 	return dynamic_cast<LineState *>(perLineData[ldState].get());
206 }
207 
Margins() const208 LineAnnotation *Document::Margins() const noexcept {
209 	return dynamic_cast<LineAnnotation *>(perLineData[ldMargin].get());
210 }
211 
Annotations() const212 LineAnnotation *Document::Annotations() const noexcept {
213 	return dynamic_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
214 }
215 
EOLAnnotations() const216 LineAnnotation *Document::EOLAnnotations() const noexcept {
217 	return dynamic_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get());
218 }
219 
LineEndTypesSupported() const220 int Document::LineEndTypesSupported() const {
221 	if ((SC_CP_UTF8 == dbcsCodePage) && pli)
222 		return pli->LineEndTypesSupported();
223 	else
224 		return 0;
225 }
226 
SetDBCSCodePage(int dbcsCodePage_)227 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
228 	if (dbcsCodePage != dbcsCodePage_) {
229 		dbcsCodePage = dbcsCodePage_;
230 		SetCaseFolder(nullptr);
231 		cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
232 		cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
233 		ModifiedAt(0);	// Need to restyle whole document
234 		return true;
235 	} else {
236 		return false;
237 	}
238 }
239 
SetLineEndTypesAllowed(int lineEndBitSet_)240 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
241 	if (lineEndBitSet != lineEndBitSet_) {
242 		lineEndBitSet = lineEndBitSet_;
243 		const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
244 		if (lineEndBitSetActive != cb.GetLineEndTypes()) {
245 			ModifiedAt(0);
246 			cb.SetLineEndTypes(lineEndBitSetActive);
247 			return true;
248 		} else {
249 			return false;
250 		}
251 	} else {
252 		return false;
253 	}
254 }
255 
SetSavePoint()256 void Document::SetSavePoint() {
257 	cb.SetSavePoint();
258 	NotifySavePoint(true);
259 }
260 
TentativeUndo()261 void Document::TentativeUndo() {
262 	if (!TentativeActive())
263 		return;
264 	CheckReadOnly();
265 	if (enteredModification == 0) {
266 		enteredModification++;
267 		if (!cb.IsReadOnly()) {
268 			const bool startSavePoint = cb.IsSavePoint();
269 			bool multiLine = false;
270 			const int steps = cb.TentativeSteps();
271 			//Platform::DebugPrintf("Steps=%d\n", steps);
272 			for (int step = 0; step < steps; step++) {
273 				const Sci::Line prevLinesTotal = LinesTotal();
274 				const Action &action = cb.GetUndoStep();
275 				if (action.at == removeAction) {
276 					NotifyModified(DocModification(
277 									SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
278 				} else if (action.at == containerAction) {
279 					DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
280 					dm.token = action.position;
281 					NotifyModified(dm);
282 				} else {
283 					NotifyModified(DocModification(
284 									SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
285 				}
286 				cb.PerformUndoStep();
287 				if (action.at != containerAction) {
288 					ModifiedAt(action.position);
289 				}
290 
291 				int modFlags = SC_PERFORMED_UNDO;
292 				// With undo, an insertion action becomes a deletion notification
293 				if (action.at == removeAction) {
294 					modFlags |= SC_MOD_INSERTTEXT;
295 				} else if (action.at == insertAction) {
296 					modFlags |= SC_MOD_DELETETEXT;
297 				}
298 				if (steps > 1)
299 					modFlags |= SC_MULTISTEPUNDOREDO;
300 				const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
301 				if (linesAdded != 0)
302 					multiLine = true;
303 				if (step == steps - 1) {
304 					modFlags |= SC_LASTSTEPINUNDOREDO;
305 					if (multiLine)
306 						modFlags |= SC_MULTILINEUNDOREDO;
307 				}
308 				NotifyModified(DocModification(modFlags, action.position, action.lenData,
309 											   linesAdded, action.data.get()));
310 			}
311 
312 			const bool endSavePoint = cb.IsSavePoint();
313 			if (startSavePoint != endSavePoint)
314 				NotifySavePoint(endSavePoint);
315 
316 			cb.TentativeCommit();
317 		}
318 		enteredModification--;
319 	}
320 }
321 
GetMark(Sci::Line line) const322 int Document::GetMark(Sci::Line line) const noexcept {
323 	return Markers()->MarkValue(line);
324 }
325 
MarkerNext(Sci::Line lineStart,int mask) const326 Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept {
327 	return Markers()->MarkerNext(lineStart, mask);
328 }
329 
AddMark(Sci::Line line,int markerNum)330 int Document::AddMark(Sci::Line line, int markerNum) {
331 	if (line >= 0 && line <= LinesTotal()) {
332 		const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
333 		const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
334 		NotifyModified(mh);
335 		return prev;
336 	} else {
337 		return -1;
338 	}
339 }
340 
AddMarkSet(Sci::Line line,int valueSet)341 void Document::AddMarkSet(Sci::Line line, int valueSet) {
342 	if (line < 0 || line > LinesTotal()) {
343 		return;
344 	}
345 	unsigned int m = valueSet;
346 	for (int i = 0; m; i++, m >>= 1) {
347 		if (m & 1)
348 			Markers()->AddMark(line, i, LinesTotal());
349 	}
350 	const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
351 	NotifyModified(mh);
352 }
353 
DeleteMark(Sci::Line line,int markerNum)354 void Document::DeleteMark(Sci::Line line, int markerNum) {
355 	Markers()->DeleteMark(line, markerNum, false);
356 	const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
357 	NotifyModified(mh);
358 }
359 
DeleteMarkFromHandle(int markerHandle)360 void Document::DeleteMarkFromHandle(int markerHandle) {
361 	Markers()->DeleteMarkFromHandle(markerHandle);
362 	DocModification mh(SC_MOD_CHANGEMARKER);
363 	mh.line = -1;
364 	NotifyModified(mh);
365 }
366 
DeleteAllMarks(int markerNum)367 void Document::DeleteAllMarks(int markerNum) {
368 	bool someChanges = false;
369 	for (Sci::Line line = 0; line < LinesTotal(); line++) {
370 		if (Markers()->DeleteMark(line, markerNum, true))
371 			someChanges = true;
372 	}
373 	if (someChanges) {
374 		DocModification mh(SC_MOD_CHANGEMARKER);
375 		mh.line = -1;
376 		NotifyModified(mh);
377 	}
378 }
379 
LineFromHandle(int markerHandle) const380 Sci::Line Document::LineFromHandle(int markerHandle) const noexcept {
381 	return Markers()->LineFromHandle(markerHandle);
382 }
383 
MarkerNumberFromLine(Sci::Line line,int which) const384 int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept {
385 	return Markers()->NumberFromLine(line, which);
386 }
387 
MarkerHandleFromLine(Sci::Line line,int which) const388 int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept {
389 	return Markers()->HandleFromLine(line, which);
390 }
391 
LineStart(Sci_Position line) const392 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
393 	return cb.LineStart(line);
394 }
395 
IsLineStartPosition(Sci::Position position) const396 bool Document::IsLineStartPosition(Sci::Position position) const {
397 	return LineStart(LineFromPosition(position)) == position;
398 }
399 
LineEnd(Sci_Position line) const400 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
401 	if (line >= LinesTotal() - 1) {
402 		return LineStart(line + 1);
403 	} else {
404 		Sci::Position position = LineStart(line + 1);
405 		if (SC_LINE_END_TYPE_UNICODE == cb.GetLineEndTypes()) {
406 			const unsigned char bytes[] = {
407 				cb.UCharAt(position-3),
408 				cb.UCharAt(position-2),
409 				cb.UCharAt(position-1),
410 			};
411 			if (UTF8IsSeparator(bytes)) {
412 				return position - UTF8SeparatorLength;
413 			}
414 			if (UTF8IsNEL(bytes+1)) {
415 				return position - UTF8NELLength;
416 			}
417 		}
418 		position--; // Back over CR or LF
419 		// When line terminator is CR+LF, may need to go back one more
420 		if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
421 			position--;
422 		}
423 		return position;
424 	}
425 }
426 
SetErrorStatus(int status)427 void SCI_METHOD Document::SetErrorStatus(int status) {
428 	// Tell the watchers an error has occurred.
429 	for (const WatcherWithUserData &watcher : watchers) {
430 		watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status);
431 	}
432 }
433 
LineFromPosition(Sci_Position pos) const434 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
435 	return cb.LineFromPosition(pos);
436 }
437 
SciLineFromPosition(Sci::Position pos) const438 Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
439 	// Avoids casting in callers for this very common function
440 	return cb.LineFromPosition(pos);
441 }
442 
LineEndPosition(Sci::Position position) const443 Sci::Position Document::LineEndPosition(Sci::Position position) const {
444 	return LineEnd(LineFromPosition(position));
445 }
446 
IsLineEndPosition(Sci::Position position) const447 bool Document::IsLineEndPosition(Sci::Position position) const {
448 	return LineEnd(LineFromPosition(position)) == position;
449 }
450 
IsPositionInLineEnd(Sci::Position position) const451 bool Document::IsPositionInLineEnd(Sci::Position position) const {
452 	return position >= LineEnd(LineFromPosition(position));
453 }
454 
VCHomePosition(Sci::Position position) const455 Sci::Position Document::VCHomePosition(Sci::Position position) const {
456 	const Sci::Line line = SciLineFromPosition(position);
457 	const Sci::Position startPosition = LineStart(line);
458 	const Sci::Position endLine = LineEnd(line);
459 	Sci::Position startText = startPosition;
460 	while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
461 		startText++;
462 	if (position == startText)
463 		return startPosition;
464 	else
465 		return startText;
466 }
467 
IndexLineStart(Sci::Line line,int lineCharacterIndex) const468 Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept {
469 	return cb.IndexLineStart(line, lineCharacterIndex);
470 }
471 
LineFromPositionIndex(Sci::Position pos,int lineCharacterIndex) const472 Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept {
473 	return cb.LineFromPositionIndex(pos, lineCharacterIndex);
474 }
475 
SetLevel(Sci_Position line,int level)476 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
477 	const int prev = Levels()->SetLevel(line, level, LinesTotal());
478 	if (prev != level) {
479 		DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
480 		                   LineStart(line), 0, 0, nullptr, line);
481 		mh.foldLevelNow = level;
482 		mh.foldLevelPrev = prev;
483 		NotifyModified(mh);
484 	}
485 	return prev;
486 }
487 
GetLevel(Sci_Position line) const488 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
489 	return Levels()->GetLevel(line);
490 }
491 
ClearLevels()492 void Document::ClearLevels() {
493 	Levels()->ClearLevels();
494 }
495 
IsSubordinate(int levelStart,int levelTry)496 static bool IsSubordinate(int levelStart, int levelTry) noexcept {
497 	if (levelTry & SC_FOLDLEVELWHITEFLAG)
498 		return true;
499 	else
500 		return LevelNumber(levelStart) < LevelNumber(levelTry);
501 }
502 
GetLastChild(Sci::Line lineParent,int level,Sci::Line lastLine)503 Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) {
504 	if (level == -1)
505 		level = LevelNumber(GetLevel(lineParent));
506 	const Sci::Line maxLine = LinesTotal();
507 	const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
508 	Sci::Line lineMaxSubord = lineParent;
509 	while (lineMaxSubord < maxLine - 1) {
510 		EnsureStyledTo(LineStart(lineMaxSubord + 2));
511 		if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
512 			break;
513 		if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
514 			break;
515 		lineMaxSubord++;
516 	}
517 	if (lineMaxSubord > lineParent) {
518 		if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) {
519 			// Have chewed up some whitespace that belongs to a parent so seek back
520 			if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
521 				lineMaxSubord--;
522 			}
523 		}
524 	}
525 	return lineMaxSubord;
526 }
527 
GetFoldParent(Sci::Line line) const528 Sci::Line Document::GetFoldParent(Sci::Line line) const {
529 	const int level = LevelNumber(GetLevel(line));
530 	Sci::Line lineLook = line - 1;
531 	while ((lineLook > 0) && (
532 	            (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
533 	            (LevelNumber(GetLevel(lineLook)) >= level))
534 	      ) {
535 		lineLook--;
536 	}
537 	if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
538 	        (LevelNumber(GetLevel(lineLook)) < level)) {
539 		return lineLook;
540 	} else {
541 		return -1;
542 	}
543 }
544 
GetHighlightDelimiters(HighlightDelimiter & highlightDelimiter,Sci::Line line,Sci::Line lastLine)545 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
546 	const int level = GetLevel(line);
547 	const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
548 
549 	Sci::Line lookLine = line;
550 	int lookLineLevel = level;
551 	int lookLineLevelNum = LevelNumber(lookLineLevel);
552 	while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
553 		((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) {
554 		lookLineLevel = GetLevel(--lookLine);
555 		lookLineLevelNum = LevelNumber(lookLineLevel);
556 	}
557 
558 	Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
559 	if (beginFoldBlock == -1) {
560 		highlightDelimiter.Clear();
561 		return;
562 	}
563 
564 	Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
565 	Sci::Line firstChangeableLineBefore = -1;
566 	if (endFoldBlock < line) {
567 		lookLine = beginFoldBlock - 1;
568 		lookLineLevel = GetLevel(lookLine);
569 		lookLineLevelNum = LevelNumber(lookLineLevel);
570 		while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
571 			if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
572 				if (GetLastChild(lookLine, -1, lookLastLine) == line) {
573 					beginFoldBlock = lookLine;
574 					endFoldBlock = line;
575 					firstChangeableLineBefore = line - 1;
576 				}
577 			}
578 			if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum))
579 				break;
580 			lookLineLevel = GetLevel(--lookLine);
581 			lookLineLevelNum = LevelNumber(lookLineLevel);
582 		}
583 	}
584 	if (firstChangeableLineBefore == -1) {
585 		for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
586 			lookLine >= beginFoldBlock;
587 			lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
588 			if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) {
589 				firstChangeableLineBefore = lookLine;
590 				break;
591 			}
592 		}
593 	}
594 	if (firstChangeableLineBefore == -1)
595 		firstChangeableLineBefore = beginFoldBlock - 1;
596 
597 	Sci::Line firstChangeableLineAfter = -1;
598 	for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
599 		lookLine <= endFoldBlock;
600 		lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
601 		if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) {
602 			firstChangeableLineAfter = lookLine;
603 			break;
604 		}
605 	}
606 	if (firstChangeableLineAfter == -1)
607 		firstChangeableLineAfter = endFoldBlock + 1;
608 
609 	highlightDelimiter.beginFoldBlock = beginFoldBlock;
610 	highlightDelimiter.endFoldBlock = endFoldBlock;
611 	highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
612 	highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
613 }
614 
ClampPositionIntoDocument(Sci::Position pos) const615 Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept {
616 	return std::clamp<Sci::Position>(pos, 0, LengthNoExcept());
617 }
618 
IsCrLf(Sci::Position pos) const619 bool Document::IsCrLf(Sci::Position pos) const noexcept {
620 	if (pos < 0)
621 		return false;
622 	if (pos >= (LengthNoExcept() - 1))
623 		return false;
624 	return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
625 }
626 
LenChar(Sci::Position pos) const627 int Document::LenChar(Sci::Position pos) const noexcept {
628 	if (pos < 0 || pos >= LengthNoExcept()) {
629 		// Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
630 		return 1;
631 	} else if (IsCrLf(pos)) {
632 		return 2;
633 	}
634 
635 	const unsigned char leadByte = cb.UCharAt(pos);
636 	if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
637 		// Common case: ASCII character
638 		return 1;
639 	}
640 	if (SC_CP_UTF8 == dbcsCodePage) {
641 		const int widthCharBytes = UTF8BytesOfLead[leadByte];
642 		unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
643 		for (int b = 1; b < widthCharBytes; b++) {
644 			charBytes[b] = cb.UCharAt(pos + b);
645 		}
646 		const int utf8status = UTF8Classify(charBytes, widthCharBytes);
647 		if (utf8status & UTF8MaskInvalid) {
648 			// Treat as invalid and use up just one byte
649 			return 1;
650 		} else {
651 			return utf8status & UTF8MaskWidth;
652 		}
653 	} else {
654 		if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) {
655 			return 2;
656 		} else {
657 			return 1;
658 		}
659 	}
660 }
661 
InGoodUTF8(Sci::Position pos,Sci::Position & start,Sci::Position & end) const662 bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
663 	Sci::Position trail = pos;
664 	while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
665 		trail--;
666 	start = (trail > 0) ? trail-1 : trail;
667 
668 	const unsigned char leadByte = cb.UCharAt(start);
669 	const int widthCharBytes = UTF8BytesOfLead[leadByte];
670 	if (widthCharBytes == 1) {
671 		return false;
672 	} else {
673 		const int trailBytes = widthCharBytes - 1;
674 		const Sci::Position len = pos - start;
675 		if (len > trailBytes)
676 			// pos too far from lead
677 			return false;
678 		unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
679 		for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
680 			charBytes[b] = cb.CharAt(start+b);
681 		const int utf8status = UTF8Classify(charBytes, widthCharBytes);
682 		if (utf8status & UTF8MaskInvalid)
683 			return false;
684 		end = start + widthCharBytes;
685 		return true;
686 	}
687 }
688 
689 // Normalise a position so that it is not halfway through a two byte character.
690 // This can occur in two situations -
691 // When lines are terminated with \r\n pairs which should be treated as one character.
692 // When displaying DBCS text such as Japanese.
693 // If moving, move the position in the indicated direction.
MovePositionOutsideChar(Sci::Position pos,Sci::Position moveDir,bool checkLineEnd) const694 Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept {
695 	//Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
696 	// If out of range, just return minimum/maximum value.
697 	if (pos <= 0)
698 		return 0;
699 	if (pos >= LengthNoExcept())
700 		return LengthNoExcept();
701 
702 	// PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
703 	if (checkLineEnd && IsCrLf(pos - 1)) {
704 		if (moveDir > 0)
705 			return pos + 1;
706 		else
707 			return pos - 1;
708 	}
709 
710 	if (dbcsCodePage) {
711 		if (SC_CP_UTF8 == dbcsCodePage) {
712 			const unsigned char ch = cb.UCharAt(pos);
713 			// If ch is not a trail byte then pos is valid intercharacter position
714 			if (UTF8IsTrailByte(ch)) {
715 				Sci::Position startUTF = pos;
716 				Sci::Position endUTF = pos;
717 				if (InGoodUTF8(pos, startUTF, endUTF)) {
718 					// ch is a trail byte within a UTF-8 character
719 					if (moveDir > 0)
720 						pos = endUTF;
721 					else
722 						pos = startUTF;
723 				}
724 				// Else invalid UTF-8 so return position of isolated trail byte
725 			}
726 		} else {
727 			// Anchor DBCS calculations at start of line because start of line can
728 			// not be a DBCS trail byte.
729 			const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
730 			if (pos == posStartLine)
731 				return pos;
732 
733 			// Step back until a non-lead-byte is found.
734 			Sci::Position posCheck = pos;
735 			while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
736 				posCheck--;
737 
738 			// Check from known start of character.
739 			while (posCheck < pos) {
740 				const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1;
741 				if (posCheck + mbsize == pos) {
742 					return pos;
743 				} else if (posCheck + mbsize > pos) {
744 					if (moveDir > 0) {
745 						return posCheck + mbsize;
746 					} else {
747 						return posCheck;
748 					}
749 				}
750 				posCheck += mbsize;
751 			}
752 		}
753 	}
754 
755 	return pos;
756 }
757 
758 // NextPosition moves between valid positions - it can not handle a position in the middle of a
759 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
760 // A \r\n pair is treated as two characters.
NextPosition(Sci::Position pos,int moveDir) const761 Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
762 	// If out of range, just return minimum/maximum value.
763 	const int increment = (moveDir > 0) ? 1 : -1;
764 	if (pos + increment <= 0)
765 		return 0;
766 	if (pos + increment >= cb.Length())
767 		return cb.Length();
768 
769 	if (dbcsCodePage) {
770 		if (SC_CP_UTF8 == dbcsCodePage) {
771 			if (increment == 1) {
772 				// Simple forward movement case so can avoid some checks
773 				const unsigned char leadByte = cb.UCharAt(pos);
774 				if (UTF8IsAscii(leadByte)) {
775 					// Single byte character or invalid
776 					pos++;
777 				} else {
778 					const int widthCharBytes = UTF8BytesOfLead[leadByte];
779 					unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
780 					for (int b=1; b<widthCharBytes; b++)
781 						charBytes[b] = cb.CharAt(pos+b);
782 					const int utf8status = UTF8Classify(charBytes, widthCharBytes);
783 					if (utf8status & UTF8MaskInvalid)
784 						pos++;
785 					else
786 						pos += utf8status & UTF8MaskWidth;
787 				}
788 			} else {
789 				// Examine byte before position
790 				pos--;
791 				const unsigned char ch = cb.UCharAt(pos);
792 				// If ch is not a trail byte then pos is valid intercharacter position
793 				if (UTF8IsTrailByte(ch)) {
794 					// If ch is a trail byte in a valid UTF-8 character then return start of character
795 					Sci::Position startUTF = pos;
796 					Sci::Position endUTF = pos;
797 					if (InGoodUTF8(pos, startUTF, endUTF)) {
798 						pos = startUTF;
799 					}
800 					// Else invalid UTF-8 so return position of isolated trail byte
801 				}
802 			}
803 		} else {
804 			if (moveDir > 0) {
805 				const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
806 				pos += mbsize;
807 				if (pos > cb.Length())
808 					pos = cb.Length();
809 			} else {
810 				// Anchor DBCS calculations at start of line because start of line can
811 				// not be a DBCS trail byte.
812 				const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
813 				// See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
814 				// http://msdn.microsoft.com/en-us/library/cc194790.aspx
815 				if ((pos - 1) <= posStartLine) {
816 					return pos - 1;
817 				} else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
818 					// Must actually be trail byte
819 					return pos - 2;
820 				} else {
821 					// Otherwise, step back until a non-lead-byte is found.
822 					Sci::Position posTemp = pos - 1;
823 					while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
824 						;
825 					// Now posTemp+1 must point to the beginning of a character,
826 					// so figure out whether we went back an even or an odd
827 					// number of bytes and go back 1 or 2 bytes, respectively.
828 					return (pos - 1 - ((pos - posTemp) & 1));
829 				}
830 			}
831 		}
832 	} else {
833 		pos += increment;
834 	}
835 
836 	return pos;
837 }
838 
NextCharacter(Sci::Position & pos,int moveDir) const839 bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
840 	// Returns true if pos changed
841 	Sci::Position posNext = NextPosition(pos, moveDir);
842 	if (posNext == pos) {
843 		return false;
844 	} else {
845 		pos = posNext;
846 		return true;
847 	}
848 }
849 
CharacterAfter(Sci::Position position) const850 Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
851 	if (position >= LengthNoExcept()) {
852 		return CharacterExtracted(unicodeReplacementChar, 0);
853 	}
854 	const unsigned char leadByte = cb.UCharAt(position);
855 	if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
856 		// Common case: ASCII character
857 		return CharacterExtracted(leadByte, 1);
858 	}
859 	if (SC_CP_UTF8 == dbcsCodePage) {
860 		const int widthCharBytes = UTF8BytesOfLead[leadByte];
861 		unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
862 		for (int b = 1; b<widthCharBytes; b++)
863 			charBytes[b] = cb.UCharAt(position + b);
864 		const int utf8status = UTF8Classify(charBytes, widthCharBytes);
865 		if (utf8status & UTF8MaskInvalid) {
866 			// Treat as invalid and use up just one byte
867 			return CharacterExtracted(unicodeReplacementChar, 1);
868 		} else {
869 			return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
870 		}
871 	} else {
872 		if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < LengthNoExcept())) {
873 			return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1));
874 		} else {
875 			return CharacterExtracted(leadByte, 1);
876 		}
877 	}
878 }
879 
CharacterBefore(Sci::Position position) const880 Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
881 	if (position <= 0) {
882 		return CharacterExtracted(unicodeReplacementChar, 0);
883 	}
884 	const unsigned char previousByte = cb.UCharAt(position - 1);
885 	if (0 == dbcsCodePage) {
886 		return CharacterExtracted(previousByte, 1);
887 	}
888 	if (SC_CP_UTF8 == dbcsCodePage) {
889 		if (UTF8IsAscii(previousByte)) {
890 			return CharacterExtracted(previousByte, 1);
891 		}
892 		position--;
893 		// If previousByte is not a trail byte then its invalid
894 		if (UTF8IsTrailByte(previousByte)) {
895 			// If previousByte is a trail byte in a valid UTF-8 character then find start of character
896 			Sci::Position startUTF = position;
897 			Sci::Position endUTF = position;
898 			if (InGoodUTF8(position, startUTF, endUTF)) {
899 				const Sci::Position widthCharBytes = endUTF - startUTF;
900 				unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
901 				for (Sci::Position b = 0; b<widthCharBytes; b++)
902 					charBytes[b] = cb.UCharAt(startUTF + b);
903 				const int utf8status = UTF8Classify(charBytes, widthCharBytes);
904 				if (utf8status & UTF8MaskInvalid) {
905 					// Treat as invalid and use up just one byte
906 					return CharacterExtracted(unicodeReplacementChar, 1);
907 				} else {
908 					return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
909 				}
910 			}
911 			// Else invalid UTF-8 so return position of isolated trail byte
912 		}
913 		return CharacterExtracted(unicodeReplacementChar, 1);
914 	} else {
915 		// Moving backwards in DBCS is complex so use NextPosition
916 		const Sci::Position posStartCharacter = NextPosition(position, -1);
917 		return CharacterAfter(posStartCharacter);
918 	}
919 }
920 
921 // Return -1  on out-of-bounds
GetRelativePosition(Sci_Position positionStart,Sci_Position characterOffset) const922 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
923 	Sci::Position pos = positionStart;
924 	if (dbcsCodePage) {
925 		const int increment = (characterOffset > 0) ? 1 : -1;
926 		while (characterOffset != 0) {
927 			const Sci::Position posNext = NextPosition(pos, increment);
928 			if (posNext == pos)
929 				return INVALID_POSITION;
930 			pos = posNext;
931 			characterOffset -= increment;
932 		}
933 	} else {
934 		pos = positionStart + characterOffset;
935 		if ((pos < 0) || (pos > Length()))
936 			return INVALID_POSITION;
937 	}
938 	return pos;
939 }
940 
GetRelativePositionUTF16(Sci::Position positionStart,Sci::Position characterOffset) const941 Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept {
942 	Sci::Position pos = positionStart;
943 	if (dbcsCodePage) {
944 		const int increment = (characterOffset > 0) ? 1 : -1;
945 		while (characterOffset != 0) {
946 			const Sci::Position posNext = NextPosition(pos, increment);
947 			if (posNext == pos)
948 				return INVALID_POSITION;
949 			if (std::abs(pos-posNext) > 3)	// 4 byte character = 2*UTF16.
950 				characterOffset -= increment;
951 			pos = posNext;
952 			characterOffset -= increment;
953 		}
954 	} else {
955 		pos = positionStart + characterOffset;
956 		if ((pos < 0) || (pos > LengthNoExcept()))
957 			return INVALID_POSITION;
958 	}
959 	return pos;
960 }
961 
GetCharacterAndWidth(Sci_Position position,Sci_Position * pWidth) const962 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
963 	int character;
964 	int bytesInCharacter = 1;
965 	const unsigned char leadByte = cb.UCharAt(position);
966 	if (dbcsCodePage) {
967 		if (SC_CP_UTF8 == dbcsCodePage) {
968 			if (UTF8IsAscii(leadByte)) {
969 				// Single byte character or invalid
970 				character =  leadByte;
971 			} else {
972 				const int widthCharBytes = UTF8BytesOfLead[leadByte];
973 				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
974 				for (int b=1; b<widthCharBytes; b++)
975 					charBytes[b] = cb.UCharAt(position+b);
976 				const int utf8status = UTF8Classify(charBytes, widthCharBytes);
977 				if (utf8status & UTF8MaskInvalid) {
978 					// Report as singleton surrogate values which are invalid Unicode
979 					character =  0xDC80 + leadByte;
980 				} else {
981 					bytesInCharacter = utf8status & UTF8MaskWidth;
982 					character = UnicodeFromUTF8(charBytes);
983 				}
984 			}
985 		} else {
986 			if (IsDBCSLeadByteNoExcept(leadByte)) {
987 				bytesInCharacter = 2;
988 				character = (leadByte << 8) | cb.UCharAt(position+1);
989 			} else {
990 				character = leadByte;
991 			}
992 		}
993 	} else {
994 		character = leadByte;
995 	}
996 	if (pWidth) {
997 		*pWidth = bytesInCharacter;
998 	}
999 	return character;
1000 }
1001 
CodePage() const1002 int SCI_METHOD Document::CodePage() const {
1003 	return dbcsCodePage;
1004 }
1005 
IsDBCSLeadByte(char ch) const1006 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
1007 	// Used by lexers so must match IDocument method exactly
1008 	return IsDBCSLeadByteNoExcept(ch);
1009 }
1010 
IsDBCSLeadByteNoExcept(char ch) const1011 bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
1012 	// Used inside core Scintilla
1013 	// Byte ranges found in Wikipedia articles with relevant search strings in each case
1014 	const unsigned char uch = ch;
1015 	switch (dbcsCodePage) {
1016 		case 932:
1017 			// Shift_jis
1018 			return ((uch >= 0x81) && (uch <= 0x9F)) ||
1019 				((uch >= 0xE0) && (uch <= 0xFC));
1020 				// Lead bytes F0 to FC may be a Microsoft addition.
1021 		case 936:
1022 			// GBK
1023 			return (uch >= 0x81) && (uch <= 0xFE);
1024 		case 949:
1025 			// Korean Wansung KS C-5601-1987
1026 			return (uch >= 0x81) && (uch <= 0xFE);
1027 		case 950:
1028 			// Big5
1029 			return (uch >= 0x81) && (uch <= 0xFE);
1030 		case 1361:
1031 			// Korean Johab KS C-5601-1992
1032 			return
1033 				((uch >= 0x84) && (uch <= 0xD3)) ||
1034 				((uch >= 0xD8) && (uch <= 0xDE)) ||
1035 				((uch >= 0xE0) && (uch <= 0xF9));
1036 	}
1037 	return false;
1038 }
1039 
IsDBCSLeadByteInvalid(char ch) const1040 bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
1041 	const unsigned char lead = ch;
1042 	switch (dbcsCodePage) {
1043 	case 932:
1044 		// Shift_jis
1045 		return
1046 			(lead == 0x85) ||
1047 			(lead == 0x86) ||
1048 			(lead == 0xEB) ||
1049 			(lead == 0xEC) ||
1050 			(lead == 0xEF) ||
1051 			(lead == 0xFA) ||
1052 			(lead == 0xFB) ||
1053 			(lead == 0xFC);
1054 	case 936:
1055 		// GBK
1056 		return (lead == 0x80) || (lead == 0xFF);
1057 	case 949:
1058 		// Korean Wansung KS C-5601-1987
1059 		return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE);
1060 	case 950:
1061 		// Big5
1062 		return
1063 			((lead >= 0x80) && (lead <= 0xA0)) ||
1064 			(lead == 0xC8) ||
1065 			(lead >= 0xFA);
1066 	case 1361:
1067 		// Korean Johab KS C-5601-1992
1068 		return
1069 			((lead >= 0x80) && (lead <= 0x83)) ||
1070 			((lead >= 0xD4) && (lead <= 0xD8)) ||
1071 			(lead == 0xDF) ||
1072 			(lead >= 0xFA);
1073 	}
1074 	return false;
1075 }
1076 
IsDBCSTrailByteInvalid(char ch) const1077 bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
1078 	const unsigned char trail = ch;
1079 	switch (dbcsCodePage) {
1080 	case 932:
1081 		// Shift_jis
1082 		return
1083 			(trail <= 0x3F) ||
1084 			(trail == 0x7F) ||
1085 			(trail >= 0xFD);
1086 	case 936:
1087 		// GBK
1088 		return
1089 			(trail <= 0x3F) ||
1090 			(trail == 0x7F) ||
1091 			(trail == 0xFF);
1092 	case 949:
1093 		// Korean Wansung KS C-5601-1987
1094 		return
1095 			(trail <= 0x40) ||
1096 			((trail >= 0x5B) && (trail <= 0x60)) ||
1097 			((trail >= 0x7B) && (trail <= 0x80)) ||
1098 			(trail == 0xFF);
1099 	case 950:
1100 		// Big5
1101 		return
1102 			(trail <= 0x3F) ||
1103 			((trail >= 0x7F) && (trail <= 0xA0)) ||
1104 			(trail == 0xFF);
1105 	case 1361:
1106 		// Korean Johab KS C-5601-1992
1107 		return
1108 			(trail <= 0x30) ||
1109 			(trail == 0x7F) ||
1110 			(trail == 0x80) ||
1111 			(trail == 0xFF);
1112 	}
1113 	return false;
1114 }
1115 
DBCSDrawBytes(std::string_view text) const1116 int Document::DBCSDrawBytes(std::string_view text) const noexcept {
1117 	if (text.length() <= 1) {
1118 		return static_cast<int>(text.length());
1119 	}
1120 	if (IsDBCSLeadByteNoExcept(text[0])) {
1121 		return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
1122 	} else {
1123 		return 1;
1124 	}
1125 }
1126 
IsSpaceOrTab(int ch)1127 static constexpr bool IsSpaceOrTab(int ch) noexcept {
1128 	return ch == ' ' || ch == '\t';
1129 }
1130 
1131 // Need to break text into segments near lengthSegment but taking into
1132 // account the encoding to not break inside a UTF-8 or DBCS character
1133 // and also trying to avoid breaking inside a pair of combining characters.
1134 // The segment length must always be long enough (more than 4 bytes)
1135 // so that there will be at least one whole character to make a segment.
1136 // For UTF-8, text must consist only of valid whole characters.
1137 // In preference order from best to worst:
1138 //   1) Break after space
1139 //   2) Break before punctuation
1140 //   3) Break after whole character
1141 
SafeSegment(const char * text,int length,int lengthSegment) const1142 int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept {
1143 	if (length <= lengthSegment)
1144 		return length;
1145 	int lastSpaceBreak = -1;
1146 	int lastPunctuationBreak = -1;
1147 	int lastEncodingAllowedBreak = 0;
1148 	for (int j=0; j < lengthSegment;) {
1149 		const unsigned char ch = text[j];
1150 		if (j > 0) {
1151 			if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
1152 				lastSpaceBreak = j;
1153 			}
1154 			if (ch < 'A') {
1155 				lastPunctuationBreak = j;
1156 			}
1157 		}
1158 		lastEncodingAllowedBreak = j;
1159 
1160 		if (dbcsCodePage == SC_CP_UTF8) {
1161 			j += UTF8BytesOfLead[ch];
1162 		} else if (dbcsCodePage) {
1163 			j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1;
1164 		} else {
1165 			j++;
1166 		}
1167 	}
1168 	if (lastSpaceBreak >= 0) {
1169 		return lastSpaceBreak;
1170 	} else if (lastPunctuationBreak >= 0) {
1171 		return lastPunctuationBreak;
1172 	}
1173 	return lastEncodingAllowedBreak;
1174 }
1175 
CodePageFamily() const1176 EncodingFamily Document::CodePageFamily() const noexcept {
1177 	if (SC_CP_UTF8 == dbcsCodePage)
1178 		return EncodingFamily::unicode;
1179 	else if (dbcsCodePage)
1180 		return EncodingFamily::dbcs;
1181 	else
1182 		return EncodingFamily::eightBit;
1183 }
1184 
ModifiedAt(Sci::Position pos)1185 void Document::ModifiedAt(Sci::Position pos) noexcept {
1186 	if (endStyled > pos)
1187 		endStyled = pos;
1188 }
1189 
CheckReadOnly()1190 void Document::CheckReadOnly() {
1191 	if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1192 		enteredReadOnlyCount++;
1193 		NotifyModifyAttempt();
1194 		enteredReadOnlyCount--;
1195 	}
1196 }
1197 
1198 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1199 // SetStyleAt does not change the persistent state of a document
1200 
DeleteChars(Sci::Position pos,Sci::Position len)1201 bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1202 	if (pos < 0)
1203 		return false;
1204 	if (len <= 0)
1205 		return false;
1206 	if ((pos + len) > LengthNoExcept())
1207 		return false;
1208 	CheckReadOnly();
1209 	if (enteredModification != 0) {
1210 		return false;
1211 	} else {
1212 		enteredModification++;
1213 		if (!cb.IsReadOnly()) {
1214 			NotifyModified(
1215 			    DocModification(
1216 			        SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
1217 			        pos, len,
1218 			        0, 0));
1219 			const Sci::Line prevLinesTotal = LinesTotal();
1220 			const bool startSavePoint = cb.IsSavePoint();
1221 			bool startSequence = false;
1222 			const char *text = cb.DeleteChars(pos, len, startSequence);
1223 			if (startSavePoint && cb.IsCollectingUndo())
1224 				NotifySavePoint(false);
1225 			if ((pos < LengthNoExcept()) || (pos == 0))
1226 				ModifiedAt(pos);
1227 			else
1228 				ModifiedAt(pos-1);
1229 			NotifyModified(
1230 			    DocModification(
1231 			        SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1232 			        pos, len,
1233 			        LinesTotal() - prevLinesTotal, text));
1234 		}
1235 		enteredModification--;
1236 	}
1237 	return !cb.IsReadOnly();
1238 }
1239 
1240 /**
1241  * Insert a string with a length.
1242  */
InsertString(Sci::Position position,const char * s,Sci::Position insertLength)1243 Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1244 	if (insertLength <= 0) {
1245 		return 0;
1246 	}
1247 	CheckReadOnly();	// Application may change read only state here
1248 	if (cb.IsReadOnly()) {
1249 		return 0;
1250 	}
1251 	if (enteredModification != 0) {
1252 		return 0;
1253 	}
1254 	enteredModification++;
1255 	insertionSet = false;
1256 	insertion.clear();
1257 	NotifyModified(
1258 		DocModification(
1259 			SC_MOD_INSERTCHECK,
1260 			position, insertLength,
1261 			0, s));
1262 	if (insertionSet) {
1263 		s = insertion.c_str();
1264 		insertLength = insertion.length();
1265 	}
1266 	NotifyModified(
1267 		DocModification(
1268 			SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1269 			position, insertLength,
1270 			0, s));
1271 	const Sci::Line prevLinesTotal = LinesTotal();
1272 	const bool startSavePoint = cb.IsSavePoint();
1273 	bool startSequence = false;
1274 	const char *text = cb.InsertString(position, s, insertLength, startSequence);
1275 	if (startSavePoint && cb.IsCollectingUndo())
1276 		NotifySavePoint(false);
1277 	ModifiedAt(position);
1278 	NotifyModified(
1279 		DocModification(
1280 			SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1281 			position, insertLength,
1282 			LinesTotal() - prevLinesTotal, text));
1283 	if (insertionSet) {	// Free memory as could be large
1284 		std::string().swap(insertion);
1285 	}
1286 	enteredModification--;
1287 	return insertLength;
1288 }
1289 
ChangeInsertion(const char * s,Sci::Position length)1290 void Document::ChangeInsertion(const char *s, Sci::Position length) {
1291 	insertionSet = true;
1292 	insertion.assign(s, length);
1293 }
1294 
AddData(const char * data,Sci_Position length)1295 int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1296 	try {
1297 		const Sci::Position position = Length();
1298 		InsertString(position, data, length);
1299 	} catch (std::bad_alloc &) {
1300 		return SC_STATUS_BADALLOC;
1301 	} catch (...) {
1302 		return SC_STATUS_FAILURE;
1303 	}
1304 	return 0;
1305 }
1306 
ConvertToDocument()1307 void * SCI_METHOD Document::ConvertToDocument() {
1308 	return this;
1309 }
1310 
Undo()1311 Sci::Position Document::Undo() {
1312 	Sci::Position newPos = -1;
1313 	CheckReadOnly();
1314 	if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1315 		enteredModification++;
1316 		if (!cb.IsReadOnly()) {
1317 			const bool startSavePoint = cb.IsSavePoint();
1318 			bool multiLine = false;
1319 			const int steps = cb.StartUndo();
1320 			//Platform::DebugPrintf("Steps=%d\n", steps);
1321 			Sci::Position coalescedRemovePos = -1;
1322 			Sci::Position coalescedRemoveLen = 0;
1323 			Sci::Position prevRemoveActionPos = -1;
1324 			Sci::Position prevRemoveActionLen = 0;
1325 			for (int step = 0; step < steps; step++) {
1326 				const Sci::Line prevLinesTotal = LinesTotal();
1327 				const Action &action = cb.GetUndoStep();
1328 				if (action.at == removeAction) {
1329 					NotifyModified(DocModification(
1330 									SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1331 				} else if (action.at == containerAction) {
1332 					DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1333 					dm.token = action.position;
1334 					NotifyModified(dm);
1335 					if (!action.mayCoalesce) {
1336 						coalescedRemovePos = -1;
1337 						coalescedRemoveLen = 0;
1338 						prevRemoveActionPos = -1;
1339 						prevRemoveActionLen = 0;
1340 					}
1341 				} else {
1342 					NotifyModified(DocModification(
1343 									SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1344 				}
1345 				cb.PerformUndoStep();
1346 				if (action.at != containerAction) {
1347 					ModifiedAt(action.position);
1348 					newPos = action.position;
1349 				}
1350 
1351 				int modFlags = SC_PERFORMED_UNDO;
1352 				// With undo, an insertion action becomes a deletion notification
1353 				if (action.at == removeAction) {
1354 					newPos += action.lenData;
1355 					modFlags |= SC_MOD_INSERTTEXT;
1356 					if ((coalescedRemoveLen > 0) &&
1357 						(action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1358 						coalescedRemoveLen += action.lenData;
1359 						newPos = coalescedRemovePos + coalescedRemoveLen;
1360 					} else {
1361 						coalescedRemovePos = action.position;
1362 						coalescedRemoveLen = action.lenData;
1363 					}
1364 					prevRemoveActionPos = action.position;
1365 					prevRemoveActionLen = action.lenData;
1366 				} else if (action.at == insertAction) {
1367 					modFlags |= SC_MOD_DELETETEXT;
1368 					coalescedRemovePos = -1;
1369 					coalescedRemoveLen = 0;
1370 					prevRemoveActionPos = -1;
1371 					prevRemoveActionLen = 0;
1372 				}
1373 				if (steps > 1)
1374 					modFlags |= SC_MULTISTEPUNDOREDO;
1375 				const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1376 				if (linesAdded != 0)
1377 					multiLine = true;
1378 				if (step == steps - 1) {
1379 					modFlags |= SC_LASTSTEPINUNDOREDO;
1380 					if (multiLine)
1381 						modFlags |= SC_MULTILINEUNDOREDO;
1382 				}
1383 				NotifyModified(DocModification(modFlags, action.position, action.lenData,
1384 											   linesAdded, action.data.get()));
1385 			}
1386 
1387 			const bool endSavePoint = cb.IsSavePoint();
1388 			if (startSavePoint != endSavePoint)
1389 				NotifySavePoint(endSavePoint);
1390 		}
1391 		enteredModification--;
1392 	}
1393 	return newPos;
1394 }
1395 
Redo()1396 Sci::Position Document::Redo() {
1397 	Sci::Position newPos = -1;
1398 	CheckReadOnly();
1399 	if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1400 		enteredModification++;
1401 		if (!cb.IsReadOnly()) {
1402 			const bool startSavePoint = cb.IsSavePoint();
1403 			bool multiLine = false;
1404 			const int steps = cb.StartRedo();
1405 			for (int step = 0; step < steps; step++) {
1406 				const Sci::Line prevLinesTotal = LinesTotal();
1407 				const Action &action = cb.GetRedoStep();
1408 				if (action.at == insertAction) {
1409 					NotifyModified(DocModification(
1410 									SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1411 				} else if (action.at == containerAction) {
1412 					DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1413 					dm.token = action.position;
1414 					NotifyModified(dm);
1415 				} else {
1416 					NotifyModified(DocModification(
1417 									SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1418 				}
1419 				cb.PerformRedoStep();
1420 				if (action.at != containerAction) {
1421 					ModifiedAt(action.position);
1422 					newPos = action.position;
1423 				}
1424 
1425 				int modFlags = SC_PERFORMED_REDO;
1426 				if (action.at == insertAction) {
1427 					newPos += action.lenData;
1428 					modFlags |= SC_MOD_INSERTTEXT;
1429 				} else if (action.at == removeAction) {
1430 					modFlags |= SC_MOD_DELETETEXT;
1431 				}
1432 				if (steps > 1)
1433 					modFlags |= SC_MULTISTEPUNDOREDO;
1434 				const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1435 				if (linesAdded != 0)
1436 					multiLine = true;
1437 				if (step == steps - 1) {
1438 					modFlags |= SC_LASTSTEPINUNDOREDO;
1439 					if (multiLine)
1440 						modFlags |= SC_MULTILINEUNDOREDO;
1441 				}
1442 				NotifyModified(
1443 					DocModification(modFlags, action.position, action.lenData,
1444 									linesAdded, action.data.get()));
1445 			}
1446 
1447 			const bool endSavePoint = cb.IsSavePoint();
1448 			if (startSavePoint != endSavePoint)
1449 				NotifySavePoint(endSavePoint);
1450 		}
1451 		enteredModification--;
1452 	}
1453 	return newPos;
1454 }
1455 
DelChar(Sci::Position pos)1456 void Document::DelChar(Sci::Position pos) {
1457 	DeleteChars(pos, LenChar(pos));
1458 }
1459 
DelCharBack(Sci::Position pos)1460 void Document::DelCharBack(Sci::Position pos) {
1461 	if (pos <= 0) {
1462 		return;
1463 	} else if (IsCrLf(pos - 2)) {
1464 		DeleteChars(pos - 2, 2);
1465 	} else if (dbcsCodePage) {
1466 		const Sci::Position startChar = NextPosition(pos, -1);
1467 		DeleteChars(startChar, pos - startChar);
1468 	} else {
1469 		DeleteChars(pos - 1, 1);
1470 	}
1471 }
1472 
NextTab(Sci::Position pos,Sci::Position tabSize)1473 static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1474 	return ((pos / tabSize) + 1) * tabSize;
1475 }
1476 
CreateIndentation(Sci::Position indent,int tabSize,bool insertSpaces)1477 static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1478 	std::string indentation;
1479 	if (!insertSpaces) {
1480 		while (indent >= tabSize) {
1481 			indentation += '\t';
1482 			indent -= tabSize;
1483 		}
1484 	}
1485 	while (indent > 0) {
1486 		indentation += ' ';
1487 		indent--;
1488 	}
1489 	return indentation;
1490 }
1491 
GetLineIndentation(Sci_Position line)1492 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1493 	int indent = 0;
1494 	if ((line >= 0) && (line < LinesTotal())) {
1495 		const Sci::Position lineStart = LineStart(line);
1496 		const Sci::Position length = Length();
1497 		for (Sci::Position i = lineStart; i < length; i++) {
1498 			const char ch = cb.CharAt(i);
1499 			if (ch == ' ')
1500 				indent++;
1501 			else if (ch == '\t')
1502 				indent = static_cast<int>(NextTab(indent, tabInChars));
1503 			else
1504 				return indent;
1505 		}
1506 	}
1507 	return indent;
1508 }
1509 
SetLineIndentation(Sci::Line line,Sci::Position indent)1510 Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1511 	const int indentOfLine = GetLineIndentation(line);
1512 	if (indent < 0)
1513 		indent = 0;
1514 	if (indent != indentOfLine) {
1515 		std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1516 		const Sci::Position thisLineStart = LineStart(line);
1517 		const Sci::Position indentPos = GetLineIndentPosition(line);
1518 		UndoGroup ug(this);
1519 		DeleteChars(thisLineStart, indentPos - thisLineStart);
1520 		return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1521 			linebuf.length());
1522 	} else {
1523 		return GetLineIndentPosition(line);
1524 	}
1525 }
1526 
GetLineIndentPosition(Sci::Line line) const1527 Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1528 	if (line < 0)
1529 		return 0;
1530 	Sci::Position pos = LineStart(line);
1531 	const Sci::Position length = Length();
1532 	while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1533 		pos++;
1534 	}
1535 	return pos;
1536 }
1537 
GetColumn(Sci::Position pos)1538 Sci::Position Document::GetColumn(Sci::Position pos) {
1539 	Sci::Position column = 0;
1540 	const Sci::Line line = SciLineFromPosition(pos);
1541 	if ((line >= 0) && (line < LinesTotal())) {
1542 		for (Sci::Position i = LineStart(line); i < pos;) {
1543 			const char ch = cb.CharAt(i);
1544 			if (ch == '\t') {
1545 				column = NextTab(column, tabInChars);
1546 				i++;
1547 			} else if (ch == '\r') {
1548 				return column;
1549 			} else if (ch == '\n') {
1550 				return column;
1551 			} else if (i >= Length()) {
1552 				return column;
1553 			} else {
1554 				column++;
1555 				i = NextPosition(i, 1);
1556 			}
1557 		}
1558 	}
1559 	return column;
1560 }
1561 
CountCharacters(Sci::Position startPos,Sci::Position endPos) const1562 Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept {
1563 	startPos = MovePositionOutsideChar(startPos, 1, false);
1564 	endPos = MovePositionOutsideChar(endPos, -1, false);
1565 	Sci::Position count = 0;
1566 	Sci::Position i = startPos;
1567 	while (i < endPos) {
1568 		count++;
1569 		i = NextPosition(i, 1);
1570 	}
1571 	return count;
1572 }
1573 
CountUTF16(Sci::Position startPos,Sci::Position endPos) const1574 Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept {
1575 	startPos = MovePositionOutsideChar(startPos, 1, false);
1576 	endPos = MovePositionOutsideChar(endPos, -1, false);
1577 	Sci::Position count = 0;
1578 	Sci::Position i = startPos;
1579 	while (i < endPos) {
1580 		count++;
1581 		const Sci::Position next = NextPosition(i, 1);
1582 		if ((next - i) > 3)
1583 			count++;
1584 		i = next;
1585 	}
1586 	return count;
1587 }
1588 
FindColumn(Sci::Line line,Sci::Position column)1589 Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1590 	Sci::Position position = LineStart(line);
1591 	if ((line >= 0) && (line < LinesTotal())) {
1592 		Sci::Position columnCurrent = 0;
1593 		while ((columnCurrent < column) && (position < Length())) {
1594 			const char ch = cb.CharAt(position);
1595 			if (ch == '\t') {
1596 				columnCurrent = NextTab(columnCurrent, tabInChars);
1597 				if (columnCurrent > column)
1598 					return position;
1599 				position++;
1600 			} else if (ch == '\r') {
1601 				return position;
1602 			} else if (ch == '\n') {
1603 				return position;
1604 			} else {
1605 				columnCurrent++;
1606 				position = NextPosition(position, 1);
1607 			}
1608 		}
1609 	}
1610 	return position;
1611 }
1612 
Indent(bool forwards,Sci::Line lineBottom,Sci::Line lineTop)1613 void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1614 	// Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1615 	for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1616 		const Sci::Position indentOfLine = GetLineIndentation(line);
1617 		if (forwards) {
1618 			if (LineStart(line) < LineEnd(line)) {
1619 				SetLineIndentation(line, indentOfLine + IndentSize());
1620 			}
1621 		} else {
1622 			SetLineIndentation(line, indentOfLine - IndentSize());
1623 		}
1624 	}
1625 }
1626 
1627 // Convert line endings for a piece of text to a particular mode.
1628 // Stop at len or when a NUL is found.
TransformLineEnds(const char * s,size_t len,int eolModeWanted)1629 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1630 	std::string dest;
1631 	for (size_t i = 0; (i < len) && (s[i]); i++) {
1632 		if (s[i] == '\n' || s[i] == '\r') {
1633 			if (eolModeWanted == SC_EOL_CR) {
1634 				dest.push_back('\r');
1635 			} else if (eolModeWanted == SC_EOL_LF) {
1636 				dest.push_back('\n');
1637 			} else { // eolModeWanted == SC_EOL_CRLF
1638 				dest.push_back('\r');
1639 				dest.push_back('\n');
1640 			}
1641 			if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1642 				i++;
1643 			}
1644 		} else {
1645 			dest.push_back(s[i]);
1646 		}
1647 	}
1648 	return dest;
1649 }
1650 
ConvertLineEnds(int eolModeSet)1651 void Document::ConvertLineEnds(int eolModeSet) {
1652 	UndoGroup ug(this);
1653 
1654 	for (Sci::Position pos = 0; pos < Length(); pos++) {
1655 		if (cb.CharAt(pos) == '\r') {
1656 			if (cb.CharAt(pos + 1) == '\n') {
1657 				// CRLF
1658 				if (eolModeSet == SC_EOL_CR) {
1659 					DeleteChars(pos + 1, 1); // Delete the LF
1660 				} else if (eolModeSet == SC_EOL_LF) {
1661 					DeleteChars(pos, 1); // Delete the CR
1662 				} else {
1663 					pos++;
1664 				}
1665 			} else {
1666 				// CR
1667 				if (eolModeSet == SC_EOL_CRLF) {
1668 					pos += InsertString(pos + 1, "\n", 1); // Insert LF
1669 				} else if (eolModeSet == SC_EOL_LF) {
1670 					pos += InsertString(pos, "\n", 1); // Insert LF
1671 					DeleteChars(pos, 1); // Delete CR
1672 					pos--;
1673 				}
1674 			}
1675 		} else if (cb.CharAt(pos) == '\n') {
1676 			// LF
1677 			if (eolModeSet == SC_EOL_CRLF) {
1678 				pos += InsertString(pos, "\r", 1); // Insert CR
1679 			} else if (eolModeSet == SC_EOL_CR) {
1680 				pos += InsertString(pos, "\r", 1); // Insert CR
1681 				DeleteChars(pos, 1); // Delete LF
1682 				pos--;
1683 			}
1684 		}
1685 	}
1686 
1687 }
1688 
Options() const1689 int Document::Options() const noexcept {
1690 	return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE : 0) |
1691 		(cb.HasStyles() ? 0 : SC_DOCUMENTOPTION_STYLES_NONE);
1692 }
1693 
IsWhiteLine(Sci::Line line) const1694 bool Document::IsWhiteLine(Sci::Line line) const {
1695 	Sci::Position currentChar = LineStart(line);
1696 	const Sci::Position endLine = LineEnd(line);
1697 	while (currentChar < endLine) {
1698 		if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1699 			return false;
1700 		}
1701 		++currentChar;
1702 	}
1703 	return true;
1704 }
1705 
ParaUp(Sci::Position pos) const1706 Sci::Position Document::ParaUp(Sci::Position pos) const {
1707 	Sci::Line line = SciLineFromPosition(pos);
1708 	line--;
1709 	while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1710 		line--;
1711 	}
1712 	while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1713 		line--;
1714 	}
1715 	line++;
1716 	return LineStart(line);
1717 }
1718 
ParaDown(Sci::Position pos) const1719 Sci::Position Document::ParaDown(Sci::Position pos) const {
1720 	Sci::Line line = SciLineFromPosition(pos);
1721 	while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1722 		line++;
1723 	}
1724 	while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1725 		line++;
1726 	}
1727 	if (line < LinesTotal())
1728 		return LineStart(line);
1729 	else // end of a document
1730 		return LineEnd(line-1);
1731 }
1732 
WordCharacterClass(unsigned int ch) const1733 CharClassify::cc Document::WordCharacterClass(unsigned int ch) const {
1734 	if (dbcsCodePage && (!UTF8IsAscii(ch))) {
1735 		if (SC_CP_UTF8 == dbcsCodePage) {
1736 			// Use hard coded Unicode class
1737 			const CharacterCategory cc = charMap.CategoryFor(ch);
1738 			switch (cc) {
1739 
1740 				// Separator, Line/Paragraph
1741 			case ccZl:
1742 			case ccZp:
1743 				return CharClassify::ccNewLine;
1744 
1745 				// Separator, Space
1746 			case ccZs:
1747 				// Other
1748 			case ccCc:
1749 			case ccCf:
1750 			case ccCs:
1751 			case ccCo:
1752 			case ccCn:
1753 				return CharClassify::ccSpace;
1754 
1755 				// Letter
1756 			case ccLu:
1757 			case ccLl:
1758 			case ccLt:
1759 			case ccLm:
1760 			case ccLo:
1761 				// Number
1762 			case ccNd:
1763 			case ccNl:
1764 			case ccNo:
1765 				// Mark - includes combining diacritics
1766 			case ccMn:
1767 			case ccMc:
1768 			case ccMe:
1769 				return CharClassify::ccWord;
1770 
1771 				// Punctuation
1772 			case ccPc:
1773 			case ccPd:
1774 			case ccPs:
1775 			case ccPe:
1776 			case ccPi:
1777 			case ccPf:
1778 			case ccPo:
1779 				// Symbol
1780 			case ccSm:
1781 			case ccSc:
1782 			case ccSk:
1783 			case ccSo:
1784 				return CharClassify::ccPunctuation;
1785 
1786 			}
1787 		} else {
1788 			// Asian DBCS
1789 			return CharClassify::ccWord;
1790 		}
1791 	}
1792 	return charClass.GetClass(static_cast<unsigned char>(ch));
1793 }
1794 
1795 /**
1796  * Used by commands that want to select whole words.
1797  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1798  */
ExtendWordSelect(Sci::Position pos,int delta,bool onlyWordCharacters) const1799 Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1800 	CharClassify::cc ccStart = CharClassify::ccWord;
1801 	if (delta < 0) {
1802 		if (!onlyWordCharacters) {
1803 			const CharacterExtracted ce = CharacterBefore(pos);
1804 			ccStart = WordCharacterClass(ce.character);
1805 		}
1806 		while (pos > 0) {
1807 			const CharacterExtracted ce = CharacterBefore(pos);
1808 			if (WordCharacterClass(ce.character) != ccStart)
1809 				break;
1810 			pos -= ce.widthBytes;
1811 		}
1812 	} else {
1813 		if (!onlyWordCharacters && pos < LengthNoExcept()) {
1814 			const CharacterExtracted ce = CharacterAfter(pos);
1815 			ccStart = WordCharacterClass(ce.character);
1816 		}
1817 		while (pos < LengthNoExcept()) {
1818 			const CharacterExtracted ce = CharacterAfter(pos);
1819 			if (WordCharacterClass(ce.character) != ccStart)
1820 				break;
1821 			pos += ce.widthBytes;
1822 		}
1823 	}
1824 	return MovePositionOutsideChar(pos, delta, true);
1825 }
1826 
1827 /**
1828  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1829  * (delta < 0).
1830  * This is looking for a transition between character classes although there is also some
1831  * additional movement to transit white space.
1832  * Used by cursor movement by word commands.
1833  */
NextWordStart(Sci::Position pos,int delta) const1834 Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1835 	if (delta < 0) {
1836 		while (pos > 0) {
1837 			const CharacterExtracted ce = CharacterBefore(pos);
1838 			if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1839 				break;
1840 			pos -= ce.widthBytes;
1841 		}
1842 		if (pos > 0) {
1843 			CharacterExtracted ce = CharacterBefore(pos);
1844 			const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1845 			while (pos > 0) {
1846 				ce = CharacterBefore(pos);
1847 				if (WordCharacterClass(ce.character) != ccStart)
1848 					break;
1849 				pos -= ce.widthBytes;
1850 			}
1851 		}
1852 	} else {
1853 		CharacterExtracted ce = CharacterAfter(pos);
1854 		const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1855 		while (pos < LengthNoExcept()) {
1856 			ce = CharacterAfter(pos);
1857 			if (WordCharacterClass(ce.character) != ccStart)
1858 				break;
1859 			pos += ce.widthBytes;
1860 		}
1861 		while (pos < LengthNoExcept()) {
1862 			ce = CharacterAfter(pos);
1863 			if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1864 				break;
1865 			pos += ce.widthBytes;
1866 		}
1867 	}
1868 	return pos;
1869 }
1870 
1871 /**
1872  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1873  * (delta < 0).
1874  * This is looking for a transition between character classes although there is also some
1875  * additional movement to transit white space.
1876  * Used by cursor movement by word commands.
1877  */
NextWordEnd(Sci::Position pos,int delta) const1878 Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1879 	if (delta < 0) {
1880 		if (pos > 0) {
1881 			CharacterExtracted ce = CharacterBefore(pos);
1882 			const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1883 			if (ccStart != CharClassify::ccSpace) {
1884 				while (pos > 0) {
1885 					ce = CharacterBefore(pos);
1886 					if (WordCharacterClass(ce.character) != ccStart)
1887 						break;
1888 					pos -= ce.widthBytes;
1889 				}
1890 			}
1891 			while (pos > 0) {
1892 				ce = CharacterBefore(pos);
1893 				if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1894 					break;
1895 				pos -= ce.widthBytes;
1896 			}
1897 		}
1898 	} else {
1899 		while (pos < LengthNoExcept()) {
1900 			const CharacterExtracted ce = CharacterAfter(pos);
1901 			if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1902 				break;
1903 			pos += ce.widthBytes;
1904 		}
1905 		if (pos < LengthNoExcept()) {
1906 			CharacterExtracted ce = CharacterAfter(pos);
1907 			const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1908 			while (pos < LengthNoExcept()) {
1909 				ce = CharacterAfter(pos);
1910 				if (WordCharacterClass(ce.character) != ccStart)
1911 					break;
1912 				pos += ce.widthBytes;
1913 			}
1914 		}
1915 	}
1916 	return pos;
1917 }
1918 
1919 /**
1920  * Check that the character at the given position is a word or punctuation character and that
1921  * the previous character is of a different character class.
1922  */
IsWordStartAt(Sci::Position pos) const1923 bool Document::IsWordStartAt(Sci::Position pos) const {
1924 	if (pos >= LengthNoExcept())
1925 		return false;
1926 	if (pos > 0) {
1927 		const CharacterExtracted cePos = CharacterAfter(pos);
1928 		const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1929 		const CharacterExtracted cePrev = CharacterBefore(pos);
1930 		const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1931 		return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1932 			(ccPos != ccPrev);
1933 	}
1934 	return true;
1935 }
1936 
1937 /**
1938  * Check that the character at the given position is a word or punctuation character and that
1939  * the next character is of a different character class.
1940  */
IsWordEndAt(Sci::Position pos) const1941 bool Document::IsWordEndAt(Sci::Position pos) const {
1942 	if (pos <= 0)
1943 		return false;
1944 	if (pos < LengthNoExcept()) {
1945 		const CharacterExtracted cePos = CharacterAfter(pos);
1946 		const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1947 		const CharacterExtracted cePrev = CharacterBefore(pos);
1948 		const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1949 		return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1950 			(ccPrev != ccPos);
1951 	}
1952 	return true;
1953 }
1954 
1955 /**
1956  * Check that the given range is has transitions between character classes at both
1957  * ends and where the characters on the inside are word or punctuation characters.
1958  */
IsWordAt(Sci::Position start,Sci::Position end) const1959 bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
1960 	return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1961 }
1962 
MatchesWordOptions(bool word,bool wordStart,Sci::Position pos,Sci::Position length) const1963 bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
1964 	return (!word && !wordStart) ||
1965 			(word && IsWordAt(pos, pos + length)) ||
1966 			(wordStart && IsWordStartAt(pos));
1967 }
1968 
HasCaseFolder() const1969 bool Document::HasCaseFolder() const noexcept {
1970 	return pcf != nullptr;
1971 }
1972 
SetCaseFolder(CaseFolder * pcf_)1973 void Document::SetCaseFolder(CaseFolder *pcf_) noexcept {
1974 	pcf.reset(pcf_);
1975 }
1976 
ExtractCharacter(Sci::Position position) const1977 Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
1978 	const unsigned char leadByte = cb.UCharAt(position);
1979 	if (UTF8IsAscii(leadByte)) {
1980 		// Common case: ASCII character
1981 		return CharacterExtracted(leadByte, 1);
1982 	}
1983 	const int widthCharBytes = UTF8BytesOfLead[leadByte];
1984 	unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1985 	for (int b=1; b<widthCharBytes; b++)
1986 		charBytes[b] = cb.UCharAt(position + b);
1987 	const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1988 	if (utf8status & UTF8MaskInvalid) {
1989 		// Treat as invalid and use up just one byte
1990 		return CharacterExtracted(unicodeReplacementChar, 1);
1991 	} else {
1992 		return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1993 	}
1994 }
1995 
1996 /**
1997  * Find text in document, supporting both forward and backward
1998  * searches (just pass minPos > maxPos to do a backward search)
1999  * Has not been tested with backwards DBCS searches yet.
2000  */
FindText(Sci::Position minPos,Sci::Position maxPos,const char * search,int flags,Sci::Position * length)2001 Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
2002                         int flags, Sci::Position *length) {
2003 	if (*length <= 0)
2004 		return minPos;
2005 	const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
2006 	const bool word = (flags & SCFIND_WHOLEWORD) != 0;
2007 	const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
2008 	const bool regExp = (flags & SCFIND_REGEXP) != 0;
2009 	if (regExp) {
2010 		if (!regex)
2011 			regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
2012 		return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
2013 	} else {
2014 
2015 		const bool forward = minPos <= maxPos;
2016 		const int increment = forward ? 1 : -1;
2017 
2018 		// Range endpoints should not be inside DBCS characters, but just in case, move them.
2019 		const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
2020 		const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
2021 
2022 		// Compute actual search ranges needed
2023 		const Sci::Position lengthFind = *length;
2024 
2025 		//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2026 		const Sci::Position limitPos = std::max(startPos, endPos);
2027 		Sci::Position pos = startPos;
2028 		if (!forward) {
2029 			// Back all of a character
2030 			pos = NextPosition(pos, increment);
2031 		}
2032 		if (caseSensitive) {
2033 			const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2034 			const char charStartSearch =  search[0];
2035 			while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2036 				if (CharAt(pos) == charStartSearch) {
2037 					bool found = (pos + lengthFind) <= limitPos;
2038 					for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2039 						found = CharAt(pos + indexSearch) == search[indexSearch];
2040 					}
2041 					if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2042 						return pos;
2043 					}
2044 				}
2045 				if (!NextCharacter(pos, increment))
2046 					break;
2047 			}
2048 		} else if (SC_CP_UTF8 == dbcsCodePage) {
2049 			constexpr size_t maxFoldingExpansion = 4;
2050 			std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2051 			const size_t lenSearch =
2052 				pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2053 			char bytes[UTF8MaxBytes + 1] = "";
2054 			char folded[UTF8MaxBytes * maxFoldingExpansion + 1] = "";
2055 			while (forward ? (pos < endPos) : (pos >= endPos)) {
2056 				int widthFirstCharacter = 0;
2057 				Sci::Position posIndexDocument = pos;
2058 				size_t indexSearch = 0;
2059 				bool characterMatches = true;
2060 				for (;;) {
2061 					const unsigned char leadByte = cb.UCharAt(posIndexDocument);
2062 					bytes[0] = leadByte;
2063 					int widthChar = 1;
2064 					if (!UTF8IsAscii(leadByte)) {
2065 						const int widthCharBytes = UTF8BytesOfLead[leadByte];
2066 						for (int b=1; b<widthCharBytes; b++) {
2067 							bytes[b] = cb.CharAt(posIndexDocument+b);
2068 						}
2069 						widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
2070 					}
2071 					if (!widthFirstCharacter)
2072 						widthFirstCharacter = widthChar;
2073 					if ((posIndexDocument + widthChar) > limitPos)
2074 						break;
2075 					const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2076 					// memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2077 					assert((indexSearch + lenFlat) <= searchThing.size());
2078 					// Does folded match the buffer
2079 					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2080 					if (!characterMatches)
2081 						break;
2082 					posIndexDocument += widthChar;
2083 					indexSearch += lenFlat;
2084 					if (indexSearch >= lenSearch)
2085 						break;
2086 				}
2087 				if (characterMatches && (indexSearch == lenSearch)) {
2088 					if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2089 						*length = posIndexDocument - pos;
2090 						return pos;
2091 					}
2092 				}
2093 				if (forward) {
2094 					pos += widthFirstCharacter;
2095 				} else {
2096 					if (!NextCharacter(pos, increment))
2097 						break;
2098 				}
2099 			}
2100 		} else if (dbcsCodePage) {
2101 			constexpr size_t maxBytesCharacter = 2;
2102 			constexpr size_t maxFoldingExpansion = 4;
2103 			std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2104 			const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2105 			while (forward ? (pos < endPos) : (pos >= endPos)) {
2106 				Sci::Position indexDocument = 0;
2107 				size_t indexSearch = 0;
2108 				bool characterMatches = true;
2109 				while (characterMatches &&
2110 					((pos + indexDocument) < limitPos) &&
2111 					(indexSearch < lenSearch)) {
2112 					char bytes[maxBytesCharacter + 1];
2113 					bytes[0] = cb.CharAt(pos + indexDocument);
2114 					const Sci::Position widthChar = IsDBCSLeadByteNoExcept(bytes[0]) ? 2 : 1;
2115 					if (widthChar == 2)
2116 						bytes[1] = cb.CharAt(pos + indexDocument + 1);
2117 					if ((pos + indexDocument + widthChar) > limitPos)
2118 						break;
2119 					char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2120 					const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2121 					// memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2122 					assert((indexSearch + lenFlat) <= searchThing.size());
2123 					// Does folded match the buffer
2124 					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2125 					indexDocument += widthChar;
2126 					indexSearch += lenFlat;
2127 				}
2128 				if (characterMatches && (indexSearch == lenSearch)) {
2129 					if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2130 						*length = indexDocument;
2131 						return pos;
2132 					}
2133 				}
2134 				if (!NextCharacter(pos, increment))
2135 					break;
2136 			}
2137 		} else {
2138 			const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2139 			std::vector<char> searchThing(lengthFind + 1);
2140 			pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2141 			while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2142 				bool found = (pos + lengthFind) <= limitPos;
2143 				for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2144 					const char ch = CharAt(pos + indexSearch);
2145 					char folded[2];
2146 					pcf->Fold(folded, sizeof(folded), &ch, 1);
2147 					found = folded[0] == searchThing[indexSearch];
2148 				}
2149 				if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2150 					return pos;
2151 				}
2152 				if (!NextCharacter(pos, increment))
2153 					break;
2154 			}
2155 		}
2156 	}
2157 	//Platform::DebugPrintf("Not found\n");
2158 	return -1;
2159 }
2160 
SubstituteByPosition(const char * text,Sci::Position * length)2161 const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2162 	if (regex)
2163 		return regex->SubstituteByPosition(this, text, length);
2164 	else
2165 		return nullptr;
2166 }
2167 
LineCharacterIndex() const2168 int Document::LineCharacterIndex() const noexcept {
2169 	return cb.LineCharacterIndex();
2170 }
2171 
AllocateLineCharacterIndex(int lineCharacterIndex)2172 void Document::AllocateLineCharacterIndex(int lineCharacterIndex) {
2173 	return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2174 }
2175 
ReleaseLineCharacterIndex(int lineCharacterIndex)2176 void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) {
2177 	return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2178 }
2179 
LinesTotal() const2180 Sci::Line Document::LinesTotal() const noexcept {
2181 	return cb.Lines();
2182 }
2183 
SetDefaultCharClasses(bool includeWordClass)2184 void Document::SetDefaultCharClasses(bool includeWordClass) {
2185     charClass.SetDefaultCharClasses(includeWordClass);
2186 }
2187 
SetCharClasses(const unsigned char * chars,CharClassify::cc newCharClass)2188 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
2189     charClass.SetCharClasses(chars, newCharClass);
2190 }
2191 
GetCharsOfClass(CharClassify::cc characterClass,unsigned char * buffer) const2192 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) const {
2193     return charClass.GetCharsOfClass(characterClass, buffer);
2194 }
2195 
SetCharacterCategoryOptimization(int countCharacters)2196 void Document::SetCharacterCategoryOptimization(int countCharacters) {
2197 	charMap.Optimize(countCharacters);
2198 }
2199 
CharacterCategoryOptimization() const2200 int Document::CharacterCategoryOptimization() const noexcept {
2201 	return charMap.Size();
2202 }
2203 
StartStyling(Sci_Position position)2204 void SCI_METHOD Document::StartStyling(Sci_Position position) {
2205 	endStyled = position;
2206 }
2207 
SetStyleFor(Sci_Position length,char style)2208 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2209 	if (enteredStyling != 0) {
2210 		return false;
2211 	} else {
2212 		enteredStyling++;
2213 		const Sci::Position prevEndStyled = endStyled;
2214 		if (cb.SetStyleFor(endStyled, length, style)) {
2215 			const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
2216 			                   prevEndStyled, length);
2217 			NotifyModified(mh);
2218 		}
2219 		endStyled += length;
2220 		enteredStyling--;
2221 		return true;
2222 	}
2223 }
2224 
SetStyles(Sci_Position length,const char * styles)2225 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2226 	if (enteredStyling != 0) {
2227 		return false;
2228 	} else {
2229 		enteredStyling++;
2230 		bool didChange = false;
2231 		Sci::Position startMod = 0;
2232 		Sci::Position endMod = 0;
2233 		for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2234 			PLATFORM_ASSERT(endStyled < Length());
2235 			if (cb.SetStyleAt(endStyled, styles[iPos])) {
2236 				if (!didChange) {
2237 					startMod = endStyled;
2238 				}
2239 				didChange = true;
2240 				endMod = endStyled;
2241 			}
2242 		}
2243 		if (didChange) {
2244 			const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
2245 			                   startMod, endMod - startMod + 1);
2246 			NotifyModified(mh);
2247 		}
2248 		enteredStyling--;
2249 		return true;
2250 	}
2251 }
2252 
EnsureStyledTo(Sci::Position pos)2253 void Document::EnsureStyledTo(Sci::Position pos) {
2254 	if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2255 		IncrementStyleClock();
2256 		if (pli && !pli->UseContainerLexing()) {
2257 			const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled());
2258 			const Sci::Position endStyledTo = LineStart(lineEndStyled);
2259 			pli->Colourise(endStyledTo, pos);
2260 		} else {
2261 			// Ask the watchers to style, and stop as soon as one responds.
2262 			for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2263 				(pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2264 				it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2265 			}
2266 		}
2267 	}
2268 }
2269 
StyleToAdjustingLineDuration(Sci::Position pos)2270 void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2271 	const Sci::Line lineFirst = SciLineFromPosition(GetEndStyled());
2272 	ElapsedPeriod epStyling;
2273 	EnsureStyledTo(pos);
2274 	const Sci::Line lineLast = SciLineFromPosition(GetEndStyled());
2275 	durationStyleOneLine.AddSample(lineLast - lineFirst, epStyling.Duration());
2276 }
2277 
LexerChanged()2278 void Document::LexerChanged() {
2279 	// Tell the watchers the lexer has changed.
2280 	for (const WatcherWithUserData &watcher : watchers) {
2281 		watcher.watcher->NotifyLexerChanged(this, watcher.userData);
2282 	}
2283 }
2284 
GetLexInterface() const2285 LexInterface *Document::GetLexInterface() const noexcept {
2286 	return pli.get();
2287 }
2288 
SetLexInterface(std::unique_ptr<LexInterface> pLexInterface)2289 void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept {
2290 	pli = std::move(pLexInterface);
2291 }
2292 
SetLineState(Sci_Position line,int state)2293 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2294 	const int statePrevious = States()->SetLineState(line, state);
2295 	if (state != statePrevious) {
2296 		const DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, nullptr,
2297 			static_cast<Sci::Line>(line));
2298 		NotifyModified(mh);
2299 	}
2300 	return statePrevious;
2301 }
2302 
GetLineState(Sci_Position line) const2303 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2304 	return States()->GetLineState(line);
2305 }
2306 
GetMaxLineState() const2307 Sci::Line Document::GetMaxLineState() const noexcept {
2308 	return States()->GetMaxLineState();
2309 }
2310 
ChangeLexerState(Sci_Position start,Sci_Position end)2311 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2312 	const DocModification mh(SC_MOD_LEXERSTATE, start,
2313 		end-start, 0, 0, 0);
2314 	NotifyModified(mh);
2315 }
2316 
MarginStyledText(Sci::Line line) const2317 StyledText Document::MarginStyledText(Sci::Line line) const noexcept {
2318 	const LineAnnotation *pla = Margins();
2319 	return StyledText(pla->Length(line), pla->Text(line),
2320 		pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2321 }
2322 
MarginSetText(Sci::Line line,const char * text)2323 void Document::MarginSetText(Sci::Line line, const char *text) {
2324 	Margins()->SetText(line, text);
2325 	const DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line),
2326 		0, 0, 0, line);
2327 	NotifyModified(mh);
2328 }
2329 
MarginSetStyle(Sci::Line line,int style)2330 void Document::MarginSetStyle(Sci::Line line, int style) {
2331 	Margins()->SetStyle(line, style);
2332 	NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line),
2333 		0, 0, 0, line));
2334 }
2335 
MarginSetStyles(Sci::Line line,const unsigned char * styles)2336 void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2337 	Margins()->SetStyles(line, styles);
2338 	NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line),
2339 		0, 0, 0, line));
2340 }
2341 
MarginClearAll()2342 void Document::MarginClearAll() {
2343 	const Sci::Line maxEditorLine = LinesTotal();
2344 	for (Sci::Line l=0; l<maxEditorLine; l++)
2345 		MarginSetText(l, nullptr);
2346 	// Free remaining data
2347 	Margins()->ClearAll();
2348 }
2349 
AnnotationStyledText(Sci::Line line) const2350 StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept {
2351 	const LineAnnotation *pla = Annotations();
2352 	return StyledText(pla->Length(line), pla->Text(line),
2353 		pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2354 }
2355 
AnnotationSetText(Sci::Line line,const char * text)2356 void Document::AnnotationSetText(Sci::Line line, const char *text) {
2357 	if (line >= 0 && line < LinesTotal()) {
2358 		const Sci::Line linesBefore = AnnotationLines(line);
2359 		Annotations()->SetText(line, text);
2360 		const int linesAfter = AnnotationLines(line);
2361 		DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line),
2362 			0, 0, 0, line);
2363 		mh.annotationLinesAdded = linesAfter - linesBefore;
2364 		NotifyModified(mh);
2365 	}
2366 }
2367 
AnnotationSetStyle(Sci::Line line,int style)2368 void Document::AnnotationSetStyle(Sci::Line line, int style) {
2369 	if (line >= 0 && line < LinesTotal()) {
2370 		Annotations()->SetStyle(line, style);
2371 		const DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line),
2372 			0, 0, 0, line);
2373 		NotifyModified(mh);
2374 	}
2375 }
2376 
AnnotationSetStyles(Sci::Line line,const unsigned char * styles)2377 void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2378 	if (line >= 0 && line < LinesTotal()) {
2379 		Annotations()->SetStyles(line, styles);
2380 	}
2381 }
2382 
AnnotationLines(Sci::Line line) const2383 int Document::AnnotationLines(Sci::Line line) const noexcept {
2384 	return Annotations()->Lines(line);
2385 }
2386 
AnnotationClearAll()2387 void Document::AnnotationClearAll() {
2388 	const Sci::Line maxEditorLine = LinesTotal();
2389 	for (Sci::Line l=0; l<maxEditorLine; l++)
2390 		AnnotationSetText(l, nullptr);
2391 	// Free remaining data
2392 	Annotations()->ClearAll();
2393 }
2394 
EOLAnnotationStyledText(Sci::Line line) const2395 StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept {
2396 	const LineAnnotation *pla = EOLAnnotations();
2397 	return StyledText(pla->Length(line), pla->Text(line),
2398 		pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2399 }
2400 
EOLAnnotationSetText(Sci::Line line,const char * text)2401 void Document::EOLAnnotationSetText(Sci::Line line, const char *text) {
2402 	if (line >= 0 && line < LinesTotal()) {
2403 		EOLAnnotations()->SetText(line, text);
2404 		const DocModification mh(SC_MOD_CHANGEEOLANNOTATION, LineStart(line),
2405 			0, 0, 0, line);
2406 		NotifyModified(mh);
2407 	}
2408 }
2409 
EOLAnnotationSetStyle(Sci::Line line,int style)2410 void Document::EOLAnnotationSetStyle(Sci::Line line, int style) {
2411 	if (line >= 0 && line < LinesTotal()) {
2412 		EOLAnnotations()->SetStyle(line, style);
2413 		const DocModification mh(SC_MOD_CHANGEEOLANNOTATION, LineStart(line),
2414 			0, 0, 0, line);
2415 		NotifyModified(mh);
2416 	}
2417 }
2418 
EOLAnnotationClearAll()2419 void Document::EOLAnnotationClearAll() {
2420 	const Sci::Line maxEditorLine = LinesTotal();
2421 	for (Sci::Line l=0; l<maxEditorLine; l++)
2422 		EOLAnnotationSetText(l, nullptr);
2423 	// Free remaining data
2424 	EOLAnnotations()->ClearAll();
2425 }
2426 
IncrementStyleClock()2427 void Document::IncrementStyleClock() noexcept {
2428 	styleClock = (styleClock + 1) % 0x100000;
2429 }
2430 
DecorationSetCurrentIndicator(int indicator)2431 void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2432 	decorations->SetCurrentIndicator(indicator);
2433 }
2434 
DecorationFillRange(Sci_Position position,int value,Sci_Position fillLength)2435 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2436 	const FillResult<Sci::Position> fr = decorations->FillRange(
2437 		position, value, fillLength);
2438 	if (fr.changed) {
2439 		const DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2440 							fr.position, fr.fillLength);
2441 		NotifyModified(mh);
2442 	}
2443 }
2444 
AddWatcher(DocWatcher * watcher,void * userData)2445 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2446 	const WatcherWithUserData wwud(watcher, userData);
2447 	std::vector<WatcherWithUserData>::iterator it =
2448 		std::find(watchers.begin(), watchers.end(), wwud);
2449 	if (it != watchers.end())
2450 		return false;
2451 	watchers.push_back(wwud);
2452 	return true;
2453 }
2454 
RemoveWatcher(DocWatcher * watcher,void * userData)2455 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2456 	std::vector<WatcherWithUserData>::iterator it =
2457 		std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2458 	if (it != watchers.end()) {
2459 		watchers.erase(it);
2460 		return true;
2461 	}
2462 	return false;
2463 }
2464 
NotifyModifyAttempt()2465 void Document::NotifyModifyAttempt() {
2466 	for (const WatcherWithUserData &watcher : watchers) {
2467 		watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2468 	}
2469 }
2470 
NotifySavePoint(bool atSavePoint)2471 void Document::NotifySavePoint(bool atSavePoint) {
2472 	for (const WatcherWithUserData &watcher : watchers) {
2473 		watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2474 	}
2475 }
2476 
NotifyModified(DocModification mh)2477 void Document::NotifyModified(DocModification mh) {
2478 	if (mh.modificationType & SC_MOD_INSERTTEXT) {
2479 		decorations->InsertSpace(mh.position, mh.length);
2480 	} else if (mh.modificationType & SC_MOD_DELETETEXT) {
2481 		decorations->DeleteRange(mh.position, mh.length);
2482 	}
2483 	for (const WatcherWithUserData &watcher : watchers) {
2484 		watcher.watcher->NotifyModified(this, mh, watcher.userData);
2485 	}
2486 }
2487 
2488 // Used for word part navigation.
IsASCIIPunctuationCharacter(unsigned int ch)2489 static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept {
2490 	switch (ch) {
2491 	case '!':
2492 	case '"':
2493 	case '#':
2494 	case '$':
2495 	case '%':
2496 	case '&':
2497 	case '\'':
2498 	case '(':
2499 	case ')':
2500 	case '*':
2501 	case '+':
2502 	case ',':
2503 	case '-':
2504 	case '.':
2505 	case '/':
2506 	case ':':
2507 	case ';':
2508 	case '<':
2509 	case '=':
2510 	case '>':
2511 	case '?':
2512 	case '@':
2513 	case '[':
2514 	case '\\':
2515 	case ']':
2516 	case '^':
2517 	case '_':
2518 	case '`':
2519 	case '{':
2520 	case '|':
2521 	case '}':
2522 	case '~':
2523 		return true;
2524 	default:
2525 		return false;
2526 	}
2527 }
2528 
IsWordPartSeparator(unsigned int ch) const2529 bool Document::IsWordPartSeparator(unsigned int ch) const {
2530 	return (WordCharacterClass(ch) == CharClassify::ccWord) && IsASCIIPunctuationCharacter(ch);
2531 }
2532 
WordPartLeft(Sci::Position pos) const2533 Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2534 	if (pos > 0) {
2535 		pos -= CharacterBefore(pos).widthBytes;
2536 		CharacterExtracted ceStart = CharacterAfter(pos);
2537 		if (IsWordPartSeparator(ceStart.character)) {
2538 			while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) {
2539 				pos -= CharacterBefore(pos).widthBytes;
2540 			}
2541 		}
2542 		if (pos > 0) {
2543 			ceStart = CharacterAfter(pos);
2544 			pos -= CharacterBefore(pos).widthBytes;
2545 			if (IsLowerCase(ceStart.character)) {
2546 				while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2547 					pos -= CharacterBefore(pos).widthBytes;
2548 				if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2549 					pos += CharacterAfter(pos).widthBytes;
2550 			} else if (IsUpperCase(ceStart.character)) {
2551 				while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2552 					pos -= CharacterBefore(pos).widthBytes;
2553 				if (!IsUpperCase(CharacterAfter(pos).character))
2554 					pos += CharacterAfter(pos).widthBytes;
2555 			} else if (IsADigit(ceStart.character)) {
2556 				while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2557 					pos -= CharacterBefore(pos).widthBytes;
2558 				if (!IsADigit(CharacterAfter(pos).character))
2559 					pos += CharacterAfter(pos).widthBytes;
2560 			} else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2561 				while (pos > 0 && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2562 					pos -= CharacterBefore(pos).widthBytes;
2563 				if (!IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2564 					pos += CharacterAfter(pos).widthBytes;
2565 			} else if (isspacechar(ceStart.character)) {
2566 				while (pos > 0 && isspacechar(CharacterAfter(pos).character))
2567 					pos -= CharacterBefore(pos).widthBytes;
2568 				if (!isspacechar(CharacterAfter(pos).character))
2569 					pos += CharacterAfter(pos).widthBytes;
2570 			} else if (!IsASCII(ceStart.character)) {
2571 				while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2572 					pos -= CharacterBefore(pos).widthBytes;
2573 				if (IsASCII(CharacterAfter(pos).character))
2574 					pos += CharacterAfter(pos).widthBytes;
2575 			} else {
2576 				pos += CharacterAfter(pos).widthBytes;
2577 			}
2578 		}
2579 	}
2580 	return pos;
2581 }
2582 
WordPartRight(Sci::Position pos) const2583 Sci::Position Document::WordPartRight(Sci::Position pos) const {
2584 	CharacterExtracted ceStart = CharacterAfter(pos);
2585 	const Sci::Position length = LengthNoExcept();
2586 	if (IsWordPartSeparator(ceStart.character)) {
2587 		while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2588 			pos += CharacterAfter(pos).widthBytes;
2589 		ceStart = CharacterAfter(pos);
2590 	}
2591 	if (!IsASCII(ceStart.character)) {
2592 		while (pos < length && !IsASCII(CharacterAfter(pos).character))
2593 			pos += CharacterAfter(pos).widthBytes;
2594 	} else if (IsLowerCase(ceStart.character)) {
2595 		while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2596 			pos += CharacterAfter(pos).widthBytes;
2597 	} else if (IsUpperCase(ceStart.character)) {
2598 		if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2599 			pos += CharacterAfter(pos).widthBytes;
2600 			while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2601 				pos += CharacterAfter(pos).widthBytes;
2602 		} else {
2603 			while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2604 				pos += CharacterAfter(pos).widthBytes;
2605 		}
2606 		if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2607 			pos -= CharacterBefore(pos).widthBytes;
2608 	} else if (IsADigit(ceStart.character)) {
2609 		while (pos < length && IsADigit(CharacterAfter(pos).character))
2610 			pos += CharacterAfter(pos).widthBytes;
2611 	} else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2612 		while (pos < length && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2613 			pos += CharacterAfter(pos).widthBytes;
2614 	} else if (isspacechar(ceStart.character)) {
2615 		while (pos < length && isspacechar(CharacterAfter(pos).character))
2616 			pos += CharacterAfter(pos).widthBytes;
2617 	} else {
2618 		pos += CharacterAfter(pos).widthBytes;
2619 	}
2620 	return pos;
2621 }
2622 
IsLineEndChar(char c)2623 static constexpr bool IsLineEndChar(char c) noexcept {
2624 	return (c == '\n' || c == '\r');
2625 }
2626 
ExtendStyleRange(Sci::Position pos,int delta,bool singleLine)2627 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
2628 	const int sStart = cb.StyleAt(pos);
2629 	if (delta < 0) {
2630 		while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2631 			pos--;
2632 		pos++;
2633 	} else {
2634 		while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2635 			pos++;
2636 	}
2637 	return pos;
2638 }
2639 
BraceOpposite(char ch)2640 static char BraceOpposite(char ch) noexcept {
2641 	switch (ch) {
2642 	case '(':
2643 		return ')';
2644 	case ')':
2645 		return '(';
2646 	case '[':
2647 		return ']';
2648 	case ']':
2649 		return '[';
2650 	case '{':
2651 		return '}';
2652 	case '}':
2653 		return '{';
2654 	case '<':
2655 		return '>';
2656 	case '>':
2657 		return '<';
2658 	default:
2659 		return '\0';
2660 	}
2661 }
2662 
2663 // TODO: should be able to extend styled region to find matching brace
BraceMatch(Sci::Position position,Sci::Position,Sci::Position startPos,bool useStartPos)2664 Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept {
2665 	const char chBrace = CharAt(position);
2666 	const char chSeek = BraceOpposite(chBrace);
2667 	if (chSeek == '\0')
2668 		return - 1;
2669 	const int styBrace = StyleIndexAt(position);
2670 	int direction = -1;
2671 	if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2672 		direction = 1;
2673 	int depth = 1;
2674 	position = useStartPos ? startPos : NextPosition(position, direction);
2675 	while ((position >= 0) && (position < LengthNoExcept())) {
2676 		const char chAtPos = CharAt(position);
2677 		const int styAtPos = StyleIndexAt(position);
2678 		if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2679 			if (chAtPos == chBrace)
2680 				depth++;
2681 			if (chAtPos == chSeek)
2682 				depth--;
2683 			if (depth == 0)
2684 				return position;
2685 		}
2686 		const Sci::Position positionBeforeMove = position;
2687 		position = NextPosition(position, direction);
2688 		if (position == positionBeforeMove)
2689 			break;
2690 	}
2691 	return - 1;
2692 }
2693 
2694 /**
2695  * Implementation of RegexSearchBase for the default built-in regular expression engine
2696  */
2697 class BuiltinRegex : public RegexSearchBase {
2698 public:
BuiltinRegex(CharClassify * charClassTable)2699 	explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2700 	BuiltinRegex(const BuiltinRegex &) = delete;
2701 	BuiltinRegex(BuiltinRegex &&) = delete;
2702 	BuiltinRegex &operator=(const BuiltinRegex &) = delete;
2703 	BuiltinRegex &operator=(BuiltinRegex &&) = delete;
2704 	~BuiltinRegex() override = default;
2705 
2706 	Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2707                         bool caseSensitive, bool word, bool wordStart, int flags,
2708                         Sci::Position *length) override;
2709 
2710 	const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2711 
2712 private:
2713 	RESearch search;
2714 	std::string substituted;
2715 };
2716 
2717 namespace {
2718 
2719 /**
2720 * RESearchRange keeps track of search range.
2721 */
2722 class RESearchRange {
2723 public:
2724 	const Document *doc;
2725 	int increment;
2726 	Sci::Position startPos;
2727 	Sci::Position endPos;
2728 	Sci::Line lineRangeStart;
2729 	Sci::Line lineRangeEnd;
2730 	Sci::Line lineRangeBreak;
RESearchRange(const Document * doc_,Sci::Position minPos,Sci::Position maxPos)2731 	RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) {
2732 		increment = (minPos <= maxPos) ? 1 : -1;
2733 
2734 		// Range endpoints should not be inside DBCS characters or between a CR and LF,
2735 		// but just in case, move them.
2736 		startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2737 		endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2738 
2739 		lineRangeStart = doc->SciLineFromPosition(startPos);
2740 		lineRangeEnd = doc->SciLineFromPosition(endPos);
2741 		lineRangeBreak = lineRangeEnd + increment;
2742 	}
LineRange(Sci::Line line) const2743 	Range LineRange(Sci::Line line) const {
2744 		Range range(doc->LineStart(line), doc->LineEnd(line));
2745 		if (increment == 1) {
2746 			if (line == lineRangeStart)
2747 				range.start = startPos;
2748 			if (line == lineRangeEnd)
2749 				range.end = endPos;
2750 		} else {
2751 			if (line == lineRangeEnd)
2752 				range.start = endPos;
2753 			if (line == lineRangeStart)
2754 				range.end = startPos;
2755 		}
2756 		return range;
2757 	}
2758 };
2759 
2760 // Define a way for the Regular Expression code to access the document
2761 class DocumentIndexer : public CharacterIndexer {
2762 	Document *pdoc;
2763 	Sci::Position end;
2764 public:
DocumentIndexer(Document * pdoc_,Sci::Position end_)2765 	DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2766 		pdoc(pdoc_), end(end_) {
2767 	}
2768 
2769 	DocumentIndexer(const DocumentIndexer &) = delete;
2770 	DocumentIndexer(DocumentIndexer &&) = delete;
2771 	DocumentIndexer &operator=(const DocumentIndexer &) = delete;
2772 	DocumentIndexer &operator=(DocumentIndexer &&) = delete;
2773 
2774 	~DocumentIndexer() override = default;
2775 
CharAt(Sci::Position index) const2776 	char CharAt(Sci::Position index) const noexcept override {
2777 		if (index < 0 || index >= end)
2778 			return 0;
2779 		else
2780 			return pdoc->CharAt(index);
2781 	}
2782 };
2783 
2784 #ifndef NO_CXX11_REGEX
2785 
2786 class ByteIterator {
2787 public:
2788 	typedef std::bidirectional_iterator_tag iterator_category;
2789 	typedef char value_type;
2790 	typedef ptrdiff_t difference_type;
2791 	typedef char* pointer;
2792 	typedef char& reference;
2793 
2794 	const Document *doc;
2795 	Sci::Position position;
2796 
ByteIterator(const Document * doc_=nullptr,Sci::Position position_=0)2797 	ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2798 		doc(doc_), position(position_) {
2799 	}
ByteIterator(const ByteIterator & other)2800 	ByteIterator(const ByteIterator &other) noexcept {
2801 		doc = other.doc;
2802 		position = other.position;
2803 	}
ByteIterator(ByteIterator && other)2804 	ByteIterator(ByteIterator &&other) noexcept {
2805 		doc = other.doc;
2806 		position = other.position;
2807 	}
operator =(const ByteIterator & other)2808 	ByteIterator &operator=(const ByteIterator &other) noexcept {
2809 		if (this != &other) {
2810 			doc = other.doc;
2811 			position = other.position;
2812 		}
2813 		return *this;
2814 	}
2815 	ByteIterator &operator=(ByteIterator &&) noexcept = default;
2816 	~ByteIterator() = default;
operator *() const2817 	char operator*() const noexcept {
2818 		return doc->CharAt(position);
2819 	}
operator ++()2820 	ByteIterator &operator++() noexcept {
2821 		position++;
2822 		return *this;
2823 	}
operator ++(int)2824 	ByteIterator operator++(int) noexcept {
2825 		ByteIterator retVal(*this);
2826 		position++;
2827 		return retVal;
2828 	}
operator --()2829 	ByteIterator &operator--() noexcept {
2830 		position--;
2831 		return *this;
2832 	}
operator ==(const ByteIterator & other) const2833 	bool operator==(const ByteIterator &other) const noexcept {
2834 		return doc == other.doc && position == other.position;
2835 	}
operator !=(const ByteIterator & other) const2836 	bool operator!=(const ByteIterator &other) const noexcept {
2837 		return doc != other.doc || position != other.position;
2838 	}
Pos() const2839 	Sci::Position Pos() const noexcept {
2840 		return position;
2841 	}
PosRoundUp() const2842 	Sci::Position PosRoundUp() const noexcept {
2843 		return position;
2844 	}
2845 };
2846 
2847 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2848 // Would be better to use sizeof(wchar_t) or similar to differentiate
2849 // but easier for now to hard-code platforms.
2850 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2851 // appear to allow specializing basic_regex over these.
2852 
2853 #ifdef _WIN32
2854 #define WCHAR_T_IS_16 1
2855 #else
2856 #define WCHAR_T_IS_16 0
2857 #endif
2858 
2859 #if WCHAR_T_IS_16
2860 
2861 // On Windows, report non-BMP characters as 2 separate surrogates as that
2862 // matches wregex since it is based on wchar_t.
2863 class UTF8Iterator {
2864 	// These 3 fields determine the iterator position and are used for comparisons
2865 	const Document *doc;
2866 	Sci::Position position;
2867 	size_t characterIndex;
2868 	// Remaining fields are derived from the determining fields so are excluded in comparisons
2869 	unsigned int lenBytes;
2870 	size_t lenCharacters;
2871 	wchar_t buffered[2];
2872 public:
2873 	typedef std::bidirectional_iterator_tag iterator_category;
2874 	typedef wchar_t value_type;
2875 	typedef ptrdiff_t difference_type;
2876 	typedef wchar_t* pointer;
2877 	typedef wchar_t& reference;
2878 
UTF8Iterator(const Document * doc_=nullptr,Sci::Position position_=0)2879 	UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2880 		doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
2881 		buffered[0] = 0;
2882 		buffered[1] = 0;
2883 		if (doc) {
2884 			ReadCharacter();
2885 		}
2886 	}
UTF8Iterator(const UTF8Iterator & other)2887 	UTF8Iterator(const UTF8Iterator &other) noexcept : buffered{} {
2888 		doc = other.doc;
2889 		position = other.position;
2890 		characterIndex = other.characterIndex;
2891 		lenBytes = other.lenBytes;
2892 		lenCharacters = other.lenCharacters;
2893 		buffered[0] = other.buffered[0];
2894 		buffered[1] = other.buffered[1];
2895 	}
2896 	UTF8Iterator(UTF8Iterator &&other) noexcept = default;
operator =(const UTF8Iterator & other)2897 	UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
2898 		if (this != &other) {
2899 			doc = other.doc;
2900 			position = other.position;
2901 			characterIndex = other.characterIndex;
2902 			lenBytes = other.lenBytes;
2903 			lenCharacters = other.lenCharacters;
2904 			buffered[0] = other.buffered[0];
2905 			buffered[1] = other.buffered[1];
2906 		}
2907 		return *this;
2908 	}
2909 	UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
2910 	~UTF8Iterator() = default;
operator *() const2911 	wchar_t operator*() const noexcept {
2912 		assert(lenCharacters != 0);
2913 		return buffered[characterIndex];
2914 	}
operator ++()2915 	UTF8Iterator &operator++() noexcept {
2916 		if ((characterIndex + 1) < (lenCharacters)) {
2917 			characterIndex++;
2918 		} else {
2919 			position += lenBytes;
2920 			ReadCharacter();
2921 			characterIndex = 0;
2922 		}
2923 		return *this;
2924 	}
operator ++(int)2925 	UTF8Iterator operator++(int) noexcept {
2926 		UTF8Iterator retVal(*this);
2927 		if ((characterIndex + 1) < (lenCharacters)) {
2928 			characterIndex++;
2929 		} else {
2930 			position += lenBytes;
2931 			ReadCharacter();
2932 			characterIndex = 0;
2933 		}
2934 		return retVal;
2935 	}
operator --()2936 	UTF8Iterator &operator--() noexcept {
2937 		if (characterIndex) {
2938 			characterIndex--;
2939 		} else {
2940 			position = doc->NextPosition(position, -1);
2941 			ReadCharacter();
2942 			characterIndex = lenCharacters - 1;
2943 		}
2944 		return *this;
2945 	}
operator ==(const UTF8Iterator & other) const2946 	bool operator==(const UTF8Iterator &other) const noexcept {
2947 		// Only test the determining fields, not the character widths and values derived from this
2948 		return doc == other.doc &&
2949 			position == other.position &&
2950 			characterIndex == other.characterIndex;
2951 	}
operator !=(const UTF8Iterator & other) const2952 	bool operator!=(const UTF8Iterator &other) const noexcept {
2953 		// Only test the determining fields, not the character widths and values derived from this
2954 		return doc != other.doc ||
2955 			position != other.position ||
2956 			characterIndex != other.characterIndex;
2957 	}
Pos() const2958 	Sci::Position Pos() const noexcept {
2959 		return position;
2960 	}
PosRoundUp() const2961 	Sci::Position PosRoundUp() const noexcept {
2962 		if (characterIndex)
2963 			return position + lenBytes;	// Force to end of character
2964 		else
2965 			return position;
2966 	}
2967 private:
ReadCharacter()2968 	void ReadCharacter() noexcept {
2969 		const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2970 		lenBytes = charExtracted.widthBytes;
2971 		if (charExtracted.character == unicodeReplacementChar) {
2972 			lenCharacters = 1;
2973 			buffered[0] = static_cast<wchar_t>(charExtracted.character);
2974 		} else {
2975 			lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2976 		}
2977 	}
2978 };
2979 
2980 #else
2981 
2982 // On Unix, report non-BMP characters as single characters
2983 
2984 class UTF8Iterator {
2985 	const Document *doc;
2986 	Sci::Position position;
2987 public:
2988 	typedef std::bidirectional_iterator_tag iterator_category;
2989 	typedef wchar_t value_type;
2990 	typedef ptrdiff_t difference_type;
2991 	typedef wchar_t* pointer;
2992 	typedef wchar_t& reference;
2993 
UTF8Iterator(const Document * doc_=nullptr,Sci::Position position_=0)2994 	UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2995 		doc(doc_), position(position_) {
2996 	}
UTF8Iterator(const UTF8Iterator & other)2997 	UTF8Iterator(const UTF8Iterator &other) noexcept {
2998 		doc = other.doc;
2999 		position = other.position;
3000 	}
3001 	UTF8Iterator(UTF8Iterator &&other) noexcept = default;
operator =(const UTF8Iterator & other)3002 	UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
3003 		if (this != &other) {
3004 			doc = other.doc;
3005 			position = other.position;
3006 		}
3007 		return *this;
3008 	}
3009 	UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
3010 	~UTF8Iterator() = default;
operator *() const3011 	wchar_t operator*() const noexcept {
3012 		const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3013 		return charExtracted.character;
3014 	}
operator ++()3015 	UTF8Iterator &operator++() noexcept {
3016 		position = doc->NextPosition(position, 1);
3017 		return *this;
3018 	}
operator ++(int)3019 	UTF8Iterator operator++(int) noexcept {
3020 		UTF8Iterator retVal(*this);
3021 		position = doc->NextPosition(position, 1);
3022 		return retVal;
3023 	}
operator --()3024 	UTF8Iterator &operator--() noexcept {
3025 		position = doc->NextPosition(position, -1);
3026 		return *this;
3027 	}
operator ==(const UTF8Iterator & other) const3028 	bool operator==(const UTF8Iterator &other) const noexcept {
3029 		return doc == other.doc && position == other.position;
3030 	}
operator !=(const UTF8Iterator & other) const3031 	bool operator!=(const UTF8Iterator &other) const noexcept {
3032 		return doc != other.doc || position != other.position;
3033 	}
Pos() const3034 	Sci::Position Pos() const noexcept {
3035 		return position;
3036 	}
PosRoundUp() const3037 	Sci::Position PosRoundUp() const noexcept {
3038 		return position;
3039 	}
3040 };
3041 
3042 #endif
3043 
MatchFlags(const Document * doc,Sci::Position startPos,Sci::Position endPos)3044 std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) {
3045 	std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
3046 	if (!doc->IsLineStartPosition(startPos))
3047 		flagsMatch |= std::regex_constants::match_not_bol;
3048 	if (!doc->IsLineEndPosition(endPos))
3049 		flagsMatch |= std::regex_constants::match_not_eol;
3050 	return flagsMatch;
3051 }
3052 
3053 template<typename Iterator, typename Regex>
MatchOnLines(const Document * doc,const Regex & regexp,const RESearchRange & resr,RESearch & search)3054 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
3055 	std::match_results<Iterator> match;
3056 
3057 	// MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3058 	// CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3059 	// The std::regex::multiline option was added to C++17 to improve behaviour but
3060 	// has not been implemented by compiler runtimes with MSVC always in multiline
3061 	// mode and libc++ and libstdc++ always in single-line mode.
3062 	// If multiline regex worked well then the line by line iteration could be removed
3063 	// for the forwards case and replaced with the following 4 lines:
3064 #ifdef REGEX_MULTILINE
3065 	Iterator itStart(doc, resr.startPos);
3066 	Iterator itEnd(doc, resr.endPos);
3067 	const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos);
3068 	const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3069 #else
3070 	// Line by line.
3071 	bool matched = false;
3072 	for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3073 		const Range lineRange = resr.LineRange(line);
3074 		Iterator itStart(doc, lineRange.start);
3075 		Iterator itEnd(doc, lineRange.end);
3076 		std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
3077 		matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3078 		// Check for the last match on this line.
3079 		if (matched) {
3080 			if (resr.increment == -1) {
3081 				while (matched) {
3082 					Iterator itNext(doc, match[0].second.PosRoundUp());
3083 					flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
3084 					std::match_results<Iterator> matchNext;
3085 					matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
3086 					if (matched) {
3087 						if (match[0].first == match[0].second) {
3088 							// Empty match means failure so exit
3089 							return false;
3090 						}
3091 						match = matchNext;
3092 					}
3093 				}
3094 				matched = true;
3095 			}
3096 			break;
3097 		}
3098 	}
3099 #endif
3100 	if (matched) {
3101 		for (size_t co = 0; co < match.size(); co++) {
3102 			search.bopat[co] = match[co].first.Pos();
3103 			search.eopat[co] = match[co].second.PosRoundUp();
3104 			const Sci::Position lenMatch = search.eopat[co] - search.bopat[co];
3105 			search.pat[co].resize(lenMatch);
3106 			for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) {
3107 				search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
3108 			}
3109 		}
3110 	}
3111 	return matched;
3112 }
3113 
Cxx11RegexFindText(const Document * doc,Sci::Position minPos,Sci::Position maxPos,const char * s,bool caseSensitive,Sci::Position * length,RESearch & search)3114 Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3115 	bool caseSensitive, Sci::Position *length, RESearch &search) {
3116 	const RESearchRange resr(doc, minPos, maxPos);
3117 	try {
3118 		//ElapsedPeriod ep;
3119 		std::regex::flag_type flagsRe = std::regex::ECMAScript;
3120 		// Flags that appear to have no effect:
3121 		// | std::regex::collate | std::regex::extended;
3122 		if (!caseSensitive)
3123 			flagsRe = flagsRe | std::regex::icase;
3124 
3125 		// Clear the RESearch so can fill in matches
3126 		search.Clear();
3127 
3128 		bool matched = false;
3129 		if (SC_CP_UTF8 == doc->dbcsCodePage) {
3130 			const std::wstring ws = WStringFromUTF8(s);
3131 			std::wregex regexp;
3132 			regexp.assign(ws, flagsRe);
3133 			matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3134 
3135 		} else {
3136 			std::regex regexp;
3137 			regexp.assign(s, flagsRe);
3138 			matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3139 		}
3140 
3141 		Sci::Position posMatch = -1;
3142 		if (matched) {
3143 			posMatch = search.bopat[0];
3144 			*length = search.eopat[0] - search.bopat[0];
3145 		}
3146 		// Example - search in doc/ScintillaHistory.html for
3147 		// [[:upper:]]eta[[:space:]]
3148 		// On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3149 		//const double durSearch = ep.Duration(true);
3150 		//Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3151 		return posMatch;
3152 	} catch (std::regex_error &) {
3153 		// Failed to create regular expression
3154 		throw RegexError();
3155 	} catch (...) {
3156 		// Failed in some other way
3157 		return -1;
3158 	}
3159 }
3160 
3161 #endif
3162 
3163 }
3164 
FindText(Document * doc,Sci::Position minPos,Sci::Position maxPos,const char * s,bool caseSensitive,bool,bool,int flags,Sci::Position * length)3165 Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3166                         bool caseSensitive, bool, bool, int flags,
3167                         Sci::Position *length) {
3168 
3169 #ifndef NO_CXX11_REGEX
3170 	if (flags & SCFIND_CXX11REGEX) {
3171 			return Cxx11RegexFindText(doc, minPos, maxPos, s,
3172 			caseSensitive, length, search);
3173 	}
3174 #endif
3175 
3176 	const RESearchRange resr(doc, minPos, maxPos);
3177 
3178 	const bool posix = (flags & SCFIND_POSIX) != 0;
3179 
3180 	const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3181 	if (errmsg) {
3182 		return -1;
3183 	}
3184 	// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3185 	// Replace first '.' with '-' in each property file variable reference:
3186 	//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3187 	//     Replace: $(\1-\2)
3188 	Sci::Position pos = -1;
3189 	Sci::Position lenRet = 0;
3190 	const bool searchforLineStart = s[0] == '^';
3191 	const char searchEnd = s[*length - 1];
3192 	const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3193 	const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3194 	for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3195 		Sci::Position startOfLine = doc->LineStart(line);
3196 		Sci::Position endOfLine = doc->LineEnd(line);
3197 		if (resr.increment == 1) {
3198 			if (line == resr.lineRangeStart) {
3199 				if ((resr.startPos != startOfLine) && searchforLineStart)
3200 					continue;	// Can't match start of line if start position after start of line
3201 				startOfLine = resr.startPos;
3202 			}
3203 			if (line == resr.lineRangeEnd) {
3204 				if ((resr.endPos != endOfLine) && searchforLineEnd)
3205 					continue;	// Can't match end of line if end position before end of line
3206 				endOfLine = resr.endPos;
3207 			}
3208 		} else {
3209 			if (line == resr.lineRangeEnd) {
3210 				if ((resr.endPos != startOfLine) && searchforLineStart)
3211 					continue;	// Can't match start of line if end position after start of line
3212 				startOfLine = resr.endPos;
3213 			}
3214 			if (line == resr.lineRangeStart) {
3215 				if ((resr.startPos != endOfLine) && searchforLineEnd)
3216 					continue;	// Can't match end of line if start position before end of line
3217 				endOfLine = resr.startPos;
3218 			}
3219 		}
3220 
3221 		const DocumentIndexer di(doc, endOfLine);
3222 		int success = search.Execute(di, startOfLine, endOfLine);
3223 		if (success) {
3224 			pos = search.bopat[0];
3225 			// Ensure only whole characters selected
3226 			search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
3227 			lenRet = search.eopat[0] - search.bopat[0];
3228 			// There can be only one start of a line, so no need to look for last match in line
3229 			if ((resr.increment == -1) && !searchforLineStart) {
3230 				// Check for the last match on this line.
3231 				int repetitions = 1000;	// Break out of infinite loop
3232 				while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
3233 					success = search.Execute(di, pos+1, endOfLine);
3234 					if (success) {
3235 						if (search.eopat[0] <= minPos) {
3236 							pos = search.bopat[0];
3237 							lenRet = search.eopat[0] - search.bopat[0];
3238 						} else {
3239 							success = 0;
3240 						}
3241 					}
3242 				}
3243 			}
3244 			break;
3245 		}
3246 	}
3247 	*length = lenRet;
3248 	return pos;
3249 }
3250 
SubstituteByPosition(Document * doc,const char * text,Sci::Position * length)3251 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3252 	substituted.clear();
3253 	const DocumentIndexer di(doc, doc->Length());
3254 	search.GrabMatches(di);
3255 	for (Sci::Position j = 0; j < *length; j++) {
3256 		if (text[j] == '\\') {
3257 			if (text[j + 1] >= '0' && text[j + 1] <= '9') {
3258 				const unsigned int patNum = text[j + 1] - '0';
3259 				const Sci::Position len = search.eopat[patNum] - search.bopat[patNum];
3260 				if (!search.pat[patNum].empty())	// Will be null if try for a match that did not occur
3261 					substituted.append(search.pat[patNum].c_str(), len);
3262 				j++;
3263 			} else {
3264 				j++;
3265 				switch (text[j]) {
3266 				case 'a':
3267 					substituted.push_back('\a');
3268 					break;
3269 				case 'b':
3270 					substituted.push_back('\b');
3271 					break;
3272 				case 'f':
3273 					substituted.push_back('\f');
3274 					break;
3275 				case 'n':
3276 					substituted.push_back('\n');
3277 					break;
3278 				case 'r':
3279 					substituted.push_back('\r');
3280 					break;
3281 				case 't':
3282 					substituted.push_back('\t');
3283 					break;
3284 				case 'v':
3285 					substituted.push_back('\v');
3286 					break;
3287 				case '\\':
3288 					substituted.push_back('\\');
3289 					break;
3290 				default:
3291 					substituted.push_back('\\');
3292 					j--;
3293 				}
3294 			}
3295 		} else {
3296 			substituted.push_back(text[j]);
3297 		}
3298 	}
3299 	*length = substituted.length();
3300 	return substituted.c_str();
3301 }
3302 
3303 #ifndef SCI_OWNREGEX
3304 
CreateRegexSearch(CharClassify * charClassTable)3305 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
3306 	return new BuiltinRegex(charClassTable);
3307 }
3308 
3309 #endif
3310