1 // Scintilla source code edit control
2 /** @file Document.cxx
3  ** Text document that handles notifications, DBCS, styling, words and end of line.
4  **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
13 
14 #include <string>
15 #include <vector>
16 #include <algorithm>
17 
18 #include "Platform.h"
19 
20 #include "ILexer.h"
21 #include "Scintilla.h"
22 
23 #include "CharacterSet.h"
24 #include "SplitVector.h"
25 #include "Partitioning.h"
26 #include "RunStyles.h"
27 #include "CellBuffer.h"
28 #include "PerLine.h"
29 #include "CharClassify.h"
30 #include "Decoration.h"
31 #include "CaseFolder.h"
32 #include "Document.h"
33 #include "RESearch.h"
34 #include "UniConversion.h"
35 
36 #ifdef SCI_NAMESPACE
37 using namespace Scintilla;
38 #endif
39 
IsPunctuation(char ch)40 static inline bool IsPunctuation(char ch) {
41 	return IsASCII(ch) && ispunct(ch);
42 }
43 
Colourise(int start,int end)44 void LexInterface::Colourise(int start, int end) {
45 	if (pdoc && instance && !performingStyle) {
46 		// Protect against reentrance, which may occur, for example, when
47 		// fold points are discovered while performing styling and the folding
48 		// code looks for child lines which may trigger styling.
49 		performingStyle = true;
50 
51 		int lengthDoc = pdoc->Length();
52 		if (end == -1)
53 			end = lengthDoc;
54 		int len = end - start;
55 
56 		PLATFORM_ASSERT(len >= 0);
57 		PLATFORM_ASSERT(start + len <= lengthDoc);
58 
59 		int styleStart = 0;
60 		if (start > 0)
61 			styleStart = pdoc->StyleAt(start - 1);
62 
63 		if (len > 0) {
64 			instance->Lex(start, len, styleStart, pdoc);
65 			instance->Fold(start, len, styleStart, pdoc);
66 		}
67 
68 		performingStyle = false;
69 	}
70 }
71 
LineEndTypesSupported()72 int LexInterface::LineEndTypesSupported() {
73 	if (instance) {
74 		int interfaceVersion = instance->Version();
75 		if (interfaceVersion >= lvSubStyles) {
76 			ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
77 			return ssinstance->LineEndTypesSupported();
78 		}
79 	}
80 	return 0;
81 }
82 
Document()83 Document::Document() {
84 	refCount = 0;
85 	pcf = NULL;
86 #ifdef _WIN32
87 	eolMode = SC_EOL_CRLF;
88 #else
89 	eolMode = SC_EOL_LF;
90 #endif
91 	dbcsCodePage = 0;
92 	lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
93 	endStyled = 0;
94 	styleClock = 0;
95 	enteredModification = 0;
96 	enteredStyling = 0;
97 	enteredReadOnlyCount = 0;
98 	insertionSet = false;
99 	tabInChars = 8;
100 	indentInChars = 0;
101 	actualIndentInChars = 8;
102 	useTabs = true;
103 	tabIndents = true;
104 	backspaceUnindents = false;
105 
106 	matchesValid = false;
107 	regex = 0;
108 
109 	UTF8BytesOfLeadInitialise();
110 
111 	perLineData[ldMarkers] = new LineMarkers();
112 	perLineData[ldLevels] = new LineLevels();
113 	perLineData[ldState] = new LineState();
114 	perLineData[ldMargin] = new LineAnnotation();
115 	perLineData[ldAnnotation] = new LineAnnotation();
116 
117 	cb.SetPerLine(this);
118 
119 	pli = 0;
120 }
121 
~Document()122 Document::~Document() {
123 	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
124 		it->watcher->NotifyDeleted(this, it->userData);
125 	}
126 	for (int j=0; j<ldSize; j++) {
127 		delete perLineData[j];
128 		perLineData[j] = 0;
129 	}
130 	delete regex;
131 	regex = 0;
132 	delete pli;
133 	pli = 0;
134 	delete pcf;
135 	pcf = 0;
136 }
137 
Init()138 void Document::Init() {
139 	for (int j=0; j<ldSize; j++) {
140 		if (perLineData[j])
141 			perLineData[j]->Init();
142 	}
143 }
144 
LineEndTypesSupported() const145 int Document::LineEndTypesSupported() const {
146 	if ((SC_CP_UTF8 == dbcsCodePage) && pli)
147 		return pli->LineEndTypesSupported();
148 	else
149 		return 0;
150 }
151 
SetDBCSCodePage(int dbcsCodePage_)152 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
153 	if (dbcsCodePage != dbcsCodePage_) {
154 		dbcsCodePage = dbcsCodePage_;
155 		SetCaseFolder(NULL);
156 		cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
157 		return true;
158 	} else {
159 		return false;
160 	}
161 }
162 
SetLineEndTypesAllowed(int lineEndBitSet_)163 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
164 	if (lineEndBitSet != lineEndBitSet_) {
165 		lineEndBitSet = lineEndBitSet_;
166 		int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
167 		if (lineEndBitSetActive != cb.GetLineEndTypes()) {
168 			ModifiedAt(0);
169 			cb.SetLineEndTypes(lineEndBitSetActive);
170 			return true;
171 		} else {
172 			return false;
173 		}
174 	} else {
175 		return false;
176 	}
177 }
178 
InsertLine(int line)179 void Document::InsertLine(int line) {
180 	for (int j=0; j<ldSize; j++) {
181 		if (perLineData[j])
182 			perLineData[j]->InsertLine(line);
183 	}
184 }
185 
RemoveLine(int line)186 void Document::RemoveLine(int line) {
187 	for (int j=0; j<ldSize; j++) {
188 		if (perLineData[j])
189 			perLineData[j]->RemoveLine(line);
190 	}
191 }
192 
193 // Increase reference count and return its previous value.
AddRef()194 int Document::AddRef() {
195 	return refCount++;
196 }
197 
198 // Decrease reference count and return its previous value.
199 // Delete the document if reference count reaches zero.
Release()200 int SCI_METHOD Document::Release() {
201 	int curRefCount = --refCount;
202 	if (curRefCount == 0)
203 		delete this;
204 	return curRefCount;
205 }
206 
SetSavePoint()207 void Document::SetSavePoint() {
208 	cb.SetSavePoint();
209 	NotifySavePoint(true);
210 }
211 
TentativeUndo()212 void Document::TentativeUndo() {
213 	CheckReadOnly();
214 	if (enteredModification == 0) {
215 		enteredModification++;
216 		if (!cb.IsReadOnly()) {
217 			bool startSavePoint = cb.IsSavePoint();
218 			bool multiLine = false;
219 			int steps = cb.TentativeSteps();
220 			//Platform::DebugPrintf("Steps=%d\n", steps);
221 			for (int step = 0; step < steps; step++) {
222 				const int prevLinesTotal = LinesTotal();
223 				const Action &action = cb.GetUndoStep();
224 				if (action.at == removeAction) {
225 					NotifyModified(DocModification(
226 									SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
227 				} else if (action.at == containerAction) {
228 					DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
229 					dm.token = action.position;
230 					NotifyModified(dm);
231 				} else {
232 					NotifyModified(DocModification(
233 									SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
234 				}
235 				cb.PerformUndoStep();
236 				if (action.at != containerAction) {
237 					ModifiedAt(action.position);
238 				}
239 
240 				int modFlags = SC_PERFORMED_UNDO;
241 				// With undo, an insertion action becomes a deletion notification
242 				if (action.at == removeAction) {
243 					modFlags |= SC_MOD_INSERTTEXT;
244 				} else if (action.at == insertAction) {
245 					modFlags |= SC_MOD_DELETETEXT;
246 				}
247 				if (steps > 1)
248 					modFlags |= SC_MULTISTEPUNDOREDO;
249 				const int linesAdded = LinesTotal() - prevLinesTotal;
250 				if (linesAdded != 0)
251 					multiLine = true;
252 				if (step == steps - 1) {
253 					modFlags |= SC_LASTSTEPINUNDOREDO;
254 					if (multiLine)
255 						modFlags |= SC_MULTILINEUNDOREDO;
256 				}
257 				NotifyModified(DocModification(modFlags, action.position, action.lenData,
258 											   linesAdded, action.data));
259 			}
260 
261 			bool endSavePoint = cb.IsSavePoint();
262 			if (startSavePoint != endSavePoint)
263 				NotifySavePoint(endSavePoint);
264 
265 			cb.TentativeCommit();
266 		}
267 		enteredModification--;
268 	}
269 }
270 
GetMark(int line)271 int Document::GetMark(int line) {
272 	return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
273 }
274 
MarkerNext(int lineStart,int mask) const275 int Document::MarkerNext(int lineStart, int mask) const {
276 	return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
277 }
278 
AddMark(int line,int markerNum)279 int Document::AddMark(int line, int markerNum) {
280 	if (line >= 0 && line <= LinesTotal()) {
281 		int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
282 			AddMark(line, markerNum, LinesTotal());
283 		DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
284 		NotifyModified(mh);
285 		return prev;
286 	} else {
287 		return 0;
288 	}
289 }
290 
AddMarkSet(int line,int valueSet)291 void Document::AddMarkSet(int line, int valueSet) {
292 	if (line < 0 || line > LinesTotal()) {
293 		return;
294 	}
295 	unsigned int m = valueSet;
296 	for (int i = 0; m; i++, m >>= 1)
297 		if (m & 1)
298 			static_cast<LineMarkers *>(perLineData[ldMarkers])->
299 				AddMark(line, i, LinesTotal());
300 	DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
301 	NotifyModified(mh);
302 }
303 
DeleteMark(int line,int markerNum)304 void Document::DeleteMark(int line, int markerNum) {
305 	static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
306 	DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
307 	NotifyModified(mh);
308 }
309 
DeleteMarkFromHandle(int markerHandle)310 void Document::DeleteMarkFromHandle(int markerHandle) {
311 	static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
312 	DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
313 	mh.line = -1;
314 	NotifyModified(mh);
315 }
316 
DeleteAllMarks(int markerNum)317 void Document::DeleteAllMarks(int markerNum) {
318 	bool someChanges = false;
319 	for (int line = 0; line < LinesTotal(); line++) {
320 		if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
321 			someChanges = true;
322 	}
323 	if (someChanges) {
324 		DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
325 		mh.line = -1;
326 		NotifyModified(mh);
327 	}
328 }
329 
LineFromHandle(int markerHandle)330 int Document::LineFromHandle(int markerHandle) {
331 	return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
332 }
333 
LineStart(int line) const334 int SCI_METHOD Document::LineStart(int line) const {
335 	return cb.LineStart(line);
336 }
337 
LineEnd(int line) const338 int SCI_METHOD Document::LineEnd(int line) const {
339 	if (line >= LinesTotal() - 1) {
340 		return LineStart(line + 1);
341 	} else {
342 		int position = LineStart(line + 1);
343 		if (SC_CP_UTF8 == dbcsCodePage) {
344 			unsigned char bytes[] = {
345 				static_cast<unsigned char>(cb.CharAt(position-3)),
346 				static_cast<unsigned char>(cb.CharAt(position-2)),
347 				static_cast<unsigned char>(cb.CharAt(position-1)),
348 			};
349 			if (UTF8IsSeparator(bytes)) {
350 				return position - UTF8SeparatorLength;
351 			}
352 			if (UTF8IsNEL(bytes+1)) {
353 				return position - UTF8NELLength;
354 			}
355 		}
356 		position--; // Back over CR or LF
357 		// When line terminator is CR+LF, may need to go back one more
358 		if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
359 			position--;
360 		}
361 		return position;
362 	}
363 }
364 
SetErrorStatus(int status)365 void SCI_METHOD Document::SetErrorStatus(int status) {
366 	// Tell the watchers an error has occurred.
367 	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
368 		it->watcher->NotifyErrorOccurred(this, it->userData, status);
369 	}
370 }
371 
LineFromPosition(int pos) const372 int SCI_METHOD Document::LineFromPosition(int pos) const {
373 	return cb.LineFromPosition(pos);
374 }
375 
LineEndPosition(int position) const376 int Document::LineEndPosition(int position) const {
377 	return LineEnd(LineFromPosition(position));
378 }
379 
IsLineEndPosition(int position) const380 bool Document::IsLineEndPosition(int position) const {
381 	return LineEnd(LineFromPosition(position)) == position;
382 }
383 
IsPositionInLineEnd(int position) const384 bool Document::IsPositionInLineEnd(int position) const {
385 	return position >= LineEnd(LineFromPosition(position));
386 }
387 
VCHomePosition(int position) const388 int Document::VCHomePosition(int position) const {
389 	int line = LineFromPosition(position);
390 	int startPosition = LineStart(line);
391 	int endLine = LineEnd(line);
392 	int startText = startPosition;
393 	while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
394 		startText++;
395 	if (position == startText)
396 		return startPosition;
397 	else
398 		return startText;
399 }
400 
SetLevel(int line,int level)401 int SCI_METHOD Document::SetLevel(int line, int level) {
402 	int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
403 	if (prev != level) {
404 		DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
405 		                   LineStart(line), 0, 0, 0, line);
406 		mh.foldLevelNow = level;
407 		mh.foldLevelPrev = prev;
408 		NotifyModified(mh);
409 	}
410 	return prev;
411 }
412 
GetLevel(int line) const413 int SCI_METHOD Document::GetLevel(int line) const {
414 	return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
415 }
416 
ClearLevels()417 void Document::ClearLevels() {
418 	static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
419 }
420 
IsSubordinate(int levelStart,int levelTry)421 static bool IsSubordinate(int levelStart, int levelTry) {
422 	if (levelTry & SC_FOLDLEVELWHITEFLAG)
423 		return true;
424 	else
425 		return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
426 }
427 
GetLastChild(int lineParent,int level,int lastLine)428 int Document::GetLastChild(int lineParent, int level, int lastLine) {
429 	if (level == -1)
430 		level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
431 	int maxLine = LinesTotal();
432 	int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
433 	int lineMaxSubord = lineParent;
434 	while (lineMaxSubord < maxLine - 1) {
435 		EnsureStyledTo(LineStart(lineMaxSubord + 2));
436 		if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
437 			break;
438 		if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
439 			break;
440 		lineMaxSubord++;
441 	}
442 	if (lineMaxSubord > lineParent) {
443 		if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
444 			// Have chewed up some whitespace that belongs to a parent so seek back
445 			if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
446 				lineMaxSubord--;
447 			}
448 		}
449 	}
450 	return lineMaxSubord;
451 }
452 
GetFoldParent(int line) const453 int Document::GetFoldParent(int line) const {
454 	int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
455 	int lineLook = line - 1;
456 	while ((lineLook > 0) && (
457 	            (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
458 	            ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
459 	      ) {
460 		lineLook--;
461 	}
462 	if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
463 	        ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
464 		return lineLook;
465 	} else {
466 		return -1;
467 	}
468 }
469 
GetHighlightDelimiters(HighlightDelimiter & highlightDelimiter,int line,int lastLine)470 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
471 	int level = GetLevel(line);
472 	int lookLastLine = Platform::Maximum(line, lastLine) + 1;
473 
474 	int lookLine = line;
475 	int lookLineLevel = level;
476 	int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
477 	while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
478 		((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
479 		lookLineLevel = GetLevel(--lookLine);
480 		lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
481 	}
482 
483 	int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
484 	if (beginFoldBlock == -1) {
485 		highlightDelimiter.Clear();
486 		return;
487 	}
488 
489 	int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
490 	int firstChangeableLineBefore = -1;
491 	if (endFoldBlock < line) {
492 		lookLine = beginFoldBlock - 1;
493 		lookLineLevel = GetLevel(lookLine);
494 		lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
495 		while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
496 			if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
497 				if (GetLastChild(lookLine, -1, lookLastLine) == line) {
498 					beginFoldBlock = lookLine;
499 					endFoldBlock = line;
500 					firstChangeableLineBefore = line - 1;
501 				}
502 			}
503 			if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
504 				break;
505 			lookLineLevel = GetLevel(--lookLine);
506 			lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
507 		}
508 	}
509 	if (firstChangeableLineBefore == -1) {
510 		for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
511 			lookLine >= beginFoldBlock;
512 			lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
513 			if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
514 				firstChangeableLineBefore = lookLine;
515 				break;
516 			}
517 		}
518 	}
519 	if (firstChangeableLineBefore == -1)
520 		firstChangeableLineBefore = beginFoldBlock - 1;
521 
522 	int firstChangeableLineAfter = -1;
523 	for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
524 		lookLine <= endFoldBlock;
525 		lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
526 		if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
527 			firstChangeableLineAfter = lookLine;
528 			break;
529 		}
530 	}
531 	if (firstChangeableLineAfter == -1)
532 		firstChangeableLineAfter = endFoldBlock + 1;
533 
534 	highlightDelimiter.beginFoldBlock = beginFoldBlock;
535 	highlightDelimiter.endFoldBlock = endFoldBlock;
536 	highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
537 	highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
538 }
539 
ClampPositionIntoDocument(int pos) const540 int Document::ClampPositionIntoDocument(int pos) const {
541 	return Platform::Clamp(pos, 0, Length());
542 }
543 
IsCrLf(int pos) const544 bool Document::IsCrLf(int pos) const {
545 	if (pos < 0)
546 		return false;
547 	if (pos >= (Length() - 1))
548 		return false;
549 	return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
550 }
551 
LenChar(int pos)552 int Document::LenChar(int pos) {
553 	if (pos < 0) {
554 		return 1;
555 	} else if (IsCrLf(pos)) {
556 		return 2;
557 	} else if (SC_CP_UTF8 == dbcsCodePage) {
558 		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
559 		const int widthCharBytes = UTF8BytesOfLead[leadByte];
560 		int lengthDoc = Length();
561 		if ((pos + widthCharBytes) > lengthDoc)
562 			return lengthDoc - pos;
563 		else
564 			return widthCharBytes;
565 	} else if (dbcsCodePage) {
566 		return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
567 	} else {
568 		return 1;
569 	}
570 }
571 
InGoodUTF8(int pos,int & start,int & end) const572 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
573 	int trail = pos;
574 	while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
575 		trail--;
576 	start = (trail > 0) ? trail-1 : trail;
577 
578 	const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
579 	const int widthCharBytes = UTF8BytesOfLead[leadByte];
580 	if (widthCharBytes == 1) {
581 		return false;
582 	} else {
583 		int trailBytes = widthCharBytes - 1;
584 		int len = pos - start;
585 		if (len > trailBytes)
586 			// pos too far from lead
587 			return false;
588 		char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
589 		for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
590 			charBytes[b] = cb.CharAt(static_cast<int>(start+b));
591 		int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
592 		if (utf8status & UTF8MaskInvalid)
593 			return false;
594 		end = start + widthCharBytes;
595 		return true;
596 	}
597 }
598 
599 // Normalise a position so that it is not halfway through a two byte character.
600 // This can occur in two situations -
601 // When lines are terminated with \r\n pairs which should be treated as one character.
602 // When displaying DBCS text such as Japanese.
603 // If moving, move the position in the indicated direction.
MovePositionOutsideChar(int pos,int moveDir,bool checkLineEnd)604 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
605 	//Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
606 	// If out of range, just return minimum/maximum value.
607 	if (pos <= 0)
608 		return 0;
609 	if (pos >= Length())
610 		return Length();
611 
612 	// PLATFORM_ASSERT(pos > 0 && pos < Length());
613 	if (checkLineEnd && IsCrLf(pos - 1)) {
614 		if (moveDir > 0)
615 			return pos + 1;
616 		else
617 			return pos - 1;
618 	}
619 
620 	if (dbcsCodePage) {
621 		if (SC_CP_UTF8 == dbcsCodePage) {
622 			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
623 			// If ch is not a trail byte then pos is valid intercharacter position
624 			if (UTF8IsTrailByte(ch)) {
625 				int startUTF = pos;
626 				int endUTF = pos;
627 				if (InGoodUTF8(pos, startUTF, endUTF)) {
628 					// ch is a trail byte within a UTF-8 character
629 					if (moveDir > 0)
630 						pos = endUTF;
631 					else
632 						pos = startUTF;
633 				}
634 				// Else invalid UTF-8 so return position of isolated trail byte
635 			}
636 		} else {
637 			// Anchor DBCS calculations at start of line because start of line can
638 			// not be a DBCS trail byte.
639 			int posStartLine = LineStart(LineFromPosition(pos));
640 			if (pos == posStartLine)
641 				return pos;
642 
643 			// Step back until a non-lead-byte is found.
644 			int posCheck = pos;
645 			while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
646 				posCheck--;
647 
648 			// Check from known start of character.
649 			while (posCheck < pos) {
650 				int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
651 				if (posCheck + mbsize == pos) {
652 					return pos;
653 				} else if (posCheck + mbsize > pos) {
654 					if (moveDir > 0) {
655 						return posCheck + mbsize;
656 					} else {
657 						return posCheck;
658 					}
659 				}
660 				posCheck += mbsize;
661 			}
662 		}
663 	}
664 
665 	return pos;
666 }
667 
668 // NextPosition moves between valid positions - it can not handle a position in the middle of a
669 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
670 // A \r\n pair is treated as two characters.
NextPosition(int pos,int moveDir) const671 int Document::NextPosition(int pos, int moveDir) const {
672 	// If out of range, just return minimum/maximum value.
673 	int increment = (moveDir > 0) ? 1 : -1;
674 	if (pos + increment <= 0)
675 		return 0;
676 	if (pos + increment >= Length())
677 		return Length();
678 
679 	if (dbcsCodePage) {
680 		if (SC_CP_UTF8 == dbcsCodePage) {
681 			if (increment == 1) {
682 				// Simple forward movement case so can avoid some checks
683 				const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
684 				if (UTF8IsAscii(leadByte)) {
685 					// Single byte character or invalid
686 					pos++;
687 				} else {
688 					const int widthCharBytes = UTF8BytesOfLead[leadByte];
689 					char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
690 					for (int b=1; b<widthCharBytes; b++)
691 						charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
692 					int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
693 					if (utf8status & UTF8MaskInvalid)
694 						pos++;
695 					else
696 						pos += utf8status & UTF8MaskWidth;
697 				}
698 			} else {
699 				// Examine byte before position
700 				pos--;
701 				unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
702 				// If ch is not a trail byte then pos is valid intercharacter position
703 				if (UTF8IsTrailByte(ch)) {
704 					// If ch is a trail byte in a valid UTF-8 character then return start of character
705 					int startUTF = pos;
706 					int endUTF = pos;
707 					if (InGoodUTF8(pos, startUTF, endUTF)) {
708 						pos = startUTF;
709 					}
710 					// Else invalid UTF-8 so return position of isolated trail byte
711 				}
712 			}
713 		} else {
714 			if (moveDir > 0) {
715 				int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
716 				pos += mbsize;
717 				if (pos > Length())
718 					pos = Length();
719 			} else {
720 				// Anchor DBCS calculations at start of line because start of line can
721 				// not be a DBCS trail byte.
722 				int posStartLine = LineStart(LineFromPosition(pos));
723 				// See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
724 				// http://msdn.microsoft.com/en-us/library/cc194790.aspx
725 				if ((pos - 1) <= posStartLine) {
726 					return pos - 1;
727 				} else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
728 					// Must actually be trail byte
729 					return pos - 2;
730 				} else {
731 					// Otherwise, step back until a non-lead-byte is found.
732 					int posTemp = pos - 1;
733 					while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
734 						;
735 					// Now posTemp+1 must point to the beginning of a character,
736 					// so figure out whether we went back an even or an odd
737 					// number of bytes and go back 1 or 2 bytes, respectively.
738 					return (pos - 1 - ((pos - posTemp) & 1));
739 				}
740 			}
741 		}
742 	} else {
743 		pos += increment;
744 	}
745 
746 	return pos;
747 }
748 
NextCharacter(int & pos,int moveDir) const749 bool Document::NextCharacter(int &pos, int moveDir) const {
750 	// Returns true if pos changed
751 	int posNext = NextPosition(pos, moveDir);
752 	if (posNext == pos) {
753 		return false;
754 	} else {
755 		pos = posNext;
756 		return true;
757 	}
758 }
759 
UnicodeFromBytes(const unsigned char * us)760 static inline int UnicodeFromBytes(const unsigned char *us) {
761 	if (us[0] < 0xC2) {
762 		return us[0];
763 	} else if (us[0] < 0xE0) {
764 		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
765 	} else if (us[0] < 0xF0) {
766 		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
767 	} else if (us[0] < 0xF5) {
768 		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
769 	}
770 	return us[0];
771 }
772 
773 // Return -1  on out-of-bounds
GetRelativePosition(int positionStart,int characterOffset) const774 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
775 	int pos = positionStart;
776 	if (dbcsCodePage) {
777 		const int increment = (characterOffset > 0) ? 1 : -1;
778 		while (characterOffset != 0) {
779 			const int posNext = NextPosition(pos, increment);
780 			if (posNext == pos)
781 				return INVALID_POSITION;
782 			pos = posNext;
783 			characterOffset -= increment;
784 		}
785 	} else {
786 		pos = positionStart + characterOffset;
787 		if ((pos < 0) || (pos > Length()))
788 			return INVALID_POSITION;
789 	}
790 	return pos;
791 }
792 
GetCharacterAndWidth(int position,int * pWidth) const793 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
794 	int character;
795 	int bytesInCharacter = 1;
796 	if (dbcsCodePage) {
797 		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
798 		if (SC_CP_UTF8 == dbcsCodePage) {
799 			if (UTF8IsAscii(leadByte)) {
800 				// Single byte character or invalid
801 				character =  leadByte;
802 			} else {
803 				const int widthCharBytes = UTF8BytesOfLead[leadByte];
804 				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
805 				for (int b=1; b<widthCharBytes; b++)
806 					charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
807 				int utf8status = UTF8Classify(charBytes, widthCharBytes);
808 				if (utf8status & UTF8MaskInvalid) {
809 					// Report as singleton surrogate values which are invalid Unicode
810 					character =  0xDC80 + leadByte;
811 				} else {
812 					bytesInCharacter = utf8status & UTF8MaskWidth;
813 					character = UnicodeFromBytes(charBytes);
814 				}
815 			}
816 		} else {
817 			if (IsDBCSLeadByte(leadByte)) {
818 				bytesInCharacter = 2;
819 				character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
820 			} else {
821 				character = leadByte;
822 			}
823 		}
824 	} else {
825 		character = cb.CharAt(position);
826 	}
827 	if (pWidth) {
828 		*pWidth = bytesInCharacter;
829 	}
830 	return character;
831 }
832 
CodePage() const833 int SCI_METHOD Document::CodePage() const {
834 	return dbcsCodePage;
835 }
836 
IsDBCSLeadByte(char ch) const837 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
838 	// Byte ranges found in Wikipedia articles with relevant search strings in each case
839 	unsigned char uch = static_cast<unsigned char>(ch);
840 	switch (dbcsCodePage) {
841 		case 932:
842 			// Shift_jis
843 			return ((uch >= 0x81) && (uch <= 0x9F)) ||
844 				((uch >= 0xE0) && (uch <= 0xFC));
845 				// Lead bytes F0 to FC may be a Microsoft addition.
846 		case 936:
847 			// GBK
848 			return (uch >= 0x81) && (uch <= 0xFE);
849 		case 949:
850 			// Korean Wansung KS C-5601-1987
851 			return (uch >= 0x81) && (uch <= 0xFE);
852 		case 950:
853 			// Big5
854 			return (uch >= 0x81) && (uch <= 0xFE);
855 		case 1361:
856 			// Korean Johab KS C-5601-1992
857 			return
858 				((uch >= 0x84) && (uch <= 0xD3)) ||
859 				((uch >= 0xD8) && (uch <= 0xDE)) ||
860 				((uch >= 0xE0) && (uch <= 0xF9));
861 	}
862 	return false;
863 }
864 
IsSpaceOrTab(int ch)865 static inline bool IsSpaceOrTab(int ch) {
866 	return ch == ' ' || ch == '\t';
867 }
868 
869 // Need to break text into segments near lengthSegment but taking into
870 // account the encoding to not break inside a UTF-8 or DBCS character
871 // and also trying to avoid breaking inside a pair of combining characters.
872 // The segment length must always be long enough (more than 4 bytes)
873 // so that there will be at least one whole character to make a segment.
874 // For UTF-8, text must consist only of valid whole characters.
875 // In preference order from best to worst:
876 //   1) Break after space
877 //   2) Break before punctuation
878 //   3) Break after whole character
879 
SafeSegment(const char * text,int length,int lengthSegment) const880 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
881 	if (length <= lengthSegment)
882 		return length;
883 	int lastSpaceBreak = -1;
884 	int lastPunctuationBreak = -1;
885 	int lastEncodingAllowedBreak = 0;
886 	for (int j=0; j < lengthSegment;) {
887 		unsigned char ch = static_cast<unsigned char>(text[j]);
888 		if (j > 0) {
889 			if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
890 				lastSpaceBreak = j;
891 			}
892 			if (ch < 'A') {
893 				lastPunctuationBreak = j;
894 			}
895 		}
896 		lastEncodingAllowedBreak = j;
897 
898 		if (dbcsCodePage == SC_CP_UTF8) {
899 			j += UTF8BytesOfLead[ch];
900 		} else if (dbcsCodePage) {
901 			j += IsDBCSLeadByte(ch) ? 2 : 1;
902 		} else {
903 			j++;
904 		}
905 	}
906 	if (lastSpaceBreak >= 0) {
907 		return lastSpaceBreak;
908 	} else if (lastPunctuationBreak >= 0) {
909 		return lastPunctuationBreak;
910 	}
911 	return lastEncodingAllowedBreak;
912 }
913 
CodePageFamily() const914 EncodingFamily Document::CodePageFamily() const {
915 	if (SC_CP_UTF8 == dbcsCodePage)
916 		return efUnicode;
917 	else if (dbcsCodePage)
918 		return efDBCS;
919 	else
920 		return efEightBit;
921 }
922 
ModifiedAt(int pos)923 void Document::ModifiedAt(int pos) {
924 	if (endStyled > pos)
925 		endStyled = pos;
926 }
927 
CheckReadOnly()928 void Document::CheckReadOnly() {
929 	if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
930 		enteredReadOnlyCount++;
931 		NotifyModifyAttempt();
932 		enteredReadOnlyCount--;
933 	}
934 }
935 
936 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
937 // SetStyleAt does not change the persistent state of a document
938 
DeleteChars(int pos,int len)939 bool Document::DeleteChars(int pos, int len) {
940 	if (len <= 0)
941 		return false;
942 	if ((pos + len) > Length())
943 		return false;
944 	CheckReadOnly();
945 	if (enteredModification != 0) {
946 		return false;
947 	} else {
948 		enteredModification++;
949 		if (!cb.IsReadOnly()) {
950 			NotifyModified(
951 			    DocModification(
952 			        SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
953 			        pos, len,
954 			        0, 0));
955 			int prevLinesTotal = LinesTotal();
956 			bool startSavePoint = cb.IsSavePoint();
957 			bool startSequence = false;
958 			const char *text = cb.DeleteChars(pos, len, startSequence);
959 			if (startSavePoint && cb.IsCollectingUndo())
960 				NotifySavePoint(!startSavePoint);
961 			if ((pos < Length()) || (pos == 0))
962 				ModifiedAt(pos);
963 			else
964 				ModifiedAt(pos-1);
965 			NotifyModified(
966 			    DocModification(
967 			        SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
968 			        pos, len,
969 			        LinesTotal() - prevLinesTotal, text));
970 		}
971 		enteredModification--;
972 	}
973 	return !cb.IsReadOnly();
974 }
975 
976 /**
977  * Insert a string with a length.
978  */
InsertString(int position,const char * s,int insertLength)979 int Document::InsertString(int position, const char *s, int insertLength) {
980 	if (insertLength <= 0) {
981 		return 0;
982 	}
983 	CheckReadOnly();	// Application may change read only state here
984 	if (cb.IsReadOnly()) {
985 		return 0;
986 	}
987 	if (enteredModification != 0) {
988 		return 0;
989 	}
990 	enteredModification++;
991 	insertionSet = false;
992 	insertion.clear();
993 	NotifyModified(
994 		DocModification(
995 			SC_MOD_INSERTCHECK,
996 			position, insertLength,
997 			0, s));
998 	if (insertionSet) {
999 		s = insertion.c_str();
1000 		insertLength = static_cast<int>(insertion.length());
1001 	}
1002 	NotifyModified(
1003 		DocModification(
1004 			SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1005 			position, insertLength,
1006 			0, s));
1007 	int prevLinesTotal = LinesTotal();
1008 	bool startSavePoint = cb.IsSavePoint();
1009 	bool startSequence = false;
1010 	const char *text = cb.InsertString(position, s, insertLength, startSequence);
1011 	if (startSavePoint && cb.IsCollectingUndo())
1012 		NotifySavePoint(!startSavePoint);
1013 	ModifiedAt(position);
1014 	NotifyModified(
1015 		DocModification(
1016 			SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1017 			position, insertLength,
1018 			LinesTotal() - prevLinesTotal, text));
1019 	if (insertionSet) {	// Free memory as could be large
1020 		std::string().swap(insertion);
1021 	}
1022 	enteredModification--;
1023 	return insertLength;
1024 }
1025 
ChangeInsertion(const char * s,int length)1026 void Document::ChangeInsertion(const char *s, int length) {
1027 	insertionSet = true;
1028 	insertion.assign(s, length);
1029 }
1030 
AddData(char * data,int length)1031 int SCI_METHOD Document::AddData(char *data, int length) {
1032 	try {
1033 		int position = Length();
1034 		InsertString(position, data, length);
1035 	} catch (std::bad_alloc &) {
1036 		return SC_STATUS_BADALLOC;
1037 	} catch (...) {
1038 		return SC_STATUS_FAILURE;
1039 	}
1040 	return 0;
1041 }
1042 
ConvertToDocument()1043 void * SCI_METHOD Document::ConvertToDocument() {
1044 	return this;
1045 }
1046 
Undo()1047 int Document::Undo() {
1048 	int newPos = -1;
1049 	CheckReadOnly();
1050 	if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1051 		enteredModification++;
1052 		if (!cb.IsReadOnly()) {
1053 			bool startSavePoint = cb.IsSavePoint();
1054 			bool multiLine = false;
1055 			int steps = cb.StartUndo();
1056 			//Platform::DebugPrintf("Steps=%d\n", steps);
1057 			int coalescedRemovePos = -1;
1058 			int coalescedRemoveLen = 0;
1059 			int prevRemoveActionPos = -1;
1060 			int prevRemoveActionLen = 0;
1061 			for (int step = 0; step < steps; step++) {
1062 				const int prevLinesTotal = LinesTotal();
1063 				const Action &action = cb.GetUndoStep();
1064 				if (action.at == removeAction) {
1065 					NotifyModified(DocModification(
1066 									SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1067 				} else if (action.at == containerAction) {
1068 					DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1069 					dm.token = action.position;
1070 					NotifyModified(dm);
1071 					if (!action.mayCoalesce) {
1072 						coalescedRemovePos = -1;
1073 						coalescedRemoveLen = 0;
1074 						prevRemoveActionPos = -1;
1075 						prevRemoveActionLen = 0;
1076 					}
1077 				} else {
1078 					NotifyModified(DocModification(
1079 									SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1080 				}
1081 				cb.PerformUndoStep();
1082 				if (action.at != containerAction) {
1083 					ModifiedAt(action.position);
1084 					newPos = action.position;
1085 				}
1086 
1087 				int modFlags = SC_PERFORMED_UNDO;
1088 				// With undo, an insertion action becomes a deletion notification
1089 				if (action.at == removeAction) {
1090 					newPos += action.lenData;
1091 					modFlags |= SC_MOD_INSERTTEXT;
1092 					if ((coalescedRemoveLen > 0) &&
1093 						(action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1094 						coalescedRemoveLen += action.lenData;
1095 						newPos = coalescedRemovePos + coalescedRemoveLen;
1096 					} else {
1097 						coalescedRemovePos = action.position;
1098 						coalescedRemoveLen = action.lenData;
1099 					}
1100 					prevRemoveActionPos = action.position;
1101 					prevRemoveActionLen = action.lenData;
1102 				} else if (action.at == insertAction) {
1103 					modFlags |= SC_MOD_DELETETEXT;
1104 					coalescedRemovePos = -1;
1105 					coalescedRemoveLen = 0;
1106 					prevRemoveActionPos = -1;
1107 					prevRemoveActionLen = 0;
1108 				}
1109 				if (steps > 1)
1110 					modFlags |= SC_MULTISTEPUNDOREDO;
1111 				const int linesAdded = LinesTotal() - prevLinesTotal;
1112 				if (linesAdded != 0)
1113 					multiLine = true;
1114 				if (step == steps - 1) {
1115 					modFlags |= SC_LASTSTEPINUNDOREDO;
1116 					if (multiLine)
1117 						modFlags |= SC_MULTILINEUNDOREDO;
1118 				}
1119 				NotifyModified(DocModification(modFlags, action.position, action.lenData,
1120 											   linesAdded, action.data));
1121 			}
1122 
1123 			bool endSavePoint = cb.IsSavePoint();
1124 			if (startSavePoint != endSavePoint)
1125 				NotifySavePoint(endSavePoint);
1126 		}
1127 		enteredModification--;
1128 	}
1129 	return newPos;
1130 }
1131 
Redo()1132 int Document::Redo() {
1133 	int newPos = -1;
1134 	CheckReadOnly();
1135 	if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1136 		enteredModification++;
1137 		if (!cb.IsReadOnly()) {
1138 			bool startSavePoint = cb.IsSavePoint();
1139 			bool multiLine = false;
1140 			int steps = cb.StartRedo();
1141 			for (int step = 0; step < steps; step++) {
1142 				const int prevLinesTotal = LinesTotal();
1143 				const Action &action = cb.GetRedoStep();
1144 				if (action.at == insertAction) {
1145 					NotifyModified(DocModification(
1146 									SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1147 				} else if (action.at == containerAction) {
1148 					DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1149 					dm.token = action.position;
1150 					NotifyModified(dm);
1151 				} else {
1152 					NotifyModified(DocModification(
1153 									SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1154 				}
1155 				cb.PerformRedoStep();
1156 				if (action.at != containerAction) {
1157 					ModifiedAt(action.position);
1158 					newPos = action.position;
1159 				}
1160 
1161 				int modFlags = SC_PERFORMED_REDO;
1162 				if (action.at == insertAction) {
1163 					newPos += action.lenData;
1164 					modFlags |= SC_MOD_INSERTTEXT;
1165 				} else if (action.at == removeAction) {
1166 					modFlags |= SC_MOD_DELETETEXT;
1167 				}
1168 				if (steps > 1)
1169 					modFlags |= SC_MULTISTEPUNDOREDO;
1170 				const int linesAdded = LinesTotal() - prevLinesTotal;
1171 				if (linesAdded != 0)
1172 					multiLine = true;
1173 				if (step == steps - 1) {
1174 					modFlags |= SC_LASTSTEPINUNDOREDO;
1175 					if (multiLine)
1176 						modFlags |= SC_MULTILINEUNDOREDO;
1177 				}
1178 				NotifyModified(
1179 					DocModification(modFlags, action.position, action.lenData,
1180 									linesAdded, action.data));
1181 			}
1182 
1183 			bool endSavePoint = cb.IsSavePoint();
1184 			if (startSavePoint != endSavePoint)
1185 				NotifySavePoint(endSavePoint);
1186 		}
1187 		enteredModification--;
1188 	}
1189 	return newPos;
1190 }
1191 
DelChar(int pos)1192 void Document::DelChar(int pos) {
1193 	DeleteChars(pos, LenChar(pos));
1194 }
1195 
DelCharBack(int pos)1196 void Document::DelCharBack(int pos) {
1197 	if (pos <= 0) {
1198 		return;
1199 	} else if (IsCrLf(pos - 2)) {
1200 		DeleteChars(pos - 2, 2);
1201 	} else if (dbcsCodePage) {
1202 		int startChar = NextPosition(pos, -1);
1203 		DeleteChars(startChar, pos - startChar);
1204 	} else {
1205 		DeleteChars(pos - 1, 1);
1206 	}
1207 }
1208 
NextTab(int pos,int tabSize)1209 static int NextTab(int pos, int tabSize) {
1210 	return ((pos / tabSize) + 1) * tabSize;
1211 }
1212 
CreateIndentation(int indent,int tabSize,bool insertSpaces)1213 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1214 	std::string indentation;
1215 	if (!insertSpaces) {
1216 		while (indent >= tabSize) {
1217 			indentation += '\t';
1218 			indent -= tabSize;
1219 		}
1220 	}
1221 	while (indent > 0) {
1222 		indentation += ' ';
1223 		indent--;
1224 	}
1225 	return indentation;
1226 }
1227 
GetLineIndentation(int line)1228 int SCI_METHOD Document::GetLineIndentation(int line) {
1229 	int indent = 0;
1230 	if ((line >= 0) && (line < LinesTotal())) {
1231 		int lineStart = LineStart(line);
1232 		int length = Length();
1233 		for (int i = lineStart; i < length; i++) {
1234 			char ch = cb.CharAt(i);
1235 			if (ch == ' ')
1236 				indent++;
1237 			else if (ch == '\t')
1238 				indent = NextTab(indent, tabInChars);
1239 			else
1240 				return indent;
1241 		}
1242 	}
1243 	return indent;
1244 }
1245 
SetLineIndentation(int line,int indent)1246 int Document::SetLineIndentation(int line, int indent) {
1247 	int indentOfLine = GetLineIndentation(line);
1248 	if (indent < 0)
1249 		indent = 0;
1250 	if (indent != indentOfLine) {
1251 		std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1252 		int thisLineStart = LineStart(line);
1253 		int indentPos = GetLineIndentPosition(line);
1254 		UndoGroup ug(this);
1255 		DeleteChars(thisLineStart, indentPos - thisLineStart);
1256 		return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1257 			static_cast<int>(linebuf.length()));
1258 	} else {
1259 		return GetLineIndentPosition(line);
1260 	}
1261 }
1262 
GetLineIndentPosition(int line) const1263 int Document::GetLineIndentPosition(int line) const {
1264 	if (line < 0)
1265 		return 0;
1266 	int pos = LineStart(line);
1267 	int length = Length();
1268 	while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1269 		pos++;
1270 	}
1271 	return pos;
1272 }
1273 
GetColumn(int pos)1274 int Document::GetColumn(int pos) {
1275 	int column = 0;
1276 	int line = LineFromPosition(pos);
1277 	if ((line >= 0) && (line < LinesTotal())) {
1278 		for (int i = LineStart(line); i < pos;) {
1279 			char ch = cb.CharAt(i);
1280 			if (ch == '\t') {
1281 				column = NextTab(column, tabInChars);
1282 				i++;
1283 			} else if (ch == '\r') {
1284 				return column;
1285 			} else if (ch == '\n') {
1286 				return column;
1287 			} else if (i >= Length()) {
1288 				return column;
1289 			} else {
1290 				column++;
1291 				i = NextPosition(i, 1);
1292 			}
1293 		}
1294 	}
1295 	return column;
1296 }
1297 
CountCharacters(int startPos,int endPos)1298 int Document::CountCharacters(int startPos, int endPos) {
1299 	startPos = MovePositionOutsideChar(startPos, 1, false);
1300 	endPos = MovePositionOutsideChar(endPos, -1, false);
1301 	int count = 0;
1302 	int i = startPos;
1303 	while (i < endPos) {
1304 		count++;
1305 		if (IsCrLf(i))
1306 			i++;
1307 		i = NextPosition(i, 1);
1308 	}
1309 	return count;
1310 }
1311 
FindColumn(int line,int column)1312 int Document::FindColumn(int line, int column) {
1313 	int position = LineStart(line);
1314 	if ((line >= 0) && (line < LinesTotal())) {
1315 		int columnCurrent = 0;
1316 		while ((columnCurrent < column) && (position < Length())) {
1317 			char ch = cb.CharAt(position);
1318 			if (ch == '\t') {
1319 				columnCurrent = NextTab(columnCurrent, tabInChars);
1320 				if (columnCurrent > column)
1321 					return position;
1322 				position++;
1323 			} else if (ch == '\r') {
1324 				return position;
1325 			} else if (ch == '\n') {
1326 				return position;
1327 			} else {
1328 				columnCurrent++;
1329 				position = NextPosition(position, 1);
1330 			}
1331 		}
1332 	}
1333 	return position;
1334 }
1335 
Indent(bool forwards,int lineBottom,int lineTop)1336 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1337 	// Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1338 	for (int line = lineBottom; line >= lineTop; line--) {
1339 		int indentOfLine = GetLineIndentation(line);
1340 		if (forwards) {
1341 			if (LineStart(line) < LineEnd(line)) {
1342 				SetLineIndentation(line, indentOfLine + IndentSize());
1343 			}
1344 		} else {
1345 			SetLineIndentation(line, indentOfLine - IndentSize());
1346 		}
1347 	}
1348 }
1349 
1350 // Convert line endings for a piece of text to a particular mode.
1351 // Stop at len or when a NUL is found.
TransformLineEnds(const char * s,size_t len,int eolModeWanted)1352 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1353 	std::string dest;
1354 	for (size_t i = 0; (i < len) && (s[i]); i++) {
1355 		if (s[i] == '\n' || s[i] == '\r') {
1356 			if (eolModeWanted == SC_EOL_CR) {
1357 				dest.push_back('\r');
1358 			} else if (eolModeWanted == SC_EOL_LF) {
1359 				dest.push_back('\n');
1360 			} else { // eolModeWanted == SC_EOL_CRLF
1361 				dest.push_back('\r');
1362 				dest.push_back('\n');
1363 			}
1364 			if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1365 				i++;
1366 			}
1367 		} else {
1368 			dest.push_back(s[i]);
1369 		}
1370 	}
1371 	return dest;
1372 }
1373 
ConvertLineEnds(int eolModeSet)1374 void Document::ConvertLineEnds(int eolModeSet) {
1375 	UndoGroup ug(this);
1376 
1377 	for (int pos = 0; pos < Length(); pos++) {
1378 		if (cb.CharAt(pos) == '\r') {
1379 			if (cb.CharAt(pos + 1) == '\n') {
1380 				// CRLF
1381 				if (eolModeSet == SC_EOL_CR) {
1382 					DeleteChars(pos + 1, 1); // Delete the LF
1383 				} else if (eolModeSet == SC_EOL_LF) {
1384 					DeleteChars(pos, 1); // Delete the CR
1385 				} else {
1386 					pos++;
1387 				}
1388 			} else {
1389 				// CR
1390 				if (eolModeSet == SC_EOL_CRLF) {
1391 					pos += InsertString(pos + 1, "\n", 1); // Insert LF
1392 				} else if (eolModeSet == SC_EOL_LF) {
1393 					pos += InsertString(pos, "\n", 1); // Insert LF
1394 					DeleteChars(pos, 1); // Delete CR
1395 					pos--;
1396 				}
1397 			}
1398 		} else if (cb.CharAt(pos) == '\n') {
1399 			// LF
1400 			if (eolModeSet == SC_EOL_CRLF) {
1401 				pos += InsertString(pos, "\r", 1); // Insert CR
1402 			} else if (eolModeSet == SC_EOL_CR) {
1403 				pos += InsertString(pos, "\r", 1); // Insert CR
1404 				DeleteChars(pos, 1); // Delete LF
1405 				pos--;
1406 			}
1407 		}
1408 	}
1409 
1410 }
1411 
IsWhiteLine(int line) const1412 bool Document::IsWhiteLine(int line) const {
1413 	int currentChar = LineStart(line);
1414 	int endLine = LineEnd(line);
1415 	while (currentChar < endLine) {
1416 		if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1417 			return false;
1418 		}
1419 		++currentChar;
1420 	}
1421 	return true;
1422 }
1423 
ParaUp(int pos) const1424 int Document::ParaUp(int pos) const {
1425 	int line = LineFromPosition(pos);
1426 	line--;
1427 	while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1428 		line--;
1429 	}
1430 	while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1431 		line--;
1432 	}
1433 	line++;
1434 	return LineStart(line);
1435 }
1436 
ParaDown(int pos) const1437 int Document::ParaDown(int pos) const {
1438 	int line = LineFromPosition(pos);
1439 	while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1440 		line++;
1441 	}
1442 	while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1443 		line++;
1444 	}
1445 	if (line < LinesTotal())
1446 		return LineStart(line);
1447 	else // end of a document
1448 		return LineEnd(line-1);
1449 }
1450 
WordCharClass(unsigned char ch) const1451 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1452 	if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1453 		return CharClassify::ccWord;
1454 	return charClass.GetClass(ch);
1455 }
1456 
1457 /**
1458  * Used by commmands that want to select whole words.
1459  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1460  */
ExtendWordSelect(int pos,int delta,bool onlyWordCharacters)1461 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1462 	CharClassify::cc ccStart = CharClassify::ccWord;
1463 	if (delta < 0) {
1464 		if (!onlyWordCharacters)
1465 			ccStart = WordCharClass(cb.CharAt(pos-1));
1466 		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1467 			pos--;
1468 	} else {
1469 		if (!onlyWordCharacters && pos < Length())
1470 			ccStart = WordCharClass(cb.CharAt(pos));
1471 		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1472 			pos++;
1473 	}
1474 	return MovePositionOutsideChar(pos, delta, true);
1475 }
1476 
1477 /**
1478  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1479  * (delta < 0).
1480  * This is looking for a transition between character classes although there is also some
1481  * additional movement to transit white space.
1482  * Used by cursor movement by word commands.
1483  */
NextWordStart(int pos,int delta)1484 int Document::NextWordStart(int pos, int delta) {
1485 	if (delta < 0) {
1486 		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1487 			pos--;
1488 		if (pos > 0) {
1489 			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1490 			while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1491 				pos--;
1492 			}
1493 		}
1494 	} else {
1495 		CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1496 		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1497 			pos++;
1498 		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1499 			pos++;
1500 	}
1501 	return pos;
1502 }
1503 
1504 /**
1505  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1506  * (delta < 0).
1507  * This is looking for a transition between character classes although there is also some
1508  * additional movement to transit white space.
1509  * Used by cursor movement by word commands.
1510  */
NextWordEnd(int pos,int delta)1511 int Document::NextWordEnd(int pos, int delta) {
1512 	if (delta < 0) {
1513 		if (pos > 0) {
1514 			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1515 			if (ccStart != CharClassify::ccSpace) {
1516 				while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1517 					pos--;
1518 				}
1519 			}
1520 			while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1521 				pos--;
1522 			}
1523 		}
1524 	} else {
1525 		while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1526 			pos++;
1527 		}
1528 		if (pos < Length()) {
1529 			CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1530 			while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1531 				pos++;
1532 			}
1533 		}
1534 	}
1535 	return pos;
1536 }
1537 
1538 /**
1539  * Check that the character at the given position is a word or punctuation character and that
1540  * the previous character is of a different character class.
1541  */
IsWordStartAt(int pos) const1542 bool Document::IsWordStartAt(int pos) const {
1543 	if (pos > 0) {
1544 		CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1545 		return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1546 			(ccPos != WordCharClass(CharAt(pos - 1)));
1547 	}
1548 	return true;
1549 }
1550 
1551 /**
1552  * Check that the character at the given position is a word or punctuation character and that
1553  * the next character is of a different character class.
1554  */
IsWordEndAt(int pos) const1555 bool Document::IsWordEndAt(int pos) const {
1556 	if (pos < Length()) {
1557 		CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1558 		return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1559 			(ccPrev != WordCharClass(CharAt(pos)));
1560 	}
1561 	return true;
1562 }
1563 
1564 /**
1565  * Check that the given range is has transitions between character classes at both
1566  * ends and where the characters on the inside are word or punctuation characters.
1567  */
IsWordAt(int start,int end) const1568 bool Document::IsWordAt(int start, int end) const {
1569 	return IsWordStartAt(start) && IsWordEndAt(end);
1570 }
1571 
MatchesWordOptions(bool word,bool wordStart,int pos,int length) const1572 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1573 	return (!word && !wordStart) ||
1574 			(word && IsWordAt(pos, pos + length)) ||
1575 			(wordStart && IsWordStartAt(pos));
1576 }
1577 
HasCaseFolder(void) const1578 bool Document::HasCaseFolder(void) const {
1579 	return pcf != 0;
1580 }
1581 
SetCaseFolder(CaseFolder * pcf_)1582 void Document::SetCaseFolder(CaseFolder *pcf_) {
1583 	delete pcf;
1584 	pcf = pcf_;
1585 }
1586 
1587 /**
1588  * Find text in document, supporting both forward and backward
1589  * searches (just pass minPos > maxPos to do a backward search)
1590  * Has not been tested with backwards DBCS searches yet.
1591  */
FindText(int minPos,int maxPos,const char * search,bool caseSensitive,bool word,bool wordStart,bool regExp,int flags,int * length)1592 long Document::FindText(int minPos, int maxPos, const char *search,
1593                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1594                         int *length) {
1595 	if (*length <= 0)
1596 		return minPos;
1597 	if (regExp) {
1598 		if (!regex)
1599 			regex = CreateRegexSearch(&charClass);
1600 		return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1601 	} else {
1602 
1603 		const bool forward = minPos <= maxPos;
1604 		const int increment = forward ? 1 : -1;
1605 
1606 		// Range endpoints should not be inside DBCS characters, but just in case, move them.
1607 		const int startPos = MovePositionOutsideChar(minPos, increment, false);
1608 		const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1609 
1610 		// Compute actual search ranges needed
1611 		const int lengthFind = *length;
1612 
1613 		//Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1614 		const int limitPos = Platform::Maximum(startPos, endPos);
1615 		int pos = startPos;
1616 		if (!forward) {
1617 			// Back all of a character
1618 			pos = NextPosition(pos, increment);
1619 		}
1620 		if (caseSensitive) {
1621 			const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1622 			const char charStartSearch =  search[0];
1623 			while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1624 				if (CharAt(pos) == charStartSearch) {
1625 					bool found = (pos + lengthFind) <= limitPos;
1626 					for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1627 						found = CharAt(pos + indexSearch) == search[indexSearch];
1628 					}
1629 					if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1630 						return pos;
1631 					}
1632 				}
1633 				if (!NextCharacter(pos, increment))
1634 					break;
1635 			}
1636 		} else if (SC_CP_UTF8 == dbcsCodePage) {
1637 			const size_t maxFoldingExpansion = 4;
1638 			std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1639 			const int lenSearch = static_cast<int>(
1640 				pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1641 			char bytes[UTF8MaxBytes + 1];
1642 			char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1643 			while (forward ? (pos < endPos) : (pos >= endPos)) {
1644 				int widthFirstCharacter = 0;
1645 				int posIndexDocument = pos;
1646 				int indexSearch = 0;
1647 				bool characterMatches = true;
1648 				for (;;) {
1649 					const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1650 					bytes[0] = leadByte;
1651 					int widthChar = 1;
1652 					if (!UTF8IsAscii(leadByte)) {
1653 						const int widthCharBytes = UTF8BytesOfLead[leadByte];
1654 						for (int b=1; b<widthCharBytes; b++) {
1655 							bytes[b] = cb.CharAt(posIndexDocument+b);
1656 						}
1657 						widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1658 					}
1659 					if (!widthFirstCharacter)
1660 						widthFirstCharacter = widthChar;
1661 					if ((posIndexDocument + widthChar) > limitPos)
1662 						break;
1663 					const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1664 					folded[lenFlat] = 0;
1665 					// Does folded match the buffer
1666 					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1667 					if (!characterMatches)
1668 						break;
1669 					posIndexDocument += widthChar;
1670 					indexSearch += lenFlat;
1671 					if (indexSearch >= lenSearch)
1672 						break;
1673 				}
1674 				if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1675 					if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1676 						*length = posIndexDocument - pos;
1677 						return pos;
1678 					}
1679 				}
1680 				if (forward) {
1681 					pos += widthFirstCharacter;
1682 				} else {
1683 					if (!NextCharacter(pos, increment))
1684 						break;
1685 				}
1686 			}
1687 		} else if (dbcsCodePage) {
1688 			const size_t maxBytesCharacter = 2;
1689 			const size_t maxFoldingExpansion = 4;
1690 			std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1691 			const int lenSearch = static_cast<int>(
1692 				pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1693 			while (forward ? (pos < endPos) : (pos >= endPos)) {
1694 				int indexDocument = 0;
1695 				int indexSearch = 0;
1696 				bool characterMatches = true;
1697 				while (characterMatches &&
1698 					((pos + indexDocument) < limitPos) &&
1699 					(indexSearch < lenSearch)) {
1700 					char bytes[maxBytesCharacter + 1];
1701 					bytes[0] = cb.CharAt(pos + indexDocument);
1702 					const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1703 					if (widthChar == 2)
1704 						bytes[1] = cb.CharAt(pos + indexDocument + 1);
1705 					if ((pos + indexDocument + widthChar) > limitPos)
1706 						break;
1707 					char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1708 					const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1709 					folded[lenFlat] = 0;
1710 					// Does folded match the buffer
1711 					characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1712 					indexDocument += widthChar;
1713 					indexSearch += lenFlat;
1714 				}
1715 				if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1716 					if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1717 						*length = indexDocument;
1718 						return pos;
1719 					}
1720 				}
1721 				if (!NextCharacter(pos, increment))
1722 					break;
1723 			}
1724 		} else {
1725 			const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1726 			std::vector<char> searchThing(lengthFind + 1);
1727 			pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1728 			while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1729 				bool found = (pos + lengthFind) <= limitPos;
1730 				for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1731 					char ch = CharAt(pos + indexSearch);
1732 					char folded[2];
1733 					pcf->Fold(folded, sizeof(folded), &ch, 1);
1734 					found = folded[0] == searchThing[indexSearch];
1735 				}
1736 				if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1737 					return pos;
1738 				}
1739 				if (!NextCharacter(pos, increment))
1740 					break;
1741 			}
1742 		}
1743 	}
1744 	//Platform::DebugPrintf("Not found\n");
1745 	return -1;
1746 }
1747 
SubstituteByPosition(const char * text,int * length)1748 const char *Document::SubstituteByPosition(const char *text, int *length) {
1749 	if (regex)
1750 		return regex->SubstituteByPosition(this, text, length);
1751 	else
1752 		return 0;
1753 }
1754 
LinesTotal() const1755 int Document::LinesTotal() const {
1756 	return cb.Lines();
1757 }
1758 
SetDefaultCharClasses(bool includeWordClass)1759 void Document::SetDefaultCharClasses(bool includeWordClass) {
1760     charClass.SetDefaultCharClasses(includeWordClass);
1761 }
1762 
SetCharClasses(const unsigned char * chars,CharClassify::cc newCharClass)1763 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1764     charClass.SetCharClasses(chars, newCharClass);
1765 }
1766 
GetCharsOfClass(CharClassify::cc characterClass,unsigned char * buffer)1767 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1768     return charClass.GetCharsOfClass(characterClass, buffer);
1769 }
1770 
StartStyling(int position,char)1771 void SCI_METHOD Document::StartStyling(int position, char) {
1772 	endStyled = position;
1773 }
1774 
SetStyleFor(int length,char style)1775 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1776 	if (enteredStyling != 0) {
1777 		return false;
1778 	} else {
1779 		enteredStyling++;
1780 		int prevEndStyled = endStyled;
1781 		if (cb.SetStyleFor(endStyled, length, style)) {
1782 			DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1783 			                   prevEndStyled, length);
1784 			NotifyModified(mh);
1785 		}
1786 		endStyled += length;
1787 		enteredStyling--;
1788 		return true;
1789 	}
1790 }
1791 
SetStyles(int length,const char * styles)1792 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1793 	if (enteredStyling != 0) {
1794 		return false;
1795 	} else {
1796 		enteredStyling++;
1797 		bool didChange = false;
1798 		int startMod = 0;
1799 		int endMod = 0;
1800 		for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1801 			PLATFORM_ASSERT(endStyled < Length());
1802 			if (cb.SetStyleAt(endStyled, styles[iPos])) {
1803 				if (!didChange) {
1804 					startMod = endStyled;
1805 				}
1806 				didChange = true;
1807 				endMod = endStyled;
1808 			}
1809 		}
1810 		if (didChange) {
1811 			DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1812 			                   startMod, endMod - startMod + 1);
1813 			NotifyModified(mh);
1814 		}
1815 		enteredStyling--;
1816 		return true;
1817 	}
1818 }
1819 
EnsureStyledTo(int pos)1820 void Document::EnsureStyledTo(int pos) {
1821 	if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1822 		IncrementStyleClock();
1823 		if (pli && !pli->UseContainerLexing()) {
1824 			int lineEndStyled = LineFromPosition(GetEndStyled());
1825 			int endStyledTo = LineStart(lineEndStyled);
1826 			pli->Colourise(endStyledTo, pos);
1827 		} else {
1828 			// Ask the watchers to style, and stop as soon as one responds.
1829 			for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1830 				(pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1831 				it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1832 			}
1833 		}
1834 	}
1835 }
1836 
LexerChanged()1837 void Document::LexerChanged() {
1838 	// Tell the watchers the lexer has changed.
1839 	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1840 		it->watcher->NotifyLexerChanged(this, it->userData);
1841 	}
1842 }
1843 
SetLineState(int line,int state)1844 int SCI_METHOD Document::SetLineState(int line, int state) {
1845 	int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1846 	if (state != statePrevious) {
1847 		DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1848 		NotifyModified(mh);
1849 	}
1850 	return statePrevious;
1851 }
1852 
GetLineState(int line) const1853 int SCI_METHOD Document::GetLineState(int line) const {
1854 	return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1855 }
1856 
GetMaxLineState()1857 int Document::GetMaxLineState() {
1858 	return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1859 }
1860 
ChangeLexerState(int start,int end)1861 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1862 	DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1863 	NotifyModified(mh);
1864 }
1865 
MarginStyledText(int line) const1866 StyledText Document::MarginStyledText(int line) const {
1867 	LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1868 	return StyledText(pla->Length(line), pla->Text(line),
1869 		pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1870 }
1871 
MarginSetText(int line,const char * text)1872 void Document::MarginSetText(int line, const char *text) {
1873 	static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1874 	DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1875 	NotifyModified(mh);
1876 }
1877 
MarginSetStyle(int line,int style)1878 void Document::MarginSetStyle(int line, int style) {
1879 	static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1880 	NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1881 }
1882 
MarginSetStyles(int line,const unsigned char * styles)1883 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1884 	static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1885 	NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1886 }
1887 
MarginClearAll()1888 void Document::MarginClearAll() {
1889 	int maxEditorLine = LinesTotal();
1890 	for (int l=0; l<maxEditorLine; l++)
1891 		MarginSetText(l, 0);
1892 	// Free remaining data
1893 	static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1894 }
1895 
AnnotationStyledText(int line) const1896 StyledText Document::AnnotationStyledText(int line) const {
1897 	LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1898 	return StyledText(pla->Length(line), pla->Text(line),
1899 		pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1900 }
1901 
AnnotationSetText(int line,const char * text)1902 void Document::AnnotationSetText(int line, const char *text) {
1903 	if (line >= 0 && line < LinesTotal()) {
1904 		const int linesBefore = AnnotationLines(line);
1905 		static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1906 		const int linesAfter = AnnotationLines(line);
1907 		DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1908 		mh.annotationLinesAdded = linesAfter - linesBefore;
1909 		NotifyModified(mh);
1910 	}
1911 }
1912 
AnnotationSetStyle(int line,int style)1913 void Document::AnnotationSetStyle(int line, int style) {
1914 	static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1915 	DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1916 	NotifyModified(mh);
1917 }
1918 
AnnotationSetStyles(int line,const unsigned char * styles)1919 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1920 	if (line >= 0 && line < LinesTotal()) {
1921 		static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1922 	}
1923 }
1924 
AnnotationLines(int line) const1925 int Document::AnnotationLines(int line) const {
1926 	return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1927 }
1928 
AnnotationClearAll()1929 void Document::AnnotationClearAll() {
1930 	int maxEditorLine = LinesTotal();
1931 	for (int l=0; l<maxEditorLine; l++)
1932 		AnnotationSetText(l, 0);
1933 	// Free remaining data
1934 	static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1935 }
1936 
IncrementStyleClock()1937 void Document::IncrementStyleClock() {
1938 	styleClock = (styleClock + 1) % 0x100000;
1939 }
1940 
DecorationFillRange(int position,int value,int fillLength)1941 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1942 	if (decorations.FillRange(position, value, fillLength)) {
1943 		DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1944 							position, fillLength);
1945 		NotifyModified(mh);
1946 	}
1947 }
1948 
AddWatcher(DocWatcher * watcher,void * userData)1949 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1950 	WatcherWithUserData wwud(watcher, userData);
1951 	std::vector<WatcherWithUserData>::iterator it =
1952 		std::find(watchers.begin(), watchers.end(), wwud);
1953 	if (it != watchers.end())
1954 		return false;
1955 	watchers.push_back(wwud);
1956 	return true;
1957 }
1958 
RemoveWatcher(DocWatcher * watcher,void * userData)1959 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1960 	std::vector<WatcherWithUserData>::iterator it =
1961 		std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1962 	if (it != watchers.end()) {
1963 		watchers.erase(it);
1964 		return true;
1965 	}
1966 	return false;
1967 }
1968 
NotifyModifyAttempt()1969 void Document::NotifyModifyAttempt() {
1970 	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1971 		it->watcher->NotifyModifyAttempt(this, it->userData);
1972 	}
1973 }
1974 
NotifySavePoint(bool atSavePoint)1975 void Document::NotifySavePoint(bool atSavePoint) {
1976 	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1977 		it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1978 	}
1979 }
1980 
NotifyModified(DocModification mh)1981 void Document::NotifyModified(DocModification mh) {
1982 	if (mh.modificationType & SC_MOD_INSERTTEXT) {
1983 		decorations.InsertSpace(mh.position, mh.length);
1984 	} else if (mh.modificationType & SC_MOD_DELETETEXT) {
1985 		decorations.DeleteRange(mh.position, mh.length);
1986 	}
1987 	for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1988 		it->watcher->NotifyModified(this, mh, it->userData);
1989 	}
1990 }
1991 
IsWordPartSeparator(char ch) const1992 bool Document::IsWordPartSeparator(char ch) const {
1993 	return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1994 }
1995 
WordPartLeft(int pos)1996 int Document::WordPartLeft(int pos) {
1997 	if (pos > 0) {
1998 		--pos;
1999 		char startChar = cb.CharAt(pos);
2000 		if (IsWordPartSeparator(startChar)) {
2001 			while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2002 				--pos;
2003 			}
2004 		}
2005 		if (pos > 0) {
2006 			startChar = cb.CharAt(pos);
2007 			--pos;
2008 			if (IsLowerCase(startChar)) {
2009 				while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2010 					--pos;
2011 				if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2012 					++pos;
2013 			} else if (IsUpperCase(startChar)) {
2014 				while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2015 					--pos;
2016 				if (!IsUpperCase(cb.CharAt(pos)))
2017 					++pos;
2018 			} else if (IsADigit(startChar)) {
2019 				while (pos > 0 && IsADigit(cb.CharAt(pos)))
2020 					--pos;
2021 				if (!IsADigit(cb.CharAt(pos)))
2022 					++pos;
2023 			} else if (IsPunctuation(startChar)) {
2024 				while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2025 					--pos;
2026 				if (!IsPunctuation(cb.CharAt(pos)))
2027 					++pos;
2028 			} else if (isspacechar(startChar)) {
2029 				while (pos > 0 && isspacechar(cb.CharAt(pos)))
2030 					--pos;
2031 				if (!isspacechar(cb.CharAt(pos)))
2032 					++pos;
2033 			} else if (!IsASCII(startChar)) {
2034 				while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2035 					--pos;
2036 				if (IsASCII(cb.CharAt(pos)))
2037 					++pos;
2038 			} else {
2039 				++pos;
2040 			}
2041 		}
2042 	}
2043 	return pos;
2044 }
2045 
WordPartRight(int pos)2046 int Document::WordPartRight(int pos) {
2047 	char startChar = cb.CharAt(pos);
2048 	int length = Length();
2049 	if (IsWordPartSeparator(startChar)) {
2050 		while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2051 			++pos;
2052 		startChar = cb.CharAt(pos);
2053 	}
2054 	if (!IsASCII(startChar)) {
2055 		while (pos < length && !IsASCII(cb.CharAt(pos)))
2056 			++pos;
2057 	} else if (IsLowerCase(startChar)) {
2058 		while (pos < length && IsLowerCase(cb.CharAt(pos)))
2059 			++pos;
2060 	} else if (IsUpperCase(startChar)) {
2061 		if (IsLowerCase(cb.CharAt(pos + 1))) {
2062 			++pos;
2063 			while (pos < length && IsLowerCase(cb.CharAt(pos)))
2064 				++pos;
2065 		} else {
2066 			while (pos < length && IsUpperCase(cb.CharAt(pos)))
2067 				++pos;
2068 		}
2069 		if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2070 			--pos;
2071 	} else if (IsADigit(startChar)) {
2072 		while (pos < length && IsADigit(cb.CharAt(pos)))
2073 			++pos;
2074 	} else if (IsPunctuation(startChar)) {
2075 		while (pos < length && IsPunctuation(cb.CharAt(pos)))
2076 			++pos;
2077 	} else if (isspacechar(startChar)) {
2078 		while (pos < length && isspacechar(cb.CharAt(pos)))
2079 			++pos;
2080 	} else {
2081 		++pos;
2082 	}
2083 	return pos;
2084 }
2085 
IsLineEndChar(char c)2086 bool IsLineEndChar(char c) {
2087 	return (c == '\n' || c == '\r');
2088 }
2089 
ExtendStyleRange(int pos,int delta,bool singleLine)2090 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2091 	int sStart = cb.StyleAt(pos);
2092 	if (delta < 0) {
2093 		while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2094 			pos--;
2095 		pos++;
2096 	} else {
2097 		while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2098 			pos++;
2099 	}
2100 	return pos;
2101 }
2102 
BraceOpposite(char ch)2103 static char BraceOpposite(char ch) {
2104 	switch (ch) {
2105 	case '(':
2106 		return ')';
2107 	case ')':
2108 		return '(';
2109 	case '[':
2110 		return ']';
2111 	case ']':
2112 		return '[';
2113 	case '{':
2114 		return '}';
2115 	case '}':
2116 		return '{';
2117 	case '<':
2118 		return '>';
2119 	case '>':
2120 		return '<';
2121 	default:
2122 		return '\0';
2123 	}
2124 }
2125 
2126 // TODO: should be able to extend styled region to find matching brace
BraceMatch(int position,int)2127 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2128 	char chBrace = CharAt(position);
2129 	char chSeek = BraceOpposite(chBrace);
2130 	if (chSeek == '\0')
2131 		return - 1;
2132 	char styBrace = static_cast<char>(StyleAt(position));
2133 	int direction = -1;
2134 	if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2135 		direction = 1;
2136 	int depth = 1;
2137 	position = NextPosition(position, direction);
2138 	while ((position >= 0) && (position < Length())) {
2139 		char chAtPos = CharAt(position);
2140 		char styAtPos = static_cast<char>(StyleAt(position));
2141 		if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2142 			if (chAtPos == chBrace)
2143 				depth++;
2144 			if (chAtPos == chSeek)
2145 				depth--;
2146 			if (depth == 0)
2147 				return position;
2148 		}
2149 		int positionBeforeMove = position;
2150 		position = NextPosition(position, direction);
2151 		if (position == positionBeforeMove)
2152 			break;
2153 	}
2154 	return - 1;
2155 }
2156 
2157 /**
2158  * Implementation of RegexSearchBase for the default built-in regular expression engine
2159  */
2160 class BuiltinRegex : public RegexSearchBase {
2161 public:
BuiltinRegex(CharClassify * charClassTable)2162 	explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2163 
~BuiltinRegex()2164 	virtual ~BuiltinRegex() {
2165 	}
2166 
2167 	virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2168                         bool caseSensitive, bool word, bool wordStart, int flags,
2169                         int *length);
2170 
2171 	virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2172 
2173 private:
2174 	RESearch search;
2175 	std::string substituted;
2176 };
2177 
2178 // Define a way for the Regular Expression code to access the document
2179 class DocumentIndexer : public CharacterIndexer {
2180 	Document *pdoc;
2181 	int end;
2182 public:
DocumentIndexer(Document * pdoc_,int end_)2183 	DocumentIndexer(Document *pdoc_, int end_) :
2184 		pdoc(pdoc_), end(end_) {
2185 	}
2186 
~DocumentIndexer()2187 	virtual ~DocumentIndexer() {
2188 	}
2189 
CharAt(int index)2190 	virtual char CharAt(int index) {
2191 		if (index < 0 || index >= end)
2192 			return 0;
2193 		else
2194 			return pdoc->CharAt(index);
2195 	}
2196 };
2197 
FindText(Document * doc,int minPos,int maxPos,const char * s,bool caseSensitive,bool,bool,int flags,int * length)2198 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2199                         bool caseSensitive, bool, bool, int flags,
2200                         int *length) {
2201 	bool posix = (flags & SCFIND_POSIX) != 0;
2202 	int increment = (minPos <= maxPos) ? 1 : -1;
2203 
2204 	int startPos = minPos;
2205 	int endPos = maxPos;
2206 
2207 	// Range endpoints should not be inside DBCS characters, but just in case, move them.
2208 	startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2209 	endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2210 
2211 	const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2212 	if (errmsg) {
2213 		return -1;
2214 	}
2215 	// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2216 	// Replace first '.' with '-' in each property file variable reference:
2217 	//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2218 	//     Replace: $(\1-\2)
2219 	int lineRangeStart = doc->LineFromPosition(startPos);
2220 	int lineRangeEnd = doc->LineFromPosition(endPos);
2221 	if ((increment == 1) &&
2222 		(startPos >= doc->LineEnd(lineRangeStart)) &&
2223 		(lineRangeStart < lineRangeEnd)) {
2224 		// the start position is at end of line or between line end characters.
2225 		lineRangeStart++;
2226 		startPos = doc->LineStart(lineRangeStart);
2227 	} else if ((increment == -1) &&
2228 	           (startPos <= doc->LineStart(lineRangeStart)) &&
2229 	           (lineRangeStart > lineRangeEnd)) {
2230 		// the start position is at beginning of line.
2231 		lineRangeStart--;
2232 		startPos = doc->LineEnd(lineRangeStart);
2233 	}
2234 	int pos = -1;
2235 	int lenRet = 0;
2236 	char searchEnd = s[*length - 1];
2237 	char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2238 	int lineRangeBreak = lineRangeEnd + increment;
2239 	for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2240 		int startOfLine = doc->LineStart(line);
2241 		int endOfLine = doc->LineEnd(line);
2242 		if (increment == 1) {
2243 			if (line == lineRangeStart) {
2244 				if ((startPos != startOfLine) && (s[0] == '^'))
2245 					continue;	// Can't match start of line if start position after start of line
2246 				startOfLine = startPos;
2247 			}
2248 			if (line == lineRangeEnd) {
2249 				if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2250 					continue;	// Can't match end of line if end position before end of line
2251 				endOfLine = endPos;
2252 			}
2253 		} else {
2254 			if (line == lineRangeEnd) {
2255 				if ((endPos != startOfLine) && (s[0] == '^'))
2256 					continue;	// Can't match start of line if end position after start of line
2257 				startOfLine = endPos;
2258 			}
2259 			if (line == lineRangeStart) {
2260 				if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2261 					continue;	// Can't match end of line if start position before end of line
2262 				endOfLine = startPos;
2263 			}
2264 		}
2265 
2266 		DocumentIndexer di(doc, endOfLine);
2267 		int success = search.Execute(di, startOfLine, endOfLine);
2268 		if (success) {
2269 			pos = search.bopat[0];
2270 			// Ensure only whole characters selected
2271 			search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2272 			lenRet = search.eopat[0] - search.bopat[0];
2273 			// There can be only one start of a line, so no need to look for last match in line
2274 			if ((increment == -1) && (s[0] != '^')) {
2275 				// Check for the last match on this line.
2276 				int repetitions = 1000;	// Break out of infinite loop
2277 				while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2278 					success = search.Execute(di, pos+1, endOfLine);
2279 					if (success) {
2280 						if (search.eopat[0] <= minPos) {
2281 							pos = search.bopat[0];
2282 							lenRet = search.eopat[0] - search.bopat[0];
2283 						} else {
2284 							success = 0;
2285 						}
2286 					}
2287 				}
2288 			}
2289 			break;
2290 		}
2291 	}
2292 	*length = lenRet;
2293 	return pos;
2294 }
2295 
SubstituteByPosition(Document * doc,const char * text,int * length)2296 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2297 	substituted.clear();
2298 	DocumentIndexer di(doc, doc->Length());
2299 	search.GrabMatches(di);
2300 	for (int j = 0; j < *length; j++) {
2301 		if (text[j] == '\\') {
2302 			if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2303 				unsigned int patNum = text[j + 1] - '0';
2304 				unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2305 				if (!search.pat[patNum].empty())	// Will be null if try for a match that did not occur
2306 					substituted.append(search.pat[patNum].c_str(), len);
2307 				j++;
2308 			} else {
2309 				j++;
2310 				switch (text[j]) {
2311 				case 'a':
2312 					substituted.push_back('\a');
2313 					break;
2314 				case 'b':
2315 					substituted.push_back('\b');
2316 					break;
2317 				case 'f':
2318 					substituted.push_back('\f');
2319 					break;
2320 				case 'n':
2321 					substituted.push_back('\n');
2322 					break;
2323 				case 'r':
2324 					substituted.push_back('\r');
2325 					break;
2326 				case 't':
2327 					substituted.push_back('\t');
2328 					break;
2329 				case 'v':
2330 					substituted.push_back('\v');
2331 					break;
2332 				case '\\':
2333 					substituted.push_back('\\');
2334 					break;
2335 				default:
2336 					substituted.push_back('\\');
2337 					j--;
2338 				}
2339 			}
2340 		} else {
2341 			substituted.push_back(text[j]);
2342 		}
2343 	}
2344 	*length = static_cast<int>(substituted.length());
2345 	return substituted.c_str();
2346 }
2347 
2348 #ifndef SCI_OWNREGEX
2349 
2350 #ifdef SCI_NAMESPACE
2351 
CreateRegexSearch(CharClassify * charClassTable)2352 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2353 	return new BuiltinRegex(charClassTable);
2354 }
2355 
2356 #else
2357 
CreateRegexSearch(CharClassify * charClassTable)2358 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2359 	return new BuiltinRegex(charClassTable);
2360 }
2361 
2362 #endif
2363 
2364 #endif
2365