1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
13
14 #include <string>
15 #include <vector>
16 #include <algorithm>
17
18 #include "Platform.h"
19
20 #include "ILexer.h"
21 #include "Scintilla.h"
22
23 #include "CharacterSet.h"
24 #include "SplitVector.h"
25 #include "Partitioning.h"
26 #include "RunStyles.h"
27 #include "CellBuffer.h"
28 #include "PerLine.h"
29 #include "CharClassify.h"
30 #include "Decoration.h"
31 #include "CaseFolder.h"
32 #include "Document.h"
33 #include "RESearch.h"
34 #include "UniConversion.h"
35
36 #ifdef SCI_NAMESPACE
37 using namespace Scintilla;
38 #endif
39
IsPunctuation(char ch)40 static inline bool IsPunctuation(char ch) {
41 return IsASCII(ch) && ispunct(ch);
42 }
43
Colourise(int start,int end)44 void LexInterface::Colourise(int start, int end) {
45 if (pdoc && instance && !performingStyle) {
46 // Protect against reentrance, which may occur, for example, when
47 // fold points are discovered while performing styling and the folding
48 // code looks for child lines which may trigger styling.
49 performingStyle = true;
50
51 int lengthDoc = pdoc->Length();
52 if (end == -1)
53 end = lengthDoc;
54 int len = end - start;
55
56 PLATFORM_ASSERT(len >= 0);
57 PLATFORM_ASSERT(start + len <= lengthDoc);
58
59 int styleStart = 0;
60 if (start > 0)
61 styleStart = pdoc->StyleAt(start - 1);
62
63 if (len > 0) {
64 instance->Lex(start, len, styleStart, pdoc);
65 instance->Fold(start, len, styleStart, pdoc);
66 }
67
68 performingStyle = false;
69 }
70 }
71
LineEndTypesSupported()72 int LexInterface::LineEndTypesSupported() {
73 if (instance) {
74 int interfaceVersion = instance->Version();
75 if (interfaceVersion >= lvSubStyles) {
76 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
77 return ssinstance->LineEndTypesSupported();
78 }
79 }
80 return 0;
81 }
82
Document()83 Document::Document() {
84 refCount = 0;
85 pcf = NULL;
86 #ifdef _WIN32
87 eolMode = SC_EOL_CRLF;
88 #else
89 eolMode = SC_EOL_LF;
90 #endif
91 dbcsCodePage = 0;
92 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
93 endStyled = 0;
94 styleClock = 0;
95 enteredModification = 0;
96 enteredStyling = 0;
97 enteredReadOnlyCount = 0;
98 insertionSet = false;
99 tabInChars = 8;
100 indentInChars = 0;
101 actualIndentInChars = 8;
102 useTabs = true;
103 tabIndents = true;
104 backspaceUnindents = false;
105
106 matchesValid = false;
107 regex = 0;
108
109 UTF8BytesOfLeadInitialise();
110
111 perLineData[ldMarkers] = new LineMarkers();
112 perLineData[ldLevels] = new LineLevels();
113 perLineData[ldState] = new LineState();
114 perLineData[ldMargin] = new LineAnnotation();
115 perLineData[ldAnnotation] = new LineAnnotation();
116
117 cb.SetPerLine(this);
118
119 pli = 0;
120 }
121
~Document()122 Document::~Document() {
123 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
124 it->watcher->NotifyDeleted(this, it->userData);
125 }
126 for (int j=0; j<ldSize; j++) {
127 delete perLineData[j];
128 perLineData[j] = 0;
129 }
130 delete regex;
131 regex = 0;
132 delete pli;
133 pli = 0;
134 delete pcf;
135 pcf = 0;
136 }
137
Init()138 void Document::Init() {
139 for (int j=0; j<ldSize; j++) {
140 if (perLineData[j])
141 perLineData[j]->Init();
142 }
143 }
144
LineEndTypesSupported() const145 int Document::LineEndTypesSupported() const {
146 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
147 return pli->LineEndTypesSupported();
148 else
149 return 0;
150 }
151
SetDBCSCodePage(int dbcsCodePage_)152 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
153 if (dbcsCodePage != dbcsCodePage_) {
154 dbcsCodePage = dbcsCodePage_;
155 SetCaseFolder(NULL);
156 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
157 return true;
158 } else {
159 return false;
160 }
161 }
162
SetLineEndTypesAllowed(int lineEndBitSet_)163 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
164 if (lineEndBitSet != lineEndBitSet_) {
165 lineEndBitSet = lineEndBitSet_;
166 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
167 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
168 ModifiedAt(0);
169 cb.SetLineEndTypes(lineEndBitSetActive);
170 return true;
171 } else {
172 return false;
173 }
174 } else {
175 return false;
176 }
177 }
178
InsertLine(int line)179 void Document::InsertLine(int line) {
180 for (int j=0; j<ldSize; j++) {
181 if (perLineData[j])
182 perLineData[j]->InsertLine(line);
183 }
184 }
185
RemoveLine(int line)186 void Document::RemoveLine(int line) {
187 for (int j=0; j<ldSize; j++) {
188 if (perLineData[j])
189 perLineData[j]->RemoveLine(line);
190 }
191 }
192
193 // Increase reference count and return its previous value.
AddRef()194 int Document::AddRef() {
195 return refCount++;
196 }
197
198 // Decrease reference count and return its previous value.
199 // Delete the document if reference count reaches zero.
Release()200 int SCI_METHOD Document::Release() {
201 int curRefCount = --refCount;
202 if (curRefCount == 0)
203 delete this;
204 return curRefCount;
205 }
206
SetSavePoint()207 void Document::SetSavePoint() {
208 cb.SetSavePoint();
209 NotifySavePoint(true);
210 }
211
TentativeUndo()212 void Document::TentativeUndo() {
213 CheckReadOnly();
214 if (enteredModification == 0) {
215 enteredModification++;
216 if (!cb.IsReadOnly()) {
217 bool startSavePoint = cb.IsSavePoint();
218 bool multiLine = false;
219 int steps = cb.TentativeSteps();
220 //Platform::DebugPrintf("Steps=%d\n", steps);
221 for (int step = 0; step < steps; step++) {
222 const int prevLinesTotal = LinesTotal();
223 const Action &action = cb.GetUndoStep();
224 if (action.at == removeAction) {
225 NotifyModified(DocModification(
226 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
227 } else if (action.at == containerAction) {
228 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
229 dm.token = action.position;
230 NotifyModified(dm);
231 } else {
232 NotifyModified(DocModification(
233 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
234 }
235 cb.PerformUndoStep();
236 if (action.at != containerAction) {
237 ModifiedAt(action.position);
238 }
239
240 int modFlags = SC_PERFORMED_UNDO;
241 // With undo, an insertion action becomes a deletion notification
242 if (action.at == removeAction) {
243 modFlags |= SC_MOD_INSERTTEXT;
244 } else if (action.at == insertAction) {
245 modFlags |= SC_MOD_DELETETEXT;
246 }
247 if (steps > 1)
248 modFlags |= SC_MULTISTEPUNDOREDO;
249 const int linesAdded = LinesTotal() - prevLinesTotal;
250 if (linesAdded != 0)
251 multiLine = true;
252 if (step == steps - 1) {
253 modFlags |= SC_LASTSTEPINUNDOREDO;
254 if (multiLine)
255 modFlags |= SC_MULTILINEUNDOREDO;
256 }
257 NotifyModified(DocModification(modFlags, action.position, action.lenData,
258 linesAdded, action.data));
259 }
260
261 bool endSavePoint = cb.IsSavePoint();
262 if (startSavePoint != endSavePoint)
263 NotifySavePoint(endSavePoint);
264
265 cb.TentativeCommit();
266 }
267 enteredModification--;
268 }
269 }
270
GetMark(int line)271 int Document::GetMark(int line) {
272 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
273 }
274
MarkerNext(int lineStart,int mask) const275 int Document::MarkerNext(int lineStart, int mask) const {
276 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
277 }
278
AddMark(int line,int markerNum)279 int Document::AddMark(int line, int markerNum) {
280 if (line >= 0 && line <= LinesTotal()) {
281 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
282 AddMark(line, markerNum, LinesTotal());
283 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
284 NotifyModified(mh);
285 return prev;
286 } else {
287 return 0;
288 }
289 }
290
AddMarkSet(int line,int valueSet)291 void Document::AddMarkSet(int line, int valueSet) {
292 if (line < 0 || line > LinesTotal()) {
293 return;
294 }
295 unsigned int m = valueSet;
296 for (int i = 0; m; i++, m >>= 1)
297 if (m & 1)
298 static_cast<LineMarkers *>(perLineData[ldMarkers])->
299 AddMark(line, i, LinesTotal());
300 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
301 NotifyModified(mh);
302 }
303
DeleteMark(int line,int markerNum)304 void Document::DeleteMark(int line, int markerNum) {
305 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
306 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
307 NotifyModified(mh);
308 }
309
DeleteMarkFromHandle(int markerHandle)310 void Document::DeleteMarkFromHandle(int markerHandle) {
311 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
312 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
313 mh.line = -1;
314 NotifyModified(mh);
315 }
316
DeleteAllMarks(int markerNum)317 void Document::DeleteAllMarks(int markerNum) {
318 bool someChanges = false;
319 for (int line = 0; line < LinesTotal(); line++) {
320 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
321 someChanges = true;
322 }
323 if (someChanges) {
324 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
325 mh.line = -1;
326 NotifyModified(mh);
327 }
328 }
329
LineFromHandle(int markerHandle)330 int Document::LineFromHandle(int markerHandle) {
331 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
332 }
333
LineStart(int line) const334 int SCI_METHOD Document::LineStart(int line) const {
335 return cb.LineStart(line);
336 }
337
LineEnd(int line) const338 int SCI_METHOD Document::LineEnd(int line) const {
339 if (line >= LinesTotal() - 1) {
340 return LineStart(line + 1);
341 } else {
342 int position = LineStart(line + 1);
343 if (SC_CP_UTF8 == dbcsCodePage) {
344 unsigned char bytes[] = {
345 static_cast<unsigned char>(cb.CharAt(position-3)),
346 static_cast<unsigned char>(cb.CharAt(position-2)),
347 static_cast<unsigned char>(cb.CharAt(position-1)),
348 };
349 if (UTF8IsSeparator(bytes)) {
350 return position - UTF8SeparatorLength;
351 }
352 if (UTF8IsNEL(bytes+1)) {
353 return position - UTF8NELLength;
354 }
355 }
356 position--; // Back over CR or LF
357 // When line terminator is CR+LF, may need to go back one more
358 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
359 position--;
360 }
361 return position;
362 }
363 }
364
SetErrorStatus(int status)365 void SCI_METHOD Document::SetErrorStatus(int status) {
366 // Tell the watchers an error has occurred.
367 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
368 it->watcher->NotifyErrorOccurred(this, it->userData, status);
369 }
370 }
371
LineFromPosition(int pos) const372 int SCI_METHOD Document::LineFromPosition(int pos) const {
373 return cb.LineFromPosition(pos);
374 }
375
LineEndPosition(int position) const376 int Document::LineEndPosition(int position) const {
377 return LineEnd(LineFromPosition(position));
378 }
379
IsLineEndPosition(int position) const380 bool Document::IsLineEndPosition(int position) const {
381 return LineEnd(LineFromPosition(position)) == position;
382 }
383
IsPositionInLineEnd(int position) const384 bool Document::IsPositionInLineEnd(int position) const {
385 return position >= LineEnd(LineFromPosition(position));
386 }
387
VCHomePosition(int position) const388 int Document::VCHomePosition(int position) const {
389 int line = LineFromPosition(position);
390 int startPosition = LineStart(line);
391 int endLine = LineEnd(line);
392 int startText = startPosition;
393 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
394 startText++;
395 if (position == startText)
396 return startPosition;
397 else
398 return startText;
399 }
400
SetLevel(int line,int level)401 int SCI_METHOD Document::SetLevel(int line, int level) {
402 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
403 if (prev != level) {
404 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
405 LineStart(line), 0, 0, 0, line);
406 mh.foldLevelNow = level;
407 mh.foldLevelPrev = prev;
408 NotifyModified(mh);
409 }
410 return prev;
411 }
412
GetLevel(int line) const413 int SCI_METHOD Document::GetLevel(int line) const {
414 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
415 }
416
ClearLevels()417 void Document::ClearLevels() {
418 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
419 }
420
IsSubordinate(int levelStart,int levelTry)421 static bool IsSubordinate(int levelStart, int levelTry) {
422 if (levelTry & SC_FOLDLEVELWHITEFLAG)
423 return true;
424 else
425 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
426 }
427
GetLastChild(int lineParent,int level,int lastLine)428 int Document::GetLastChild(int lineParent, int level, int lastLine) {
429 if (level == -1)
430 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
431 int maxLine = LinesTotal();
432 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
433 int lineMaxSubord = lineParent;
434 while (lineMaxSubord < maxLine - 1) {
435 EnsureStyledTo(LineStart(lineMaxSubord + 2));
436 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
437 break;
438 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
439 break;
440 lineMaxSubord++;
441 }
442 if (lineMaxSubord > lineParent) {
443 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
444 // Have chewed up some whitespace that belongs to a parent so seek back
445 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
446 lineMaxSubord--;
447 }
448 }
449 }
450 return lineMaxSubord;
451 }
452
GetFoldParent(int line) const453 int Document::GetFoldParent(int line) const {
454 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
455 int lineLook = line - 1;
456 while ((lineLook > 0) && (
457 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
458 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
459 ) {
460 lineLook--;
461 }
462 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
463 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
464 return lineLook;
465 } else {
466 return -1;
467 }
468 }
469
GetHighlightDelimiters(HighlightDelimiter & highlightDelimiter,int line,int lastLine)470 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
471 int level = GetLevel(line);
472 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
473
474 int lookLine = line;
475 int lookLineLevel = level;
476 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
477 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
478 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
479 lookLineLevel = GetLevel(--lookLine);
480 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
481 }
482
483 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
484 if (beginFoldBlock == -1) {
485 highlightDelimiter.Clear();
486 return;
487 }
488
489 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
490 int firstChangeableLineBefore = -1;
491 if (endFoldBlock < line) {
492 lookLine = beginFoldBlock - 1;
493 lookLineLevel = GetLevel(lookLine);
494 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
495 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
496 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
497 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
498 beginFoldBlock = lookLine;
499 endFoldBlock = line;
500 firstChangeableLineBefore = line - 1;
501 }
502 }
503 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
504 break;
505 lookLineLevel = GetLevel(--lookLine);
506 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
507 }
508 }
509 if (firstChangeableLineBefore == -1) {
510 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
511 lookLine >= beginFoldBlock;
512 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
513 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
514 firstChangeableLineBefore = lookLine;
515 break;
516 }
517 }
518 }
519 if (firstChangeableLineBefore == -1)
520 firstChangeableLineBefore = beginFoldBlock - 1;
521
522 int firstChangeableLineAfter = -1;
523 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
524 lookLine <= endFoldBlock;
525 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
526 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
527 firstChangeableLineAfter = lookLine;
528 break;
529 }
530 }
531 if (firstChangeableLineAfter == -1)
532 firstChangeableLineAfter = endFoldBlock + 1;
533
534 highlightDelimiter.beginFoldBlock = beginFoldBlock;
535 highlightDelimiter.endFoldBlock = endFoldBlock;
536 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
537 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
538 }
539
ClampPositionIntoDocument(int pos) const540 int Document::ClampPositionIntoDocument(int pos) const {
541 return Platform::Clamp(pos, 0, Length());
542 }
543
IsCrLf(int pos) const544 bool Document::IsCrLf(int pos) const {
545 if (pos < 0)
546 return false;
547 if (pos >= (Length() - 1))
548 return false;
549 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
550 }
551
LenChar(int pos)552 int Document::LenChar(int pos) {
553 if (pos < 0) {
554 return 1;
555 } else if (IsCrLf(pos)) {
556 return 2;
557 } else if (SC_CP_UTF8 == dbcsCodePage) {
558 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
559 const int widthCharBytes = UTF8BytesOfLead[leadByte];
560 int lengthDoc = Length();
561 if ((pos + widthCharBytes) > lengthDoc)
562 return lengthDoc - pos;
563 else
564 return widthCharBytes;
565 } else if (dbcsCodePage) {
566 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
567 } else {
568 return 1;
569 }
570 }
571
InGoodUTF8(int pos,int & start,int & end) const572 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
573 int trail = pos;
574 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
575 trail--;
576 start = (trail > 0) ? trail-1 : trail;
577
578 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
579 const int widthCharBytes = UTF8BytesOfLead[leadByte];
580 if (widthCharBytes == 1) {
581 return false;
582 } else {
583 int trailBytes = widthCharBytes - 1;
584 int len = pos - start;
585 if (len > trailBytes)
586 // pos too far from lead
587 return false;
588 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
589 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
590 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
591 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
592 if (utf8status & UTF8MaskInvalid)
593 return false;
594 end = start + widthCharBytes;
595 return true;
596 }
597 }
598
599 // Normalise a position so that it is not halfway through a two byte character.
600 // This can occur in two situations -
601 // When lines are terminated with \r\n pairs which should be treated as one character.
602 // When displaying DBCS text such as Japanese.
603 // If moving, move the position in the indicated direction.
MovePositionOutsideChar(int pos,int moveDir,bool checkLineEnd)604 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
605 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
606 // If out of range, just return minimum/maximum value.
607 if (pos <= 0)
608 return 0;
609 if (pos >= Length())
610 return Length();
611
612 // PLATFORM_ASSERT(pos > 0 && pos < Length());
613 if (checkLineEnd && IsCrLf(pos - 1)) {
614 if (moveDir > 0)
615 return pos + 1;
616 else
617 return pos - 1;
618 }
619
620 if (dbcsCodePage) {
621 if (SC_CP_UTF8 == dbcsCodePage) {
622 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
623 // If ch is not a trail byte then pos is valid intercharacter position
624 if (UTF8IsTrailByte(ch)) {
625 int startUTF = pos;
626 int endUTF = pos;
627 if (InGoodUTF8(pos, startUTF, endUTF)) {
628 // ch is a trail byte within a UTF-8 character
629 if (moveDir > 0)
630 pos = endUTF;
631 else
632 pos = startUTF;
633 }
634 // Else invalid UTF-8 so return position of isolated trail byte
635 }
636 } else {
637 // Anchor DBCS calculations at start of line because start of line can
638 // not be a DBCS trail byte.
639 int posStartLine = LineStart(LineFromPosition(pos));
640 if (pos == posStartLine)
641 return pos;
642
643 // Step back until a non-lead-byte is found.
644 int posCheck = pos;
645 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
646 posCheck--;
647
648 // Check from known start of character.
649 while (posCheck < pos) {
650 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
651 if (posCheck + mbsize == pos) {
652 return pos;
653 } else if (posCheck + mbsize > pos) {
654 if (moveDir > 0) {
655 return posCheck + mbsize;
656 } else {
657 return posCheck;
658 }
659 }
660 posCheck += mbsize;
661 }
662 }
663 }
664
665 return pos;
666 }
667
668 // NextPosition moves between valid positions - it can not handle a position in the middle of a
669 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
670 // A \r\n pair is treated as two characters.
NextPosition(int pos,int moveDir) const671 int Document::NextPosition(int pos, int moveDir) const {
672 // If out of range, just return minimum/maximum value.
673 int increment = (moveDir > 0) ? 1 : -1;
674 if (pos + increment <= 0)
675 return 0;
676 if (pos + increment >= Length())
677 return Length();
678
679 if (dbcsCodePage) {
680 if (SC_CP_UTF8 == dbcsCodePage) {
681 if (increment == 1) {
682 // Simple forward movement case so can avoid some checks
683 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
684 if (UTF8IsAscii(leadByte)) {
685 // Single byte character or invalid
686 pos++;
687 } else {
688 const int widthCharBytes = UTF8BytesOfLead[leadByte];
689 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
690 for (int b=1; b<widthCharBytes; b++)
691 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
692 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
693 if (utf8status & UTF8MaskInvalid)
694 pos++;
695 else
696 pos += utf8status & UTF8MaskWidth;
697 }
698 } else {
699 // Examine byte before position
700 pos--;
701 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
702 // If ch is not a trail byte then pos is valid intercharacter position
703 if (UTF8IsTrailByte(ch)) {
704 // If ch is a trail byte in a valid UTF-8 character then return start of character
705 int startUTF = pos;
706 int endUTF = pos;
707 if (InGoodUTF8(pos, startUTF, endUTF)) {
708 pos = startUTF;
709 }
710 // Else invalid UTF-8 so return position of isolated trail byte
711 }
712 }
713 } else {
714 if (moveDir > 0) {
715 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
716 pos += mbsize;
717 if (pos > Length())
718 pos = Length();
719 } else {
720 // Anchor DBCS calculations at start of line because start of line can
721 // not be a DBCS trail byte.
722 int posStartLine = LineStart(LineFromPosition(pos));
723 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
724 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
725 if ((pos - 1) <= posStartLine) {
726 return pos - 1;
727 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
728 // Must actually be trail byte
729 return pos - 2;
730 } else {
731 // Otherwise, step back until a non-lead-byte is found.
732 int posTemp = pos - 1;
733 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
734 ;
735 // Now posTemp+1 must point to the beginning of a character,
736 // so figure out whether we went back an even or an odd
737 // number of bytes and go back 1 or 2 bytes, respectively.
738 return (pos - 1 - ((pos - posTemp) & 1));
739 }
740 }
741 }
742 } else {
743 pos += increment;
744 }
745
746 return pos;
747 }
748
NextCharacter(int & pos,int moveDir) const749 bool Document::NextCharacter(int &pos, int moveDir) const {
750 // Returns true if pos changed
751 int posNext = NextPosition(pos, moveDir);
752 if (posNext == pos) {
753 return false;
754 } else {
755 pos = posNext;
756 return true;
757 }
758 }
759
UnicodeFromBytes(const unsigned char * us)760 static inline int UnicodeFromBytes(const unsigned char *us) {
761 if (us[0] < 0xC2) {
762 return us[0];
763 } else if (us[0] < 0xE0) {
764 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
765 } else if (us[0] < 0xF0) {
766 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
767 } else if (us[0] < 0xF5) {
768 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
769 }
770 return us[0];
771 }
772
773 // Return -1 on out-of-bounds
GetRelativePosition(int positionStart,int characterOffset) const774 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
775 int pos = positionStart;
776 if (dbcsCodePage) {
777 const int increment = (characterOffset > 0) ? 1 : -1;
778 while (characterOffset != 0) {
779 const int posNext = NextPosition(pos, increment);
780 if (posNext == pos)
781 return INVALID_POSITION;
782 pos = posNext;
783 characterOffset -= increment;
784 }
785 } else {
786 pos = positionStart + characterOffset;
787 if ((pos < 0) || (pos > Length()))
788 return INVALID_POSITION;
789 }
790 return pos;
791 }
792
GetCharacterAndWidth(int position,int * pWidth) const793 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
794 int character;
795 int bytesInCharacter = 1;
796 if (dbcsCodePage) {
797 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
798 if (SC_CP_UTF8 == dbcsCodePage) {
799 if (UTF8IsAscii(leadByte)) {
800 // Single byte character or invalid
801 character = leadByte;
802 } else {
803 const int widthCharBytes = UTF8BytesOfLead[leadByte];
804 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
805 for (int b=1; b<widthCharBytes; b++)
806 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
807 int utf8status = UTF8Classify(charBytes, widthCharBytes);
808 if (utf8status & UTF8MaskInvalid) {
809 // Report as singleton surrogate values which are invalid Unicode
810 character = 0xDC80 + leadByte;
811 } else {
812 bytesInCharacter = utf8status & UTF8MaskWidth;
813 character = UnicodeFromBytes(charBytes);
814 }
815 }
816 } else {
817 if (IsDBCSLeadByte(leadByte)) {
818 bytesInCharacter = 2;
819 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
820 } else {
821 character = leadByte;
822 }
823 }
824 } else {
825 character = cb.CharAt(position);
826 }
827 if (pWidth) {
828 *pWidth = bytesInCharacter;
829 }
830 return character;
831 }
832
CodePage() const833 int SCI_METHOD Document::CodePage() const {
834 return dbcsCodePage;
835 }
836
IsDBCSLeadByte(char ch) const837 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
838 // Byte ranges found in Wikipedia articles with relevant search strings in each case
839 unsigned char uch = static_cast<unsigned char>(ch);
840 switch (dbcsCodePage) {
841 case 932:
842 // Shift_jis
843 return ((uch >= 0x81) && (uch <= 0x9F)) ||
844 ((uch >= 0xE0) && (uch <= 0xFC));
845 // Lead bytes F0 to FC may be a Microsoft addition.
846 case 936:
847 // GBK
848 return (uch >= 0x81) && (uch <= 0xFE);
849 case 949:
850 // Korean Wansung KS C-5601-1987
851 return (uch >= 0x81) && (uch <= 0xFE);
852 case 950:
853 // Big5
854 return (uch >= 0x81) && (uch <= 0xFE);
855 case 1361:
856 // Korean Johab KS C-5601-1992
857 return
858 ((uch >= 0x84) && (uch <= 0xD3)) ||
859 ((uch >= 0xD8) && (uch <= 0xDE)) ||
860 ((uch >= 0xE0) && (uch <= 0xF9));
861 }
862 return false;
863 }
864
IsSpaceOrTab(int ch)865 static inline bool IsSpaceOrTab(int ch) {
866 return ch == ' ' || ch == '\t';
867 }
868
869 // Need to break text into segments near lengthSegment but taking into
870 // account the encoding to not break inside a UTF-8 or DBCS character
871 // and also trying to avoid breaking inside a pair of combining characters.
872 // The segment length must always be long enough (more than 4 bytes)
873 // so that there will be at least one whole character to make a segment.
874 // For UTF-8, text must consist only of valid whole characters.
875 // In preference order from best to worst:
876 // 1) Break after space
877 // 2) Break before punctuation
878 // 3) Break after whole character
879
SafeSegment(const char * text,int length,int lengthSegment) const880 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
881 if (length <= lengthSegment)
882 return length;
883 int lastSpaceBreak = -1;
884 int lastPunctuationBreak = -1;
885 int lastEncodingAllowedBreak = 0;
886 for (int j=0; j < lengthSegment;) {
887 unsigned char ch = static_cast<unsigned char>(text[j]);
888 if (j > 0) {
889 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
890 lastSpaceBreak = j;
891 }
892 if (ch < 'A') {
893 lastPunctuationBreak = j;
894 }
895 }
896 lastEncodingAllowedBreak = j;
897
898 if (dbcsCodePage == SC_CP_UTF8) {
899 j += UTF8BytesOfLead[ch];
900 } else if (dbcsCodePage) {
901 j += IsDBCSLeadByte(ch) ? 2 : 1;
902 } else {
903 j++;
904 }
905 }
906 if (lastSpaceBreak >= 0) {
907 return lastSpaceBreak;
908 } else if (lastPunctuationBreak >= 0) {
909 return lastPunctuationBreak;
910 }
911 return lastEncodingAllowedBreak;
912 }
913
CodePageFamily() const914 EncodingFamily Document::CodePageFamily() const {
915 if (SC_CP_UTF8 == dbcsCodePage)
916 return efUnicode;
917 else if (dbcsCodePage)
918 return efDBCS;
919 else
920 return efEightBit;
921 }
922
ModifiedAt(int pos)923 void Document::ModifiedAt(int pos) {
924 if (endStyled > pos)
925 endStyled = pos;
926 }
927
CheckReadOnly()928 void Document::CheckReadOnly() {
929 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
930 enteredReadOnlyCount++;
931 NotifyModifyAttempt();
932 enteredReadOnlyCount--;
933 }
934 }
935
936 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
937 // SetStyleAt does not change the persistent state of a document
938
DeleteChars(int pos,int len)939 bool Document::DeleteChars(int pos, int len) {
940 if (len <= 0)
941 return false;
942 if ((pos + len) > Length())
943 return false;
944 CheckReadOnly();
945 if (enteredModification != 0) {
946 return false;
947 } else {
948 enteredModification++;
949 if (!cb.IsReadOnly()) {
950 NotifyModified(
951 DocModification(
952 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
953 pos, len,
954 0, 0));
955 int prevLinesTotal = LinesTotal();
956 bool startSavePoint = cb.IsSavePoint();
957 bool startSequence = false;
958 const char *text = cb.DeleteChars(pos, len, startSequence);
959 if (startSavePoint && cb.IsCollectingUndo())
960 NotifySavePoint(!startSavePoint);
961 if ((pos < Length()) || (pos == 0))
962 ModifiedAt(pos);
963 else
964 ModifiedAt(pos-1);
965 NotifyModified(
966 DocModification(
967 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
968 pos, len,
969 LinesTotal() - prevLinesTotal, text));
970 }
971 enteredModification--;
972 }
973 return !cb.IsReadOnly();
974 }
975
976 /**
977 * Insert a string with a length.
978 */
InsertString(int position,const char * s,int insertLength)979 int Document::InsertString(int position, const char *s, int insertLength) {
980 if (insertLength <= 0) {
981 return 0;
982 }
983 CheckReadOnly(); // Application may change read only state here
984 if (cb.IsReadOnly()) {
985 return 0;
986 }
987 if (enteredModification != 0) {
988 return 0;
989 }
990 enteredModification++;
991 insertionSet = false;
992 insertion.clear();
993 NotifyModified(
994 DocModification(
995 SC_MOD_INSERTCHECK,
996 position, insertLength,
997 0, s));
998 if (insertionSet) {
999 s = insertion.c_str();
1000 insertLength = static_cast<int>(insertion.length());
1001 }
1002 NotifyModified(
1003 DocModification(
1004 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1005 position, insertLength,
1006 0, s));
1007 int prevLinesTotal = LinesTotal();
1008 bool startSavePoint = cb.IsSavePoint();
1009 bool startSequence = false;
1010 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1011 if (startSavePoint && cb.IsCollectingUndo())
1012 NotifySavePoint(!startSavePoint);
1013 ModifiedAt(position);
1014 NotifyModified(
1015 DocModification(
1016 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1017 position, insertLength,
1018 LinesTotal() - prevLinesTotal, text));
1019 if (insertionSet) { // Free memory as could be large
1020 std::string().swap(insertion);
1021 }
1022 enteredModification--;
1023 return insertLength;
1024 }
1025
ChangeInsertion(const char * s,int length)1026 void Document::ChangeInsertion(const char *s, int length) {
1027 insertionSet = true;
1028 insertion.assign(s, length);
1029 }
1030
AddData(char * data,int length)1031 int SCI_METHOD Document::AddData(char *data, int length) {
1032 try {
1033 int position = Length();
1034 InsertString(position, data, length);
1035 } catch (std::bad_alloc &) {
1036 return SC_STATUS_BADALLOC;
1037 } catch (...) {
1038 return SC_STATUS_FAILURE;
1039 }
1040 return 0;
1041 }
1042
ConvertToDocument()1043 void * SCI_METHOD Document::ConvertToDocument() {
1044 return this;
1045 }
1046
Undo()1047 int Document::Undo() {
1048 int newPos = -1;
1049 CheckReadOnly();
1050 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1051 enteredModification++;
1052 if (!cb.IsReadOnly()) {
1053 bool startSavePoint = cb.IsSavePoint();
1054 bool multiLine = false;
1055 int steps = cb.StartUndo();
1056 //Platform::DebugPrintf("Steps=%d\n", steps);
1057 int coalescedRemovePos = -1;
1058 int coalescedRemoveLen = 0;
1059 int prevRemoveActionPos = -1;
1060 int prevRemoveActionLen = 0;
1061 for (int step = 0; step < steps; step++) {
1062 const int prevLinesTotal = LinesTotal();
1063 const Action &action = cb.GetUndoStep();
1064 if (action.at == removeAction) {
1065 NotifyModified(DocModification(
1066 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1067 } else if (action.at == containerAction) {
1068 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1069 dm.token = action.position;
1070 NotifyModified(dm);
1071 if (!action.mayCoalesce) {
1072 coalescedRemovePos = -1;
1073 coalescedRemoveLen = 0;
1074 prevRemoveActionPos = -1;
1075 prevRemoveActionLen = 0;
1076 }
1077 } else {
1078 NotifyModified(DocModification(
1079 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1080 }
1081 cb.PerformUndoStep();
1082 if (action.at != containerAction) {
1083 ModifiedAt(action.position);
1084 newPos = action.position;
1085 }
1086
1087 int modFlags = SC_PERFORMED_UNDO;
1088 // With undo, an insertion action becomes a deletion notification
1089 if (action.at == removeAction) {
1090 newPos += action.lenData;
1091 modFlags |= SC_MOD_INSERTTEXT;
1092 if ((coalescedRemoveLen > 0) &&
1093 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1094 coalescedRemoveLen += action.lenData;
1095 newPos = coalescedRemovePos + coalescedRemoveLen;
1096 } else {
1097 coalescedRemovePos = action.position;
1098 coalescedRemoveLen = action.lenData;
1099 }
1100 prevRemoveActionPos = action.position;
1101 prevRemoveActionLen = action.lenData;
1102 } else if (action.at == insertAction) {
1103 modFlags |= SC_MOD_DELETETEXT;
1104 coalescedRemovePos = -1;
1105 coalescedRemoveLen = 0;
1106 prevRemoveActionPos = -1;
1107 prevRemoveActionLen = 0;
1108 }
1109 if (steps > 1)
1110 modFlags |= SC_MULTISTEPUNDOREDO;
1111 const int linesAdded = LinesTotal() - prevLinesTotal;
1112 if (linesAdded != 0)
1113 multiLine = true;
1114 if (step == steps - 1) {
1115 modFlags |= SC_LASTSTEPINUNDOREDO;
1116 if (multiLine)
1117 modFlags |= SC_MULTILINEUNDOREDO;
1118 }
1119 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1120 linesAdded, action.data));
1121 }
1122
1123 bool endSavePoint = cb.IsSavePoint();
1124 if (startSavePoint != endSavePoint)
1125 NotifySavePoint(endSavePoint);
1126 }
1127 enteredModification--;
1128 }
1129 return newPos;
1130 }
1131
Redo()1132 int Document::Redo() {
1133 int newPos = -1;
1134 CheckReadOnly();
1135 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1136 enteredModification++;
1137 if (!cb.IsReadOnly()) {
1138 bool startSavePoint = cb.IsSavePoint();
1139 bool multiLine = false;
1140 int steps = cb.StartRedo();
1141 for (int step = 0; step < steps; step++) {
1142 const int prevLinesTotal = LinesTotal();
1143 const Action &action = cb.GetRedoStep();
1144 if (action.at == insertAction) {
1145 NotifyModified(DocModification(
1146 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1147 } else if (action.at == containerAction) {
1148 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1149 dm.token = action.position;
1150 NotifyModified(dm);
1151 } else {
1152 NotifyModified(DocModification(
1153 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1154 }
1155 cb.PerformRedoStep();
1156 if (action.at != containerAction) {
1157 ModifiedAt(action.position);
1158 newPos = action.position;
1159 }
1160
1161 int modFlags = SC_PERFORMED_REDO;
1162 if (action.at == insertAction) {
1163 newPos += action.lenData;
1164 modFlags |= SC_MOD_INSERTTEXT;
1165 } else if (action.at == removeAction) {
1166 modFlags |= SC_MOD_DELETETEXT;
1167 }
1168 if (steps > 1)
1169 modFlags |= SC_MULTISTEPUNDOREDO;
1170 const int linesAdded = LinesTotal() - prevLinesTotal;
1171 if (linesAdded != 0)
1172 multiLine = true;
1173 if (step == steps - 1) {
1174 modFlags |= SC_LASTSTEPINUNDOREDO;
1175 if (multiLine)
1176 modFlags |= SC_MULTILINEUNDOREDO;
1177 }
1178 NotifyModified(
1179 DocModification(modFlags, action.position, action.lenData,
1180 linesAdded, action.data));
1181 }
1182
1183 bool endSavePoint = cb.IsSavePoint();
1184 if (startSavePoint != endSavePoint)
1185 NotifySavePoint(endSavePoint);
1186 }
1187 enteredModification--;
1188 }
1189 return newPos;
1190 }
1191
DelChar(int pos)1192 void Document::DelChar(int pos) {
1193 DeleteChars(pos, LenChar(pos));
1194 }
1195
DelCharBack(int pos)1196 void Document::DelCharBack(int pos) {
1197 if (pos <= 0) {
1198 return;
1199 } else if (IsCrLf(pos - 2)) {
1200 DeleteChars(pos - 2, 2);
1201 } else if (dbcsCodePage) {
1202 int startChar = NextPosition(pos, -1);
1203 DeleteChars(startChar, pos - startChar);
1204 } else {
1205 DeleteChars(pos - 1, 1);
1206 }
1207 }
1208
NextTab(int pos,int tabSize)1209 static int NextTab(int pos, int tabSize) {
1210 return ((pos / tabSize) + 1) * tabSize;
1211 }
1212
CreateIndentation(int indent,int tabSize,bool insertSpaces)1213 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1214 std::string indentation;
1215 if (!insertSpaces) {
1216 while (indent >= tabSize) {
1217 indentation += '\t';
1218 indent -= tabSize;
1219 }
1220 }
1221 while (indent > 0) {
1222 indentation += ' ';
1223 indent--;
1224 }
1225 return indentation;
1226 }
1227
GetLineIndentation(int line)1228 int SCI_METHOD Document::GetLineIndentation(int line) {
1229 int indent = 0;
1230 if ((line >= 0) && (line < LinesTotal())) {
1231 int lineStart = LineStart(line);
1232 int length = Length();
1233 for (int i = lineStart; i < length; i++) {
1234 char ch = cb.CharAt(i);
1235 if (ch == ' ')
1236 indent++;
1237 else if (ch == '\t')
1238 indent = NextTab(indent, tabInChars);
1239 else
1240 return indent;
1241 }
1242 }
1243 return indent;
1244 }
1245
SetLineIndentation(int line,int indent)1246 int Document::SetLineIndentation(int line, int indent) {
1247 int indentOfLine = GetLineIndentation(line);
1248 if (indent < 0)
1249 indent = 0;
1250 if (indent != indentOfLine) {
1251 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1252 int thisLineStart = LineStart(line);
1253 int indentPos = GetLineIndentPosition(line);
1254 UndoGroup ug(this);
1255 DeleteChars(thisLineStart, indentPos - thisLineStart);
1256 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1257 static_cast<int>(linebuf.length()));
1258 } else {
1259 return GetLineIndentPosition(line);
1260 }
1261 }
1262
GetLineIndentPosition(int line) const1263 int Document::GetLineIndentPosition(int line) const {
1264 if (line < 0)
1265 return 0;
1266 int pos = LineStart(line);
1267 int length = Length();
1268 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1269 pos++;
1270 }
1271 return pos;
1272 }
1273
GetColumn(int pos)1274 int Document::GetColumn(int pos) {
1275 int column = 0;
1276 int line = LineFromPosition(pos);
1277 if ((line >= 0) && (line < LinesTotal())) {
1278 for (int i = LineStart(line); i < pos;) {
1279 char ch = cb.CharAt(i);
1280 if (ch == '\t') {
1281 column = NextTab(column, tabInChars);
1282 i++;
1283 } else if (ch == '\r') {
1284 return column;
1285 } else if (ch == '\n') {
1286 return column;
1287 } else if (i >= Length()) {
1288 return column;
1289 } else {
1290 column++;
1291 i = NextPosition(i, 1);
1292 }
1293 }
1294 }
1295 return column;
1296 }
1297
CountCharacters(int startPos,int endPos)1298 int Document::CountCharacters(int startPos, int endPos) {
1299 startPos = MovePositionOutsideChar(startPos, 1, false);
1300 endPos = MovePositionOutsideChar(endPos, -1, false);
1301 int count = 0;
1302 int i = startPos;
1303 while (i < endPos) {
1304 count++;
1305 if (IsCrLf(i))
1306 i++;
1307 i = NextPosition(i, 1);
1308 }
1309 return count;
1310 }
1311
FindColumn(int line,int column)1312 int Document::FindColumn(int line, int column) {
1313 int position = LineStart(line);
1314 if ((line >= 0) && (line < LinesTotal())) {
1315 int columnCurrent = 0;
1316 while ((columnCurrent < column) && (position < Length())) {
1317 char ch = cb.CharAt(position);
1318 if (ch == '\t') {
1319 columnCurrent = NextTab(columnCurrent, tabInChars);
1320 if (columnCurrent > column)
1321 return position;
1322 position++;
1323 } else if (ch == '\r') {
1324 return position;
1325 } else if (ch == '\n') {
1326 return position;
1327 } else {
1328 columnCurrent++;
1329 position = NextPosition(position, 1);
1330 }
1331 }
1332 }
1333 return position;
1334 }
1335
Indent(bool forwards,int lineBottom,int lineTop)1336 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1337 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1338 for (int line = lineBottom; line >= lineTop; line--) {
1339 int indentOfLine = GetLineIndentation(line);
1340 if (forwards) {
1341 if (LineStart(line) < LineEnd(line)) {
1342 SetLineIndentation(line, indentOfLine + IndentSize());
1343 }
1344 } else {
1345 SetLineIndentation(line, indentOfLine - IndentSize());
1346 }
1347 }
1348 }
1349
1350 // Convert line endings for a piece of text to a particular mode.
1351 // Stop at len or when a NUL is found.
TransformLineEnds(const char * s,size_t len,int eolModeWanted)1352 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1353 std::string dest;
1354 for (size_t i = 0; (i < len) && (s[i]); i++) {
1355 if (s[i] == '\n' || s[i] == '\r') {
1356 if (eolModeWanted == SC_EOL_CR) {
1357 dest.push_back('\r');
1358 } else if (eolModeWanted == SC_EOL_LF) {
1359 dest.push_back('\n');
1360 } else { // eolModeWanted == SC_EOL_CRLF
1361 dest.push_back('\r');
1362 dest.push_back('\n');
1363 }
1364 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1365 i++;
1366 }
1367 } else {
1368 dest.push_back(s[i]);
1369 }
1370 }
1371 return dest;
1372 }
1373
ConvertLineEnds(int eolModeSet)1374 void Document::ConvertLineEnds(int eolModeSet) {
1375 UndoGroup ug(this);
1376
1377 for (int pos = 0; pos < Length(); pos++) {
1378 if (cb.CharAt(pos) == '\r') {
1379 if (cb.CharAt(pos + 1) == '\n') {
1380 // CRLF
1381 if (eolModeSet == SC_EOL_CR) {
1382 DeleteChars(pos + 1, 1); // Delete the LF
1383 } else if (eolModeSet == SC_EOL_LF) {
1384 DeleteChars(pos, 1); // Delete the CR
1385 } else {
1386 pos++;
1387 }
1388 } else {
1389 // CR
1390 if (eolModeSet == SC_EOL_CRLF) {
1391 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1392 } else if (eolModeSet == SC_EOL_LF) {
1393 pos += InsertString(pos, "\n", 1); // Insert LF
1394 DeleteChars(pos, 1); // Delete CR
1395 pos--;
1396 }
1397 }
1398 } else if (cb.CharAt(pos) == '\n') {
1399 // LF
1400 if (eolModeSet == SC_EOL_CRLF) {
1401 pos += InsertString(pos, "\r", 1); // Insert CR
1402 } else if (eolModeSet == SC_EOL_CR) {
1403 pos += InsertString(pos, "\r", 1); // Insert CR
1404 DeleteChars(pos, 1); // Delete LF
1405 pos--;
1406 }
1407 }
1408 }
1409
1410 }
1411
IsWhiteLine(int line) const1412 bool Document::IsWhiteLine(int line) const {
1413 int currentChar = LineStart(line);
1414 int endLine = LineEnd(line);
1415 while (currentChar < endLine) {
1416 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1417 return false;
1418 }
1419 ++currentChar;
1420 }
1421 return true;
1422 }
1423
ParaUp(int pos) const1424 int Document::ParaUp(int pos) const {
1425 int line = LineFromPosition(pos);
1426 line--;
1427 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1428 line--;
1429 }
1430 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1431 line--;
1432 }
1433 line++;
1434 return LineStart(line);
1435 }
1436
ParaDown(int pos) const1437 int Document::ParaDown(int pos) const {
1438 int line = LineFromPosition(pos);
1439 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1440 line++;
1441 }
1442 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1443 line++;
1444 }
1445 if (line < LinesTotal())
1446 return LineStart(line);
1447 else // end of a document
1448 return LineEnd(line-1);
1449 }
1450
WordCharClass(unsigned char ch) const1451 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1452 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1453 return CharClassify::ccWord;
1454 return charClass.GetClass(ch);
1455 }
1456
1457 /**
1458 * Used by commmands that want to select whole words.
1459 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1460 */
ExtendWordSelect(int pos,int delta,bool onlyWordCharacters)1461 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1462 CharClassify::cc ccStart = CharClassify::ccWord;
1463 if (delta < 0) {
1464 if (!onlyWordCharacters)
1465 ccStart = WordCharClass(cb.CharAt(pos-1));
1466 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1467 pos--;
1468 } else {
1469 if (!onlyWordCharacters && pos < Length())
1470 ccStart = WordCharClass(cb.CharAt(pos));
1471 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1472 pos++;
1473 }
1474 return MovePositionOutsideChar(pos, delta, true);
1475 }
1476
1477 /**
1478 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1479 * (delta < 0).
1480 * This is looking for a transition between character classes although there is also some
1481 * additional movement to transit white space.
1482 * Used by cursor movement by word commands.
1483 */
NextWordStart(int pos,int delta)1484 int Document::NextWordStart(int pos, int delta) {
1485 if (delta < 0) {
1486 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1487 pos--;
1488 if (pos > 0) {
1489 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1490 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1491 pos--;
1492 }
1493 }
1494 } else {
1495 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1496 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1497 pos++;
1498 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1499 pos++;
1500 }
1501 return pos;
1502 }
1503
1504 /**
1505 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1506 * (delta < 0).
1507 * This is looking for a transition between character classes although there is also some
1508 * additional movement to transit white space.
1509 * Used by cursor movement by word commands.
1510 */
NextWordEnd(int pos,int delta)1511 int Document::NextWordEnd(int pos, int delta) {
1512 if (delta < 0) {
1513 if (pos > 0) {
1514 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1515 if (ccStart != CharClassify::ccSpace) {
1516 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1517 pos--;
1518 }
1519 }
1520 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1521 pos--;
1522 }
1523 }
1524 } else {
1525 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1526 pos++;
1527 }
1528 if (pos < Length()) {
1529 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1530 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1531 pos++;
1532 }
1533 }
1534 }
1535 return pos;
1536 }
1537
1538 /**
1539 * Check that the character at the given position is a word or punctuation character and that
1540 * the previous character is of a different character class.
1541 */
IsWordStartAt(int pos) const1542 bool Document::IsWordStartAt(int pos) const {
1543 if (pos > 0) {
1544 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1545 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1546 (ccPos != WordCharClass(CharAt(pos - 1)));
1547 }
1548 return true;
1549 }
1550
1551 /**
1552 * Check that the character at the given position is a word or punctuation character and that
1553 * the next character is of a different character class.
1554 */
IsWordEndAt(int pos) const1555 bool Document::IsWordEndAt(int pos) const {
1556 if (pos < Length()) {
1557 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1558 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1559 (ccPrev != WordCharClass(CharAt(pos)));
1560 }
1561 return true;
1562 }
1563
1564 /**
1565 * Check that the given range is has transitions between character classes at both
1566 * ends and where the characters on the inside are word or punctuation characters.
1567 */
IsWordAt(int start,int end) const1568 bool Document::IsWordAt(int start, int end) const {
1569 return IsWordStartAt(start) && IsWordEndAt(end);
1570 }
1571
MatchesWordOptions(bool word,bool wordStart,int pos,int length) const1572 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1573 return (!word && !wordStart) ||
1574 (word && IsWordAt(pos, pos + length)) ||
1575 (wordStart && IsWordStartAt(pos));
1576 }
1577
HasCaseFolder(void) const1578 bool Document::HasCaseFolder(void) const {
1579 return pcf != 0;
1580 }
1581
SetCaseFolder(CaseFolder * pcf_)1582 void Document::SetCaseFolder(CaseFolder *pcf_) {
1583 delete pcf;
1584 pcf = pcf_;
1585 }
1586
1587 /**
1588 * Find text in document, supporting both forward and backward
1589 * searches (just pass minPos > maxPos to do a backward search)
1590 * Has not been tested with backwards DBCS searches yet.
1591 */
FindText(int minPos,int maxPos,const char * search,bool caseSensitive,bool word,bool wordStart,bool regExp,int flags,int * length)1592 long Document::FindText(int minPos, int maxPos, const char *search,
1593 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1594 int *length) {
1595 if (*length <= 0)
1596 return minPos;
1597 if (regExp) {
1598 if (!regex)
1599 regex = CreateRegexSearch(&charClass);
1600 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1601 } else {
1602
1603 const bool forward = minPos <= maxPos;
1604 const int increment = forward ? 1 : -1;
1605
1606 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1607 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1608 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1609
1610 // Compute actual search ranges needed
1611 const int lengthFind = *length;
1612
1613 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1614 const int limitPos = Platform::Maximum(startPos, endPos);
1615 int pos = startPos;
1616 if (!forward) {
1617 // Back all of a character
1618 pos = NextPosition(pos, increment);
1619 }
1620 if (caseSensitive) {
1621 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1622 const char charStartSearch = search[0];
1623 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1624 if (CharAt(pos) == charStartSearch) {
1625 bool found = (pos + lengthFind) <= limitPos;
1626 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1627 found = CharAt(pos + indexSearch) == search[indexSearch];
1628 }
1629 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1630 return pos;
1631 }
1632 }
1633 if (!NextCharacter(pos, increment))
1634 break;
1635 }
1636 } else if (SC_CP_UTF8 == dbcsCodePage) {
1637 const size_t maxFoldingExpansion = 4;
1638 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1639 const int lenSearch = static_cast<int>(
1640 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1641 char bytes[UTF8MaxBytes + 1];
1642 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1643 while (forward ? (pos < endPos) : (pos >= endPos)) {
1644 int widthFirstCharacter = 0;
1645 int posIndexDocument = pos;
1646 int indexSearch = 0;
1647 bool characterMatches = true;
1648 for (;;) {
1649 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1650 bytes[0] = leadByte;
1651 int widthChar = 1;
1652 if (!UTF8IsAscii(leadByte)) {
1653 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1654 for (int b=1; b<widthCharBytes; b++) {
1655 bytes[b] = cb.CharAt(posIndexDocument+b);
1656 }
1657 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1658 }
1659 if (!widthFirstCharacter)
1660 widthFirstCharacter = widthChar;
1661 if ((posIndexDocument + widthChar) > limitPos)
1662 break;
1663 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1664 folded[lenFlat] = 0;
1665 // Does folded match the buffer
1666 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1667 if (!characterMatches)
1668 break;
1669 posIndexDocument += widthChar;
1670 indexSearch += lenFlat;
1671 if (indexSearch >= lenSearch)
1672 break;
1673 }
1674 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1675 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1676 *length = posIndexDocument - pos;
1677 return pos;
1678 }
1679 }
1680 if (forward) {
1681 pos += widthFirstCharacter;
1682 } else {
1683 if (!NextCharacter(pos, increment))
1684 break;
1685 }
1686 }
1687 } else if (dbcsCodePage) {
1688 const size_t maxBytesCharacter = 2;
1689 const size_t maxFoldingExpansion = 4;
1690 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1691 const int lenSearch = static_cast<int>(
1692 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1693 while (forward ? (pos < endPos) : (pos >= endPos)) {
1694 int indexDocument = 0;
1695 int indexSearch = 0;
1696 bool characterMatches = true;
1697 while (characterMatches &&
1698 ((pos + indexDocument) < limitPos) &&
1699 (indexSearch < lenSearch)) {
1700 char bytes[maxBytesCharacter + 1];
1701 bytes[0] = cb.CharAt(pos + indexDocument);
1702 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1703 if (widthChar == 2)
1704 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1705 if ((pos + indexDocument + widthChar) > limitPos)
1706 break;
1707 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1708 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1709 folded[lenFlat] = 0;
1710 // Does folded match the buffer
1711 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1712 indexDocument += widthChar;
1713 indexSearch += lenFlat;
1714 }
1715 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1716 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1717 *length = indexDocument;
1718 return pos;
1719 }
1720 }
1721 if (!NextCharacter(pos, increment))
1722 break;
1723 }
1724 } else {
1725 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1726 std::vector<char> searchThing(lengthFind + 1);
1727 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1728 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1729 bool found = (pos + lengthFind) <= limitPos;
1730 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1731 char ch = CharAt(pos + indexSearch);
1732 char folded[2];
1733 pcf->Fold(folded, sizeof(folded), &ch, 1);
1734 found = folded[0] == searchThing[indexSearch];
1735 }
1736 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1737 return pos;
1738 }
1739 if (!NextCharacter(pos, increment))
1740 break;
1741 }
1742 }
1743 }
1744 //Platform::DebugPrintf("Not found\n");
1745 return -1;
1746 }
1747
SubstituteByPosition(const char * text,int * length)1748 const char *Document::SubstituteByPosition(const char *text, int *length) {
1749 if (regex)
1750 return regex->SubstituteByPosition(this, text, length);
1751 else
1752 return 0;
1753 }
1754
LinesTotal() const1755 int Document::LinesTotal() const {
1756 return cb.Lines();
1757 }
1758
SetDefaultCharClasses(bool includeWordClass)1759 void Document::SetDefaultCharClasses(bool includeWordClass) {
1760 charClass.SetDefaultCharClasses(includeWordClass);
1761 }
1762
SetCharClasses(const unsigned char * chars,CharClassify::cc newCharClass)1763 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1764 charClass.SetCharClasses(chars, newCharClass);
1765 }
1766
GetCharsOfClass(CharClassify::cc characterClass,unsigned char * buffer)1767 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1768 return charClass.GetCharsOfClass(characterClass, buffer);
1769 }
1770
StartStyling(int position,char)1771 void SCI_METHOD Document::StartStyling(int position, char) {
1772 endStyled = position;
1773 }
1774
SetStyleFor(int length,char style)1775 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1776 if (enteredStyling != 0) {
1777 return false;
1778 } else {
1779 enteredStyling++;
1780 int prevEndStyled = endStyled;
1781 if (cb.SetStyleFor(endStyled, length, style)) {
1782 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1783 prevEndStyled, length);
1784 NotifyModified(mh);
1785 }
1786 endStyled += length;
1787 enteredStyling--;
1788 return true;
1789 }
1790 }
1791
SetStyles(int length,const char * styles)1792 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1793 if (enteredStyling != 0) {
1794 return false;
1795 } else {
1796 enteredStyling++;
1797 bool didChange = false;
1798 int startMod = 0;
1799 int endMod = 0;
1800 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1801 PLATFORM_ASSERT(endStyled < Length());
1802 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1803 if (!didChange) {
1804 startMod = endStyled;
1805 }
1806 didChange = true;
1807 endMod = endStyled;
1808 }
1809 }
1810 if (didChange) {
1811 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1812 startMod, endMod - startMod + 1);
1813 NotifyModified(mh);
1814 }
1815 enteredStyling--;
1816 return true;
1817 }
1818 }
1819
EnsureStyledTo(int pos)1820 void Document::EnsureStyledTo(int pos) {
1821 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1822 IncrementStyleClock();
1823 if (pli && !pli->UseContainerLexing()) {
1824 int lineEndStyled = LineFromPosition(GetEndStyled());
1825 int endStyledTo = LineStart(lineEndStyled);
1826 pli->Colourise(endStyledTo, pos);
1827 } else {
1828 // Ask the watchers to style, and stop as soon as one responds.
1829 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1830 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1831 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1832 }
1833 }
1834 }
1835 }
1836
LexerChanged()1837 void Document::LexerChanged() {
1838 // Tell the watchers the lexer has changed.
1839 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1840 it->watcher->NotifyLexerChanged(this, it->userData);
1841 }
1842 }
1843
SetLineState(int line,int state)1844 int SCI_METHOD Document::SetLineState(int line, int state) {
1845 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1846 if (state != statePrevious) {
1847 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1848 NotifyModified(mh);
1849 }
1850 return statePrevious;
1851 }
1852
GetLineState(int line) const1853 int SCI_METHOD Document::GetLineState(int line) const {
1854 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1855 }
1856
GetMaxLineState()1857 int Document::GetMaxLineState() {
1858 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1859 }
1860
ChangeLexerState(int start,int end)1861 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1862 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1863 NotifyModified(mh);
1864 }
1865
MarginStyledText(int line) const1866 StyledText Document::MarginStyledText(int line) const {
1867 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1868 return StyledText(pla->Length(line), pla->Text(line),
1869 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1870 }
1871
MarginSetText(int line,const char * text)1872 void Document::MarginSetText(int line, const char *text) {
1873 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1874 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1875 NotifyModified(mh);
1876 }
1877
MarginSetStyle(int line,int style)1878 void Document::MarginSetStyle(int line, int style) {
1879 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1880 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1881 }
1882
MarginSetStyles(int line,const unsigned char * styles)1883 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1884 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1885 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1886 }
1887
MarginClearAll()1888 void Document::MarginClearAll() {
1889 int maxEditorLine = LinesTotal();
1890 for (int l=0; l<maxEditorLine; l++)
1891 MarginSetText(l, 0);
1892 // Free remaining data
1893 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1894 }
1895
AnnotationStyledText(int line) const1896 StyledText Document::AnnotationStyledText(int line) const {
1897 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1898 return StyledText(pla->Length(line), pla->Text(line),
1899 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1900 }
1901
AnnotationSetText(int line,const char * text)1902 void Document::AnnotationSetText(int line, const char *text) {
1903 if (line >= 0 && line < LinesTotal()) {
1904 const int linesBefore = AnnotationLines(line);
1905 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1906 const int linesAfter = AnnotationLines(line);
1907 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1908 mh.annotationLinesAdded = linesAfter - linesBefore;
1909 NotifyModified(mh);
1910 }
1911 }
1912
AnnotationSetStyle(int line,int style)1913 void Document::AnnotationSetStyle(int line, int style) {
1914 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1915 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1916 NotifyModified(mh);
1917 }
1918
AnnotationSetStyles(int line,const unsigned char * styles)1919 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1920 if (line >= 0 && line < LinesTotal()) {
1921 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1922 }
1923 }
1924
AnnotationLines(int line) const1925 int Document::AnnotationLines(int line) const {
1926 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1927 }
1928
AnnotationClearAll()1929 void Document::AnnotationClearAll() {
1930 int maxEditorLine = LinesTotal();
1931 for (int l=0; l<maxEditorLine; l++)
1932 AnnotationSetText(l, 0);
1933 // Free remaining data
1934 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1935 }
1936
IncrementStyleClock()1937 void Document::IncrementStyleClock() {
1938 styleClock = (styleClock + 1) % 0x100000;
1939 }
1940
DecorationFillRange(int position,int value,int fillLength)1941 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1942 if (decorations.FillRange(position, value, fillLength)) {
1943 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1944 position, fillLength);
1945 NotifyModified(mh);
1946 }
1947 }
1948
AddWatcher(DocWatcher * watcher,void * userData)1949 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1950 WatcherWithUserData wwud(watcher, userData);
1951 std::vector<WatcherWithUserData>::iterator it =
1952 std::find(watchers.begin(), watchers.end(), wwud);
1953 if (it != watchers.end())
1954 return false;
1955 watchers.push_back(wwud);
1956 return true;
1957 }
1958
RemoveWatcher(DocWatcher * watcher,void * userData)1959 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1960 std::vector<WatcherWithUserData>::iterator it =
1961 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1962 if (it != watchers.end()) {
1963 watchers.erase(it);
1964 return true;
1965 }
1966 return false;
1967 }
1968
NotifyModifyAttempt()1969 void Document::NotifyModifyAttempt() {
1970 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1971 it->watcher->NotifyModifyAttempt(this, it->userData);
1972 }
1973 }
1974
NotifySavePoint(bool atSavePoint)1975 void Document::NotifySavePoint(bool atSavePoint) {
1976 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1977 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1978 }
1979 }
1980
NotifyModified(DocModification mh)1981 void Document::NotifyModified(DocModification mh) {
1982 if (mh.modificationType & SC_MOD_INSERTTEXT) {
1983 decorations.InsertSpace(mh.position, mh.length);
1984 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1985 decorations.DeleteRange(mh.position, mh.length);
1986 }
1987 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1988 it->watcher->NotifyModified(this, mh, it->userData);
1989 }
1990 }
1991
IsWordPartSeparator(char ch) const1992 bool Document::IsWordPartSeparator(char ch) const {
1993 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1994 }
1995
WordPartLeft(int pos)1996 int Document::WordPartLeft(int pos) {
1997 if (pos > 0) {
1998 --pos;
1999 char startChar = cb.CharAt(pos);
2000 if (IsWordPartSeparator(startChar)) {
2001 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2002 --pos;
2003 }
2004 }
2005 if (pos > 0) {
2006 startChar = cb.CharAt(pos);
2007 --pos;
2008 if (IsLowerCase(startChar)) {
2009 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2010 --pos;
2011 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2012 ++pos;
2013 } else if (IsUpperCase(startChar)) {
2014 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2015 --pos;
2016 if (!IsUpperCase(cb.CharAt(pos)))
2017 ++pos;
2018 } else if (IsADigit(startChar)) {
2019 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2020 --pos;
2021 if (!IsADigit(cb.CharAt(pos)))
2022 ++pos;
2023 } else if (IsPunctuation(startChar)) {
2024 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2025 --pos;
2026 if (!IsPunctuation(cb.CharAt(pos)))
2027 ++pos;
2028 } else if (isspacechar(startChar)) {
2029 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2030 --pos;
2031 if (!isspacechar(cb.CharAt(pos)))
2032 ++pos;
2033 } else if (!IsASCII(startChar)) {
2034 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2035 --pos;
2036 if (IsASCII(cb.CharAt(pos)))
2037 ++pos;
2038 } else {
2039 ++pos;
2040 }
2041 }
2042 }
2043 return pos;
2044 }
2045
WordPartRight(int pos)2046 int Document::WordPartRight(int pos) {
2047 char startChar = cb.CharAt(pos);
2048 int length = Length();
2049 if (IsWordPartSeparator(startChar)) {
2050 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2051 ++pos;
2052 startChar = cb.CharAt(pos);
2053 }
2054 if (!IsASCII(startChar)) {
2055 while (pos < length && !IsASCII(cb.CharAt(pos)))
2056 ++pos;
2057 } else if (IsLowerCase(startChar)) {
2058 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2059 ++pos;
2060 } else if (IsUpperCase(startChar)) {
2061 if (IsLowerCase(cb.CharAt(pos + 1))) {
2062 ++pos;
2063 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2064 ++pos;
2065 } else {
2066 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2067 ++pos;
2068 }
2069 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2070 --pos;
2071 } else if (IsADigit(startChar)) {
2072 while (pos < length && IsADigit(cb.CharAt(pos)))
2073 ++pos;
2074 } else if (IsPunctuation(startChar)) {
2075 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2076 ++pos;
2077 } else if (isspacechar(startChar)) {
2078 while (pos < length && isspacechar(cb.CharAt(pos)))
2079 ++pos;
2080 } else {
2081 ++pos;
2082 }
2083 return pos;
2084 }
2085
IsLineEndChar(char c)2086 bool IsLineEndChar(char c) {
2087 return (c == '\n' || c == '\r');
2088 }
2089
ExtendStyleRange(int pos,int delta,bool singleLine)2090 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2091 int sStart = cb.StyleAt(pos);
2092 if (delta < 0) {
2093 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2094 pos--;
2095 pos++;
2096 } else {
2097 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2098 pos++;
2099 }
2100 return pos;
2101 }
2102
BraceOpposite(char ch)2103 static char BraceOpposite(char ch) {
2104 switch (ch) {
2105 case '(':
2106 return ')';
2107 case ')':
2108 return '(';
2109 case '[':
2110 return ']';
2111 case ']':
2112 return '[';
2113 case '{':
2114 return '}';
2115 case '}':
2116 return '{';
2117 case '<':
2118 return '>';
2119 case '>':
2120 return '<';
2121 default:
2122 return '\0';
2123 }
2124 }
2125
2126 // TODO: should be able to extend styled region to find matching brace
BraceMatch(int position,int)2127 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2128 char chBrace = CharAt(position);
2129 char chSeek = BraceOpposite(chBrace);
2130 if (chSeek == '\0')
2131 return - 1;
2132 char styBrace = static_cast<char>(StyleAt(position));
2133 int direction = -1;
2134 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2135 direction = 1;
2136 int depth = 1;
2137 position = NextPosition(position, direction);
2138 while ((position >= 0) && (position < Length())) {
2139 char chAtPos = CharAt(position);
2140 char styAtPos = static_cast<char>(StyleAt(position));
2141 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2142 if (chAtPos == chBrace)
2143 depth++;
2144 if (chAtPos == chSeek)
2145 depth--;
2146 if (depth == 0)
2147 return position;
2148 }
2149 int positionBeforeMove = position;
2150 position = NextPosition(position, direction);
2151 if (position == positionBeforeMove)
2152 break;
2153 }
2154 return - 1;
2155 }
2156
2157 /**
2158 * Implementation of RegexSearchBase for the default built-in regular expression engine
2159 */
2160 class BuiltinRegex : public RegexSearchBase {
2161 public:
BuiltinRegex(CharClassify * charClassTable)2162 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2163
~BuiltinRegex()2164 virtual ~BuiltinRegex() {
2165 }
2166
2167 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2168 bool caseSensitive, bool word, bool wordStart, int flags,
2169 int *length);
2170
2171 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2172
2173 private:
2174 RESearch search;
2175 std::string substituted;
2176 };
2177
2178 // Define a way for the Regular Expression code to access the document
2179 class DocumentIndexer : public CharacterIndexer {
2180 Document *pdoc;
2181 int end;
2182 public:
DocumentIndexer(Document * pdoc_,int end_)2183 DocumentIndexer(Document *pdoc_, int end_) :
2184 pdoc(pdoc_), end(end_) {
2185 }
2186
~DocumentIndexer()2187 virtual ~DocumentIndexer() {
2188 }
2189
CharAt(int index)2190 virtual char CharAt(int index) {
2191 if (index < 0 || index >= end)
2192 return 0;
2193 else
2194 return pdoc->CharAt(index);
2195 }
2196 };
2197
FindText(Document * doc,int minPos,int maxPos,const char * s,bool caseSensitive,bool,bool,int flags,int * length)2198 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2199 bool caseSensitive, bool, bool, int flags,
2200 int *length) {
2201 bool posix = (flags & SCFIND_POSIX) != 0;
2202 int increment = (minPos <= maxPos) ? 1 : -1;
2203
2204 int startPos = minPos;
2205 int endPos = maxPos;
2206
2207 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2208 startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2209 endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2210
2211 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2212 if (errmsg) {
2213 return -1;
2214 }
2215 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2216 // Replace first '.' with '-' in each property file variable reference:
2217 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2218 // Replace: $(\1-\2)
2219 int lineRangeStart = doc->LineFromPosition(startPos);
2220 int lineRangeEnd = doc->LineFromPosition(endPos);
2221 if ((increment == 1) &&
2222 (startPos >= doc->LineEnd(lineRangeStart)) &&
2223 (lineRangeStart < lineRangeEnd)) {
2224 // the start position is at end of line or between line end characters.
2225 lineRangeStart++;
2226 startPos = doc->LineStart(lineRangeStart);
2227 } else if ((increment == -1) &&
2228 (startPos <= doc->LineStart(lineRangeStart)) &&
2229 (lineRangeStart > lineRangeEnd)) {
2230 // the start position is at beginning of line.
2231 lineRangeStart--;
2232 startPos = doc->LineEnd(lineRangeStart);
2233 }
2234 int pos = -1;
2235 int lenRet = 0;
2236 char searchEnd = s[*length - 1];
2237 char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2238 int lineRangeBreak = lineRangeEnd + increment;
2239 for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2240 int startOfLine = doc->LineStart(line);
2241 int endOfLine = doc->LineEnd(line);
2242 if (increment == 1) {
2243 if (line == lineRangeStart) {
2244 if ((startPos != startOfLine) && (s[0] == '^'))
2245 continue; // Can't match start of line if start position after start of line
2246 startOfLine = startPos;
2247 }
2248 if (line == lineRangeEnd) {
2249 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2250 continue; // Can't match end of line if end position before end of line
2251 endOfLine = endPos;
2252 }
2253 } else {
2254 if (line == lineRangeEnd) {
2255 if ((endPos != startOfLine) && (s[0] == '^'))
2256 continue; // Can't match start of line if end position after start of line
2257 startOfLine = endPos;
2258 }
2259 if (line == lineRangeStart) {
2260 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2261 continue; // Can't match end of line if start position before end of line
2262 endOfLine = startPos;
2263 }
2264 }
2265
2266 DocumentIndexer di(doc, endOfLine);
2267 int success = search.Execute(di, startOfLine, endOfLine);
2268 if (success) {
2269 pos = search.bopat[0];
2270 // Ensure only whole characters selected
2271 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2272 lenRet = search.eopat[0] - search.bopat[0];
2273 // There can be only one start of a line, so no need to look for last match in line
2274 if ((increment == -1) && (s[0] != '^')) {
2275 // Check for the last match on this line.
2276 int repetitions = 1000; // Break out of infinite loop
2277 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2278 success = search.Execute(di, pos+1, endOfLine);
2279 if (success) {
2280 if (search.eopat[0] <= minPos) {
2281 pos = search.bopat[0];
2282 lenRet = search.eopat[0] - search.bopat[0];
2283 } else {
2284 success = 0;
2285 }
2286 }
2287 }
2288 }
2289 break;
2290 }
2291 }
2292 *length = lenRet;
2293 return pos;
2294 }
2295
SubstituteByPosition(Document * doc,const char * text,int * length)2296 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2297 substituted.clear();
2298 DocumentIndexer di(doc, doc->Length());
2299 search.GrabMatches(di);
2300 for (int j = 0; j < *length; j++) {
2301 if (text[j] == '\\') {
2302 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2303 unsigned int patNum = text[j + 1] - '0';
2304 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2305 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2306 substituted.append(search.pat[patNum].c_str(), len);
2307 j++;
2308 } else {
2309 j++;
2310 switch (text[j]) {
2311 case 'a':
2312 substituted.push_back('\a');
2313 break;
2314 case 'b':
2315 substituted.push_back('\b');
2316 break;
2317 case 'f':
2318 substituted.push_back('\f');
2319 break;
2320 case 'n':
2321 substituted.push_back('\n');
2322 break;
2323 case 'r':
2324 substituted.push_back('\r');
2325 break;
2326 case 't':
2327 substituted.push_back('\t');
2328 break;
2329 case 'v':
2330 substituted.push_back('\v');
2331 break;
2332 case '\\':
2333 substituted.push_back('\\');
2334 break;
2335 default:
2336 substituted.push_back('\\');
2337 j--;
2338 }
2339 }
2340 } else {
2341 substituted.push_back(text[j]);
2342 }
2343 }
2344 *length = static_cast<int>(substituted.length());
2345 return substituted.c_str();
2346 }
2347
2348 #ifndef SCI_OWNREGEX
2349
2350 #ifdef SCI_NAMESPACE
2351
CreateRegexSearch(CharClassify * charClassTable)2352 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2353 return new BuiltinRegex(charClassTable);
2354 }
2355
2356 #else
2357
CreateRegexSearch(CharClassify * charClassTable)2358 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2359 return new BuiltinRegex(charClassTable);
2360 }
2361
2362 #endif
2363
2364 #endif
2365