1 /**
2  *  Yudit Unicode Editor Source File
3  *
4  *  GNU Copyright (C) 1997-2006  Gaspar Sinai <gaspar@yudit.org>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License, version 2,
8  *  dated June 1991. See file COPYYING for details.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, write to the Free Software
17  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18  */
19 
20 #include "stoolkit/syntax/SSyntax.h"
21 #include "stoolkit/syntax/SHunspellPattern.h"
22 #include "stoolkit/syntax/SSyntaxMarker.h"
23 #include "stoolkit/SIO.h"
24 #include "stoolkit/SProperties.h"
25 #include "stoolkit/SUtil.h"
26 
27 
28 static SStringVector syntaxSearchPath(
29    "/,syntax,../syntax,/etc/syntax,/usr/share/yudit/syntax");
30 
SSyntax(void)31 SSyntax::SSyntax (void) : parser ("")
32 {
33   textData = 0;
34   pattern = 0;
35   listener = 0;
36   syntaxListener = 0;
37   syntaxState = 0;
38 }
39 
~SSyntax()40 SSyntax::~SSyntax ()
41 {
42   if (syntaxState) delete syntaxState;
43   if (pattern) delete pattern;
44   clear ();
45 }
46 
47 bool
isSupported(const SString & syn)48 SSyntax::isSupported (const SString& syn)
49 {
50   SStringVector v(syn, ":", true);
51   if (v.size() != 2) return false;
52   if (v[0] == "test") return true;
53   if (v[0] == "hunspell") return true;
54   return false;
55 /*
56   SString ps_syn = syn;
57   ps_syn.append (".dic");
58   SFile f(ps_syn, syntaxSearchPath);
59   return (f.size() > 0);
60 */
61 }
62 
63 /**
64  * @param ps - supported properties:
65  *  none
66  *  xml
67  *  properties
68  */
69 bool
setSyntax(const SString & ps)70 SSyntax::setSyntax (const SString&  ps)
71 {
72   parser = ps;
73   if (pattern)
74   {
75     delete pattern;
76     pattern = 0;
77   }
78   if (syntaxState)
79   {
80     delete syntaxState;
81     syntaxState = 0;
82     if (syntaxListener) syntaxListener->syntaxChanged (
83        SSyntaxListener::SD_PARSING_DONE);
84   }
85   clear ();
86   if (ps == "")
87   {
88     return true;
89   }
90   SStringVector split (ps, ":", true);
91   if (split.size() != 2)
92   {
93     parser = "";
94     return false;
95   }
96   if (split[0] == "test") // update isSupported()
97   {
98     if (split[1] == "yuko")
99     {
100       pattern = new SPattern ();
101       CHECK_NEW (pattern);
102       lineGlobalChange ();
103       return true;
104     }
105     else if (split[1] == "lines")
106     {
107       pattern = new SPattern ();
108       CHECK_NEW (pattern);
109       lineGlobalChange ();
110       return true;
111     }
112   }
113   else if (split[0] == "hunspell")
114   {
115     pattern = new SHunspellPattern (split[1], syntaxSearchPath);
116     CHECK_NEW (pattern);
117     if (!pattern->isValid ())
118     {
119       delete pattern;
120       pattern = 0;
121       parser = "";
122       return false;
123     }
124     lineGlobalChange ();
125     return true;
126   }
127   parser = "";
128   return false;
129 }
130 
131 const SString&
getParser() const132 SSyntax::getParser () const
133 {
134   return parser;
135 }
136 
clear()137 void SSyntax::clear ()
138 {
139   unsigned int i;
140   for (i=0; i<syntaxLines.size(); i++)
141   {
142      delete syntaxLines[i];
143   }
144   syntaxLines.clear ();
145   for (i=0; i<dataLines.size(); i++)
146   {
147      delete dataLines[i];
148   }
149   dataLines.clear ();
150   clearIterator ();
151   if (syntaxState)
152   {
153     delete syntaxState;
154     syntaxState = 0;
155     if (syntaxListener) syntaxListener->syntaxChanged (
156        SSyntaxListener::SD_PARSING_DONE);
157   }
158 }
159 
160 void
clearIterator()161 SSyntax::clearIterator ()
162 {
163   iteratorDataIndex = STextIndex (0,0);
164   iteratorSyntaxIndex = STextIndex (0,0);
165 }
166 
167 void
setTextData(const STextData * td)168 SSyntax::setTextData (const STextData* td)
169 {
170   textData = td;
171   clear ();
172   lineGlobalChange ();
173 }
174 
175 SSyntax::SS_Tag
getTag(const STextIndex & index)176 SSyntax::getTag (const STextIndex& index)
177 {
178   if (parser.size()==0) return SD_NONE;
179   if (index.line > syntaxLines.size()) return SD_NONE;
180   if (index.index > syntaxLines[index.line]->size()) return SD_NONE;
181   // Strip off control characters
182   return (SS_Tag) (syntaxLines[index.line]->peek (index.index) & 0xff);
183 }
184 
185 SSyntax::SS_Tag
getTagByTDI(const STextIndex & index)186 SSyntax::getTagByTDI (const STextIndex& index)
187 {
188   if (parser.size()==0) return SD_NONE;
189   if (index.line > textData->size()) return SD_NONE;
190   if (index.line > syntaxLines.size()) return SD_NONE;
191   if (index.index > textData->size(index.line)) return SD_NONE;
192   if (parser == "test:lines") return (SS_Tag)(index.line % (int) SD_MAX);
193 
194   // return to previous iterator
195   if (index.line != iteratorDataIndex.line
196      || index.index < iteratorDataIndex.index)
197   {
198      iteratorDataIndex.index = 0;
199      iteratorSyntaxIndex.index = 0;
200   }
201   iteratorDataIndex.line = index.line;
202   iteratorSyntaxIndex.line = index.line;
203 
204   const SGlyph* g0 = (iteratorDataIndex.index > 0)
205    ? textData->peekGlyphAt (STextIndex(iteratorDataIndex.line,
206    iteratorDataIndex.index-1)) : 0;
207   while (iteratorDataIndex.index < index.index)
208   {
209     const SGlyph* g = textData->peekGlyphAt (iteratorDataIndex);
210     SV_UCS4 chars = g->getChars ();
211     if (chars.size() == 0) break; // sanity
212     SV_UCS4 emb = g->getEmbeddingMarks(g0);
213     g0 = g;
214     iteratorSyntaxIndex.index += (chars.size() + emb.size());
215     iteratorDataIndex.index = iteratorDataIndex.index + 1;
216   }
217   // Strip off embedding control characters
218   unsigned int i=0;
219   unsigned int max = dataLines[index.line]->size();
220   for (i=iteratorSyntaxIndex.index; i<max; i++)
221   {
222     SS_UCS4 c = dataLines[index.line]->peek (i);
223     if (c!=SD_CD_LRO && c!=SD_CD_RLO && c!=SD_CD_LRE && c!=SD_CD_RLE
224        && c!=SD_CD_PDF) // most likely we wont have SD_CD_PDF
225     {
226       break;
227     }
228   }
229   if (i==max) return SD_ERROR;
230   if (syntaxLines[index.line]->size()<=i) return SD_ERROR;
231   return (SS_Tag) (syntaxLines[index.line]->peek (i) & 0xff);
232 }
233 
234 void
lineRemoved(void * src,unsigned int index)235 SSyntax::lineRemoved (void* src, unsigned int index)
236 {
237   if (parser.size()==0) return;
238   if (textData->size() == 0)
239   {
240      clear ();
241      return;
242   }
243   if (index >= dataLines.size () || index > textData->size())
244   {
245 #if DEBUG_PARSER
246      fprintf (stderr,
247         "ERROR: lineRemoved index=%u lines.size=%u textData.size=%u\n",
248          index, lines.size(), textData->size());
249 #endif
250      lineGlobalChange();
251      return;
252   }
253   delete dataLines[index];
254   dataLines.remove (index);
255   delete syntaxLines[index];
256   syntaxLines.remove (index);
257   clearIterator ();
258   updateSyntaxState (STextIndex (index, 0));
259 }
260 
261 void
lineInserted(void * src,unsigned int index)262 SSyntax::lineInserted (void* src, unsigned int index)
263 {
264   if (parser.size()==0) return;
265   if (index > dataLines.size () || index >= textData->size())
266   {
267 #if 0
268      fprintf (stderr,
269         "ERROR: lineInserted index=%u lines.size=%u textData.size=%u\n",
270          index, dataLines.size(), textData->size());
271 #endif
272      lineGlobalChange();
273      return;
274   }
275   SV_UCS4* l = new SV_UCS4(textData->getChars (index));
276   CHECK_NEW (l);
277   dataLines.insert (index, l);
278 
279   SSyntaxRow* row = new SSyntaxRow();
280   CHECK_NEW (row);
281   for (unsigned int i=0; i<l->size(); i++)
282   {
283     row->append (0);
284   }
285   syntaxLines.insert (index, row);
286   clearIterator ();
287   updateSyntaxState (STextIndex (index, 0));
288 }
289 
290 void
lineChanged(void * src,unsigned int index)291 SSyntax::lineChanged (void* src, unsigned int index)
292 {
293   if (parser.size()==0) return;
294   if (index > dataLines.size () || index >= textData->size())
295   {
296 #if 0
297      fprintf (stderr,
298         "ERROR: lineChanged index=%u lines.size=%u textData.size=%u\n",
299          index, dataLines.size(), textData->size());
300 #endif
301      lineGlobalChange();
302      return;
303   }
304 
305   SV_UCS4* newdl = new SV_UCS4 (textData->getChars (index));
306   CHECK_NEW (newdl);
307   // check what changed.
308   unsigned int floor = 0;
309   unsigned int ceiling = 0;
310 
311   unsigned int lsize = newdl->size();
312   unsigned int dsize = dataLines[index]->size();
313 
314   while (floor < lsize && floor < dsize)
315   {
316     if (newdl->peek  (floor) != dataLines[index]->peek (floor)) break;
317     floor++;
318   }
319   while (ceiling < lsize && ceiling < dsize
320       && floor + ceiling < dsize && floor + ceiling <  lsize)
321   {
322     if (newdl->peek  (lsize-ceiling-1)
323        != dataLines[index]->peek (dsize-ceiling-1)) break;
324     ceiling++;
325   }
326   SSyntaxRow* newsn = new SSyntaxRow();
327   CHECK_NEW (newsn);
328   // copy old data
329   unsigned int i;
330   // fprintf (stderr, "floor=%u ceiling=%u\n", floor, ceiling);
331   // keep the syntax but remove the control
332   for (i=0; i<floor; i++)
333   {
334     newsn->append (0xff & (unsigned int)syntaxLines[index]->peek (i));
335   }
336   for (i=0; i<ceiling; i++)
337   {
338     newsn->insert (floor, 0xff & (unsigned int)syntaxLines[index]->peek (dsize-i-1));
339   }
340   for (i=floor; i<lsize-ceiling; i++)
341   {
342     newsn->insert (i, 0);
343   }
344   if (newdl->size() != newsn->size())
345   {
346     fprintf (stderr, "Internal error SSyntax::lineChanged.");
347     lineGlobalChange();
348     return;
349   }
350 
351   delete dataLines[index];
352   delete syntaxLines[index];
353   dataLines.replace (index, newdl);
354   syntaxLines.replace (index, newsn);
355   clearIterator ();
356   updateSyntaxState (STextIndex (index, 0));
357 }
358 
359 void
lineGlobalChange()360 SSyntax::lineGlobalChange ()
361 {
362   clear ();
363   if (parser.size()==0) return;
364 
365   for (unsigned i=0; i<textData->size(); i++)
366   {
367     lineInserted (this, i);
368   }
369   updateSyntaxState (STextIndex(0,0));
370 }
371 void
updateSyntaxState(const STextIndex ndx)372 SSyntax::updateSyntaxState (const STextIndex ndx)
373 {
374   if (parser.size()==0)
375   {
376     if (syntaxState)
377     {
378       delete syntaxState;
379       syntaxState = 0;
380       if (syntaxListener) syntaxListener->syntaxChanged (
381           SSyntaxListener::SD_PARSING_DONE);
382     }
383     return;
384   }
385   if (syntaxState)
386   {
387     STextIndex crawlIndex = syntaxState->getCurrentIndex ();
388     // If the crawlIndex is less than our index, dont do anything,
389     // this point is not checked yet.
390     // the linesizes are cached as we crawl, so we should do this.
391     if (crawlIndex.line < ndx.line)
392     {
393        return;
394     }
395     delete syntaxState;
396     syntaxState = 0;
397   }
398   // create a new syntaxState
399   SSyntaxMarker* marker = new SSyntaxMarker (syntaxLines, dataLines, ndx);
400   CHECK_NEW (marker);
401   SMatcher* matcher = new SMatcher (*pattern, *marker);
402   CHECK_NEW (matcher);
403   // effectively this is an idle timer.
404   STimer* timer = STimer::newTimer(0, this);
405   syntaxState = new SSyntaxState ( matcher, marker, timer);
406   CHECK_NEW (syntaxState);
407   if (syntaxListener) syntaxListener->syntaxChanged (
408        SSyntaxListener::SD_PARSING_STARTED);
409 }
410 
411 // do another iteration of syntax checking
412 // return false if finished, and cleanup syntaxState.
413 bool
timeout(const SEventSource * s)414 SSyntax::timeout (const SEventSource* s)
415 {
416   if (syntaxState == 0)
417   {
418     return false;// never
419   }
420   // 100 characters at a time
421   unsigned int count = 0;
422   unsigned int oldN = SD_MATCH_EOD;
423   SS_UCS4 n = 0;
424   while ((n=syntaxState->matcher->find (true)) != SD_MATCH_EOD)
425   {
426     if (n==SD_MATCH_AGAIN)
427     {
428       if (count < SD_UNIT_WORK_COUNT) continue;
429       return true; // call timer again.
430     }
431     count++;
432     if (oldN == n)
433     {
434       fprintf (stderr, "Detected infinite loop in matcher at %u.\n",
435          oldN);
436       n = SD_MATCH_EOD;
437       break;
438     }
439     oldN = n;
440     applyActions ();
441 
442     // set a sync marker to begin here
443     STextIndex idx = syntaxState->marker->position2Index (n);
444     int syn = syntaxState->marker->getSyntaxAt (idx);
445     syn = syn | SGC_BEGIN_MARK;
446     syntaxState->marker->setSyntaxAt (idx, syn);
447   }
448   // end of file reached.
449   applyActions ();
450   if (n != SD_MATCH_EOD)
451   {
452     // set a sync marker to begin here
453     STextIndex idx = syntaxState->marker->position2Index (n);
454     int syn = syntaxState->marker->getSyntaxAt (idx);
455     syn = syn | SGC_BEGIN_MARK;
456     syntaxState->marker->setSyntaxAt (idx, syn);
457   }
458   delete syntaxState;
459   syntaxState = 0;
460   if (syntaxListener) syntaxListener->syntaxChanged (
461        SSyntaxListener::SD_PARSING_DONE);
462 //  fprintf (stderr, "SGC syntax finished.\n");
463   return false;
464 }
465 
466 void
applyActions()467 SSyntax::applyActions ()
468 {
469   syntaxState->marker->beginActionBlock ();
470   syntaxState->matcher->applyActions (*syntaxState->marker);
471   syntaxState->marker->endActionBlock ();
472   STextIndex minModified = syntaxState->marker->minModified;
473   STextIndex maxModified = syntaxState->marker->maxModified;
474   // maxmodified is incluside.
475   if (minModified <= maxModified)
476   {
477     // we are lazy, and set whole line modified instead of converting
478     // our dataLine index to real textData index.
479     maxModified.line++;
480     maxModified.index=0;
481     // reverse index.
482     unsigned int lineCeiling = (maxModified.line >= textData->size())
483        ?  0 : textData->size() -  maxModified.line;
484     STextDataEvent evt (minModified);
485     if (listener)
486     {
487        STextDataEvent evt (minModified);
488        evt.setRemaining (STextIndex (lineCeiling, 0));
489        evt.attribute = true;
490        listener->textChanged (this, evt);
491     }
492   }
493 }
494 
495 // It is a setter only
496 void
addTextDataListener(STextDataListener * _listener)497 SSyntax::addTextDataListener (STextDataListener* _listener)
498 {
499   listener = _listener;
500 }
501 
502 void
addSyntaxListener(SSyntaxListener * _listener)503 SSyntax::addSyntaxListener (SSyntaxListener* _listener)
504 {
505   syntaxListener = _listener;
506 }
507 
508 void
setPath(const SStringVector & l)509 SSyntax::setPath (const SStringVector& l)
510 {
511   syntaxSearchPath = l;
512 }
513 
514 const SStringVector&
getPath()515 SSyntax::getPath ()
516 {
517   return syntaxSearchPath;
518 }
519 
520 /**
521  * search files for property in order and set the path to the
522  * property. Always add YUDIT_DATA/syntax
523  */
524 void
guessPath(const SStringVector & files,const SString & property)525 SSyntax::guessPath (const SStringVector& files, const SString& property)
526 {
527 
528   SStringVector outDataPath;
529   for (unsigned int i=0; i<files.size(); i++)
530   {
531     SProperties p;
532     loadProperties (files[i], &p);
533     if (p.get (property))
534     {
535       SStringVector v(p[property], ",:;");
536       for (unsigned int j=0; j<v.size(); j++)
537       {
538          outDataPath.append (v[j]);
539       }
540     }
541   }
542   SString c1 = getHome();
543   c1.append ("/.yudit/syntax");
544   SString c2 = getPrefix();
545   c2.append ("/syntax");
546   if (outDataPath.size()!=0)
547   {
548     outDataPath.append (c1);
549     outDataPath.append (c2);
550     syntaxSearchPath = outDataPath;
551   }
552   else
553   {
554     outDataPath.append (c1);
555     outDataPath.append (c2);
556     outDataPath.append (syntaxSearchPath);
557     syntaxSearchPath = outDataPath;
558   }
559 //fprintf (stderr, "syntaxpath is %*.*s\n", SSARGS(syntaxSearchPath.join(",")));
560 }
561 
562 void
guessPath()563 SSyntax::guessPath()
564 {
565   SString c1 = getHome();
566   c1.append ("/.yudit/yudit.properties");
567   SString c2 = getPrefix();
568   c2.append ("/config/yudit.properties");
569   SStringVector v;
570   v.append (c1);
571   v.append (c2);
572   guessPath (v, "yudit.syntaxpath");
573 }
574 
575 
576 // Get available syntax highlight categories
577 SStringVector
getCategories()578 SSyntax::getCategories ()
579 {
580   SStringVector ret;
581 //  ret.append ("test");
582   ret.append ("hunspell");
583   return SStringVector(ret);
584 }
585 
586 // Get available syntax within a category. Please note that
587 // syntax itself should be unique across all categories.
588 SStringVector
getAvaliableList(const SString & category)589 SSyntax::getAvaliableList (const SString& category)
590 {
591   SStringVector ret;
592   if (category == "test")
593   {
594     ret.append ("lines");
595     ret.append ("yuko");
596     return SStringVector(ret);
597   }
598   SProperties   prop;
599   unsigned int i;
600   unsigned int j;
601   if (category == "hunspell")
602   {
603     SStringVector p("*.dic");
604     for (i=syntaxSearchPath.size(); i>0; i--)
605     {
606       SDir dir (syntaxSearchPath[i-1]);
607       SStringVector f = dir.list (p);
608       for (unsigned int j=0; j<f.size(); j++)
609       {
610         SString s = f[j];
611         if (s.size() > 4) s.truncate (s.size()-4);
612         prop.put (s, s);
613       }
614     }
615   }
616   for (i=0; i<prop.size(); i++)
617   {
618     for (j=0; j<prop.size(i); j++)
619     {
620        ret.append (*prop.get (i, j));
621     }
622   }
623   ret.sort();
624   return SStringVector(ret);
625 }
626 
627 SString
getFolderFor(const SString & name)628 SSyntax::getFolderFor (const SString& name)
629 {
630   SStringVector v (name, ":", true);
631   if (v.size () != 2) return (SString ("none"));
632   if (v[0] == "simple") return  (SString ("built-in"));
633   if (v[0] == "test") return  (SString ("built-in"));
634   if (v[0] == "hunspell")
635   {
636     return SHunspellPattern::getFolderFor (v[1], syntaxSearchPath);
637   }
638   return SString ("");
639 }
640 
641 SString
getMissingFile(const SString & name)642 SSyntax::getMissingFile (const SString& name)
643 {
644   SStringVector v (name, ":", true);
645   if (v.size () != 2) return (SString (""));
646   if (v[0] == "simple") return  (SString (""));
647   if (v[0] == "test") return  (SString (""));
648   if (v[0] == "hunspell")
649   {
650     return SHunspellPattern::getMissingFile (v[1], syntaxSearchPath);
651   }
652   return SString ("");
653 }
654