1
2 /******************************************************************************
3 * MODULE : python_language.cpp
4 * DESCRIPTION: the python language
5 * COPYRIGHT : (C) 2014 François Poulain
6 *******************************************************************************
7 * This software falls under the GNU general public license and comes WITHOUT
8 * ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
9 * If you don't have this file, write to the Free Software Foundation, Inc.,
10 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
11 ******************************************************************************/
12
13 #include "analyze.hpp"
14 #include "impl_language.hpp"
15 #include "scheme.hpp"
16
17 static void parse_escaped_char (string s, int& pos);
18 static void parse_number (string s, int& pos);
19 static void parse_various_number (string s, int& pos);
20 static void parse_alpha (string s, int& pos);
21 static inline bool belongs_to_identifier (char c);
22
python_language_rep(string name)23 python_language_rep::python_language_rep (string name):
24 language_rep (name), colored ("") {}
25
26 text_property
advance(tree t,int & pos)27 python_language_rep::advance (tree t, int& pos) {
28 string s= t->label;
29 if (pos==N(s))
30 return &tp_normal_rep;
31 char c= s[pos];
32 if (c == ' ') {
33 pos++;
34 return &tp_space_rep;
35 }
36 if (c == '\\') {
37 parse_escaped_char (s, pos);
38 return &tp_normal_rep;
39 }
40 if (pos+2 < N(s) && s[pos] == '0' &&
41 (s[pos+1] == 'x' || s[pos+1] == 'X' ||
42 s[pos+1] == 'o' || s[pos+1] == 'O' ||
43 s[pos+1] == 'b' || s[pos+1] == 'B')) {
44 parse_various_number (s, pos);
45 return &tp_normal_rep;
46 }
47 if ((c >= '0' && c <= '9') ||
48 (c == '.' && pos+1 < N(s) && s[pos+1] >= '0' && s[pos+1] <= '9')) {
49 parse_number (s, pos);
50 return &tp_normal_rep;
51 }
52 if (belongs_to_identifier (c)) {
53 parse_alpha (s, pos);
54 return &tp_normal_rep;
55 }
56 tm_char_forwards (s, pos);
57 return &tp_normal_rep;
58 }
59
60 array<int>
get_hyphens(string s)61 python_language_rep::get_hyphens (string s) {
62 int i;
63 array<int> penalty (N(s)+1);
64 penalty[0]= HYPH_INVALID;
65 for (i=1; i<N(s); i++)
66 if (s[i-1] == '-' && is_alpha (s[i]))
67 penalty[i]= HYPH_STD;
68 else penalty[i]= HYPH_INVALID;
69 penalty[i]= HYPH_INVALID;
70 return penalty;
71 }
72
73 void
hyphenate(string s,int after,string & left,string & right)74 python_language_rep::hyphenate (
75 string s, int after, string& left, string& right)
76 {
77 left = s (0, after);
78 right= s (after, N(s));
79 }
80
81 static void
python_color_setup_operator_openclose(hashmap<string,string> & t)82 python_color_setup_operator_openclose (hashmap<string, string> & t) {
83 string c= "operator_openclose";
84 t ("{")= c;
85 t ("[")= c;
86 t ("(")= c;
87 t (")")= c;
88 t ("]")= c;
89 t ("}")= c;
90 }
91
92 static void
python_color_setup_constants(hashmap<string,string> & t)93 python_color_setup_constants (hashmap<string, string> & t) {
94 string c= "constant";
95 t ("Ellipsis")= c;
96 t ("False")= c;
97 t ("None")= c;
98 t ("NotImplemented")= c;
99 t ("True")= c;
100 t ("__debug__")= c;
101 t ("__import__")= c;
102 t ("abs")= c;
103 t ("all")= c;
104 t ("any")= c;
105 t ("apply")= c;
106 t ("ascii")= c;
107 t ("basestring")= c;
108 t ("bin")= c;
109 t ("bool")= c;
110 t ("buffer")= c;
111 t ("bytearray")= c;
112 t ("bytes")= c;
113 t ("callable")= c;
114 t ("chr")= c;
115 t ("classmethod")= c;
116 t ("cmp")= c;
117 t ("coerce")= c;
118 t ("compile")= c;
119 t ("complex")= c;
120 t ("delattr")= c;
121 t ("dict")= c;
122 t ("dir")= c;
123 t ("divmod")= c;
124 t ("enumerate")= c;
125 t ("eval")= c;
126 t ("execfile")= c;
127 t ("file")= c;
128 t ("filter")= c;
129 t ("float")= c;
130 t ("format")= c;
131 t ("frozenset")= c;
132 t ("getattr")= c;
133 t ("globals")= c;
134 t ("hasattr")= c;
135 t ("hash")= c;
136 t ("help")= c;
137 t ("hex")= c;
138 t ("id")= c;
139 t ("input")= c;
140 t ("int")= c;
141 t ("intern")= c;
142 t ("isinstance")= c;
143 t ("issubclass")= c;
144 t ("iter")= c;
145 t ("len")= c;
146 t ("list")= c;
147 t ("locals")= c;
148 t ("long")= c;
149 t ("map")= c;
150 t ("max")= c;
151 t ("memoryview")= c;
152 t ("min")= c;
153 t ("next")= c;
154 t ("nonlocal")= c;
155 t ("object")= c;
156 t ("oct")= c;
157 t ("open")= c;
158 t ("ord")= c;
159 t ("pow")= c;
160 t ("property")= c;
161 t ("range")= c;
162 t ("raw_input")= c;
163 t ("reduce")= c;
164 t ("reload")= c;
165 t ("repr")= c;
166 t ("reversed")= c;
167 t ("round")= c;
168 t ("set")= c;
169 t ("setattr")= c;
170 t ("slice")= c;
171 t ("sorted")= c;
172 t ("staticmethod")= c;
173 t ("str")= c;
174 t ("sum")= c;
175 t ("super")= c;
176 t ("tuple")= c;
177 t ("type")= c;
178 t ("unichr")= c;
179 t ("unicode")= c;
180 t ("vars")= c;
181 t ("xrange")= c;
182 t ("zip")= c;
183 }
184
185 static void
python_color_setup_constant_exceptions(hashmap<string,string> & t)186 python_color_setup_constant_exceptions (hashmap<string, string> & t) {
187 string c= "constant";
188 t ("BaseException")= c;
189 t ("Exception")= c;
190 t ("ArithmeticError")= c;
191 t ("EnvironmentError")= c;
192 t ("LookupError")= c;
193 t ("StandardError")= c;
194 t ("AssertionError")= c;
195 t ("AttributeError")= c;
196 t ("BufferError")= c;
197 t ("EOFError")= c;
198 t ("FloatingPointError")= c;
199 t ("GeneratorExit")= c;
200 t ("IOError")= c;
201 t ("ImportError")= c;
202 t ("IndentationError")= c;
203 t ("IndexError")= c;
204 t ("KeyError")= c;
205 t ("KeyboardInterrupt")= c;
206 t ("MemoryError")= c;
207 t ("NameError")= c;
208 t ("NotImplementedError")= c;
209 t ("OSError")= c;
210 t ("OverflowError")= c;
211 t ("ReferenceError")= c;
212 t ("RuntimeError")= c;
213 t ("StopIteration")= c;
214 t ("SyntaxError")= c;
215 t ("SystemError")= c;
216 t ("SystemExit")= c;
217 t ("TabError")= c;
218 t ("TypeError")= c;
219 t ("UnboundLocalError")= c;
220 t ("UnicodeError")= c;
221 t ("UnicodeDecodeError")= c;
222 t ("UnicodeEncodeError")= c;
223 t ("UnicodeTranslateError")= c;
224 t ("ValueError")= c;
225 t ("VMSError")= c;
226 t ("WindowsError")= c;
227 t ("ZeroDivisionError")= c;
228 t ("BytesWarning")= c;
229 t ("DeprecationWarning")= c;
230 t ("FutureWarning")= c;
231 t ("ImportWarning")= c;
232 t ("PendingDeprecationWarning")= c;
233 t ("RuntimeWarning")= c;
234 t ("SyntaxWarning")= c;
235 t ("UnicodeWarning")= c;
236 t ("UserWarning")= c;
237 t ("Warning")= c;
238 }
239
240 static void
python_color_setup_declare_class(hashmap<string,string> & t)241 python_color_setup_declare_class (hashmap<string, string> & t) {
242 string c= "declare_type";
243 t ("class")= c;
244 }
245
246 static void
python_color_setup_declare_function(hashmap<string,string> & t)247 python_color_setup_declare_function (hashmap<string, string> & t) {
248 string c= "declare_function";
249 t ("def")= c;
250 t ("lambda")= c;
251 }
252
253 static void
python_color_setup_keywords(hashmap<string,string> & t)254 python_color_setup_keywords (hashmap<string, string> & t) {
255 string c= "keyword";
256 t ("as")= c;
257 t ("del")= c;
258 t ("finally")= c;
259 t ("from")= c;
260 t ("global")= c;
261 t ("import")= c;
262 t ("in")= c;
263 t ("is")= c;
264 t ("with")= c;
265 }
266
267 static void
python_color_setup_keywords_conditional(hashmap<string,string> & t)268 python_color_setup_keywords_conditional (hashmap<string, string> & t) {
269 string c= "keyword_conditional";
270 t ("break")= c;
271 t ("continue")= c;
272 t ("elif")= c;
273 t ("else")= c;
274 t ("for")= c;
275 t ("if")= c;
276 t ("while")= c;
277 }
278
279 static void
python_color_setup_keywords_control(hashmap<string,string> & t)280 python_color_setup_keywords_control (hashmap<string, string> & t) {
281 string c= "keyword_control";
282 t ("assert")= c;
283 t ("except")= c;
284 t ("exec")= c;
285 t ("pass")= c;
286 t ("print")= c;
287 t ("raise")= c;
288 t ("return")= c;
289 t ("try")= c;
290 t ("yield")= c;
291 }
292
293 static void
python_color_setup_operator(hashmap<string,string> & t)294 python_color_setup_operator (hashmap<string, string>& t) {
295 string c= "operator";
296 t ("and")= c;
297 t ("not")= c;
298 t ("or")= c;
299
300 t ("+")= c;
301 t ("-")= c;
302 t ("/")= c;
303 t ("*")= c;
304 t ("**")= c;
305 t ("//")= c;
306 t ("%")= c;
307 t ("|")= c;
308 t ("&")= c;
309 t ("^")= c;
310 t ("<less><less>")= c;
311 t ("<gtr><gtr>")= c;
312
313 t ("==")= c;
314 t ("!=")= c;
315 t ("<less><gtr>")= c;
316 t ("<less>")= c;
317 t ("<gtr>")= c;
318 t ("<less>=")= c;
319 t ("<gtr>=")= c;
320
321 t ("=")= c;
322
323 t ("+=")= c;
324 t ("-=")= c;
325 t ("/=")= c;
326 t ("*=")= c;
327 t ("**=")= c;
328 t ("//=")= c;
329 t ("%=")= c;
330 t ("|=")= c;
331 t ("&=")= c;
332 t ("^=")= c;
333 t ("<less><less>=")= c;
334 t ("<gtr><gtr>=")= c;
335
336 t ("~")= c;
337 }
338
339 static void
python_color_setup_operator_special(hashmap<string,string> & t)340 python_color_setup_operator_special (hashmap<string, string> & t) {
341 string c= "operator_special";
342 t (":")= c;
343 }
344
345 static void
python_color_setup_operator_decoration(hashmap<string,string> & t)346 python_color_setup_operator_decoration (hashmap<string, string> & t) {
347 string c= "operator_decoration";
348 t ("@")= c;
349 }
350
351 static void
python_color_setup_operator_field(hashmap<string,string> & t)352 python_color_setup_operator_field (hashmap<string, string> & t) {
353 t (".")= "operator_field";
354 }
355
356 static inline bool
belongs_to_identifier(char c)357 belongs_to_identifier (char c) {
358 return ((c<='9' && c>='0') ||
359 (c<='Z' && c>='A') ||
360 (c<='z' && c>='a') ||
361 (c=='_'));
362 }
363
364 static inline bool
is_hex_number(char c)365 is_hex_number (char c) {
366 return (c>='0' && c<='9') || (c>='A' && c<='F') || (c>='a' && c<='f');
367 }
368
369 static inline bool
is_number(char c)370 is_number (char c) {
371 return (c>='0' && c<='9');
372 }
373
374 static void
parse_identifier(hashmap<string,string> & t,string s,int & pos)375 parse_identifier (hashmap<string, string>& t, string s, int& pos) {
376 int i=pos;
377 if (pos >= N(s)) return;
378 if (is_number (s[i])) return;
379 while (i<N(s) && belongs_to_identifier (s[i])) i++;
380 if (!(t->contains (s (pos, i)))) pos= i;
381 }
382
383 static void
parse_alpha(string s,int & pos)384 parse_alpha (string s, int& pos) {
385 static hashmap<string,string> empty;
386 parse_identifier (empty, s, pos);
387 }
388
389 static void
parse_blanks(string s,int & pos)390 parse_blanks (string s, int& pos) {
391 while (pos<N(s) && (s[pos] == ' ' || s[pos] == '\t')) pos++;
392 }
393
394 static void
parse_escaped_char(string s,int & pos)395 parse_escaped_char (string s, int& pos) {
396 int n= N(s), i= pos++;
397 if (i+2 >= n) return;
398 if (s[i] != '\\')
399 return;
400 i++;
401 if (test (s, i, "newline"))
402 pos+= 7;
403 else if (s[i] == '\\' || s[i] == '\'' || s[i] == '\"' ||
404 s[i] == 'a' || s[i] == 'b' || s[i] == 'f' ||
405 s[i] == 'n' || s[i] == 'r' || s[i] == 't' ||
406 s[i] == 'N' || s[i] == 'v')
407 pos+= 1;
408 else if (s[i] == 'o' || s[i] == 'x')
409 pos+= 3;
410 else if (s[i] == 'u')
411 pos+= 5;
412 else if (s[i] == 'U')
413 pos+= 9;
414 return;
415 }
416
417 static bool
parse_string(string s,int & pos,bool force)418 parse_string (string s, int& pos, bool force) {
419 int n= N(s);
420 static string delim;
421 if (pos >= n) return false;
422 if (test (s, pos, "\"\"\"") || test (s, pos, "\'\'\'")) {
423 delim= s(pos, pos+3);
424 pos+= N(delim);
425 }
426 else if (s[pos] == '\"' || s[pos] == '\'') {
427 delim= s(pos, pos+1);
428 pos+= N(delim);
429 }
430 else if (!force)
431 return false;
432 while (pos<n && !test (s, pos, delim)) {
433 if (s[pos] == '\\') {
434 return true;
435 }
436 else
437 pos++;
438 }
439 if (test (s, pos, delim))
440 pos+= N(delim);
441 return false;
442 }
443
444 static string
parse_keywords(hashmap<string,string> & t,string s,int & pos)445 parse_keywords (hashmap<string,string>& t, string s, int& pos) {
446 int i= pos;
447 if (pos>=N(s)) return "";
448 if (is_number (s[i])) return "";
449 while ((i<N(s)) && belongs_to_identifier (s[i])) i++;
450 string r= s (pos, i);
451 if (t->contains (r)) {
452 string tr= t(r);
453 if (tr == "keyword_conditional" ||
454 tr == "keyword_control" ||
455 tr == "keyword" ||
456 tr == "declare_type" ||
457 tr == "declare_function" ||
458 tr == "constant") {
459 pos=i;
460 return tr;
461 }
462 }
463 return "";
464 }
465
466 static string
parse_operators(hashmap<string,string> & t,string s,int & pos)467 parse_operators (hashmap<string,string>& t, string s, int& pos) {
468 int i;
469 for (i=12; i>=1; i--) {
470 string r=s(pos,pos+i);
471 if (t->contains (r)) {
472 string tr= t(r);
473 if (tr == "operator" ||
474 tr == "operator_field" ||
475 tr == "operator_special" ||
476 tr == "operator_openclose") {
477 pos=pos+i;
478 return tr;
479 }
480 else if (t(r) == "operator_decoration") {
481 pos=pos+i;
482 while ((pos<N(s)) && belongs_to_identifier (s[pos])) pos++;
483 return "operator_special";
484 }
485 }
486 }
487 return "";
488 }
489
490 static void
parse_various_number(string s,int & pos)491 parse_various_number (string s, int& pos) {
492 if (!(pos+2 < N(s) && s[pos] == '0' &&
493 (s[pos+1] == 'x' || s[pos+1] == 'X' ||
494 s[pos+1] == 'o' || s[pos+1] == 'O' ||
495 s[pos+1] == 'b' || s[pos+1] == 'B')))
496 return;
497 pos+= 2;
498 while (pos<N(s) && is_hex_number (s[pos])) pos++;
499 if (pos<N(s) && (s[pos] == 'l' || s[pos] == 'L')) pos++;
500 }
501
502 static void
parse_number(string s,int & pos)503 parse_number (string s, int& pos) {
504 int i= pos;
505 if (pos>=N(s)) return;
506 while (i<N(s) && (is_number (s[i]) || s[i] == '.'))
507 i++;
508 if (i == pos) return;
509 if (i<N(s) && (s[i] == 'e' || s[i] == 'E')) {
510 i++;
511 if (i<N(s) && s[i] == '-') i++;
512 while (i<N(s) && (is_number (s[i]) || s[i] == '.')) i++;
513 if (i<N(s) && (s[i] == 'j')) i++;
514 }
515 else if (i<N(s) && (s[i] == 'l' || s[i] == 'L')) i++;
516 else if (i<N(s) && (s[i] == 'j')) i++;
517 pos= i;
518 }
519
520 static void
parse_comment_single_line(string s,int & pos)521 parse_comment_single_line (string s, int& pos) {
522 if (pos>=N(s)) return;
523 if (s[pos]!='#') return;
524 pos=N(s);
525 }
526
527 string
get_color(tree t,int start,int end)528 python_language_rep::get_color (tree t, int start, int end) {
529 static bool setup_done= false;
530 if (!setup_done) {
531 /*
532 * NOTE: it seems there is no way to take into account multiline
533 * dependencies. Then such weird syntax like
534 *
535 * str= """some string beginning ...
536 * some string end"""
537 *
538 * will not be correctly typeset.
539 *
540 */
541
542 python_color_setup_constants (colored);
543 python_color_setup_constant_exceptions (colored);
544 python_color_setup_declare_class (colored);
545 python_color_setup_declare_function (colored);
546 python_color_setup_keywords (colored);
547 python_color_setup_keywords_conditional (colored);
548 python_color_setup_keywords_control (colored);
549 python_color_setup_operator (colored);
550 python_color_setup_operator_special (colored);
551 python_color_setup_operator_decoration (colored);
552 python_color_setup_operator_openclose (colored);
553 python_color_setup_operator_field (colored);
554 setup_done= true;
555 }
556
557 static string none= "";
558 if (start >= end) return none;
559 string s= t->label;
560 int pos= 0;
561 int opos=0;
562 string type;
563 bool in_str= false;
564 bool in_esc= false;
565 do {
566 type= none;
567 do {
568 opos= pos;
569 if (in_str) {
570 in_esc= parse_string (s, pos, true);
571 in_str= false;
572 if (opos < pos) {
573 type= "constant_string";
574 break;
575 }
576 }
577 else if (in_esc) {
578 parse_escaped_char (s, pos);
579 in_esc= false;
580 in_str= true;
581 if (opos < pos) {
582 type= "constant_char";
583 break;
584 }
585 }
586 else {
587 parse_blanks (s, pos);
588 if (opos < pos){
589 break;
590 }
591 parse_comment_single_line (s, pos);
592 if (opos < pos) {
593 type= "comment";
594 break;
595 }
596 in_esc= parse_string (s, pos, false);
597 if (opos < pos) {
598 type= "constant_string";
599 break;
600 }
601 type= parse_keywords (colored, s, pos);
602 if (opos < pos) {
603 break;
604 }
605 parse_various_number (s, pos);
606 if (opos < pos) {
607 type= "constant_number";
608 break;
609 }
610 parse_number (s, pos);
611 if (opos < pos) {
612 type= "constant_number";
613 break;
614 }
615 type= parse_operators (colored, s, pos);
616 if (opos < pos) {
617 break;
618 }
619 parse_identifier (colored, s, pos);
620 if (opos < pos) {
621 type= none;
622 break;
623 }
624 }
625 pos= opos;
626 pos++;
627 }
628 while (false);
629 }
630 while (pos <= start);
631 if (type == none) return none;
632 return decode_color ("python", encode_color (type));
633 }
634