1 // A lexer to print out all XML tags in a file. 2 // Uses lazy quantifiers for compact expressions. 3 // Limitations: does not check UTF-8 encoding validity, cannot handle DTDs. 4 5 #include <stdio.h> 6 int level = 0; 7 8 %o dotall main 9 10 name [A-Za-z_:\x80-\xFF][-.0-9A-Za-z_:\x80-\xFF]* 11 pi <\?{name} 12 comment <!--.*?--> 13 open <{name} 14 close <\/{name}> 15 cdata <!\[CDATA\[.*?]]> 16 string \".*?\"|'.*?' 17 18 %x ATTRIBUTES 19 20 %% 21 22 {comment} | 23 {cdata} /* skip comments and CDATA sections */ 24 25 {pi} start(ATTRIBUTES); 26 27 {open} printf("%*s%s\n", level++, "", text() + 1); 28 start(ATTRIBUTES); 29 30 {close} matcher().less(size() - 1); 31 printf("%*s%s\n", --level, "", text() + 2); 32 33 <<EOF>> printf("Tags are %sbalanced\n", level ? "im" : ""); 34 return 0; 35 36 <ATTRIBUTES>"/>" --level; 37 start(INITIAL); 38 39 <ATTRIBUTES>"?>" | 40 <ATTRIBUTES>">" start(INITIAL); 41 42 <ATTRIBUTES>{name} | 43 <ATTRIBUTES>{string} /* skip attribute names and strings */ 44 45 <*>. /* skip anything else */ 46 47 %% 48