1#!/usr/local/bin/python -u
2#
3# This is the API builder, it parses the C sources and build the
4# API formal description in XML.
5#
6# See Copyright for the status of this software.
7#
8# daniel@veillard.com
9#
10import os, sys
11import string
12import glob
13
14debug=0
15#debugsym='ignorableWhitespaceSAXFunc'
16debugsym=None
17
18#
19# C parser analysis code
20#
21ignored_files = {
22  "trio": "too many non standard macros",
23  "trio.c": "too many non standard macros",
24  "trionan.c": "too many non standard macros",
25  "triostr.c": "too many non standard macros",
26  "acconfig.h": "generated portability layer",
27  "config.h": "generated portability layer",
28  "libxml.h": "internal only",
29  "testOOM.c": "out of memory tester",
30  "testOOMlib.h": "out of memory tester",
31  "testOOMlib.c": "out of memory tester",
32  "rngparser.c": "not yet integrated",
33  "rngparser.h": "not yet integrated",
34  "elfgcchack.h": "not a normal header",
35  "testHTML.c": "test tool",
36  "testReader.c": "test tool",
37  "testSchemas.c": "test tool",
38  "testXPath.c": "test tool",
39  "testAutomata.c": "test tool",
40  "testModule.c": "test tool",
41  "testRegexp.c": "test tool",
42  "testThreads.c": "test tool",
43  "testC14N.c": "test tool",
44  "testRelax.c": "test tool",
45  "testSAX.c": "test tool",
46  "testURI.c": "test tool",
47  "testapi.c": "generated regression tests",
48  "runtest.c": "regression tests program",
49  "runsuite.c": "regression tests program",
50  "tst.c": "not part of the library",
51  "test.c": "not part of the library",
52  "testdso.c": "test for dynamid shared libraries",
53  "testrecurse.c": "test for entities recursions",
54  "xzlib.h": "Internal API only 2.8.0",
55  "buf.h": "Internal API only 2.9.0",
56  "enc.h": "Internal API only 2.9.0",
57  "/save.h": "Internal API only 2.9.0",
58  "timsort.h": "Internal header only for xpath.c 2.9.0",
59}
60
61ignored_words = {
62  "WINAPI": (0, "Windows keyword"),
63  "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
64  "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
65  "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
66  "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
67  "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
68  "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
69  "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
70  "XMLCALL": (0, "Special macro for win32 calls"),
71  "XSLTCALL": (0, "Special macro for win32 calls"),
72  "XMLCDECL": (0, "Special macro for win32 calls"),
73  "EXSLTCALL": (0, "Special macro for win32 calls"),
74  "__declspec": (3, "Windows keyword"),
75  "__stdcall": (0, "Windows keyword"),
76  "ATTRIBUTE_UNUSED": (0, "macro keyword"),
77  "ATTRIBUTE_DESTRUCTOR": (0, "macro keyword"),
78  "LIBEXSLT_PUBLIC": (0, "macro keyword"),
79  "X_IN_Y": (5, "macro function builder"),
80  "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
81  "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
82  "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
83  "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
84  "ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),
85}
86
87def escape(raw):
88    raw = raw.replace('&', '&')
89    raw = raw.replace('<', '&lt;')
90    raw = raw.replace('>', '&gt;')
91    raw = raw.replace("'", '&apos;')
92    raw = raw.replace('"', '&quot;')
93    return raw
94
95def uniq(items):
96    d = {}
97    for item in items:
98        d[item]=1
99    return list(d.keys())
100
101class identifier:
102    def __init__(self, name, header=None, module=None, type=None, lineno = 0,
103                 info=None, extra=None, conditionals = None):
104        self.name = name
105        self.header = header
106        self.module = module
107        self.type = type
108        self.info = info
109        self.extra = extra
110        self.lineno = lineno
111        self.static = 0
112        if conditionals == None or len(conditionals) == 0:
113            self.conditionals = None
114        else:
115            self.conditionals = conditionals[:]
116        if self.name == debugsym:
117            print("=> define %s : %s" % (debugsym, (module, type, info,
118                                         extra, conditionals)))
119
120    def __repr__(self):
121        r = "%s %s:" % (self.type, self.name)
122        if self.static:
123            r = r + " static"
124        if self.module != None:
125            r = r + " from %s" % (self.module)
126        if self.info != None:
127            r = r + " " +  repr(self.info)
128        if self.extra != None:
129            r = r + " " + repr(self.extra)
130        if self.conditionals != None:
131            r = r + " " + repr(self.conditionals)
132        return r
133
134
135    def set_header(self, header):
136        self.header = header
137    def set_module(self, module):
138        self.module = module
139    def set_type(self, type):
140        self.type = type
141    def set_info(self, info):
142        self.info = info
143    def set_extra(self, extra):
144        self.extra = extra
145    def set_lineno(self, lineno):
146        self.lineno = lineno
147    def set_static(self, static):
148        self.static = static
149    def set_conditionals(self, conditionals):
150        if conditionals == None or len(conditionals) == 0:
151            self.conditionals = None
152        else:
153            self.conditionals = conditionals[:]
154
155    def get_name(self):
156        return self.name
157    def get_header(self):
158        return self.module
159    def get_module(self):
160        return self.module
161    def get_type(self):
162        return self.type
163    def get_info(self):
164        return self.info
165    def get_lineno(self):
166        return self.lineno
167    def get_extra(self):
168        return self.extra
169    def get_static(self):
170        return self.static
171    def get_conditionals(self):
172        return self.conditionals
173
174    def update(self, header, module, type = None, info = None, extra=None,
175               conditionals=None):
176        if self.name == debugsym:
177            print("=> update %s : %s" % (debugsym, (module, type, info,
178                                         extra, conditionals)))
179        if header != None and self.header == None:
180            self.set_header(module)
181        if module != None and (self.module == None or self.header == self.module):
182            self.set_module(module)
183        if type != None and self.type == None:
184            self.set_type(type)
185        if info != None:
186            self.set_info(info)
187        if extra != None:
188            self.set_extra(extra)
189        if conditionals != None:
190            self.set_conditionals(conditionals)
191
192class index:
193    def __init__(self, name = "noname"):
194        self.name = name
195        self.identifiers = {}
196        self.functions = {}
197        self.variables = {}
198        self.includes = {}
199        self.structs = {}
200        self.enums = {}
201        self.typedefs = {}
202        self.macros = {}
203        self.references = {}
204        self.info = {}
205
206    def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
207        if name[0:2] == '__':
208            return None
209        d = None
210        try:
211           d = self.identifiers[name]
212           d.update(header, module, type, lineno, info, extra, conditionals)
213        except:
214           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
215           self.identifiers[name] = d
216
217        if d != None and static == 1:
218            d.set_static(1)
219
220        if d != None and name != None and type != None:
221            self.references[name] = d
222
223        if name == debugsym:
224            print("New ref: %s" % (d))
225
226        return d
227
228    def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
229        if name[0:2] == '__':
230            return None
231        d = None
232        try:
233           d = self.identifiers[name]
234           d.update(header, module, type, lineno, info, extra, conditionals)
235        except:
236           d = identifier(name, header, module, type, lineno, info, extra, conditionals)
237           self.identifiers[name] = d
238
239        if d != None and static == 1:
240            d.set_static(1)
241
242        if d != None and name != None and type != None:
243            if type == "function":
244                self.functions[name] = d
245            elif type == "functype":
246                self.functions[name] = d
247            elif type == "variable":
248                self.variables[name] = d
249            elif type == "include":
250                self.includes[name] = d
251            elif type == "struct":
252                self.structs[name] = d
253            elif type == "enum":
254                self.enums[name] = d
255            elif type == "typedef":
256                self.typedefs[name] = d
257            elif type == "macro":
258                self.macros[name] = d
259            else:
260                print("Unable to register type ", type)
261
262        if name == debugsym:
263            print("New symbol: %s" % (d))
264
265        return d
266
267    def merge(self, idx):
268        for id in list(idx.functions.keys()):
269              #
270              # macro might be used to override functions or variables
271              # definitions
272              #
273             if id in self.macros:
274                 del self.macros[id]
275             if id in self.functions:
276                 print("function %s from %s redeclared in %s" % (
277                    id, self.functions[id].header, idx.functions[id].header))
278             else:
279                 self.functions[id] = idx.functions[id]
280                 self.identifiers[id] = idx.functions[id]
281        for id in list(idx.variables.keys()):
282              #
283              # macro might be used to override functions or variables
284              # definitions
285              #
286             if id in self.macros:
287                 del self.macros[id]
288             if id in self.variables:
289                 print("variable %s from %s redeclared in %s" % (
290                    id, self.variables[id].header, idx.variables[id].header))
291             else:
292                 self.variables[id] = idx.variables[id]
293                 self.identifiers[id] = idx.variables[id]
294        for id in list(idx.structs.keys()):
295             if id in self.structs:
296                 print("struct %s from %s redeclared in %s" % (
297                    id, self.structs[id].header, idx.structs[id].header))
298             else:
299                 self.structs[id] = idx.structs[id]
300                 self.identifiers[id] = idx.structs[id]
301        for id in list(idx.typedefs.keys()):
302             if id in self.typedefs:
303                 print("typedef %s from %s redeclared in %s" % (
304                    id, self.typedefs[id].header, idx.typedefs[id].header))
305             else:
306                 self.typedefs[id] = idx.typedefs[id]
307                 self.identifiers[id] = idx.typedefs[id]
308        for id in list(idx.macros.keys()):
309              #
310              # macro might be used to override functions or variables
311              # definitions
312              #
313             if id in self.variables:
314                 continue
315             if id in self.functions:
316                 continue
317             if id in self.enums:
318                 continue
319             if id in self.macros:
320                 print("macro %s from %s redeclared in %s" % (
321                    id, self.macros[id].header, idx.macros[id].header))
322             else:
323                 self.macros[id] = idx.macros[id]
324                 self.identifiers[id] = idx.macros[id]
325        for id in list(idx.enums.keys()):
326             if id in self.enums:
327                 print("enum %s from %s redeclared in %s" % (
328                    id, self.enums[id].header, idx.enums[id].header))
329             else:
330                 self.enums[id] = idx.enums[id]
331                 self.identifiers[id] = idx.enums[id]
332
333    def merge_public(self, idx):
334        for id in list(idx.functions.keys()):
335             if id in self.functions:
336                 # check that function condition agrees with header
337                 if idx.functions[id].conditionals != \
338                    self.functions[id].conditionals:
339                     print("Header condition differs from Function for %s:" \
340                        % id)
341                     print("  H: %s" % self.functions[id].conditionals)
342                     print("  C: %s" % idx.functions[id].conditionals)
343                 up = idx.functions[id]
344                 self.functions[id].update(None, up.module, up.type, up.info, up.extra)
345         #     else:
346         #         print "Function %s from %s is not declared in headers" % (
347         #                id, idx.functions[id].module)
348         # TODO: do the same for variables.
349
350    def analyze_dict(self, type, dict):
351        count = 0
352        public = 0
353        for name in list(dict.keys()):
354            id = dict[name]
355            count = count + 1
356            if id.static == 0:
357                public = public + 1
358        if count != public:
359            print("  %d %s , %d public" % (count, type, public))
360        elif count != 0:
361            print("  %d public %s" % (count, type))
362
363
364    def analyze(self):
365        self.analyze_dict("functions", self.functions)
366        self.analyze_dict("variables", self.variables)
367        self.analyze_dict("structs", self.structs)
368        self.analyze_dict("typedefs", self.typedefs)
369        self.analyze_dict("macros", self.macros)
370
371class CLexer:
372    """A lexer for the C language, tokenize the input by reading and
373       analyzing it line by line"""
374    def __init__(self, input):
375        self.input = input
376        self.tokens = []
377        self.line = ""
378        self.lineno = 0
379
380    def getline(self):
381        line = ''
382        while line == '':
383            line = self.input.readline()
384            if not line:
385                return None
386            self.lineno = self.lineno + 1
387            line = line.lstrip()
388            line = line.rstrip()
389            if line == '':
390                continue
391            while line[-1] == '\\':
392                line = line[:-1]
393                n = self.input.readline()
394                self.lineno = self.lineno + 1
395                n = n.lstrip()
396                n = n.rstrip()
397                if not n:
398                    break
399                else:
400                    line = line + n
401        return line
402
403    def getlineno(self):
404        return self.lineno
405
406    def push(self, token):
407        self.tokens.insert(0, token);
408
409    def debug(self):
410        print("Last token: ", self.last)
411        print("Token queue: ", self.tokens)
412        print("Line %d end: " % (self.lineno), self.line)
413
414    def token(self):
415        while self.tokens == []:
416            if self.line == "":
417                line = self.getline()
418            else:
419                line = self.line
420                self.line = ""
421            if line == None:
422                return None
423
424            if line[0] == '#':
425                self.tokens = list(map((lambda x: ('preproc', x)),
426                                  line.split()))
427                break;
428            l = len(line)
429            if line[0] == '"' or line[0] == "'":
430                end = line[0]
431                line = line[1:]
432                found = 0
433                tok = ""
434                while found == 0:
435                    i = 0
436                    l = len(line)
437                    while i < l:
438                        if line[i] == end:
439                            self.line = line[i+1:]
440                            line = line[:i]
441                            l = i
442                            found = 1
443                            break
444                        if line[i] == '\\':
445                            i = i + 1
446                        i = i + 1
447                    tok = tok + line
448                    if found == 0:
449                        line = self.getline()
450                        if line == None:
451                            return None
452                self.last = ('string', tok)
453                return self.last
454
455            if l >= 2 and line[0] == '/' and line[1] == '*':
456                line = line[2:]
457                found = 0
458                tok = ""
459                while found == 0:
460                    i = 0
461                    l = len(line)
462                    while i < l:
463                        if line[i] == '*' and i+1 < l and line[i+1] == '/':
464                            self.line = line[i+2:]
465                            line = line[:i-1]
466                            l = i
467                            found = 1
468                            break
469                        i = i + 1
470                    if tok != "":
471                        tok = tok + "\n"
472                    tok = tok + line
473                    if found == 0:
474                        line = self.getline()
475                        if line == None:
476                            return None
477                self.last = ('comment', tok)
478                return self.last
479            if l >= 2 and line[0] == '/' and line[1] == '/':
480                line = line[2:]
481                self.last = ('comment', line)
482                return self.last
483            i = 0
484            while i < l:
485                if line[i] == '/' and i+1 < l and line[i+1] == '/':
486                    self.line = line[i:]
487                    line = line[:i]
488                    break
489                if line[i] == '/' and i+1 < l and line[i+1] == '*':
490                    self.line = line[i:]
491                    line = line[:i]
492                    break
493                if line[i] == '"' or line[i] == "'":
494                    self.line = line[i:]
495                    line = line[:i]
496                    break
497                i = i + 1
498            l = len(line)
499            i = 0
500            while i < l:
501                if line[i] == ' ' or line[i] == '\t':
502                    i = i + 1
503                    continue
504                o = ord(line[i])
505                if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
506                   (o >= 48 and o <= 57):
507                    s = i
508                    while i < l:
509                        o = ord(line[i])
510                        if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
511                           (o >= 48 and o <= 57) or \
512			   (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
513                            i = i + 1
514                        else:
515                            break
516                    self.tokens.append(('name', line[s:i]))
517                    continue
518                if "(){}:;,[]".find(line[i]) != -1:
519#                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
520#                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
521#                    line[i] == ',' or line[i] == '[' or line[i] == ']':
522                    self.tokens.append(('sep', line[i]))
523                    i = i + 1
524                    continue
525                if "+-*><=/%&!|.".find(line[i]) != -1:
526#                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
527#                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
528#                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
529#                    line[i] == '!' or line[i] == '|' or line[i] == '.':
530                    if line[i] == '.' and  i + 2 < l and \
531                       line[i+1] == '.' and line[i+2] == '.':
532                        self.tokens.append(('name', '...'))
533                        i = i + 3
534                        continue
535
536                    j = i + 1
537                    if j < l and (
538                       "+-*><=/%&!|".find(line[j]) != -1):
539#                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
540#                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
541#                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
542#                        line[j] == '!' or line[j] == '|'):
543                        self.tokens.append(('op', line[i:j+1]))
544                        i = j + 1
545                    else:
546                        self.tokens.append(('op', line[i]))
547                        i = i + 1
548                    continue
549                s = i
550                while i < l:
551                    o = ord(line[i])
552                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
553                       (o >= 48 and o <= 57) or (
554                        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
555#                         line[i] != ' ' and line[i] != '\t' and
556#                         line[i] != '(' and line[i] != ')' and
557#                         line[i] != '{'  and line[i] != '}' and
558#                         line[i] != ':' and line[i] != ';' and
559#                         line[i] != ',' and line[i] != '+' and
560#                         line[i] != '-' and line[i] != '*' and
561#                         line[i] != '/' and line[i] != '%' and
562#                         line[i] != '&' and line[i] != '!' and
563#                         line[i] != '|' and line[i] != '[' and
564#                         line[i] != ']' and line[i] != '=' and
565#                         line[i] != '*' and line[i] != '>' and
566#                         line[i] != '<'):
567                        i = i + 1
568                    else:
569                        break
570                self.tokens.append(('name', line[s:i]))
571
572        tok = self.tokens[0]
573        self.tokens = self.tokens[1:]
574        self.last = tok
575        return tok
576
577class CParser:
578    """The C module parser"""
579    def __init__(self, filename, idx = None):
580        self.filename = filename
581        if len(filename) > 2 and filename[-2:] == '.h':
582            self.is_header = 1
583        else:
584            self.is_header = 0
585        self.input = open(filename)
586        self.lexer = CLexer(self.input)
587        if idx == None:
588            self.index = index()
589        else:
590            self.index = idx
591        self.top_comment = ""
592        self.last_comment = ""
593        self.comment = None
594        self.collect_ref = 0
595        self.no_error = 0
596        self.conditionals = []
597        self.defines = []
598
599    def collect_references(self):
600        self.collect_ref = 1
601
602    def stop_error(self):
603        self.no_error = 1
604
605    def start_error(self):
606        self.no_error = 0
607
608    def lineno(self):
609        return self.lexer.getlineno()
610
611    def index_add(self, name, module, static, type, info=None, extra = None):
612        if self.is_header == 1:
613            self.index.add(name, module, module, static, type, self.lineno(),
614                           info, extra, self.conditionals)
615        else:
616            self.index.add(name, None, module, static, type, self.lineno(),
617                           info, extra, self.conditionals)
618
619    def index_add_ref(self, name, module, static, type, info=None,
620                      extra = None):
621        if self.is_header == 1:
622            self.index.add_ref(name, module, module, static, type,
623                               self.lineno(), info, extra, self.conditionals)
624        else:
625            self.index.add_ref(name, None, module, static, type, self.lineno(),
626                               info, extra, self.conditionals)
627
628    def warning(self, msg):
629        if self.no_error:
630            return
631        print(msg)
632
633    def error(self, msg, token=-1):
634        if self.no_error:
635            return
636
637        print("Parse Error: " + msg)
638        if token != -1:
639            print("Got token ", token)
640        self.lexer.debug()
641        sys.exit(1)
642
643    def debug(self, msg, token=-1):
644        print("Debug: " + msg)
645        if token != -1:
646            print("Got token ", token)
647        self.lexer.debug()
648
649    def parseTopComment(self, comment):
650        res = {}
651        lines = comment.split("\n")
652        item = None
653        for line in lines:
654            while line != "" and (line[0] == ' ' or line[0] == '\t'):
655                line = line[1:]
656            while line != "" and line[0] == '*':
657                line = line[1:]
658            while line != "" and (line[0] == ' ' or line[0] == '\t'):
659                line = line[1:]
660            try:
661                (it, line) = line.split(":", 1)
662                item = it
663                while line != "" and (line[0] == ' ' or line[0] == '\t'):
664                    line = line[1:]
665                if item in res:
666                    res[item] = res[item] + " " + line
667                else:
668                    res[item] = line
669            except:
670                if item != None:
671                    if item in res:
672                        res[item] = res[item] + " " + line
673                    else:
674                        res[item] = line
675        self.index.info = res
676
677    def parseComment(self, token):
678        if self.top_comment == "":
679            self.top_comment = token[1]
680        if self.comment == None or token[1][0] == '*':
681            self.comment = token[1];
682        else:
683            self.comment = self.comment + token[1]
684        token = self.lexer.token()
685
686        if self.comment.find("DOC_DISABLE") != -1:
687            self.stop_error()
688
689        if self.comment.find("DOC_ENABLE") != -1:
690            self.start_error()
691
692        return token
693
694    #
695    # Parse a comment block associate to a typedef
696    #
697    def parseTypeComment(self, name, quiet = 0):
698        if name[0:2] == '__':
699            quiet = 1
700
701        args = []
702        desc = ""
703
704        if self.comment == None:
705            if not quiet:
706                self.warning("Missing comment for type %s" % (name))
707            return((args, desc))
708        if self.comment[0] != '*':
709            if not quiet:
710                self.warning("Missing * in type comment for %s" % (name))
711            return((args, desc))
712        lines = self.comment.split('\n')
713        if lines[0] == '*':
714            del lines[0]
715        if lines[0] != "* %s:" % (name):
716            if not quiet:
717                self.warning("Misformatted type comment for %s" % (name))
718                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
719            return((args, desc))
720        del lines[0]
721        while len(lines) > 0 and lines[0] == '*':
722            del lines[0]
723        desc = ""
724        while len(lines) > 0:
725            l = lines[0]
726            while len(l) > 0 and l[0] == '*':
727                l = l[1:]
728            l = l.strip()
729            desc = desc + " " + l
730            del lines[0]
731
732        desc = desc.strip()
733
734        if quiet == 0:
735            if desc == "":
736                self.warning("Type comment for %s lack description of the macro" % (name))
737
738        return(desc)
739    #
740    # Parse a comment block associate to a macro
741    #
742    def parseMacroComment(self, name, quiet = 0):
743        if name[0:2] == '__':
744            quiet = 1
745
746        args = []
747        desc = ""
748
749        if self.comment == None:
750            if not quiet:
751                self.warning("Missing comment for macro %s" % (name))
752            return((args, desc))
753        if self.comment[0] != '*':
754            if not quiet:
755                self.warning("Missing * in macro comment for %s" % (name))
756            return((args, desc))
757        lines = self.comment.split('\n')
758        if lines[0] == '*':
759            del lines[0]
760        if lines[0] != "* %s:" % (name):
761            if not quiet:
762                self.warning("Misformatted macro comment for %s" % (name))
763                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
764            return((args, desc))
765        del lines[0]
766        while lines[0] == '*':
767            del lines[0]
768        while len(lines) > 0 and lines[0][0:3] == '* @':
769            l = lines[0][3:]
770            try:
771                (arg, desc) = l.split(':', 1)
772                desc=desc.strip()
773                arg=arg.strip()
774            except:
775                if not quiet:
776                    self.warning("Misformatted macro comment for %s" % (name))
777                    self.warning("  problem with '%s'" % (lines[0]))
778                del lines[0]
779                continue
780            del lines[0]
781            l = lines[0].strip()
782            while len(l) > 2 and l[0:3] != '* @':
783                while l[0] == '*':
784                    l = l[1:]
785                desc = desc + ' ' + l.strip()
786                del lines[0]
787                if len(lines) == 0:
788                    break
789                l = lines[0]
790            args.append((arg, desc))
791        while len(lines) > 0 and lines[0] == '*':
792            del lines[0]
793        desc = ""
794        while len(lines) > 0:
795            l = lines[0]
796            while len(l) > 0 and l[0] == '*':
797                l = l[1:]
798            l = l.strip()
799            desc = desc + " " + l
800            del lines[0]
801
802        desc = desc.strip()
803
804        if quiet == 0:
805            if desc == "":
806                self.warning("Macro comment for %s lack description of the macro" % (name))
807
808        return((args, desc))
809
810     #
811     # Parse a comment block and merge the information found in the
812     # parameters descriptions, finally returns a block as complete
813     # as possible
814     #
815    def mergeFunctionComment(self, name, description, quiet = 0):
816        if name == 'main':
817            quiet = 1
818        if name[0:2] == '__':
819            quiet = 1
820
821        (ret, args) = description
822        desc = ""
823        retdesc = ""
824
825        if self.comment == None:
826            if not quiet:
827                self.warning("Missing comment for function %s" % (name))
828            return(((ret[0], retdesc), args, desc))
829        if self.comment[0] != '*':
830            if not quiet:
831                self.warning("Missing * in function comment for %s" % (name))
832            return(((ret[0], retdesc), args, desc))
833        lines = self.comment.split('\n')
834        if lines[0] == '*':
835            del lines[0]
836        if lines[0] != "* %s:" % (name):
837            if not quiet:
838                self.warning("Misformatted function comment for %s" % (name))
839                self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
840            return(((ret[0], retdesc), args, desc))
841        del lines[0]
842        while lines[0] == '*':
843            del lines[0]
844        nbargs = len(args)
845        while len(lines) > 0 and lines[0][0:3] == '* @':
846            l = lines[0][3:]
847            try:
848                (arg, desc) = l.split(':', 1)
849                desc=desc.strip()
850                arg=arg.strip()
851            except:
852                if not quiet:
853                    self.warning("Misformatted function comment for %s" % (name))
854                    self.warning("  problem with '%s'" % (lines[0]))
855                del lines[0]
856                continue
857            del lines[0]
858            l = lines[0].strip()
859            while len(l) > 2 and l[0:3] != '* @':
860                while l[0] == '*':
861                    l = l[1:]
862                desc = desc + ' ' + l.strip()
863                del lines[0]
864                if len(lines) == 0:
865                    break
866                l = lines[0]
867            i = 0
868            while i < nbargs:
869                if args[i][1] == arg:
870                    args[i] = (args[i][0], arg, desc)
871                    break;
872                i = i + 1
873            if i >= nbargs:
874                if not quiet:
875                    self.warning("Unable to find arg %s from function comment for %s" % (
876                       arg, name))
877        while len(lines) > 0 and lines[0] == '*':
878            del lines[0]
879        desc = ""
880        while len(lines) > 0:
881            l = lines[0]
882            while len(l) > 0 and l[0] == '*':
883                l = l[1:]
884            l = l.strip()
885            if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
886                try:
887                    l = l.split(' ', 1)[1]
888                except:
889                    l = ""
890                retdesc = l.strip()
891                del lines[0]
892                while len(lines) > 0:
893                    l = lines[0]
894                    while len(l) > 0 and l[0] == '*':
895                        l = l[1:]
896                    l = l.strip()
897                    retdesc = retdesc + " " + l
898                    del lines[0]
899            else:
900                desc = desc + " " + l
901                del lines[0]
902
903        retdesc = retdesc.strip()
904        desc = desc.strip()
905
906        if quiet == 0:
907             #
908             # report missing comments
909             #
910            i = 0
911            while i < nbargs:
912                if args[i][2] == None and args[i][0] != "void" and \
913                   ((args[i][1] != None) or (args[i][1] == '')):
914                    self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
915                i = i + 1
916            if retdesc == "" and ret[0] != "void":
917                self.warning("Function comment for %s lacks description of return value" % (name))
918            if desc == "":
919                self.warning("Function comment for %s lacks description of the function" % (name))
920
921        return(((ret[0], retdesc), args, desc))
922
923    def parsePreproc(self, token):
924        if debug:
925            print("=> preproc ", token, self.lexer.tokens)
926        name = token[1]
927        if name == "#include":
928            token = self.lexer.token()
929            if token == None:
930                return None
931            if token[0] == 'preproc':
932                self.index_add(token[1], self.filename, not self.is_header,
933                                "include")
934                return self.lexer.token()
935            return token
936        if name == "#define":
937            token = self.lexer.token()
938            if token == None:
939                return None
940            if token[0] == 'preproc':
941                 # TODO macros with arguments
942                name = token[1]
943                lst = []
944                token = self.lexer.token()
945                while token != None and token[0] == 'preproc' and \
946                      token[1][0] != '#':
947                    lst.append(token[1])
948                    token = self.lexer.token()
949                try:
950                    name = name.split('(') [0]
951                except:
952                    pass
953                info = self.parseMacroComment(name, not self.is_header)
954                self.index_add(name, self.filename, not self.is_header,
955                                "macro", info)
956                return token
957
958        #
959        # Processing of conditionals modified by Bill 1/1/05
960        #
961        # We process conditionals (i.e. tokens from #ifdef, #ifndef,
962        # #if, #else and #endif) for headers and mainline code,
963        # store the ones from the header in libxml2-api.xml, and later
964        # (in the routine merge_public) verify that the two (header and
965        # mainline code) agree.
966        #
967        # There is a small problem with processing the headers. Some of
968        # the variables are not concerned with enabling / disabling of
969        # library functions (e.g. '__XML_PARSER_H__'), and we don't want
970        # them to be included in libxml2-api.xml, or involved in
971        # the check between the header and the mainline code.  To
972        # accomplish this, we ignore any conditional which doesn't include
973        # the string 'ENABLED'
974        #
975        if name == "#ifdef":
976            apstr = self.lexer.tokens[0][1]
977            try:
978                self.defines.append(apstr)
979                if apstr.find('ENABLED') != -1:
980                    self.conditionals.append("defined(%s)" % apstr)
981            except:
982                pass
983        elif name == "#ifndef":
984            apstr = self.lexer.tokens[0][1]
985            try:
986                self.defines.append(apstr)
987                if apstr.find('ENABLED') != -1:
988                    self.conditionals.append("!defined(%s)" % apstr)
989            except:
990                pass
991        elif name == "#if":
992            apstr = ""
993            for tok in self.lexer.tokens:
994                if apstr != "":
995                    apstr = apstr + " "
996                apstr = apstr + tok[1]
997            try:
998                self.defines.append(apstr)
999                if apstr.find('ENABLED') != -1:
1000                    self.conditionals.append(apstr)
1001            except:
1002                pass
1003        elif name == "#else":
1004            if self.conditionals != [] and \
1005               self.defines[-1].find('ENABLED') != -1:
1006                self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1007        elif name == "#endif":
1008            if self.conditionals != [] and \
1009               self.defines[-1].find('ENABLED') != -1:
1010                self.conditionals = self.conditionals[:-1]
1011            self.defines = self.defines[:-1]
1012        token = self.lexer.token()
1013        while token != None and token[0] == 'preproc' and \
1014            token[1][0] != '#':
1015            token = self.lexer.token()
1016        return token
1017
1018     #
1019     # token acquisition on top of the lexer, it handle internally
1020     # preprocessor and comments since they are logically not part of
1021     # the program structure.
1022     #
1023    def token(self):
1024        global ignored_words
1025
1026        token = self.lexer.token()
1027        while token != None:
1028            if token[0] == 'comment':
1029                token = self.parseComment(token)
1030                continue
1031            elif token[0] == 'preproc':
1032                token = self.parsePreproc(token)
1033                continue
1034            elif token[0] == "name" and token[1] == "__const":
1035                token = ("name", "const")
1036                return token
1037            elif token[0] == "name" and token[1] == "__attribute":
1038                token = self.lexer.token()
1039                while token != None and token[1] != ";":
1040                    token = self.lexer.token()
1041                return token
1042            elif token[0] == "name" and token[1] in ignored_words:
1043                (n, info) = ignored_words[token[1]]
1044                i = 0
1045                while i < n:
1046                    token = self.lexer.token()
1047                    i = i + 1
1048                token = self.lexer.token()
1049                continue
1050            else:
1051                if debug:
1052                    print("=> ", token)
1053                return token
1054        return None
1055
1056     #
1057     # Parse a typedef, it records the type and its name.
1058     #
1059    def parseTypedef(self, token):
1060        if token == None:
1061            return None
1062        token = self.parseType(token)
1063        if token == None:
1064            self.error("parsing typedef")
1065            return None
1066        base_type = self.type
1067        type = base_type
1068         #self.debug("end typedef type", token)
1069        while token != None:
1070            if token[0] == "name":
1071                name = token[1]
1072                signature = self.signature
1073                if signature != None:
1074                    type = type.split('(')[0]
1075                    d = self.mergeFunctionComment(name,
1076                            ((type, None), signature), 1)
1077                    self.index_add(name, self.filename, not self.is_header,
1078                                    "functype", d)
1079                else:
1080                    if base_type == "struct":
1081                        self.index_add(name, self.filename, not self.is_header,
1082                                        "struct", type)
1083                        base_type = "struct " + name
1084                    else:
1085                        # TODO report missing or misformatted comments
1086                        info = self.parseTypeComment(name, 1)
1087                        self.index_add(name, self.filename, not self.is_header,
1088                                    "typedef", type, info)
1089                token = self.token()
1090            else:
1091                self.error("parsing typedef: expecting a name")
1092                return token
1093             #self.debug("end typedef", token)
1094            if token != None and token[0] == 'sep' and token[1] == ',':
1095                type = base_type
1096                token = self.token()
1097                while token != None and token[0] == "op":
1098                    type = type + token[1]
1099                    token = self.token()
1100            elif token != None and token[0] == 'sep' and token[1] == ';':
1101                break;
1102            elif token != None and token[0] == 'name':
1103                type = base_type
1104                continue;
1105            else:
1106                self.error("parsing typedef: expecting ';'", token)
1107                return token
1108        token = self.token()
1109        return token
1110
1111     #
1112     # Parse a C code block, used for functions it parse till
1113     # the balancing } included
1114     #
1115    def parseBlock(self, token):
1116        while token != None:
1117            if token[0] == "sep" and token[1] == "{":
1118                token = self.token()
1119                token = self.parseBlock(token)
1120            elif token[0] == "sep" and token[1] == "}":
1121                self.comment = None
1122                token = self.token()
1123                return token
1124            else:
1125                if self.collect_ref == 1:
1126                    oldtok = token
1127                    token = self.token()
1128                    if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1129                        if token[0] == "sep" and token[1] == "(":
1130                            self.index_add_ref(oldtok[1], self.filename,
1131                                                0, "function")
1132                            token = self.token()
1133                        elif token[0] == "name":
1134                            token = self.token()
1135                            if token[0] == "sep" and (token[1] == ";" or
1136                               token[1] == "," or token[1] == "="):
1137                                self.index_add_ref(oldtok[1], self.filename,
1138                                                    0, "type")
1139                    elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1140                        self.index_add_ref(oldtok[1], self.filename,
1141                                            0, "typedef")
1142                    elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1143                        self.index_add_ref(oldtok[1], self.filename,
1144                                            0, "typedef")
1145
1146                else:
1147                    token = self.token()
1148        return token
1149
1150     #
1151     # Parse a C struct definition till the balancing }
1152     #
1153    def parseStruct(self, token):
1154        fields = []
1155         #self.debug("start parseStruct", token)
1156        while token != None:
1157            if token[0] == "sep" and token[1] == "{":
1158                token = self.token()
1159                token = self.parseTypeBlock(token)
1160            elif token[0] == "sep" and token[1] == "}":
1161                self.struct_fields = fields
1162                 #self.debug("end parseStruct", token)
1163                 #print fields
1164                token = self.token()
1165                return token
1166            else:
1167                base_type = self.type
1168                 #self.debug("before parseType", token)
1169                token = self.parseType(token)
1170                 #self.debug("after parseType", token)
1171                if token != None and token[0] == "name":
1172                    fname = token[1]
1173                    token = self.token()
1174                    if token[0] == "sep" and token[1] == ";":
1175                        self.comment = None
1176                        token = self.token()
1177                        fields.append((self.type, fname, self.comment))
1178                        self.comment = None
1179                    else:
1180                        self.error("parseStruct: expecting ;", token)
1181                elif token != None and token[0] == "sep" and token[1] == "{":
1182                    token = self.token()
1183                    token = self.parseTypeBlock(token)
1184                    if token != None and token[0] == "name":
1185                        token = self.token()
1186                    if token != None and token[0] == "sep" and token[1] == ";":
1187                        token = self.token()
1188                    else:
1189                        self.error("parseStruct: expecting ;", token)
1190                else:
1191                    self.error("parseStruct: name", token)
1192                    token = self.token()
1193                self.type = base_type;
1194        self.struct_fields = fields
1195         #self.debug("end parseStruct", token)
1196         #print fields
1197        return token
1198
1199     #
1200     # Parse a C enum block, parse till the balancing }
1201     #
1202    def parseEnumBlock(self, token):
1203        self.enums = []
1204        name = None
1205        self.comment = None
1206        comment = ""
1207        value = "0"
1208        while token != None:
1209            if token[0] == "sep" and token[1] == "{":
1210                token = self.token()
1211                token = self.parseTypeBlock(token)
1212            elif token[0] == "sep" and token[1] == "}":
1213                if name != None:
1214                    if self.comment != None:
1215                        comment = self.comment
1216                        self.comment = None
1217                    self.enums.append((name, value, comment))
1218                token = self.token()
1219                return token
1220            elif token[0] == "name":
1221                    if name != None:
1222                        if self.comment != None:
1223                            comment = self.comment.strip()
1224                            self.comment = None
1225                        self.enums.append((name, value, comment))
1226                    name = token[1]
1227                    comment = ""
1228                    token = self.token()
1229                    if token[0] == "op" and token[1][0] == "=":
1230                        value = ""
1231                        if len(token[1]) > 1:
1232                            value = token[1][1:]
1233                        token = self.token()
1234                        while token[0] != "sep" or (token[1] != ',' and
1235                              token[1] != '}'):
1236                            value = value + token[1]
1237                            token = self.token()
1238                    else:
1239                        try:
1240                            value = "%d" % (int(value) + 1)
1241                        except:
1242                            self.warning("Failed to compute value of enum %s" % (name))
1243                            value=""
1244                    if token[0] == "sep" and token[1] == ",":
1245                        token = self.token()
1246            else:
1247                token = self.token()
1248        return token
1249
1250     #
1251     # Parse a C definition block, used for structs it parse till
1252     # the balancing }
1253     #
1254    def parseTypeBlock(self, token):
1255        while token != None:
1256            if token[0] == "sep" and token[1] == "{":
1257                token = self.token()
1258                token = self.parseTypeBlock(token)
1259            elif token[0] == "sep" and token[1] == "}":
1260                token = self.token()
1261                return token
1262            else:
1263                token = self.token()
1264        return token
1265
1266     #
1267     # Parse a type: the fact that the type name can either occur after
1268     #    the definition or within the definition makes it a little harder
1269     #    if inside, the name token is pushed back before returning
1270     #
1271    def parseType(self, token):
1272        self.type = ""
1273        self.struct_fields = []
1274        self.signature = None
1275        if token == None:
1276            return token
1277
1278        while token[0] == "name" and (
1279              token[1] == "const" or \
1280              token[1] == "unsigned" or \
1281              token[1] == "signed"):
1282            if self.type == "":
1283                self.type = token[1]
1284            else:
1285                self.type = self.type + " " + token[1]
1286            token = self.token()
1287
1288        if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1289            if self.type == "":
1290                self.type = token[1]
1291            else:
1292                self.type = self.type + " " + token[1]
1293            if token[0] == "name" and token[1] == "int":
1294                if self.type == "":
1295                    self.type = tmp[1]
1296                else:
1297                    self.type = self.type + " " + tmp[1]
1298
1299        elif token[0] == "name" and token[1] == "struct":
1300            if self.type == "":
1301                self.type = token[1]
1302            else:
1303                self.type = self.type + " " + token[1]
1304            token = self.token()
1305            nametok = None
1306            if token[0] == "name":
1307                nametok = token
1308                token = self.token()
1309            if token != None and token[0] == "sep" and token[1] == "{":
1310                token = self.token()
1311                token = self.parseStruct(token)
1312            elif token != None and token[0] == "op" and token[1] == "*":
1313                self.type = self.type + " " + nametok[1] + " *"
1314                token = self.token()
1315                while token != None and token[0] == "op" and token[1] == "*":
1316                    self.type = self.type + " *"
1317                    token = self.token()
1318                if token[0] == "name":
1319                    nametok = token
1320                    token = self.token()
1321                else:
1322                    self.error("struct : expecting name", token)
1323                    return token
1324            elif token != None and token[0] == "name" and nametok != None:
1325                self.type = self.type + " " + nametok[1]
1326                return token
1327
1328            if nametok != None:
1329                self.lexer.push(token)
1330                token = nametok
1331            return token
1332
1333        elif token[0] == "name" and token[1] == "enum":
1334            if self.type == "":
1335                self.type = token[1]
1336            else:
1337                self.type = self.type + " " + token[1]
1338            self.enums = []
1339            token = self.token()
1340            if token != None and token[0] == "sep" and token[1] == "{":
1341                token = self.token()
1342                token = self.parseEnumBlock(token)
1343            else:
1344                self.error("parsing enum: expecting '{'", token)
1345            enum_type = None
1346            if token != None and token[0] != "name":
1347                self.lexer.push(token)
1348                token = ("name", "enum")
1349            else:
1350                enum_type = token[1]
1351            for enum in self.enums:
1352                self.index_add(enum[0], self.filename,
1353                               not self.is_header, "enum",
1354                               (enum[1], enum[2], enum_type))
1355            return token
1356
1357        elif token[0] == "name":
1358            if self.type == "":
1359                self.type = token[1]
1360            else:
1361                self.type = self.type + " " + token[1]
1362        else:
1363            self.error("parsing type %s: expecting a name" % (self.type),
1364                       token)
1365            return token
1366        token = self.token()
1367        while token != None and (token[0] == "op" or
1368              token[0] == "name" and token[1] == "const"):
1369            self.type = self.type + " " + token[1]
1370            token = self.token()
1371
1372         #
1373         # if there is a parenthesis here, this means a function type
1374         #
1375        if token != None and token[0] == "sep" and token[1] == '(':
1376            self.type = self.type + token[1]
1377            token = self.token()
1378            while token != None and token[0] == "op" and token[1] == '*':
1379                self.type = self.type + token[1]
1380                token = self.token()
1381            if token == None or token[0] != "name" :
1382                self.error("parsing function type, name expected", token);
1383                return token
1384            self.type = self.type + token[1]
1385            nametok = token
1386            token = self.token()
1387            if token != None and token[0] == "sep" and token[1] == ')':
1388                self.type = self.type + token[1]
1389                token = self.token()
1390                if token != None and token[0] == "sep" and token[1] == '(':
1391                    token = self.token()
1392                    type = self.type;
1393                    token = self.parseSignature(token);
1394                    self.type = type;
1395                else:
1396                    self.error("parsing function type, '(' expected", token);
1397                    return token
1398            else:
1399                self.error("parsing function type, ')' expected", token);
1400                return token
1401            self.lexer.push(token)
1402            token = nametok
1403            return token
1404
1405         #
1406         # do some lookahead for arrays
1407         #
1408        if token != None and token[0] == "name":
1409            nametok = token
1410            token = self.token()
1411            if token != None and token[0] == "sep" and token[1] == '[':
1412                self.type = self.type + nametok[1]
1413                while token != None and token[0] == "sep" and token[1] == '[':
1414                    self.type = self.type + token[1]
1415                    token = self.token()
1416                    while token != None and token[0] != 'sep' and \
1417                          token[1] != ']' and token[1] != ';':
1418                        self.type = self.type + token[1]
1419                        token = self.token()
1420                if token != None and token[0] == 'sep' and token[1] == ']':
1421                    self.type = self.type + token[1]
1422                    token = self.token()
1423                else:
1424                    self.error("parsing array type, ']' expected", token);
1425                    return token
1426            elif token != None and token[0] == "sep" and token[1] == ':':
1427                 # remove :12 in case it's a limited int size
1428                token = self.token()
1429                token = self.token()
1430            self.lexer.push(token)
1431            token = nametok
1432
1433        return token
1434
1435     #
1436     # Parse a signature: '(' has been parsed and we scan the type definition
1437     #    up to the ')' included
1438    def parseSignature(self, token):
1439        signature = []
1440        if token != None and token[0] == "sep" and token[1] == ')':
1441            self.signature = []
1442            token = self.token()
1443            return token
1444        while token != None:
1445            token = self.parseType(token)
1446            if token != None and token[0] == "name":
1447                signature.append((self.type, token[1], None))
1448                token = self.token()
1449            elif token != None and token[0] == "sep" and token[1] == ',':
1450                token = self.token()
1451                continue
1452            elif token != None and token[0] == "sep" and token[1] == ')':
1453                 # only the type was provided
1454                if self.type == "...":
1455                    signature.append((self.type, "...", None))
1456                else:
1457                    signature.append((self.type, None, None))
1458            if token != None and token[0] == "sep":
1459                if token[1] == ',':
1460                    token = self.token()
1461                    continue
1462                elif token[1] == ')':
1463                    token = self.token()
1464                    break
1465        self.signature = signature
1466        return token
1467
1468     #
1469     # Parse a global definition, be it a type, variable or function
1470     # the extern "C" blocks are a bit nasty and require it to recurse.
1471     #
1472    def parseGlobal(self, token):
1473        static = 0
1474        if token[1] == 'extern':
1475            token = self.token()
1476            if token == None:
1477                return token
1478            if token[0] == 'string':
1479                if token[1] == 'C':
1480                    token = self.token()
1481                    if token == None:
1482                        return token
1483                    if token[0] == 'sep' and token[1] == "{":
1484                        token = self.token()
1485#                         print 'Entering extern "C line ', self.lineno()
1486                        while token != None and (token[0] != 'sep' or
1487                              token[1] != "}"):
1488                            if token[0] == 'name':
1489                                token = self.parseGlobal(token)
1490                            else:
1491                                self.error(
1492                                 "token %s %s unexpected at the top level" % (
1493                                        token[0], token[1]))
1494                                token = self.parseGlobal(token)
1495#                         print 'Exiting extern "C" line', self.lineno()
1496                        token = self.token()
1497                        return token
1498                else:
1499                    return token
1500        elif token[1] == 'static':
1501            static = 1
1502            token = self.token()
1503            if token == None or  token[0] != 'name':
1504                return token
1505
1506        if token[1] == 'typedef':
1507            token = self.token()
1508            return self.parseTypedef(token)
1509        else:
1510            token = self.parseType(token)
1511            type_orig = self.type
1512        if token == None or token[0] != "name":
1513            return token
1514        type = type_orig
1515        self.name = token[1]
1516        token = self.token()
1517        while token != None and (token[0] == "sep" or token[0] == "op"):
1518            if token[0] == "sep":
1519                if token[1] == "[":
1520                    type = type + token[1]
1521                    token = self.token()
1522                    while token != None and (token[0] != "sep" or \
1523                          token[1] != ";"):
1524                        type = type + token[1]
1525                        token = self.token()
1526
1527            if token != None and token[0] == "op" and token[1] == "=":
1528                 #
1529                 # Skip the initialization of the variable
1530                 #
1531                token = self.token()
1532                if token[0] == 'sep' and token[1] == '{':
1533                    token = self.token()
1534                    token = self.parseBlock(token)
1535                else:
1536                    self.comment = None
1537                    while token != None and (token[0] != "sep" or \
1538                          (token[1] != ';' and token[1] != ',')):
1539                            token = self.token()
1540                self.comment = None
1541                if token == None or token[0] != "sep" or (token[1] != ';' and
1542                   token[1] != ','):
1543                    self.error("missing ';' or ',' after value")
1544
1545            if token != None and token[0] == "sep":
1546                if token[1] == ";":
1547                    self.comment = None
1548                    token = self.token()
1549                    if type == "struct":
1550                        self.index_add(self.name, self.filename,
1551                             not self.is_header, "struct", self.struct_fields)
1552                    else:
1553                        self.index_add(self.name, self.filename,
1554                             not self.is_header, "variable", type)
1555                    break
1556                elif token[1] == "(":
1557                    token = self.token()
1558                    token = self.parseSignature(token)
1559                    if token == None:
1560                        return None
1561                    if token[0] == "sep" and token[1] == ";":
1562                        d = self.mergeFunctionComment(self.name,
1563                                ((type, None), self.signature), 1)
1564                        self.index_add(self.name, self.filename, static,
1565                                        "function", d)
1566                        token = self.token()
1567                    elif token[0] == "sep" and token[1] == "{":
1568                        d = self.mergeFunctionComment(self.name,
1569                                ((type, None), self.signature), static)
1570                        self.index_add(self.name, self.filename, static,
1571                                        "function", d)
1572                        token = self.token()
1573                        token = self.parseBlock(token);
1574                elif token[1] == ',':
1575                    self.comment = None
1576                    self.index_add(self.name, self.filename, static,
1577                                    "variable", type)
1578                    type = type_orig
1579                    token = self.token()
1580                    while token != None and token[0] == "sep":
1581                        type = type + token[1]
1582                        token = self.token()
1583                    if token != None and token[0] == "name":
1584                        self.name = token[1]
1585                        token = self.token()
1586                else:
1587                    break
1588
1589        return token
1590
1591    def parse(self):
1592        self.warning("Parsing %s" % (self.filename))
1593        token = self.token()
1594        while token != None:
1595            if token[0] == 'name':
1596                token = self.parseGlobal(token)
1597            else:
1598                self.error("token %s %s unexpected at the top level" % (
1599                       token[0], token[1]))
1600                token = self.parseGlobal(token)
1601                return
1602        self.parseTopComment(self.top_comment)
1603        return self.index
1604
1605
1606class docBuilder:
1607    """A documentation builder"""
1608    def __init__(self, name, directories=['.'], excludes=[]):
1609        self.name = name
1610        self.directories = directories
1611        self.excludes = excludes + list(ignored_files.keys())
1612        self.modules = {}
1613        self.headers = {}
1614        self.idx = index()
1615        self.xref = {}
1616        self.index = {}
1617        if name == 'libxml2':
1618            self.basename = 'libxml'
1619        else:
1620            self.basename = name
1621
1622    def indexString(self, id, str):
1623        if str == None:
1624            return
1625        str = str.replace("'", ' ')
1626        str = str.replace('"', ' ')
1627        str = str.replace("/", ' ')
1628        str = str.replace('*', ' ')
1629        str = str.replace("[", ' ')
1630        str = str.replace("]", ' ')
1631        str = str.replace("(", ' ')
1632        str = str.replace(")", ' ')
1633        str = str.replace("<", ' ')
1634        str = str.replace('>', ' ')
1635        str = str.replace("&", ' ')
1636        str = str.replace('#', ' ')
1637        str = str.replace(",", ' ')
1638        str = str.replace('.', ' ')
1639        str = str.replace(';', ' ')
1640        tokens = str.split()
1641        for token in tokens:
1642            try:
1643                c = token[0]
1644                if string.ascii_letters.find(c) < 0:
1645                    pass
1646                elif len(token) < 3:
1647                    pass
1648                else:
1649                    lower = token.lower()
1650                    # TODO: generalize this a bit
1651                    if lower == 'and' or lower == 'the':
1652                        pass
1653                    elif token in self.xref:
1654                        self.xref[token].append(id)
1655                    else:
1656                        self.xref[token] = [id]
1657            except:
1658                pass
1659
1660    def analyze(self):
1661        print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1662        self.idx.analyze()
1663
1664    def scanHeaders(self):
1665        for header in list(self.headers.keys()):
1666            parser = CParser(header)
1667            idx = parser.parse()
1668            self.headers[header] = idx;
1669            self.idx.merge(idx)
1670
1671    def scanModules(self):
1672        for module in list(self.modules.keys()):
1673            parser = CParser(module)
1674            idx = parser.parse()
1675            # idx.analyze()
1676            self.modules[module] = idx
1677            self.idx.merge_public(idx)
1678
1679    def scan(self):
1680        for directory in self.directories:
1681            files = glob.glob(directory + "/*.c")
1682            for file in files:
1683                skip = 0
1684                for excl in self.excludes:
1685                    if file.find(excl) != -1:
1686                        print("Skipping %s" % file)
1687                        skip = 1
1688                        break
1689                if skip == 0:
1690                    self.modules[file] = None;
1691            files = glob.glob(directory + "/*.h")
1692            for file in files:
1693                skip = 0
1694                for excl in self.excludes:
1695                    if file.find(excl) != -1:
1696                        print("Skipping %s" % file)
1697                        skip = 1
1698                        break
1699                if skip == 0:
1700                    self.headers[file] = None;
1701        self.scanHeaders()
1702        self.scanModules()
1703
1704    def modulename_file(self, file):
1705        module = os.path.basename(file)
1706        if module[-2:] == '.h':
1707            module = module[:-2]
1708        elif module[-2:] == '.c':
1709            module = module[:-2]
1710        return module
1711
1712    def serialize_enum(self, output, name):
1713        id = self.idx.enums[name]
1714        output.write("    <enum name='%s' file='%s'" % (name,
1715                     self.modulename_file(id.header)))
1716        if id.info != None:
1717            info = id.info
1718            if info[0] != None and info[0] != '':
1719                try:
1720                    val = eval(info[0])
1721                except:
1722                    val = info[0]
1723                output.write(" value='%s'" % (val));
1724            if info[2] != None and info[2] != '':
1725                output.write(" type='%s'" % info[2]);
1726            if info[1] != None and info[1] != '':
1727                output.write(" info='%s'" % escape(info[1]));
1728        output.write("/>\n")
1729
1730    def serialize_macro(self, output, name):
1731        id = self.idx.macros[name]
1732        output.write("    <macro name='%s' file='%s'>\n" % (name,
1733                     self.modulename_file(id.header)))
1734        if id.info != None:
1735            try:
1736                (args, desc) = id.info
1737                if desc != None and desc != "":
1738                    output.write("      <info>%s</info>\n" % (escape(desc)))
1739                    self.indexString(name, desc)
1740                for arg in args:
1741                    (name, desc) = arg
1742                    if desc != None and desc != "":
1743                        output.write("      <arg name='%s' info='%s'/>\n" % (
1744                                     name, escape(desc)))
1745                        self.indexString(name, desc)
1746                    else:
1747                        output.write("      <arg name='%s'/>\n" % (name))
1748            except:
1749                pass
1750        output.write("    </macro>\n")
1751
1752    def serialize_typedef(self, output, name):
1753        id = self.idx.typedefs[name]
1754        if id.info[0:7] == 'struct ':
1755            output.write("    <struct name='%s' file='%s' type='%s'" % (
1756                     name, self.modulename_file(id.header), id.info))
1757            name = id.info[7:]
1758            if name in self.idx.structs and ( \
1759               type(self.idx.structs[name].info) == type(()) or
1760                type(self.idx.structs[name].info) == type([])):
1761                output.write(">\n");
1762                try:
1763                    for field in self.idx.structs[name].info:
1764                        desc = field[2]
1765                        self.indexString(name, desc)
1766                        if desc == None:
1767                            desc = ''
1768                        else:
1769                            desc = escape(desc)
1770                        output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1771                except:
1772                    print("Failed to serialize struct %s" % (name))
1773                output.write("    </struct>\n")
1774            else:
1775                output.write("/>\n");
1776        else :
1777            output.write("    <typedef name='%s' file='%s' type='%s'" % (
1778                         name, self.modulename_file(id.header), id.info))
1779            try:
1780                desc = id.extra
1781                if desc != None and desc != "":
1782                    output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1783                    output.write("    </typedef>\n")
1784                else:
1785                    output.write("/>\n")
1786            except:
1787                output.write("/>\n")
1788
1789    def serialize_variable(self, output, name):
1790        id = self.idx.variables[name]
1791        if id.info != None:
1792            output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1793                    name, self.modulename_file(id.header), id.info))
1794        else:
1795            output.write("    <variable name='%s' file='%s'/>\n" % (
1796                    name, self.modulename_file(id.header)))
1797
1798    def serialize_function(self, output, name):
1799        id = self.idx.functions[name]
1800        if name == debugsym:
1801            print("=>", id)
1802
1803        output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1804                     name, self.modulename_file(id.header),
1805                     self.modulename_file(id.module)))
1806        #
1807        # Processing of conditionals modified by Bill 1/1/05
1808        #
1809        if id.conditionals != None:
1810            apstr = ""
1811            for cond in id.conditionals:
1812                if apstr != "":
1813                    apstr = apstr + " &amp;&amp; "
1814                apstr = apstr + cond
1815            output.write("      <cond>%s</cond>\n"% (apstr));
1816        try:
1817            (ret, params, desc) = id.info
1818            if (desc == None or desc == '') and \
1819               name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1820                print("%s %s from %s has no description" % (id.type, name,
1821                       self.modulename_file(id.module)))
1822
1823            output.write("      <info>%s</info>\n" % (escape(desc)))
1824            self.indexString(name, desc)
1825            if ret[0] != None:
1826                if ret[0] == "void":
1827                    output.write("      <return type='void'/>\n")
1828                else:
1829                    output.write("      <return type='%s' info='%s'/>\n" % (
1830                             ret[0], escape(ret[1])))
1831                    self.indexString(name, ret[1])
1832            for param in params:
1833                if param[0] == 'void':
1834                    continue
1835                if param[2] == None:
1836                    output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1837                else:
1838                    output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1839                    self.indexString(name, param[2])
1840        except:
1841            print("Failed to save function %s info: " % name, repr(id.info))
1842        output.write("    </%s>\n" % (id.type))
1843
1844    def serialize_exports(self, output, file):
1845        module = self.modulename_file(file)
1846        output.write("    <file name='%s'>\n" % (module))
1847        dict = self.headers[file]
1848        if dict.info != None:
1849            for data in ('Summary', 'Description', 'Author'):
1850                try:
1851                    output.write("     <%s>%s</%s>\n" % (
1852                                 data.lower(),
1853                                 escape(dict.info[data]),
1854                                 data.lower()))
1855                except:
1856                    print("Header %s lacks a %s description" % (module, data))
1857            if 'Description' in dict.info:
1858                desc = dict.info['Description']
1859                if desc.find("DEPRECATED") != -1:
1860                    output.write("     <deprecated/>\n")
1861
1862        ids = list(dict.macros.keys())
1863        ids.sort()
1864        for id in uniq(ids):
1865            # Macros are sometime used to masquerade other types.
1866            if id in dict.functions:
1867                continue
1868            if id in dict.variables:
1869                continue
1870            if id in dict.typedefs:
1871                continue
1872            if id in dict.structs:
1873                continue
1874            if id in dict.enums:
1875                continue
1876            output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1877        ids = list(dict.enums.keys())
1878        ids.sort()
1879        for id in uniq(ids):
1880            output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1881        ids = list(dict.typedefs.keys())
1882        ids.sort()
1883        for id in uniq(ids):
1884            output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1885        ids = list(dict.structs.keys())
1886        ids.sort()
1887        for id in uniq(ids):
1888            output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1889        ids = list(dict.variables.keys())
1890        ids.sort()
1891        for id in uniq(ids):
1892            output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1893        ids = list(dict.functions.keys())
1894        ids.sort()
1895        for id in uniq(ids):
1896            output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1897        output.write("    </file>\n")
1898
1899    def serialize_xrefs_files(self, output):
1900        headers = list(self.headers.keys())
1901        headers.sort()
1902        for file in headers:
1903            module = self.modulename_file(file)
1904            output.write("    <file name='%s'>\n" % (module))
1905            dict = self.headers[file]
1906            ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
1907                  list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
1908                  list(dict.structs.keys()) + list(dict.enums.keys()))
1909            ids.sort()
1910            for id in ids:
1911                output.write("      <ref name='%s'/>\n" % (id))
1912            output.write("    </file>\n")
1913        pass
1914
1915    def serialize_xrefs_functions(self, output):
1916        funcs = {}
1917        for name in list(self.idx.functions.keys()):
1918            id = self.idx.functions[name]
1919            try:
1920                (ret, params, desc) = id.info
1921                for param in params:
1922                    if param[0] == 'void':
1923                        continue
1924                    if param[0] in funcs:
1925                        funcs[param[0]].append(name)
1926                    else:
1927                        funcs[param[0]] = [name]
1928            except:
1929                pass
1930        typ = list(funcs.keys())
1931        typ.sort()
1932        for type in typ:
1933            if type == '' or type == 'void' or type == "int" or \
1934               type == "char *" or type == "const char *" :
1935                continue
1936            output.write("    <type name='%s'>\n" % (type))
1937            ids = funcs[type]
1938            ids.sort()
1939            pid = ''        # not sure why we have dups, but get rid of them!
1940            for id in ids:
1941                if id != pid:
1942                    output.write("      <ref name='%s'/>\n" % (id))
1943                    pid = id
1944            output.write("    </type>\n")
1945
1946    def serialize_xrefs_constructors(self, output):
1947        funcs = {}
1948        for name in list(self.idx.functions.keys()):
1949            id = self.idx.functions[name]
1950            try:
1951                (ret, params, desc) = id.info
1952                if ret[0] == "void":
1953                    continue
1954                if ret[0] in funcs:
1955                    funcs[ret[0]].append(name)
1956                else:
1957                    funcs[ret[0]] = [name]
1958            except:
1959                pass
1960        typ = list(funcs.keys())
1961        typ.sort()
1962        for type in typ:
1963            if type == '' or type == 'void' or type == "int" or \
1964               type == "char *" or type == "const char *" :
1965                continue
1966            output.write("    <type name='%s'>\n" % (type))
1967            ids = funcs[type]
1968            ids.sort()
1969            for id in ids:
1970                output.write("      <ref name='%s'/>\n" % (id))
1971            output.write("    </type>\n")
1972
1973    def serialize_xrefs_alpha(self, output):
1974        letter = None
1975        ids = list(self.idx.identifiers.keys())
1976        ids.sort()
1977        for id in ids:
1978            if id[0] != letter:
1979                if letter != None:
1980                    output.write("    </letter>\n")
1981                letter = id[0]
1982                output.write("    <letter name='%s'>\n" % (letter))
1983            output.write("      <ref name='%s'/>\n" % (id))
1984        if letter != None:
1985            output.write("    </letter>\n")
1986
1987    def serialize_xrefs_references(self, output):
1988        typ = list(self.idx.identifiers.keys())
1989        typ.sort()
1990        for id in typ:
1991            idf = self.idx.identifiers[id]
1992            module = idf.header
1993            output.write("    <reference name='%s' href='%s'/>\n" % (id,
1994                         'html/' + self.basename + '-' +
1995                         self.modulename_file(module) + '.html#' +
1996                         id))
1997
1998    def serialize_xrefs_index(self, output):
1999        index = self.xref
2000        typ = list(index.keys())
2001        typ.sort()
2002        letter = None
2003        count = 0
2004        chunk = 0
2005        chunks = []
2006        for id in typ:
2007            if len(index[id]) > 30:
2008                continue
2009            if id[0] != letter:
2010                if letter == None or count > 200:
2011                    if letter != None:
2012                        output.write("      </letter>\n")
2013                        output.write("    </chunk>\n")
2014                        count = 0
2015                        chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2016                    output.write("    <chunk name='chunk%s'>\n" % (chunk))
2017                    first_letter = id[0]
2018                    chunk = chunk + 1
2019                elif letter != None:
2020                    output.write("      </letter>\n")
2021                letter = id[0]
2022                output.write("      <letter name='%s'>\n" % (letter))
2023            output.write("        <word name='%s'>\n" % (id))
2024            tokens = index[id];
2025            tokens.sort()
2026            tok = None
2027            for token in tokens:
2028                if tok == token:
2029                    continue
2030                tok = token
2031                output.write("          <ref name='%s'/>\n" % (token))
2032                count = count + 1
2033            output.write("        </word>\n")
2034        if letter != None:
2035            output.write("      </letter>\n")
2036            output.write("    </chunk>\n")
2037            if count != 0:
2038                chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2039            output.write("    <chunks>\n")
2040            for ch in chunks:
2041                output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
2042                             ch[0], ch[1], ch[2]))
2043            output.write("    </chunks>\n")
2044
2045    def serialize_xrefs(self, output):
2046        output.write("  <references>\n")
2047        self.serialize_xrefs_references(output)
2048        output.write("  </references>\n")
2049        output.write("  <alpha>\n")
2050        self.serialize_xrefs_alpha(output)
2051        output.write("  </alpha>\n")
2052        output.write("  <constructors>\n")
2053        self.serialize_xrefs_constructors(output)
2054        output.write("  </constructors>\n")
2055        output.write("  <functions>\n")
2056        self.serialize_xrefs_functions(output)
2057        output.write("  </functions>\n")
2058        output.write("  <files>\n")
2059        self.serialize_xrefs_files(output)
2060        output.write("  </files>\n")
2061        output.write("  <index>\n")
2062        self.serialize_xrefs_index(output)
2063        output.write("  </index>\n")
2064
2065    def serialize(self):
2066        filename = "%s-api.xml" % self.name
2067        print("Saving XML description %s" % (filename))
2068        output = open(filename, "w")
2069        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2070        output.write("<api name='%s'>\n" % self.name)
2071        output.write("  <files>\n")
2072        headers = list(self.headers.keys())
2073        headers.sort()
2074        for file in headers:
2075            self.serialize_exports(output, file)
2076        output.write("  </files>\n")
2077        output.write("  <symbols>\n")
2078        macros = list(self.idx.macros.keys())
2079        macros.sort()
2080        for macro in macros:
2081            self.serialize_macro(output, macro)
2082        enums = list(self.idx.enums.keys())
2083        enums.sort()
2084        for enum in enums:
2085            self.serialize_enum(output, enum)
2086        typedefs = list(self.idx.typedefs.keys())
2087        typedefs.sort()
2088        for typedef in typedefs:
2089            self.serialize_typedef(output, typedef)
2090        variables = list(self.idx.variables.keys())
2091        variables.sort()
2092        for variable in variables:
2093            self.serialize_variable(output, variable)
2094        functions = list(self.idx.functions.keys())
2095        functions.sort()
2096        for function in functions:
2097            self.serialize_function(output, function)
2098        output.write("  </symbols>\n")
2099        output.write("</api>\n")
2100        output.close()
2101
2102        filename = "%s-refs.xml" % self.name
2103        print("Saving XML Cross References %s" % (filename))
2104        output = open(filename, "w")
2105        output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2106        output.write("<apirefs name='%s'>\n" % self.name)
2107        self.serialize_xrefs(output)
2108        output.write("</apirefs>\n")
2109        output.close()
2110
2111
2112def rebuild():
2113    builder = None
2114    if glob.glob("parser.c") != [] :
2115        print("Rebuilding API description for libxml2")
2116        builder = docBuilder("libxml2", [".", "."],
2117                             ["xmlwin32version.h", "tst.c"])
2118    elif glob.glob("../parser.c") != [] :
2119        print("Rebuilding API description for libxml2")
2120        builder = docBuilder("libxml2", ["..", "../include/libxml"],
2121                             ["xmlwin32version.h", "tst.c"])
2122    elif glob.glob("../libxslt/transform.c") != [] :
2123        print("Rebuilding API description for libxslt")
2124        builder = docBuilder("libxslt", ["../libxslt"],
2125                             ["win32config.h", "libxslt.h", "tst.c"])
2126    else:
2127        print("rebuild() failed, unable to guess the module")
2128        return None
2129    builder.scan()
2130    builder.analyze()
2131    builder.serialize()
2132    if glob.glob("../libexslt/exslt.c") != [] :
2133        extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2134        extra.scan()
2135        extra.analyze()
2136        extra.serialize()
2137    return builder
2138
2139#
2140# for debugging the parser
2141#
2142def parse(filename):
2143    parser = CParser(filename)
2144    idx = parser.parse()
2145    return idx
2146
2147if __name__ == "__main__":
2148    if len(sys.argv) > 1:
2149        debug = 1
2150        parse(sys.argv[1])
2151    else:
2152        rebuild()
2153