1#!/usr/bin/env python
2
3#    Copyright (C) 2002-2004 Artifex Software, Inc.
4#    All Rights Reserved.
5#
6# This software is provided AS-IS with no warranty, either express or
7# implied.
8#
9# This software is distributed under license and may not be copied, modified
10# or distributed except as expressly authorized under the terms of that
11# license.  Refer to licensing information at http://www.artifex.com/
12# or contact Artifex Software, Inc.,  7 Mt. Lassen Drive - Suite A-134,
13# San Rafael, CA  94903, U.S.A., +1(415)492-9861, for further information.
14
15# $Id: check_docrefs.py 8687 2008-05-02 02:19:57Z giles $
16
17# Check that the hrefs in an HTML document mention all of a set of files.
18# The requirement is that the union of all the docfiles must somewhere
19# reference all the files.  Usage:
20#    check_docrefs (+src | +lib | [+-]from <docfile>* | [+-]to (<directory> | <file>)*)*
21
22# +from or +to adds files; -from or -to removes them;
23# +src, or +lib execute SRC_LIST, LIB_LIST below.
24
25# Define the Ghostscript-specific parameter lists.
26SRC_LIST = [
27    '+from', 'doc/Develop.htm',
28    '+to', 'lib', 'src',
29    '-to', '*/CVS', 'src/rinkj/CVS',
30    '-to', 'src/*.mak.tcl',
31    '-to', 'lib/*.upp',
32    '-to', 'lib/*.ps',
33    '+to', 'lib/gs_*.ps', 'lib/pdf_*.ps'
34]
35LIB_LIST = [
36    '+from', 'doc/Psfiles.htm',
37    '+to', 'examples/*.ps', 'lib/*.ps'
38]
39
40import glob, os.path, re, sys
41from gstestutils import GSTestCase, gsRunTestsMain
42
43# Utilities
44
45# List all the files referenced from a document.
46# Note that we only count files referenced as a whole, i.e., without #.
47
48def hrefs(doc):
49    prefix = os.path.dirname(doc)
50    fp = open(doc, 'r')
51    contents = fp.read()
52    fp.close()
53    pattern = re.compile('href="([^"#]*)"', re.IGNORECASE)
54    return map(lambda match,pre=prefix:
55                 os.path.normpath(os.path.join(pre, match)),
56               re.findall(pattern, contents))
57
58# Define a class for storing to/from information.
59
60class DocRefs:
61
62    def __init__(self):
63        self.to = {}
64        self.toDir = {}
65        self.From = {}
66
67    def doTo(self, fname, adding):
68        if os.path.isdir(fname):
69            self.toDir[fname] = adding
70            for f in glob.glob(os.path.join(fname, '*')):
71                self.doTo(f, adding)
72        elif os.path.islink(fname):
73            pass
74        elif os.path.isfile(fname):
75            self.to[os.path.normpath(fname)] = adding
76
77    def doFrom(self, fname, adding):
78        self.From[os.path.normpath(fname)] = adding
79
80    def doFromDoc(self, docname, adding):
81        for f in hrefs(docname):
82            self.doFrom(f, adding)
83
84    def cleanup(self):
85        for k in self.to.keys():
86            if self.to[k] == 0: del self.to[k]
87        for k in self.From.keys():
88            if self.From[k] == 0: del self.From[k]
89
90# Process command line arguments and switches.
91
92def mainArgs(arglist, root, fromProc, toProc):
93    for arg in arglist:
94        if arg == '+src':
95            mainArgs(SRC_LIST, root, fromProc, toProc)
96        elif arg == '+lib':
97            mainArgs(LIB_LIST, root, fromProc, toProc)
98        elif arg == '+from':
99            do, adding = fromProc, 1
100        elif arg == '-from':
101            do, adding = fromProc, 0
102        elif arg == '+to':
103            do, adding = toProc, 1
104        elif arg == '-to':
105            do, adding = toProc, 0
106        elif re.match('[+-]', arg):
107            print sys.stderr >> 'Unknown switch: ' + arg
108            exit(1)
109        elif arg.find('*') >= 0:
110            for f in glob.glob(os.path.join(root, arg)): do(f, adding)
111        else:
112            do(os.path.join(root, arg), adding)
113
114class GSCheckDocRefs(GSTestCase):
115
116    def __init__(self, root, arglist):
117        self.root = root
118        self.arglist = arglist
119        GSTestCase.__init__(self)
120
121    def _fromDocs(self):
122        refs = DocRefs()
123        mainArgs(self.arglist, self.root,
124                 lambda f,b,refs=refs: refs.doFrom(f, b),
125                 lambda f,b: None)
126        refs.cleanup()
127        return refs.From.keys()
128
129    def shortDescription(self):
130        docs = reduce(lambda a,b: a+' or '+b, self._fromDocs())
131        return docs + ' must reference all relevant files.'
132
133    def runTest(self):
134        refs = DocRefs()
135        mainArgs(self.arglist, self.root,
136                 lambda f,b,refs=refs: refs.doFromDoc(f, b),
137                 lambda f,b,refs=refs: refs.doTo(f, b))
138        refs.cleanup()
139        docs = self._fromDocs()
140        if len(docs) == 1:
141            fromFormat = docs[0] + ' fails to reference these %d files:'
142            toFormat = docs[0] + ' references these %d files that do not exist:'
143        else:
144            fromFormat = 'These %d files are not referenced:'
145            toFormat = 'These %d files are referenced but do not exist:'
146        messages = []
147        noFrom = []
148        for f in refs.to.keys():
149            if not refs.From.has_key(f):
150                noFrom.append(f)
151        if len(noFrom) > 0:
152            messages.append(fromFormat % len(noFrom))
153            noFrom.sort()
154            messages += noFrom
155        noTo = []
156        for f in refs.to.keys():
157            if not refs.to.has_key(f) and refs.toDir.has_key(os.path.dirname(f)):
158                noTo.append(f)
159        if len(noTo) > 0:
160            messages.append(toFormat % len(noTo))
161            noTo.sort()
162            messages += noTo
163        self.failIfMessages(messages)
164
165# Add the tests defined in this file to a suite.
166
167def addTests(suite, gsroot, **args):
168    suite.addTest(GSCheckDocRefs(gsroot, ['+src']))
169    suite.addTest(GSCheckDocRefs(gsroot, ['+lib']))
170
171if __name__ == "__main__":
172    gsRunTestsMain(addTests)
173