1#!/usr/bin/env python 2 3# Copyright (C) 2002-2004 Artifex Software, Inc. 4# All Rights Reserved. 5# 6# This software is provided AS-IS with no warranty, either express or 7# implied. 8# 9# This software is distributed under license and may not be copied, modified 10# or distributed except as expressly authorized under the terms of that 11# license. Refer to licensing information at http://www.artifex.com/ 12# or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134, 13# San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information. 14 15# $Id: check_docrefs.py 8687 2008-05-02 02:19:57Z giles $ 16 17# Check that the hrefs in an HTML document mention all of a set of files. 18# The requirement is that the union of all the docfiles must somewhere 19# reference all the files. Usage: 20# check_docrefs (+src | +lib | [+-]from <docfile>* | [+-]to (<directory> | <file>)*)* 21 22# +from or +to adds files; -from or -to removes them; 23# +src, or +lib execute SRC_LIST, LIB_LIST below. 24 25# Define the Ghostscript-specific parameter lists. 26SRC_LIST = [ 27 '+from', 'doc/Develop.htm', 28 '+to', 'lib', 'src', 29 '-to', '*/CVS', 'src/rinkj/CVS', 30 '-to', 'src/*.mak.tcl', 31 '-to', 'lib/*.upp', 32 '-to', 'lib/*.ps', 33 '+to', 'lib/gs_*.ps', 'lib/pdf_*.ps' 34] 35LIB_LIST = [ 36 '+from', 'doc/Psfiles.htm', 37 '+to', 'examples/*.ps', 'lib/*.ps' 38] 39 40import glob, os.path, re, sys 41from gstestutils import GSTestCase, gsRunTestsMain 42 43# Utilities 44 45# List all the files referenced from a document. 46# Note that we only count files referenced as a whole, i.e., without #. 47 48def hrefs(doc): 49 prefix = os.path.dirname(doc) 50 fp = open(doc, 'r') 51 contents = fp.read() 52 fp.close() 53 pattern = re.compile('href="([^"#]*)"', re.IGNORECASE) 54 return map(lambda match,pre=prefix: 55 os.path.normpath(os.path.join(pre, match)), 56 re.findall(pattern, contents)) 57 58# Define a class for storing to/from information. 59 60class DocRefs: 61 62 def __init__(self): 63 self.to = {} 64 self.toDir = {} 65 self.From = {} 66 67 def doTo(self, fname, adding): 68 if os.path.isdir(fname): 69 self.toDir[fname] = adding 70 for f in glob.glob(os.path.join(fname, '*')): 71 self.doTo(f, adding) 72 elif os.path.islink(fname): 73 pass 74 elif os.path.isfile(fname): 75 self.to[os.path.normpath(fname)] = adding 76 77 def doFrom(self, fname, adding): 78 self.From[os.path.normpath(fname)] = adding 79 80 def doFromDoc(self, docname, adding): 81 for f in hrefs(docname): 82 self.doFrom(f, adding) 83 84 def cleanup(self): 85 for k in self.to.keys(): 86 if self.to[k] == 0: del self.to[k] 87 for k in self.From.keys(): 88 if self.From[k] == 0: del self.From[k] 89 90# Process command line arguments and switches. 91 92def mainArgs(arglist, root, fromProc, toProc): 93 for arg in arglist: 94 if arg == '+src': 95 mainArgs(SRC_LIST, root, fromProc, toProc) 96 elif arg == '+lib': 97 mainArgs(LIB_LIST, root, fromProc, toProc) 98 elif arg == '+from': 99 do, adding = fromProc, 1 100 elif arg == '-from': 101 do, adding = fromProc, 0 102 elif arg == '+to': 103 do, adding = toProc, 1 104 elif arg == '-to': 105 do, adding = toProc, 0 106 elif re.match('[+-]', arg): 107 print sys.stderr >> 'Unknown switch: ' + arg 108 exit(1) 109 elif arg.find('*') >= 0: 110 for f in glob.glob(os.path.join(root, arg)): do(f, adding) 111 else: 112 do(os.path.join(root, arg), adding) 113 114class GSCheckDocRefs(GSTestCase): 115 116 def __init__(self, root, arglist): 117 self.root = root 118 self.arglist = arglist 119 GSTestCase.__init__(self) 120 121 def _fromDocs(self): 122 refs = DocRefs() 123 mainArgs(self.arglist, self.root, 124 lambda f,b,refs=refs: refs.doFrom(f, b), 125 lambda f,b: None) 126 refs.cleanup() 127 return refs.From.keys() 128 129 def shortDescription(self): 130 docs = reduce(lambda a,b: a+' or '+b, self._fromDocs()) 131 return docs + ' must reference all relevant files.' 132 133 def runTest(self): 134 refs = DocRefs() 135 mainArgs(self.arglist, self.root, 136 lambda f,b,refs=refs: refs.doFromDoc(f, b), 137 lambda f,b,refs=refs: refs.doTo(f, b)) 138 refs.cleanup() 139 docs = self._fromDocs() 140 if len(docs) == 1: 141 fromFormat = docs[0] + ' fails to reference these %d files:' 142 toFormat = docs[0] + ' references these %d files that do not exist:' 143 else: 144 fromFormat = 'These %d files are not referenced:' 145 toFormat = 'These %d files are referenced but do not exist:' 146 messages = [] 147 noFrom = [] 148 for f in refs.to.keys(): 149 if not refs.From.has_key(f): 150 noFrom.append(f) 151 if len(noFrom) > 0: 152 messages.append(fromFormat % len(noFrom)) 153 noFrom.sort() 154 messages += noFrom 155 noTo = [] 156 for f in refs.to.keys(): 157 if not refs.to.has_key(f) and refs.toDir.has_key(os.path.dirname(f)): 158 noTo.append(f) 159 if len(noTo) > 0: 160 messages.append(toFormat % len(noTo)) 161 noTo.sort() 162 messages += noTo 163 self.failIfMessages(messages) 164 165# Add the tests defined in this file to a suite. 166 167def addTests(suite, gsroot, **args): 168 suite.addTest(GSCheckDocRefs(gsroot, ['+src'])) 169 suite.addTest(GSCheckDocRefs(gsroot, ['+lib'])) 170 171if __name__ == "__main__": 172 gsRunTestsMain(addTests) 173