1#! /usr/bin/env python
2
3"""GUI interface to webchecker.
4
5This works as a Grail applet too!  E.g.
6
7  <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
8
9Checkpoints are not (yet???  ever???) supported.
10
11User interface:
12
13Enter a root to check in the text entry box.  To enter more than one root,
14enter them one at a time and press <Return> for each one.
15
16Command buttons Start, Stop and "Check one" govern the checking process in
17the obvious way.  Start and "Check one" also enter the root from the text
18entry box if one is present.  There's also a check box (enabled by default)
19to decide whether actually to follow external links (since this can slow
20the checking down considerably).  Finally there's a Quit button.
21
22A series of checkbuttons determines whether the corresponding output panel
23is shown.  List panels are also automatically shown or hidden when their
24status changes between empty to non-empty.  There are six panels:
25
26Log        -- raw output from the checker (-v, -q affect this)
27To check   -- links discovered but not yet checked
28Checked    -- links that have been checked
29Bad links  -- links that failed upon checking
30Errors     -- pages containing at least one bad link
31Details    -- details about one URL; double click on a URL in any of
32              the above list panels (not in Log) will show details
33              for that URL
34
35Use your window manager's Close command to quit.
36
37Command line options:
38
39-m bytes  -- skip HTML pages larger than this size (default %(MAXPAGE)d)
40-q        -- quiet operation (also suppresses external links report)
41-v        -- verbose operation; repeating -v will increase verbosity
42-t root   -- specify root dir which should be treated as internal (can repeat)
43-a        -- don't check name anchors
44
45Command line arguments:
46
47rooturl   -- URL to start checking
48             (default %(DEFROOT)s)
49
50XXX The command line options (-m, -q, -v) should be GUI accessible.
51
52XXX The roots should be visible as a list (?).
53
54XXX The multipanel user interface is clumsy.
55
56"""
57
58# ' Emacs bait
59
60
61import sys
62import getopt
63from Tkinter import *
64import tktools
65import webchecker
66
67def main():
68    try:
69        opts, args = getopt.getopt(sys.argv[1:], 't:m:qva')
70    except getopt.error, msg:
71        sys.stdout = sys.stderr
72        print msg
73        print __doc__%vars(webchecker)
74        sys.exit(2)
75    webchecker.verbose = webchecker.VERBOSE
76    webchecker.nonames = webchecker.NONAMES
77    webchecker.maxpage = webchecker.MAXPAGE
78    extra_roots = []
79    for o, a in opts:
80        if o == '-m':
81            webchecker.maxpage = int(a)
82        if o == '-q':
83            webchecker.verbose = 0
84        if o == '-v':
85            webchecker.verbose = webchecker.verbose + 1
86        if o == '-t':
87            extra_roots.append(a)
88        if o == '-a':
89            webchecker.nonames = not webchecker.nonames
90    root = Tk(className='Webchecker')
91    root.protocol("WM_DELETE_WINDOW", root.quit)
92    c = CheckerWindow(root)
93    c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage,
94               nonames=webchecker.nonames)
95    if args:
96        for arg in args[:-1]:
97            c.addroot(arg)
98        c.suggestroot(args[-1])
99    # Usually conditioned on whether external links
100    # will be checked, but since that's not a command
101    # line option, just toss them in.
102    for url_root in extra_roots:
103        # Make sure it's terminated by a slash,
104        # so that addroot doesn't discard the last
105        # directory component.
106        if url_root[-1] != "/":
107            url_root = url_root + "/"
108        c.addroot(url_root, add_to_do = 0)
109    root.mainloop()
110
111
112class CheckerWindow(webchecker.Checker):
113
114    def __init__(self, parent, root=webchecker.DEFROOT):
115        self.__parent = parent
116
117        self.__topcontrols = Frame(parent)
118        self.__topcontrols.pack(side=TOP, fill=X)
119        self.__label = Label(self.__topcontrols, text="Root URL:")
120        self.__label.pack(side=LEFT)
121        self.__rootentry = Entry(self.__topcontrols, width=60)
122        self.__rootentry.pack(side=LEFT)
123        self.__rootentry.bind('<Return>', self.enterroot)
124        self.__rootentry.focus_set()
125
126        self.__controls = Frame(parent)
127        self.__controls.pack(side=TOP, fill=X)
128        self.__running = 0
129        self.__start = Button(self.__controls, text="Run", command=self.start)
130        self.__start.pack(side=LEFT)
131        self.__stop = Button(self.__controls, text="Stop", command=self.stop,
132                             state=DISABLED)
133        self.__stop.pack(side=LEFT)
134        self.__step = Button(self.__controls, text="Check one",
135                             command=self.step)
136        self.__step.pack(side=LEFT)
137        self.__cv = BooleanVar(parent)
138        self.__cv.set(self.checkext)
139        self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
140                                      command=self.update_checkext,
141                                      text="Check nonlocal links",)
142        self.__checkext.pack(side=LEFT)
143        self.__reset = Button(self.__controls, text="Start over", command=self.reset)
144        self.__reset.pack(side=LEFT)
145        if __name__ == '__main__': # No Quit button under Grail!
146            self.__quit = Button(self.__controls, text="Quit",
147                                 command=self.__parent.quit)
148            self.__quit.pack(side=RIGHT)
149
150        self.__status = Label(parent, text="Status: initial", anchor=W)
151        self.__status.pack(side=TOP, fill=X)
152        self.__checking = Label(parent, text="Idle", anchor=W)
153        self.__checking.pack(side=TOP, fill=X)
154        self.__mp = mp = MultiPanel(parent)
155        sys.stdout = self.__log = LogPanel(mp, "Log")
156        self.__todo = ListPanel(mp, "To check", self, self.showinfo)
157        self.__done = ListPanel(mp, "Checked", self, self.showinfo)
158        self.__bad = ListPanel(mp, "Bad links", self, self.showinfo)
159        self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo)
160        self.__details = LogPanel(mp, "Details")
161        self.root_seed = None
162        webchecker.Checker.__init__(self)
163        if root:
164            root = str(root).strip()
165            if root:
166                self.suggestroot(root)
167        self.newstatus()
168
169    def reset(self):
170        webchecker.Checker.reset(self)
171        for p in self.__todo, self.__done, self.__bad, self.__errors:
172            p.clear()
173        if self.root_seed:
174            self.suggestroot(self.root_seed)
175
176    def suggestroot(self, root):
177        self.__rootentry.delete(0, END)
178        self.__rootentry.insert(END, root)
179        self.__rootentry.select_range(0, END)
180        self.root_seed = root
181
182    def enterroot(self, event=None):
183        root = self.__rootentry.get()
184        root = root.strip()
185        if root:
186            self.__checking.config(text="Adding root "+root)
187            self.__checking.update_idletasks()
188            self.addroot(root)
189            self.__checking.config(text="Idle")
190            try:
191                i = self.__todo.items.index(root)
192            except (ValueError, IndexError):
193                pass
194            else:
195                self.__todo.list.select_clear(0, END)
196                self.__todo.list.select_set(i)
197                self.__todo.list.yview(i)
198        self.__rootentry.delete(0, END)
199
200    def start(self):
201        self.__start.config(state=DISABLED, relief=SUNKEN)
202        self.__stop.config(state=NORMAL)
203        self.__step.config(state=DISABLED)
204        self.enterroot()
205        self.__running = 1
206        self.go()
207
208    def stop(self):
209        self.__stop.config(state=DISABLED, relief=SUNKEN)
210        self.__running = 0
211
212    def step(self):
213        self.__start.config(state=DISABLED)
214        self.__step.config(state=DISABLED, relief=SUNKEN)
215        self.enterroot()
216        self.__running = 0
217        self.dosomething()
218
219    def go(self):
220        if self.__running:
221            self.__parent.after_idle(self.dosomething)
222        else:
223            self.__checking.config(text="Idle")
224            self.__start.config(state=NORMAL, relief=RAISED)
225            self.__stop.config(state=DISABLED, relief=RAISED)
226            self.__step.config(state=NORMAL, relief=RAISED)
227
228    __busy = 0
229
230    def dosomething(self):
231        if self.__busy: return
232        self.__busy = 1
233        if self.todo:
234            l = self.__todo.selectedindices()
235            if l:
236                i = l[0]
237            else:
238                i = 0
239                self.__todo.list.select_set(i)
240            self.__todo.list.yview(i)
241            url = self.__todo.items[i]
242            self.__checking.config(text="Checking "+self.format_url(url))
243            self.__parent.update()
244            self.dopage(url)
245        else:
246            self.stop()
247        self.__busy = 0
248        self.go()
249
250    def showinfo(self, url):
251        d = self.__details
252        d.clear()
253        d.put("URL:    %s\n" % self.format_url(url))
254        if self.bad.has_key(url):
255            d.put("Error:  %s\n" % str(self.bad[url]))
256        if url in self.roots:
257            d.put("Note:   This is a root URL\n")
258        if self.done.has_key(url):
259            d.put("Status: checked\n")
260            o = self.done[url]
261        elif self.todo.has_key(url):
262            d.put("Status: to check\n")
263            o = self.todo[url]
264        else:
265            d.put("Status: unknown (!)\n")
266            o = []
267        if (not url[1]) and self.errors.has_key(url[0]):
268            d.put("Bad links from this page:\n")
269            for triple in self.errors[url[0]]:
270                link, rawlink, msg = triple
271                d.put("  HREF  %s" % self.format_url(link))
272                if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink)
273                d.put("\n")
274                d.put("  error %s\n" % str(msg))
275        self.__mp.showpanel("Details")
276        for source, rawlink in o:
277            d.put("Origin: %s" % source)
278            if rawlink != self.format_url(url):
279                d.put(" (%s)" % rawlink)
280            d.put("\n")
281        d.text.yview("1.0")
282
283    def setbad(self, url, msg):
284        webchecker.Checker.setbad(self, url, msg)
285        self.__bad.insert(url)
286        self.newstatus()
287
288    def setgood(self, url):
289        webchecker.Checker.setgood(self, url)
290        self.__bad.remove(url)
291        self.newstatus()
292
293    def newlink(self, url, origin):
294        webchecker.Checker.newlink(self, url, origin)
295        if self.done.has_key(url):
296            self.__done.insert(url)
297        elif self.todo.has_key(url):
298            self.__todo.insert(url)
299        self.newstatus()
300
301    def markdone(self, url):
302        webchecker.Checker.markdone(self, url)
303        self.__done.insert(url)
304        self.__todo.remove(url)
305        self.newstatus()
306
307    def seterror(self, url, triple):
308        webchecker.Checker.seterror(self, url, triple)
309        self.__errors.insert((url, ''))
310        self.newstatus()
311
312    def newstatus(self):
313        self.__status.config(text="Status: "+self.status())
314        self.__parent.update()
315
316    def update_checkext(self):
317        self.checkext = self.__cv.get()
318
319
320class ListPanel:
321
322    def __init__(self, mp, name, checker, showinfo=None):
323        self.mp = mp
324        self.name = name
325        self.showinfo = showinfo
326        self.checker = checker
327        self.panel = mp.addpanel(name)
328        self.list, self.frame = tktools.make_list_box(
329            self.panel, width=60, height=5)
330        self.list.config(exportselection=0)
331        if showinfo:
332            self.list.bind('<Double-Button-1>', self.doubleclick)
333        self.items = []
334
335    def clear(self):
336        self.items = []
337        self.list.delete(0, END)
338        self.mp.hidepanel(self.name)
339
340    def doubleclick(self, event):
341        l = self.selectedindices()
342        if l:
343            self.showinfo(self.items[l[0]])
344
345    def selectedindices(self):
346        l = self.list.curselection()
347        if not l: return []
348        return map(int, l)
349
350    def insert(self, url):
351        if url not in self.items:
352            if not self.items:
353                self.mp.showpanel(self.name)
354            # (I tried sorting alphabetically, but the display is too jumpy)
355            i = len(self.items)
356            self.list.insert(i, self.checker.format_url(url))
357            self.list.yview(i)
358            self.items.insert(i, url)
359
360    def remove(self, url):
361        try:
362            i = self.items.index(url)
363        except (ValueError, IndexError):
364            pass
365        else:
366            was_selected = i in self.selectedindices()
367            self.list.delete(i)
368            del self.items[i]
369            if not self.items:
370                self.mp.hidepanel(self.name)
371            elif was_selected:
372                if i >= len(self.items):
373                    i = len(self.items) - 1
374                self.list.select_set(i)
375
376
377class LogPanel:
378
379    def __init__(self, mp, name):
380        self.mp = mp
381        self.name = name
382        self.panel = mp.addpanel(name)
383        self.text, self.frame = tktools.make_text_box(self.panel, height=10)
384        self.text.config(wrap=NONE)
385
386    def clear(self):
387        self.text.delete("1.0", END)
388        self.text.yview("1.0")
389
390    def put(self, s):
391        self.text.insert(END, s)
392        if '\n' in s:
393            self.text.yview(END)
394
395    def write(self, s):
396        self.text.insert(END, s)
397        if '\n' in s:
398            self.text.yview(END)
399            self.panel.update()
400
401
402class MultiPanel:
403
404    def __init__(self, parent):
405        self.parent = parent
406        self.frame = Frame(self.parent)
407        self.frame.pack(expand=1, fill=BOTH)
408        self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
409        self.topframe.pack(fill=X)
410        self.botframe = Frame(self.frame)
411        self.botframe.pack(expand=1, fill=BOTH)
412        self.panelnames = []
413        self.panels = {}
414
415    def addpanel(self, name, on=0):
416        v = StringVar(self.parent)
417        if on:
418            v.set(name)
419        else:
420            v.set("")
421        check = Checkbutton(self.topframe, text=name,
422                            offvalue="", onvalue=name, variable=v,
423                            command=self.checkpanel)
424        check.pack(side=LEFT)
425        panel = Frame(self.botframe)
426        label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
427        label.pack(side=TOP, fill=X)
428        t = v, check, panel
429        self.panelnames.append(name)
430        self.panels[name] = t
431        if on:
432            panel.pack(expand=1, fill=BOTH)
433        return panel
434
435    def showpanel(self, name):
436        v, check, panel = self.panels[name]
437        v.set(name)
438        panel.pack(expand=1, fill=BOTH)
439
440    def hidepanel(self, name):
441        v, check, panel = self.panels[name]
442        v.set("")
443        panel.pack_forget()
444
445    def checkpanel(self):
446        for name in self.panelnames:
447            v, check, panel = self.panels[name]
448            panel.pack_forget()
449        for name in self.panelnames:
450            v, check, panel = self.panels[name]
451            if v.get():
452                panel.pack(expand=1, fill=BOTH)
453
454
455if __name__ == '__main__':
456    main()
457