1#!/usr/bin/env tclsh
2## -*- tcl -*-
3# Extract and report oscon schedule
4
5package require struct
6package require csv
7package require report
8package require htmlparse
9package require textutil
10package require log
11
12# Restrict logging to levels 'info' and higher.
13log::lvSuppressLE debug
14
15# 1. CSV structure filled by the parser = main data table
16#    ----------------------------------------------------
17#    Day Time/Start Time/End Track Tower Room Speaker Title
18#
19#    Matrices: "dmain" and "dmainr"
20#
21#    Difference: dmainr contains gratituous newlines in the
22#    speaker column which make for a better TXT report (less
23#    wide).
24#
25#    This is also report 'main'.
26#
27# 2. Schedule report to see conflicts, CSV structure
28#    ----------------------------------------------
29#    Day Time                Location-Columns, one per Room
30#        (15min granularity) (Content: Speaker + Topic)
31#
32#    Matrices: "sched" and "schedr". Difference as for dmain(r)
33#	and the location columns
34#
35#    This will be report 'sched'.
36
37proc main {} {
38    global pfx argv
39
40    set pfx   [lindex $argv 0]
41    set files [lrange $argv 1 end]
42
43    if {($pfx == {}) || ([llength $files] == 0)} {
44	usage
45	exit -1
46    }
47
48    initialize
49    foreach f $files {
50	log::log info "Scanning \"$f\" ..."
51	parse $f
52    }
53    gen_schedule
54    dump_main
55    dump_schedule
56    postscript
57    return
58}
59
60proc usage {} {
61    global argv0
62    puts "usage: $argv0 prefix file..."
63}
64
65
66proc initialize {} {
67    global rooms tracks
68    ::struct::matrix::matrix dmain  ; # data 1
69    ::struct::matrix::matrix dmainr ; # data 1r
70    ::struct::matrix::matrix sched  ; # data 2
71    ::struct::matrix::matrix schedr ; # data 2r
72    array set rooms  {}
73    array set tracks {}
74    dmain  add columns 8
75    dmain  add row {Day Start End Track Tower Room Speaker Title}
76    dmainr add columns 8
77    dmainr add row {Day Start End Track Tower Room Speaker Title}
78    return
79}
80
81proc parse {htmlfile} {
82    global rooms tracks
83
84    ::struct::tree::tree t
85
86    log::log info "Reading \"$htmlfile\" ..."
87    set html [read [set fh [open $htmlfile]]]
88    close $fh
89
90    log::log info "Parsing \"$htmlfile\" ..."
91    htmlparse::2tree $html t
92    htmlparse::removeVisualFluff t
93    htmlparse::removeFormDefs t
94
95    log::log info "Extracting information"
96
97    #puts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
98    # Navigate and extract the information
99    #t walk root -command {print %t %n}
100    #exit
101
102    set base [walk {1 1 0 1 1 0 1 0 1 0}]
103    set day  [walkf $base {0 0}]
104    set day  [escape [t get $day -key data]]
105    log::log debug "Day = $day"
106    set day [string range $day 0 2]
107
108    # Walk through the sessions of that day.
109
110    set sess [t next $base]
111    while {$sess != {}} {
112	set start [cvtdate [escape [t get [walkf $sess {0 0}] -key data]]]
113	set track [string trim [escape [t get [walkf $sess {1 0}] -key data]]]
114	set loc   [escape [t get [walkf $sess {1 1 0}] -key data]]
115	set loc   [string trimright $loc "\n\r\t:"]
116
117	log::log debug "    $start - $track - $loc"
118
119	# Separate Room/Tower information ...
120	regexp {(.*) in the (.*) Tower} $loc -> room tower
121	set room  [string trim $room]
122	set tower [string trim $tower]
123	set rooms($tower/$room) .
124	set tracks($track) .
125
126	set talk [walkf $sess {1 1 3}]
127	while {$talk != {}} {
128	    set time    [escape [t get $talk -key data]]
129	    set talk    [t next $talk]
130	    set title   [escape [t get [walkf $talk {0 0 0}] -key data]]
131	    set speaker [escape [t get [walkf $talk {0 2}]   -key data]]
132
133	    # Now we have everything to fill the main table ...
134	    # (After a bit of munging of the strings we got)
135
136	    foreach {start end} [split $time -] break
137	    set start [cvtdate $start]
138	    set end   [cvtdate $end]
139
140	    regsub -all \r  $speaker \n speaker
141	    regsub -all \n+ $speaker \n speaker
142	    regsub -all " *\n *" $speaker "\n" speaker
143	    set speakerc [split $speaker "\n"]
144	    set speakerc [join $speakerc ", "]
145	    log::log debug "        $start - $end - $speakerc - $title"
146
147	    #puts >>$speakerc<<
148	    #puts >>$speaker<<
149
150	    #                Day Time/Start Time/End Tower Room Speaker Title
151	    dmainr add row [list $day $start $end $track $tower $room $speaker  $title]
152	    dmain  add row [list $day $start $end $track $tower $room $speakerc $title]
153
154	    # Forward to next talk
155	    catch {set talk [t next $talk]}
156	    catch {set talk [t next $talk]}
157	}
158
159	set sess [t next $sess]
160    }
161
162    t destroy
163    return
164}
165
166proc print {t n} {
167    set  tp  [$t get $n -key type]
168    set  d   [$t depth $n]
169    set idx ""
170    catch {set  idx [$t index $n]}
171    incr d  $d
172    incr d  $d
173
174    switch -exact -- $tp {
175        a {
176            log::log debug "[textutil::strRepeat " " $d]$idx $tp ([$t get $n -key data]...)"
177        }
178        PCDATA {
179            log::log debug "[textutil::strRepeat " " $d]$idx $tp ([string range [$t get $n -key data] 0 20]...)"
180        }
181        default {
182            log::log debug "[textutil::strRepeat " " $d]$idx $tp"
183        }
184    }
185}
186
187proc walkf {n p} {
188    #log::log info "$n + $p ="
189    foreach idx $p {
190        if {$n == ""} {break}
191        set n [lindex [t children $n] $idx]
192        #log::log info "$idx :- $n"
193    }
194    return $n
195}
196
197proc walk {p} {
198    return [walkf root $p]
199}
200
201proc cvtdate {date} {
202    clock format [clock scan $date] -format "%H:%M"
203}
204
205proc escape {text} {
206    # Special escape for nbsp, convert into space and not the
207    # character specified by the standard.
208
209    regsub -all {&nbsp;} $text { } text
210    htmlparse::mapEscapes $text
211}
212
213
214proc gen_schedule {} {
215    global rooms tracks
216
217    dmain  set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmain  get rect 0 1 end end]]]
218    dmainr set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmainr get rect 0 1 end end]]]
219
220    sched  add columns 2
221    schedr add columns 2
222    #sched  add columns [array size rooms]
223    #schedr add columns [array size rooms]
224    sched  add columns [array size tracks]
225    schedr add columns [array size tracks]
226
227    #log::log info Tracks=[array size tracks]
228    #log::log info Rooms.=[array size rooms]
229
230    set res [list Day Time]
231    set c 2
232    foreach k [lsort [array names tracks]] {
233	lappend res $k
234	set tracks($k) $c
235	incr c
236    }
237
238    sched  add row $res
239    schedr add row $res
240
241    # Data in dmain is already sorted by day. By starting time only
242    # partially, there are back references.
243    # Just move them to the correct rooms and rows!
244
245    #-- Day Time Location-Columns, one per Room --
246
247    set n [dmain rows]
248    set p 0
249
250    array set rmap {}
251
252    for {set r 1} {$r < $n} {incr r} {
253	foreach {day start end track tower room speaker title} [dmain get row $r] break
254	#[list $day $start $end $tower $room $speakerc $title]
255
256	set key $day,$start
257	if {![info exists rmap($key)]} {
258	    log::log info "Track schedule $day $start"
259	    sched  add row
260	    schedr add row
261	    incr p
262
263	    set rmap($key) $p
264	    sched  set cell 0 $p $day
265	    sched  set cell 1 $p $start
266	    schedr set cell 0 $p $day
267	    schedr set cell 1 $p $start
268	}
269
270	sched  set cell $tracks($track) $rmap($key) "$tower; $room; $speaker; $title"
271	schedr set cell $tracks($track) $rmap($key) "$tower $room\n$speaker\n$title"
272    }
273
274    # Squeeze the columns 2+ in the report matrix
275
276    set cols [schedr columns]
277    for {set c 2} {$c < $cols} {incr c} {
278
279	if {[schedr columnwidth $c] > 21} {
280	    log::log debug "Squeezing $c"
281	    set col [schedr get column $c]
282	    set res [list]
283	    foreach item $col {
284		lappend res [wrap $item 21]
285	    }
286	    schedr set column $c $res
287	}
288    }
289
290    # Now sort by day (primary key) and starting time (secondary key).
291    # (Meaning we have to sort by time first, and then the day)
292
293    # sched  setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [sched  getrect 0 0 end end]]]
294    # schedr setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [schedr getrect 0 0 end end]]]
295
296    return
297}
298
299proc dump_main {} {
300    global pfx
301    log::log info "Writing talk information /CSV"
302
303    set f [open ${pfx}.main.csv w]
304    csv::writematrix dmain $f
305    close $f
306
307    log::log info "Writing talk information /TXT"
308
309    # Compute width of report and squeeze the title column to fit
310    # below 80 char/line
311
312    # Day Time/Start Time/End Track Tower Room Speaker Title
313
314    set total 0
315    incr total [dmain columnwidth 0]
316    incr total [dmain columnwidth 1]
317    incr total [dmain columnwidth 2]
318    incr total [dmain columnwidth 3]
319    incr total [dmain columnwidth 4]
320    incr total [dmain columnwidth 5]
321    incr total [dmain columnwidth 6]
322
323    #log::log info Total=$total
324
325    if {$total < 80} {
326	set total [expr {80 - $total}]
327	set titles [dmain getcolumn 7]
328	set res [list]
329	foreach t $titles {
330	    lappend res [textutil::adjust $t -length $total]
331	}
332	dmain setcolumn 7 $res
333    }
334
335    ::report::report r [dmainr columns] style captionedtable 1
336    set f [open ${pfx}.main.txt w]
337    r printmatrix2channel dmainr $f
338    close $f
339    r destroy
340
341    # Now the HTML report, use 'dmain' as base, actually formatting
342    # into lines is done by the browser.
343
344    log::log info "Writing talk information /HTML"
345
346    ::report::report r [dmain columns] style html
347
348    set f [open ${pfx}.main.html w]
349    puts $f "<html><head><title>Talk information and schedule</title></head><body>"
350    puts $f "<h1>Talk information and schedule</h1>"
351    puts $f "<p><table border=1>"
352    r printmatrix2channel dmain $f
353    puts $f "</table></p></body></html>"
354    close $f
355    r destroy
356}
357
358proc dump_schedule {} {
359    global pfx
360    log::log info "Writing track schedule /CSV"
361
362    set f [open ${pfx}.sched.csv w]
363    csv::writematrix sched $f
364    close $f
365
366    log::log info "Writing track schedule /TXT"
367
368    ::report::report r [schedr columns] style captionedtable 1
369    r datasep set [r top get]
370    r datasep enable
371
372    set f [open ${pfx}.sched.txt w]
373    r printmatrix2channel schedr $f
374    close $f
375    r destroy
376
377    # Now the HTML report, use 'sched' as base, actually formatting
378    # into lines is done by the browser.
379
380    log::log info "Writing track schedule /HTML"
381
382    ::report::report r [sched columns] style html
383
384    set f [open ${pfx}.sched.html w]
385    puts $f "<html><head><title>Track schedules</title></head><body>"
386    puts $f "<h1>Track schedules</h1>"
387    puts $f "<p><table border=1>"
388    r printmatrix2channel sched $f
389    puts $f "</table></p></body></html>"
390    close $f
391    r destroy
392}
393
394proc postscript {} {
395    global pfx
396    # Transforms texts into printable postscript, using a2ps (if available)
397
398    catch {exec a2ps -o ${pfx}.main.ps  -1 -B -r -f7 ${pfx}.main.txt}
399    catch {exec a2ps -o ${pfx}.sched.ps -1 -B -r -f4 ${pfx}.sched.txt}
400    return
401}
402
403proc wrap {text len} {
404    # @author Jeffrey Hobbs <jeff at hobbs org>
405    #
406    # @c Wraps the given <a text> into multiple lines not
407    # @c exceeding <a len> characters each. Lines shorter
408    # @c than <a len> characters might get filled up.
409    #
410    # @a text: The string to operate on.
411    # @a len: The maximum allowed length of a single line.
412    #
413    # @r Basically <a text>, but with changed newlines to
414    # @r restrict the length of individual lines to at most
415    # @r <a len> characters.
416
417    # @n This procedure is not checked by the testsuite.
418
419    # @i wrap, word wrap
420
421    # Convert all newlines into spaces and initialize the result
422    # see ::pool::string::oneLine too.
423
424    regsub -all "\n" $text { } text
425    incr len -1
426
427    set out {}
428
429    # As long as the string is longer than the intended length of
430    # lines in the result:
431
432    while {[string len $text] > $len} {
433	# - Find position of last space in the part of the text
434	#   which could a line in the result.
435
436	# - We jump out of the loop if there is none and the whole
437	#   text does not contain spaces anymore. In the latter case
438	#   the rest of the text is one word longer than an intended
439	#   line, we cannot avoid the longer line.
440
441	set i [string last { } [string range $text 0 $len]]
442
443	if {$i == -1 && [set i [string first { } $text]] == -1} {
444	    break
445	}
446
447	# Get the just fitting part of the text, remove any heading
448	# and trailing spaces, then append it to the result string,
449	# don't close it with a newline!
450
451	append out [string trim [string range $text 0 [incr i -1]]]\n
452
453	# Shorten the text by the length of the processed part and
454	# the space used to split it, then iterate.
455
456	set text [string range $text [incr i 2] end]
457    }
458
459    return $out$text
460}
461
462# -------------------------------------------
463# Define the required reports styles
464
465::report::defstyle simpletable {} {
466    data   set [split "[string repeat "| "   [columns]]|"]
467    top    set [split "[string repeat "+ - " [columns]]+"]
468    bottom set [top get]
469    top	   enable
470    bottom enable
471}
472::report::defstyle captionedtable {{n 1}} {
473    simpletable
474    topdata   set [data get]
475    topcapsep set [top  get]
476    topcapsep enable
477    tcaption $n
478}
479::report::defstyle html {} {
480    set c  [columns]
481    set cl $c ; incr cl -1
482    data set "<tr> [split [string repeat " " $cl] ""] </tr>"
483    for {set col 0} {$col < $c} {incr col} {
484	pad $col left  "<td>"
485	pad $col right "</td>"
486    }
487    return
488}
489
490# -------------------------------------------
491
492main
493exit
494