1#!/usr/bin/env tclsh 2## -*- tcl -*- 3# Extract and report oscon schedule 4 5package require struct 6package require csv 7package require report 8package require htmlparse 9package require textutil 10package require log 11 12# Restrict logging to levels 'info' and higher. 13log::lvSuppressLE debug 14 15# 1. CSV structure filled by the parser = main data table 16# ---------------------------------------------------- 17# Day Time/Start Time/End Track Tower Room Speaker Title 18# 19# Matrices: "dmain" and "dmainr" 20# 21# Difference: dmainr contains gratituous newlines in the 22# speaker column which make for a better TXT report (less 23# wide). 24# 25# This is also report 'main'. 26# 27# 2. Schedule report to see conflicts, CSV structure 28# ---------------------------------------------- 29# Day Time Location-Columns, one per Room 30# (15min granularity) (Content: Speaker + Topic) 31# 32# Matrices: "sched" and "schedr". Difference as for dmain(r) 33# and the location columns 34# 35# This will be report 'sched'. 36 37proc main {} { 38 global pfx argv 39 40 set pfx [lindex $argv 0] 41 set files [lrange $argv 1 end] 42 43 if {($pfx == {}) || ([llength $files] == 0)} { 44 usage 45 exit -1 46 } 47 48 initialize 49 foreach f $files { 50 log::log info "Scanning \"$f\" ..." 51 parse $f 52 } 53 gen_schedule 54 dump_main 55 dump_schedule 56 postscript 57 return 58} 59 60proc usage {} { 61 global argv0 62 puts "usage: $argv0 prefix file..." 63} 64 65 66proc initialize {} { 67 global rooms tracks 68 ::struct::matrix::matrix dmain ; # data 1 69 ::struct::matrix::matrix dmainr ; # data 1r 70 ::struct::matrix::matrix sched ; # data 2 71 ::struct::matrix::matrix schedr ; # data 2r 72 array set rooms {} 73 array set tracks {} 74 dmain add columns 8 75 dmain add row {Day Start End Track Tower Room Speaker Title} 76 dmainr add columns 8 77 dmainr add row {Day Start End Track Tower Room Speaker Title} 78 return 79} 80 81proc parse {htmlfile} { 82 global rooms tracks 83 84 ::struct::tree::tree t 85 86 log::log info "Reading \"$htmlfile\" ..." 87 set html [read [set fh [open $htmlfile]]] 88 close $fh 89 90 log::log info "Parsing \"$htmlfile\" ..." 91 htmlparse::2tree $html t 92 htmlparse::removeVisualFluff t 93 htmlparse::removeFormDefs t 94 95 log::log info "Extracting information" 96 97 #puts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 98 # Navigate and extract the information 99 #t walk root -command {print %t %n} 100 #exit 101 102 set base [walk {1 1 0 1 1 0 1 0 1 0}] 103 set day [walkf $base {0 0}] 104 set day [escape [t get $day -key data]] 105 log::log debug "Day = $day" 106 set day [string range $day 0 2] 107 108 # Walk through the sessions of that day. 109 110 set sess [t next $base] 111 while {$sess != {}} { 112 set start [cvtdate [escape [t get [walkf $sess {0 0}] -key data]]] 113 set track [string trim [escape [t get [walkf $sess {1 0}] -key data]]] 114 set loc [escape [t get [walkf $sess {1 1 0}] -key data]] 115 set loc [string trimright $loc "\n\r\t:"] 116 117 log::log debug " $start - $track - $loc" 118 119 # Separate Room/Tower information ... 120 regexp {(.*) in the (.*) Tower} $loc -> room tower 121 set room [string trim $room] 122 set tower [string trim $tower] 123 set rooms($tower/$room) . 124 set tracks($track) . 125 126 set talk [walkf $sess {1 1 3}] 127 while {$talk != {}} { 128 set time [escape [t get $talk -key data]] 129 set talk [t next $talk] 130 set title [escape [t get [walkf $talk {0 0 0}] -key data]] 131 set speaker [escape [t get [walkf $talk {0 2}] -key data]] 132 133 # Now we have everything to fill the main table ... 134 # (After a bit of munging of the strings we got) 135 136 foreach {start end} [split $time -] break 137 set start [cvtdate $start] 138 set end [cvtdate $end] 139 140 regsub -all \r $speaker \n speaker 141 regsub -all \n+ $speaker \n speaker 142 regsub -all " *\n *" $speaker "\n" speaker 143 set speakerc [split $speaker "\n"] 144 set speakerc [join $speakerc ", "] 145 log::log debug " $start - $end - $speakerc - $title" 146 147 #puts >>$speakerc<< 148 #puts >>$speaker<< 149 150 # Day Time/Start Time/End Tower Room Speaker Title 151 dmainr add row [list $day $start $end $track $tower $room $speaker $title] 152 dmain add row [list $day $start $end $track $tower $room $speakerc $title] 153 154 # Forward to next talk 155 catch {set talk [t next $talk]} 156 catch {set talk [t next $talk]} 157 } 158 159 set sess [t next $sess] 160 } 161 162 t destroy 163 return 164} 165 166proc print {t n} { 167 set tp [$t get $n -key type] 168 set d [$t depth $n] 169 set idx "" 170 catch {set idx [$t index $n]} 171 incr d $d 172 incr d $d 173 174 switch -exact -- $tp { 175 a { 176 log::log debug "[textutil::strRepeat " " $d]$idx $tp ([$t get $n -key data]...)" 177 } 178 PCDATA { 179 log::log debug "[textutil::strRepeat " " $d]$idx $tp ([string range [$t get $n -key data] 0 20]...)" 180 } 181 default { 182 log::log debug "[textutil::strRepeat " " $d]$idx $tp" 183 } 184 } 185} 186 187proc walkf {n p} { 188 #log::log info "$n + $p =" 189 foreach idx $p { 190 if {$n == ""} {break} 191 set n [lindex [t children $n] $idx] 192 #log::log info "$idx :- $n" 193 } 194 return $n 195} 196 197proc walk {p} { 198 return [walkf root $p] 199} 200 201proc cvtdate {date} { 202 clock format [clock scan $date] -format "%H:%M" 203} 204 205proc escape {text} { 206 # Special escape for nbsp, convert into space and not the 207 # character specified by the standard. 208 209 regsub -all { } $text { } text 210 htmlparse::mapEscapes $text 211} 212 213 214proc gen_schedule {} { 215 global rooms tracks 216 217 dmain set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmain get rect 0 1 end end]]] 218 dmainr set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmainr get rect 0 1 end end]]] 219 220 sched add columns 2 221 schedr add columns 2 222 #sched add columns [array size rooms] 223 #schedr add columns [array size rooms] 224 sched add columns [array size tracks] 225 schedr add columns [array size tracks] 226 227 #log::log info Tracks=[array size tracks] 228 #log::log info Rooms.=[array size rooms] 229 230 set res [list Day Time] 231 set c 2 232 foreach k [lsort [array names tracks]] { 233 lappend res $k 234 set tracks($k) $c 235 incr c 236 } 237 238 sched add row $res 239 schedr add row $res 240 241 # Data in dmain is already sorted by day. By starting time only 242 # partially, there are back references. 243 # Just move them to the correct rooms and rows! 244 245 #-- Day Time Location-Columns, one per Room -- 246 247 set n [dmain rows] 248 set p 0 249 250 array set rmap {} 251 252 for {set r 1} {$r < $n} {incr r} { 253 foreach {day start end track tower room speaker title} [dmain get row $r] break 254 #[list $day $start $end $tower $room $speakerc $title] 255 256 set key $day,$start 257 if {![info exists rmap($key)]} { 258 log::log info "Track schedule $day $start" 259 sched add row 260 schedr add row 261 incr p 262 263 set rmap($key) $p 264 sched set cell 0 $p $day 265 sched set cell 1 $p $start 266 schedr set cell 0 $p $day 267 schedr set cell 1 $p $start 268 } 269 270 sched set cell $tracks($track) $rmap($key) "$tower; $room; $speaker; $title" 271 schedr set cell $tracks($track) $rmap($key) "$tower $room\n$speaker\n$title" 272 } 273 274 # Squeeze the columns 2+ in the report matrix 275 276 set cols [schedr columns] 277 for {set c 2} {$c < $cols} {incr c} { 278 279 if {[schedr columnwidth $c] > 21} { 280 log::log debug "Squeezing $c" 281 set col [schedr get column $c] 282 set res [list] 283 foreach item $col { 284 lappend res [wrap $item 21] 285 } 286 schedr set column $c $res 287 } 288 } 289 290 # Now sort by day (primary key) and starting time (secondary key). 291 # (Meaning we have to sort by time first, and then the day) 292 293 # sched setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [sched getrect 0 0 end end]]] 294 # schedr setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [schedr getrect 0 0 end end]]] 295 296 return 297} 298 299proc dump_main {} { 300 global pfx 301 log::log info "Writing talk information /CSV" 302 303 set f [open ${pfx}.main.csv w] 304 csv::writematrix dmain $f 305 close $f 306 307 log::log info "Writing talk information /TXT" 308 309 # Compute width of report and squeeze the title column to fit 310 # below 80 char/line 311 312 # Day Time/Start Time/End Track Tower Room Speaker Title 313 314 set total 0 315 incr total [dmain columnwidth 0] 316 incr total [dmain columnwidth 1] 317 incr total [dmain columnwidth 2] 318 incr total [dmain columnwidth 3] 319 incr total [dmain columnwidth 4] 320 incr total [dmain columnwidth 5] 321 incr total [dmain columnwidth 6] 322 323 #log::log info Total=$total 324 325 if {$total < 80} { 326 set total [expr {80 - $total}] 327 set titles [dmain getcolumn 7] 328 set res [list] 329 foreach t $titles { 330 lappend res [textutil::adjust $t -length $total] 331 } 332 dmain setcolumn 7 $res 333 } 334 335 ::report::report r [dmainr columns] style captionedtable 1 336 set f [open ${pfx}.main.txt w] 337 r printmatrix2channel dmainr $f 338 close $f 339 r destroy 340 341 # Now the HTML report, use 'dmain' as base, actually formatting 342 # into lines is done by the browser. 343 344 log::log info "Writing talk information /HTML" 345 346 ::report::report r [dmain columns] style html 347 348 set f [open ${pfx}.main.html w] 349 puts $f "<html><head><title>Talk information and schedule</title></head><body>" 350 puts $f "<h1>Talk information and schedule</h1>" 351 puts $f "<p><table border=1>" 352 r printmatrix2channel dmain $f 353 puts $f "</table></p></body></html>" 354 close $f 355 r destroy 356} 357 358proc dump_schedule {} { 359 global pfx 360 log::log info "Writing track schedule /CSV" 361 362 set f [open ${pfx}.sched.csv w] 363 csv::writematrix sched $f 364 close $f 365 366 log::log info "Writing track schedule /TXT" 367 368 ::report::report r [schedr columns] style captionedtable 1 369 r datasep set [r top get] 370 r datasep enable 371 372 set f [open ${pfx}.sched.txt w] 373 r printmatrix2channel schedr $f 374 close $f 375 r destroy 376 377 # Now the HTML report, use 'sched' as base, actually formatting 378 # into lines is done by the browser. 379 380 log::log info "Writing track schedule /HTML" 381 382 ::report::report r [sched columns] style html 383 384 set f [open ${pfx}.sched.html w] 385 puts $f "<html><head><title>Track schedules</title></head><body>" 386 puts $f "<h1>Track schedules</h1>" 387 puts $f "<p><table border=1>" 388 r printmatrix2channel sched $f 389 puts $f "</table></p></body></html>" 390 close $f 391 r destroy 392} 393 394proc postscript {} { 395 global pfx 396 # Transforms texts into printable postscript, using a2ps (if available) 397 398 catch {exec a2ps -o ${pfx}.main.ps -1 -B -r -f7 ${pfx}.main.txt} 399 catch {exec a2ps -o ${pfx}.sched.ps -1 -B -r -f4 ${pfx}.sched.txt} 400 return 401} 402 403proc wrap {text len} { 404 # @author Jeffrey Hobbs <jeff at hobbs org> 405 # 406 # @c Wraps the given <a text> into multiple lines not 407 # @c exceeding <a len> characters each. Lines shorter 408 # @c than <a len> characters might get filled up. 409 # 410 # @a text: The string to operate on. 411 # @a len: The maximum allowed length of a single line. 412 # 413 # @r Basically <a text>, but with changed newlines to 414 # @r restrict the length of individual lines to at most 415 # @r <a len> characters. 416 417 # @n This procedure is not checked by the testsuite. 418 419 # @i wrap, word wrap 420 421 # Convert all newlines into spaces and initialize the result 422 # see ::pool::string::oneLine too. 423 424 regsub -all "\n" $text { } text 425 incr len -1 426 427 set out {} 428 429 # As long as the string is longer than the intended length of 430 # lines in the result: 431 432 while {[string len $text] > $len} { 433 # - Find position of last space in the part of the text 434 # which could a line in the result. 435 436 # - We jump out of the loop if there is none and the whole 437 # text does not contain spaces anymore. In the latter case 438 # the rest of the text is one word longer than an intended 439 # line, we cannot avoid the longer line. 440 441 set i [string last { } [string range $text 0 $len]] 442 443 if {$i == -1 && [set i [string first { } $text]] == -1} { 444 break 445 } 446 447 # Get the just fitting part of the text, remove any heading 448 # and trailing spaces, then append it to the result string, 449 # don't close it with a newline! 450 451 append out [string trim [string range $text 0 [incr i -1]]]\n 452 453 # Shorten the text by the length of the processed part and 454 # the space used to split it, then iterate. 455 456 set text [string range $text [incr i 2] end] 457 } 458 459 return $out$text 460} 461 462# ------------------------------------------- 463# Define the required reports styles 464 465::report::defstyle simpletable {} { 466 data set [split "[string repeat "| " [columns]]|"] 467 top set [split "[string repeat "+ - " [columns]]+"] 468 bottom set [top get] 469 top enable 470 bottom enable 471} 472::report::defstyle captionedtable {{n 1}} { 473 simpletable 474 topdata set [data get] 475 topcapsep set [top get] 476 topcapsep enable 477 tcaption $n 478} 479::report::defstyle html {} { 480 set c [columns] 481 set cl $c ; incr cl -1 482 data set "<tr> [split [string repeat " " $cl] ""] </tr>" 483 for {set col 0} {$col < $c} {incr col} { 484 pad $col left "<td>" 485 pad $col right "</td>" 486 } 487 return 488} 489 490# ------------------------------------------- 491 492main 493exit 494