1#!/usr/local/bin/ruby
2
3#  HTML reference generator
4#  by A.Ito 1999/3/30
5
6require 'kconv'
7
8###########################################################################
9class URL
10  attr 'scheme'
11  attr 'host'
12  attr 'port'
13  attr 'file'
14  attr 'label'
15  def initialize(str)
16    if /([a-zA-Z+\-]+):(.*)/ =~ str then
17      @scheme = $1
18      str = $2
19    else
20      @scheme = 'unknown'
21    end
22    hostpart = ''
23    if %r'//([^/]*)(/.*)' =~ str then
24      hostpart = $1
25      str = $2
26    elsif %r'//([^/]*)$' =~ str then
27      hostpart = str
28      str = ''
29    end
30    if hostpart != '' then
31      if /(.*):(\d+)/ =~ hostpart then
32        @host = $1
33        @port = $2
34      else
35        @host = hostpart
36        @port = ''
37      end
38    else
39      @host = @port = ''
40    end
41    if /(.*)#(.*)/ =~ str then
42      @file = $1
43      @label = $2
44    else
45      @file = str
46      @label = ''
47    end
48  end
49  def to_s
50    s = "#{@scheme}:"
51    if s == 'news' or s == 'mailto' then
52      return s+@file
53    end
54    s += "//"+@host
55    s += ":"+@port if @port.size > 0
56    s += @file
57    s += "#"+@label if @label.size > 0
58    s
59  end
60  def complete(current)
61    @scheme = current.scheme  if @scheme == 'unknown'
62    @port = current.port if @host == '' and @port == ''
63    @host = current.host if @host == ''
64    unless @file =~ %r'^/' then
65      @file = File.expand_path(File.dirname(current.file)+'/'+@file)
66    end
67    self
68  end
69end
70
71class Tag
72  def initialize(str)
73    if str =~ /<(.+)>/ then
74      str = $1
75    end
76    tags = str.split
77    @tagname = tags.shift.downcase
78    @vals = {}
79    tags.each do |t|
80      if t =~ /=/ then
81	tn,tv = t.split(/\s*=\s*/,2)
82	tv.sub!(/^"/,"")
83	tv.sub!(/"$/,"")
84	@vals[tn.downcase] = tv
85      else
86	@vals[t.downcase] = TRUE
87      end
88    end
89  end
90  def tagname
91    return @tagname
92  end
93  def each
94    @vals.each do |k,v|
95      yield k,v
96    end
97  end
98  def switch(k)
99    return @vals[k]
100  end
101  def to_s
102    if tagname =~ /!--/ then
103      return ''
104    end
105    t = "<"+tagname
106    if @vals.size == 0 then
107      return t+">"
108    end
109    each do |a,v|
110      if v == true then
111        t += " #{a}"
112      else
113        t += " #{a}=\"#{v}\""
114      end
115    end
116    t+">"
117  end
118end
119
120class TokenStream
121  TAG_START = ?<
122  TAG_END = ?>
123  AMP_START = ?&
124  AMP_END = ?;
125
126  def initialize(file)
127    if file.kind_of?(IO) then
128      @f = file
129    else
130      @f = File.new(file)
131    end
132    @buf = nil
133    @bpos = 0
134  end
135
136  def read_until(endsym)
137    complete = FALSE
138    tag = []
139    begin
140      while @bpos < @buf.size
141	c = @buf[@bpos]
142	if c == endsym then
143	  tag.push(c.chr)
144	  complete = TRUE
145	  @bpos += 1
146	  break
147	end
148	if c == 10 || c == 13 then
149	  tag.push(' ')
150	else
151	  tag.push(c.chr)
152	end
153	@bpos += 1
154      end
155      unless complete
156	@buf = @f.gets
157	@bpos = 0
158	break if @f.eof?
159      end
160    end until complete
161    return tag.join('')
162  end
163
164  def get
165    while TRUE
166      if @buf.nil? then
167	@buf = @f.gets
168	if @f.eof? then
169	  return nil
170	end
171        @buf = Kconv.toeuc(@buf)
172	@bpos = 0
173      end
174      if @buf[@bpos] == TAG_START then
175	return Tag.new(read_until(TAG_END))
176      elsif @buf[@bpos] == AMP_START then
177	return read_until(AMP_END)
178      else
179	i = @bpos
180	while i < @buf.size && @buf[i] != TAG_START && @buf[i] != AMP_START
181	  i += 1
182	end
183	r = @buf[@bpos,i-@bpos]
184	if i == @buf.size then
185	  @buf = nil
186	else
187	  @bpos = i
188	end
189	redo if r =~ /^\s+$/
190	return r
191      end
192    end
193  end
194  public :eof?
195  def eof?
196    @f.eof?
197  end
198end
199
200################################ MAIN ####################################
201
202refs = []
203refnum = 0
204body_finished = false
205html_finished = false
206currentURL = nil
207immediate_ref = false
208
209while ARGV[0] =~ /^-/
210  case ARGV.shift
211  when '-url'
212    currentURL = URL.new(ARGV.shift)
213  when '-u'
214    immediate_ref = true
215  end
216end
217
218if ARGV.size > 0 then
219  f = TokenStream.new(ARGV[0])
220else
221  f = TokenStream.new(STDIN)
222end
223
224until f.eof?
225  tok = f.get
226  if tok.kind_of?(Tag) then
227    if tok.tagname == 'a' and !tok.switch('href').nil? then
228      refs[refnum] = tok.switch('href')
229      refnum += 1
230    elsif tok.tagname == '/a' then
231      if immediate_ref then
232        r = refs[refnum-1]
233        if !currentURL.nil? then
234          r = URL.new(r).complete(currentURL).to_s
235        end
236        print "[#{r}]"
237      else
238        print "[#{refnum}]"
239      end
240    elsif tok.tagname == '/body' then
241      body_finished = true
242      break
243    elsif tok.tagname == '/html' then
244      html_finished = true
245      break
246    end
247    print tok.to_s
248  elsif !tok.nil? then
249    print tok
250  end
251end
252if !immediate_ref and refs.size > 0 then
253  print "<hr><h2>References</h2>\n"
254  for i in 0..refs.size-1
255    if currentURL.nil? then
256      r = refs[i]
257    else
258      r = URL.new(refs[i])
259      r.complete(currentURL)
260      r = r.to_s
261    end
262    print "[#{i+1}] #{r}<br>\n"
263  end
264end
265print "</body>\n" unless body_finished
266print "</html>\n" unless html_finished
267