xref: /netbsd/share/misc/nanpa.sed (revision 6550d01e)
1# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $
2#
3# Parse HTML tables output by
4#   http://docs.nanpa.com/cgi-bin/npa_reports/nanpa
5# Specifically, for each html table row (TR),
6# print the <TD> elements separated by colons.
7#
8# This could break on HTML comments.
9#
10:top
11#				Strip ^Ms
12s/
13//g
14#				Join all lines with unterminated HTML tags
15/<[^>]*$/{
16	N
17	b top
18}
19#				Replace all </TR> with EOL tag
20s;</[Tt][Rr]>;$;g
21# 				Join lines with only <TR>.
22/<[Tt][Rr][^>]*>$/{
23	N
24	s/\n//g
25	b top
26}
27#				Also, join all lines starting with <TR>.
28/<[TtRr][^>]*>[^$]*$/{
29	N
30	s/\n//g
31	b top
32}
33#				Remove EOL markers
34s/\$$//
35#				Remove lines not starting with <TR>
36/<[Tt][Rr][^>]*>/!d
37#				Replace all <TD> with colon
38s/[ 	]*<TD[^>]*> */:/g
39#				Strip all HTML tags
40s/<[^>]*>//g
41#				Handle HTML characters
42s/&nbsp;/ /g
43#				Compress spaces/tabs
44s/[ 	][ 	]*/ /g
45#				Strip leading colons
46s/^://
47#				Strip leading/trailing whitespace
48s/^ //
49s/ $//
50