1# $NetBSD: nanpa.sed,v 1.2 2006/12/25 18:39:48 wiz Exp $ 2# 3# Parse HTML tables output by 4# http://docs.nanpa.com/cgi-bin/npa_reports/nanpa 5# Specifically, for each html table row (TR), 6# print the <TD> elements separated by colons. 7# 8# This could break on HTML comments. 9# 10:top 11# Strip ^Ms 12s/ 13//g 14# Join all lines with unterminated HTML tags 15/<[^>]*$/{ 16 N 17 b top 18} 19# Replace all </TR> with EOL tag 20s;</[Tt][Rr]>;$;g 21# Join lines with only <TR>. 22/<[Tt][Rr][^>]*>$/{ 23 N 24 s/\n//g 25 b top 26} 27# Also, join all lines starting with <TR>. 28/<[TtRr][^>]*>[^$]*$/{ 29 N 30 s/\n//g 31 b top 32} 33# Remove EOL markers 34s/\$$// 35# Remove lines not starting with <TR> 36/<[Tt][Rr][^>]*>/!d 37# Replace all <TD> with colon 38s/[ ]*<TD[^>]*> */:/g 39# Strip all HTML tags 40s/<[^>]*>//g 41# Handle HTML characters 42s/ / /g 43# Compress spaces/tabs 44s/[ ][ ]*/ /g 45# Strip leading colons 46s/^:// 47# Strip leading/trailing whitespace 48s/^ // 49s/ $// 50