1setGeneric("readHTMLList",
2          function(doc,
3                    trim = TRUE, elFun = xmlValue,
4                      which = integer(), ...)
5             standardGeneric("readHTMLList"))
6
7
8setMethod("readHTMLList",
9           "character",
10          function(doc,
11                    trim = TRUE, elFun = xmlValue,
12                     which = integer(), encoding = character(), ...) {
13             readHTMLList(htmlParse(doc, encoding = encoding), trim, elFun, which, ...)
14          })
15
16
17setMethod("readHTMLList",
18           "HTMLInternalDocument",
19          function(doc,
20                    trim = TRUE, elFun = xmlValue,
21                     which = integer(), ...) {
22            lists = getNodeSet(doc, "//ol | //ul | //dl")
23            if(length(which))
24               lists = lists[which]
25            ans = lapply(lists, readHTMLList, trim = trim, elFun = elFun)
26            if(length(which) == 1)
27              ans[[1]]
28            else
29              ans
30          })
31
32setMethod("readHTMLList",
33           "XMLInternalNode",
34          function(doc,
35                    trim = TRUE, elFun = xmlValue,
36                     which = integer(), ...) {
37
38            if(xmlName(doc) == "dl")
39                return(readHTMLDefinitionList(doc, trim, elFun))
40
41
42            ans = unname(sapply(xmlChildren(doc)[!xmlSApply(doc, is, "XMLInternalTextNode")], elFun))
43
44            if(trim)
45              ans = unname(sapply(ans, function(x) if(is.character(x)) trim(x) else x))
46
47            ans
48          })
49
50readHTMLDefinitionList =
51function(node, trim = TRUE, elFun = xmlValue)
52{
53  kids = xmlChildren(node)
54  structure(sapply(kids[names(node) == "dd"], elFun),
55            names = sapply(kids[names(node) == "dt"], elFun))
56}
57