1setGeneric("readHTMLList", 2 function(doc, 3 trim = TRUE, elFun = xmlValue, 4 which = integer(), ...) 5 standardGeneric("readHTMLList")) 6 7 8setMethod("readHTMLList", 9 "character", 10 function(doc, 11 trim = TRUE, elFun = xmlValue, 12 which = integer(), encoding = character(), ...) { 13 readHTMLList(htmlParse(doc, encoding = encoding), trim, elFun, which, ...) 14 }) 15 16 17setMethod("readHTMLList", 18 "HTMLInternalDocument", 19 function(doc, 20 trim = TRUE, elFun = xmlValue, 21 which = integer(), ...) { 22 lists = getNodeSet(doc, "//ol | //ul | //dl") 23 if(length(which)) 24 lists = lists[which] 25 ans = lapply(lists, readHTMLList, trim = trim, elFun = elFun) 26 if(length(which) == 1) 27 ans[[1]] 28 else 29 ans 30 }) 31 32setMethod("readHTMLList", 33 "XMLInternalNode", 34 function(doc, 35 trim = TRUE, elFun = xmlValue, 36 which = integer(), ...) { 37 38 if(xmlName(doc) == "dl") 39 return(readHTMLDefinitionList(doc, trim, elFun)) 40 41 42 ans = unname(sapply(xmlChildren(doc)[!xmlSApply(doc, is, "XMLInternalTextNode")], elFun)) 43 44 if(trim) 45 ans = unname(sapply(ans, function(x) if(is.character(x)) trim(x) else x)) 46 47 ans 48 }) 49 50readHTMLDefinitionList = 51function(node, trim = TRUE, elFun = xmlValue) 52{ 53 kids = xmlChildren(node) 54 structure(sapply(kids[names(node) == "dd"], elFun), 55 names = sapply(kids[names(node) == "dt"], elFun)) 56} 57