1\name{getHTMLLinks}
2\alias{getHTMLLinks}
3\alias{getHTMLExternalFiles}
4\title{Get links or names of external files in HTML document}
5\description{
6  These functions allow us to retrieve either the links
7  within an HTML document, or the collection of names of
8  external files referenced in an HTML document.
9  The external files include images, JavaScript and CSS documents.
10}
11\usage{
12getHTMLLinks(doc, externalOnly = TRUE, xpQuery = "//a/@href",
13               baseURL = docName(doc), relative = FALSE)
14getHTMLExternalFiles(doc, xpQuery = c("//img/@src", "//link/@href",
15                                      "//script/@href", "//embed/@src"),
16                     baseURL = docName(doc), relative = FALSE,
17                     asNodes = FALSE, recursive = FALSE)
18}
19%- maybe also 'usage' for other objects documented here.
20\arguments{
21  \item{doc}{the HTML document as a URL, local file name, parsed
22    document or an XML/HTML node}
23  \item{externalOnly}{a logical value that indicates whether we should
24    only return links to external documents and not references to
25    internal anchors/nodes within this document, i.e. those that of the
26    form \code{#foo}.}
27  \item{xpQuery}{a vector of XPath elements which match the elements of interest}
28\item{baseURL}{the URL of the container document. This is used
29    to resolve relative references/links.
30}
31\item{relative}{a logical value indicating whether to leave the
32  references as relative to the base URL or to expand them to their full paths.
33}
34\item{asNodes}{a logical value that indicates whether we want the actual
35  HTML/XML   nodes in the document that reference external documents
36  or just the names of the external documents.}
37\item{recursive}{a logical value that controls whether we recursively
38  process the external documents we find in the top-level document
39  examining them for their external files.}
40}
41
42\value{
43  \code{getHTMLLinks} returns a character vector of the links.
44
45  \code{getHTMLExternalFiles} returns a character vector.
46}
47\author{
48Duncan Temple Lang
49}
50\seealso{
51  \code{\link{getXIncludes}}
52}
53\examples{\donttest{ # site is flaky
54  try(getHTMLLinks("http://www.omegahat.net"))
55
56  try(getHTMLLinks("http://www.omegahat.net/RSXML"))
57
58  try(unique(getHTMLExternalFiles("http://www.omegahat.net")))
59}}
60\keyword{IO}
61\keyword{programming}
62
63