Lines Matching defs:Collector

55 type Collector struct {  struct
57 UserAgent string
60 MaxDepth int
63 AllowedDomains []string
65 DisallowedDomains []string
71 DisallowedURLFilters []*regexp.Regexp
78 URLFilters []*regexp.Regexp
81 AllowURLRevisit bool
85 MaxBodySize int
88 CacheDir string
92 IgnoreRobotsTxt bool
95 Async bool
99 ParseHTTPErrorResponse bool
101 ID uint32
104 DetectCharset bool
107 redirectHandler func(req *http.Request, via []*http.Request) error
109 CheckHead bool
112 TraceHTTP bool
113 store storage.Storage
114 debugger debug.Debugger
115 robotsMap map[string]*robotstxt.RobotsData
116 htmlCallbacks []*htmlCallbackContainer
117 xmlCallbacks []*xmlCallbackContainer
118 requestCallbacks []RequestCallback
119 responseCallbacks []ResponseCallback
120 responseHeadersCallbacks []ResponseHeadersCallback
121 errorCallbacks []ErrorCallback
122 scrapedCallbacks []ScrapedCallback
123 requestCount uint32
124 responseCount uint32
125 backend *httpBackend
126 wg *sync.WaitGroup
127 lock *sync.RWMutex
399 func (c *Collector) Init() {
428 func (c *Collector) Appengine(ctx context.Context) {
440 func (c *Collector) Visit(URL string) error {
450 func (c *Collector) HasVisited(URL string) (bool, error) {
456 func (c *Collector) HasPosted(URL string, requestData map[string]string) (bool, error) {
461 func (c *Collector) Head(URL string) error {
467 func (c *Collector) Post(URL string, requestData map[string]string) error {
473 func (c *Collector) PostRaw(URL string, requestData []byte) error {
479 func (c *Collector) PostMultipart(URL string, requestData map[string][]byte) error {
498 …ollector) Request(method, URL string, requestData io.Reader, ctx *Context, hdr http.Header) error {
503 func (c *Collector) SetDebugger(d debug.Debugger) {
509 func (c *Collector) UnmarshalRequest(r []byte) (*Request, error) {
538 …string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, checkRevisit bool) error {
612 …string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, req *http.Request) error {
691 …ng, parsedURL *url.URL, method string, requestData io.Reader, depth int, checkRevisit bool) error {
742 func (c *Collector) isDomainAllowed(domain string) bool {
759 func (c *Collector) checkRobots(u *url.URL) error {
798 func (c *Collector) String() string {
811 func (c *Collector) Wait() {
817 func (c *Collector) OnRequest(f RequestCallback) {
837 func (c *Collector) OnResponseHeaders(f ResponseHeadersCallback) {
844 func (c *Collector) OnResponse(f ResponseCallback) {
856 func (c *Collector) OnHTML(goquerySelector string, f HTMLCallback) {
871 func (c *Collector) OnXML(xpathQuery string, f XMLCallback) {
884 func (c *Collector) OnHTMLDetach(goquerySelector string) {
900 func (c *Collector) OnXMLDetach(xpathQuery string) {
917 func (c *Collector) OnError(f ErrorCallback) {
928 func (c *Collector) OnScraped(f ScrapedCallback) {
938 func (c *Collector) SetClient(client *http.Client) {
943 func (c *Collector) WithTransport(transport http.RoundTripper) {
948 func (c *Collector) DisableCookies() {
953 func (c *Collector) SetCookieJar(j http.CookieJar) {
958 func (c *Collector) SetRequestTimeout(timeout time.Duration) {
964 func (c *Collector) SetStorage(s storage.Storage) error {
978 func (c *Collector) SetProxy(proxyURL string) error {
996 func (c *Collector) SetProxyFunc(p ProxyFunc) {
1016 func (c *Collector) handleOnRequest(r *Request) {
1027 func (c *Collector) handleOnResponse(r *Response) {
1039 func (c *Collector) handleOnResponseHeaders(r *Response) {
1051 func (c *Collector) handleOnHTML(resp *Response) error {
1081 func (c *Collector) handleOnXML(resp *Response) error {
1139 …(c *Collector) handleOnError(response *Response, err error, request *Request, ctx *Context) error {
1170 func (c *Collector) handleOnScraped(r *Response) {
1182 func (c *Collector) Limit(rule *LimitRule) error {
1187 func (c *Collector) Limits(rules []*LimitRule) error {
1192 func (c *Collector) SetRedirectHandler(f func(req *http.Request, via []*http.Request) error) {
1198 func (c *Collector) SetCookies(URL string, cookies []*http.Cookie) error {
1211 func (c *Collector) Cookies(URL string) []*http.Cookie {
1225 func (c *Collector) Clone() *Collector {
1259 func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Request) error {
1285 func (c *Collector) parseSettingsFromEnv() {
1299 func (c *Collector) checkHasVisited(URL string, requestData map[string]string) (bool, error) {