Lines Matching defs:Collector

54 type Collector struct {  struct
56 UserAgent string
59 MaxDepth int
62 AllowedDomains []string
64 DisallowedDomains []string
70 DisallowedURLFilters []*regexp.Regexp
77 URLFilters []*regexp.Regexp
80 AllowURLRevisit bool
84 MaxBodySize int
87 CacheDir string
91 IgnoreRobotsTxt bool
94 Async bool
98 ParseHTTPErrorResponse bool
100 ID uint32
103 DetectCharset bool
105 RedirectHandler func(req *http.Request, via []*http.Request) error
107 CheckHead bool
108 store storage.Storage
109 debugger debug.Debugger
110 robotsMap map[string]*robotstxt.RobotsData
111 htmlCallbacks []*htmlCallbackContainer
112 xmlCallbacks []*xmlCallbackContainer
113 requestCallbacks []RequestCallback
114 responseCallbacks []ResponseCallback
115 errorCallbacks []ErrorCallback
116 scrapedCallbacks []ScrapedCallback
117 requestCount uint32
118 responseCount uint32
119 backend *httpBackend
120 wg *sync.WaitGroup
121 lock *sync.RWMutex
241 func NewCollector(options ...func(*Collector)) *Collector {
366 func (c *Collector) Init() {
394 func (c *Collector) Appengine(ctx context.Context) {
406 func (c *Collector) Visit(URL string) error {
416 func (c *Collector) Head(URL string) error {
422 func (c *Collector) Post(URL string, requestData map[string]string) error {
428 func (c *Collector) PostRaw(URL string, requestData []byte) error {
434 func (c *Collector) PostMultipart(URL string, requestData map[string][]byte) error {
453 …ollector) Request(method, URL string, requestData io.Reader, ctx *Context, hdr http.Header) error {
458 func (c *Collector) SetDebugger(d debug.Debugger) {
464 func (c *Collector) UnmarshalRequest(r []byte) (*Request, error) {
492 …string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, checkRevisit bool) error {
576 …string, depth int, requestData io.Reader, ctx *Context, hdr http.Header, req *http.Request) error {
644 func (c *Collector) requestCheck(u, method string, depth int, checkRevisit bool) error {
677 func (c *Collector) isDomainAllowed(domain string) bool {
694 func (c *Collector) checkRobots(u *url.URL) error {
727 func (c *Collector) String() string {
740 func (c *Collector) Wait() {
746 func (c *Collector) OnRequest(f RequestCallback) {
756 func (c *Collector) OnResponse(f ResponseCallback) {
768 func (c *Collector) OnHTML(goquerySelector string, f HTMLCallback) {
783 func (c *Collector) OnXML(xpathQuery string, f XMLCallback) {
796 func (c *Collector) OnHTMLDetach(goquerySelector string) {
812 func (c *Collector) OnXMLDetach(xpathQuery string) {
829 func (c *Collector) OnError(f ErrorCallback) {
840 func (c *Collector) OnScraped(f ScrapedCallback) {
850 func (c *Collector) WithTransport(transport http.RoundTripper) {
855 func (c *Collector) DisableCookies() {
860 func (c *Collector) SetCookieJar(j *cookiejar.Jar) {
865 func (c *Collector) SetRequestTimeout(timeout time.Duration) {
871 func (c *Collector) SetStorage(s storage.Storage) error {
885 func (c *Collector) SetProxy(proxyURL string) error {
903 func (c *Collector) SetProxyFunc(p ProxyFunc) {
923 func (c *Collector) handleOnRequest(r *Request) {
934 func (c *Collector) handleOnResponse(r *Response) {
946 func (c *Collector) handleOnHTML(resp *Response) error {
976 func (c *Collector) handleOnXML(resp *Response) error {
1033 …(c *Collector) handleOnError(response *Response, err error, request *Request, ctx *Context) error {
1064 func (c *Collector) handleOnScraped(r *Response) {
1076 func (c *Collector) Limit(rule *LimitRule) error {
1081 func (c *Collector) Limits(rules []*LimitRule) error {
1086 func (c *Collector) SetCookies(URL string, cookies []*http.Cookie) error {
1099 func (c *Collector) Cookies(URL string) []*http.Cookie {
1113 func (c *Collector) Clone() *Collector {
1145 func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Request) error {
1178 func (c *Collector) parseSettingsFromEnv() {