1###############
2# Device Detector - The Universal Device Detection library for parsing User Agents
3#
4# @link https://matomo.org
5# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6###############
7
8- regex: '360Spider'
9  name: '360Spider'
10  category: 'Search bot'
11  url: 'https://www.so.com/help/help_3_2.html'
12  producer:
13    name: 'Online Media Group, Inc.'
14    url: ''
15
16- regex: 'Aboundex'
17  name: 'Aboundexbot'
18  category: 'Search bot'
19  url: 'http://www.aboundex.com/crawler/'
20  producer:
21    name: 'Aboundex.com'
22    url: 'http://www.aboundex.com'
23
24- regex: 'AcoonBot'
25  name: 'Acoon'
26  category: 'Search bot'
27  url: 'http://www.acoon.de/robot.asp'
28  producer:
29    name: 'Acoon GmbH'
30    url: 'http://www.acoon.de'
31
32- regex: 'AddThis\.com'
33  name: 'AddThis.com'
34  category: 'Social Media Agent'
35  url: ''
36  producer:
37    name: 'Clearspring Technologies, Inc.'
38    url: 'http://www.clearspring.com'
39
40- regex: 'AhrefsBot'
41  name: 'aHrefs Bot'
42  category: 'Crawler'
43  url: 'https://ahrefs.com/robot'
44  producer:
45    name: 'Ahrefs Pte Ltd'
46    url: 'https://ahrefs.com/robot'
47
48- regex: 'ia_archiver|alexabot|verifybot'
49  name: 'Alexa Crawler'
50  category: 'Search bot'
51  url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52  producer:
53    name: 'Alexa Internet'
54    url: 'https://www.alexa.com'
55
56- regex: 'alexa site audit'
57  name: 'Alexa Site Audit'
58  category: 'Site Monitor'
59  url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60  producer:
61    name: 'Alexa Internet'
62    url: 'https://www.alexa.com'
63
64- regex: 'Amazonbot'
65  name: 'Amazon Bot'
66  category: 'Crawler'
67  url: 'https://developer.amazon.com/support/amazonbot'
68  producer:
69    name: 'Amazon.com, Inc.'
70    url: 'https://www.amazon.com/'
71
72- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
73  name: 'Amazon Route53 Health Check'
74  category: 'Service Agent'
75  producer:
76    name: 'Amazon Web Services'
77    url: 'https://aws.amazon.com/'
78
79- regex: 'AmorankSpider'
80  name: 'Amorank Spider'
81  category: 'Crawler'
82  url: 'http://amorank.com/webcrawler.html'
83  producer:
84    name: 'Amorank'
85    url: 'http://www.amorank.com'
86
87- regex: 'ApacheBench'
88  name: 'ApacheBench'
89  category: 'Benchmark'
90  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
91  producer:
92    name: 'The Apache Software Foundation'
93    url: 'https://www.apache.org/foundation/'
94
95- regex: 'Applebot'
96  name: 'Applebot'
97  category: 'Crawler'
98  url: 'https://support.apple.com/en-us/HT204683'
99  producer:
100    name: 'Apple Inc'
101    url: 'https://www.apple.com'
102
103- regex: "AppSignalBot"
104  name: "AppSignalBot"
105  category: "Site Monitor"
106  url: "https://docs.appsignal.com/uptime-monitoring/"
107  producer:
108    name: "AppSignal"
109    url: "https://appsignal.com/"
110
111- regex: 'Arachni'
112  name: 'Arachni'
113  category: 'Security Checker'
114  url: 'https://www.arachni-scanner.com/'
115  producer:
116    name: 'Sarosys LLC'
117    url: 'https://www.sarosys.com/'
118
119- regex: 'AspiegelBot'
120  name: 'AspiegelBot'
121  category: 'Crawler'
122  url: 'https://aspiegel.com/'
123  producer:
124    name: 'Huawei'
125    url: 'https://www.huawei.com/'
126
127- regex: 'Castro 2, Episode Duration Lookup'
128  name: 'Castro 2'
129  category: 'Service Agent'
130  url: 'http://supertop.co/castro/'
131  producer:
132    name: 'Supertop'
133    url: 'http://supertop.co'
134
135- regex: 'Curious George'
136  name: 'Analytics SEO Crawler'
137  category: 'Crawler'
138  url: 'http://www.analyticsseo.com/crawler'
139  producer:
140    name: 'Analytics SEO'
141    url: 'http://www.analyticsseo.com'
142
143- regex: 'archive\.org_bot|special_archiver'
144  name: 'archive.org bot'
145  category: 'Crawler'
146  url: 'https://archive.org/details/archive.org_bot'
147  producer:
148    name: 'The Internet Archive'
149    url: 'https://archive.org'
150
151- regex: 'Ask Jeeves/Teoma'
152  name: 'Ask Jeeves'
153  category: 'Search bot'
154  url: ''
155  producer:
156    name: 'Ask Jeeves Inc.'
157    url: 'http://www.ask.com'
158
159- regex: 'Backlink-Check\.de'
160  name: 'Backlink-Check.de'
161  category: 'Crawler'
162  url: 'http://www.backlink-check.de/bot.html'
163  producer:
164    name: 'Mediagreen Medienservice'
165    url: 'http://www.backlink-check.de'
166
167- regex: 'BacklinkCrawler'
168  name: 'BacklinkCrawler'
169  category: 'Crawler'
170  url: 'http://www.backlinktest.com/crawler.html'
171  producer:
172    name: '2.0Promotion GbR'
173    url: 'http://www.backlinktest.com'
174
175- regex: 'Baidu.*spider|baidu Transcoder'
176  name: 'Baidu Spider'
177  category: 'Search bot'
178  url: 'http://www.baidu.com/search/spider.htm'
179  producer:
180    name: 'Baidu'
181    url: 'http://www.baidu.com'
182
183- regex: 'BazQux'
184  name: 'BazQux Reader'
185  url: 'https://bazqux.com/fetcher'
186  category: 'Feed Fetcher'
187  producer:
188    name: ''
189    url: ''
190
191- regex: 'Better Uptime Bot'
192  name: 'Better Uptime Bot'
193  category: 'Site Monitor'
194  url: 'https://betteruptime.com/faq'
195  producer:
196    name: 'Better Uptime'
197    url: 'https://betteruptime.com/'
198
199- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
200  name: 'BingBot'
201  category: 'Search bot'
202  url: 'http://search.msn.com/msnbot.htmn'
203  producer:
204    name: 'Microsoft Corporation'
205    url: 'http://www.microsoft.com'
206
207- regex: 'Blekkobot'
208  name: 'Blekkobot'
209  category: 'Search bot'
210  url: 'http://blekko.com/about/blekkobot'
211  producer:
212    name: 'Blekko'
213    url: 'http://blekko.com'
214
215- regex: 'BLEXBot'
216  name: 'BLEXBot Crawler'
217  category: 'Crawler'
218  url: 'http://webmeup-crawler.com'
219  producer:
220    name: 'WebMeUp'
221    url: 'http://webmeup.com'
222
223- regex: 'Bloglovin'
224  name: 'Bloglovin'
225  url: 'http://www.bloglovin.com'
226  category: 'Feed Fetcher'
227  producer:
228    name: ''
229    url: ''
230
231- regex: 'Blogtrottr'
232  name: 'Blogtrottr'
233  url: ''
234  category: 'Feed Fetcher'
235  producer:
236    name: 'Blogtrottr Ltd'
237    url: 'https://blogtrottr.com/'
238
239- regex: 'BoardReader Blog Indexer'
240  name: 'BoardReader Blog Indexer'
241  category: 'Crawler'
242  producer:
243    name: 'BoardReader'
244    url: 'https://boardreader.com/'
245
246- regex: 'BountiiBot'
247  name: 'Bountii Bot'
248  category: 'Search bot'
249  url: 'http://bountii.com/contact.php'
250  producer:
251    name: 'Bountii Inc.'
252    url: 'http://bountii.com'
253
254- regex: 'Browsershots'
255  name: 'Browsershots'
256  category: 'Service Agent'
257  url: 'http://browsershots.org/faq'
258  producer:
259    name: 'Browsershots.org'
260    url: 'http://browsershots.org'
261
262- regex: 'BUbiNG'
263  name: 'BUbiNG'
264  category: 'Crawler'
265  url: 'http://law.di.unimi.it/BUbiNG.html'
266  producer:
267    name: 'The Laboratory for Web Algorithmics (LAW)'
268    url: 'http://law.di.unimi.it/software.php#buging'
269
270- regex: '(?<!HTC)[ _]Butterfly/'
271  name: 'Butterfly Robot'
272  category: 'Search bot'
273  url: 'http://labs.topsy.com/butterfly'
274  producer:
275    name: 'Topsy Labs'
276    url: 'http://labs.topsy.com'
277
278- regex: 'CareerBot'
279  name: 'CareerBot'
280  category: 'Crawler'
281  url: 'http://www.career-x.de/bot.html'
282  producer:
283    name: 'career-x GmbH'
284    url: 'http://www.career-x.de'
285
286- regex: 'CCBot'
287  name: 'ccBot crawler'
288  category: 'Crawler'
289  url: 'http://commoncrawl.org/faq/'
290  producer:
291    name: 'reddit inc.'
292    url: 'http://www.reddit.com'
293
294- regex: 'Cliqzbot'
295  name: 'Cliqzbot'
296  category: 'Crawler'
297  url: 'http://cliqz.com/company/cliqzbot'
298  producer:
299    name: '10betterpages GmbH'
300    url: 'http://cliqz.com'
301
302- regex: 'Cloudflare-AMP'
303  name: 'CloudFlare AMP Fetcher'
304  category: 'Crawler'
305  url: 'https://amp.cloudflare.com/doc/fetcher.html'
306  producer:
307    name: 'CloudFlare'
308    url: 'http://www.cloudflare.com'
309
310- regex: 'CloudflareDiagnostics'
311  name: 'Cloudflare Diagnostics'
312  category: 'Site Monitor'
313  url: 'https://www.cloudflare.com/'
314  producer:
315    name: 'Cloudflare'
316    url: 'https://www.cloudflare.com'
317
318- regex: 'CloudFlare-AlwaysOnline'
319  name: 'CloudFlare Always Online'
320  category: 'Site Monitor'
321  url: 'http://www.cloudflare.com/always-online'
322  producer:
323    name: 'CloudFlare'
324    url: 'http://www.cloudflare.com'
325
326- regex: 'coccoc.com'
327  name: 'Cốc Cốc Bot'
328  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
329  category: 'Search bot'
330  producer:
331    name: 'Cốc Cốc'
332    url: 'https://coccoc.com/'
333
334- regex: 'collectd'
335  name: 'Collectd'
336  url: 'https://collectd.org/'
337  category: 'Site Monitor'
338  producer:
339    name: 'Collectd'
340    url: 'https://collectd.org/'
341
342- regex: 'CommaFeed'
343  name: 'CommaFeed'
344  url: 'http://www.commafeed.com'
345  category: 'Feed Fetcher'
346  producer:
347    name: ''
348    url: ''
349
350- regex: 'CSS Certificate Spider'
351  name: 'CSS Certificate Spider'
352  category: 'Crawler'
353  url: 'http://www.css-security.com/certificatespider/'
354  producer:
355    name: 'Certified Security Solutions'
356    url: 'https://www.css-security.com/company/about-us/'
357
358- regex: 'Datadog Agent'
359  name: 'Datadog Agent'
360  url: 'https://github.com/DataDog/dd-agent'
361  category: 'Site Monitor'
362  producer:
363    name: 'Datadog'
364    url: 'https://www.datadoghq.com/'
365
366- regex: 'Datanyze'
367  name: 'Datanyze'
368  url: ''
369  category: 'Crawler'
370  producer:
371    name: 'Datanyze'
372    url: 'https://www.datanyze.com'
373
374- regex: 'Dataprovider'
375  name: 'Dataprovider'
376  category: 'Crawler'
377  url: ''
378  producer:
379    name: 'Dataprovider B.V.'
380    url: 'https://www.dataprovider.com/'
381
382- regex: 'Daum(oa)?[ /][0-9]'
383  name: 'Daum'
384  category: 'Search bot'
385  url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
386  producer:
387    name: 'Daum Communications Corp.'
388    url: 'http://www.kakaocorp.com/main'
389
390- regex: 'Dazoobot'
391  name: 'Dazoobot'
392  category: 'Search bot'
393  url: ''
394  producer:
395    name: 'DAZOO.FR'
396    url: 'http://dazoo.fr'
397
398- regex: 'discobot'
399  name: 'Discobot'
400  category: 'Search bot'
401  url: 'http://discoveryengine.com/discobot.html'
402  producer:
403    name: 'Discovery Engine'
404    url: 'http://discoveryengine.com'
405
406- regex: 'Domain Re-Animator Bot|support@domainreanimator.com'
407  name: 'Domain Re-Animator Bot'
408  category: 'Crawler'
409  url: ''
410  producer:
411    name: 'Domain Re-Animator, LLC'
412    url: 'http://domainreanimator.com'
413
414- regex: 'DotBot'
415  name: 'DotBot'
416  category: 'Crawler'
417  url: 'http://www.opensiteexplorer.org/dotbot'
418  producer:
419    name: 'SEOmoz, Inc.'
420    url: 'http://moz.com/'
421
422- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
423  name: 'DuckDuckGo Bot'
424  category: 'Search bot'
425  url: 'https://duckduckgo.com/duckduckbot'
426  producer:
427    name: 'DuckDuckGo'
428    url: 'https://duckduckgo.com/'
429
430- regex: 'EasouSpider'
431  name: 'Easou Spider'
432  category: 'Search bot'
433  url: 'http://www.easou.com/search/spider.html'
434  producer:
435    name: 'easou ICP'
436    url: 'http://www.easou.com'
437
438- regex: 'eCairn-Grabber'
439  name: 'eCairn-Grabber'
440  category: 'Crawler'
441  producer:
442    name: 'eCairn'
443    url: 'https://ecairn.com'
444
445- regex: 'EMail Exractor'
446  name: 'EMail Exractor'
447  category: 'Crawler'
448  url: ''
449  producer:
450    name: ''
451    url: ''
452
453- regex: 'evc-batch'
454  name: 'evc-batch'
455  category: 'Crawler'
456  url: ''
457  producer:
458    name: 'eVenture Capital Partners II, LLC'
459    url: 'http://www.eventures.vc/'
460
461- regex: 'Exabot|ExaleadCloudview'
462  name: 'ExaBot'
463  category: 'Crawler'
464  url: 'http://www.exabot.com/go/robot'
465  producer:
466    name: 'Dassault Systèmes'
467    url: 'http://www.3ds.com'
468
469- regex: 'ExactSeek Crawler'
470  name: 'ExactSeek Crawler'
471  category: 'Search bot'
472  url: 'http://www.exactseek.com'
473  producer:
474    name: 'Jayde Online, Inc.'
475    url: 'http://www.jaydeonlineinc.com'
476
477- regex: 'Ezooms'
478  name: 'Ezooms'
479  category: 'Crawler'
480  url: ''
481  producer:
482    name: 'SEOmoz, Inc.'
483    url: 'http://moz.com/'
484
485- regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
486  name: 'Facebook External Hit'
487  category: 'Social Media Agent'
488  url: 'https://www.facebook.com/externalhit_uatext.php'
489  producer:
490    name: 'Facebook'
491    url: 'http://www.facebook.com'
492
493- regex: 'Feedbin'
494  name: 'Feedbin'
495  url: 'http://feedbin.com/'
496  category: 'Feed Fetcher'
497  producer:
498    name: ''
499    url: ''
500
501- regex: 'FeedBurner'
502  name: 'FeedBurner'
503  url: 'http://www.feedburner.com'
504  category: 'Feed Fetcher'
505  producer:
506    name: ''
507    url: ''
508
509- regex: 'Feed Wrangler'
510  name: 'Feed Wrangler'
511  url: 'https://feedwrangler.net/'
512  category: 'Feed Fetcher'
513  producer:
514    name: 'David Smith & Developing Perspective, LLC'
515    url: 'https://david-smith.org'
516
517- regex: 'Feedly'
518  name: 'Feedly'
519  url: 'http://www.feedly.com'
520  category: 'Feed Fetcher'
521  producer:
522    name: ''
523    url: ''
524
525- regex: 'Feedspot'
526  name: 'Feedspot'
527  url: 'http://www.feedspot.com'
528  category: 'Feed Fetcher'
529  producer:
530    name: ''
531    url: ''
532
533- regex: 'Fever/[0-9]'
534  name: 'Fever'
535  url: 'http://feedafever.com/'
536  category: 'Feed Fetcher'
537  producer:
538    name: ''
539    url: ''
540
541- regex: 'FlipboardProxy|FlipboardRSS'
542  name: 'Flipboard'
543  url: 'http://flipboard.com/browserproxy'
544  category: 'Feed Fetcher'
545  producer:
546    name: 'Flipboard'
547    url: 'http://flipboard.com/'
548
549- regex: 'Findxbot'
550  name: 'Findxbot'
551  category: 'Crawler'
552  url: 'http://www.findxbot.com'
553
554- regex: 'FreshRSS'
555  name: 'FreshRSS'
556  category: 'Feed Fetcher'
557  url: 'https://freshrss.org/'
558
559- regex: 'Genieo'
560  name: 'Genieo Web filter'
561  category: ''
562  url: 'http://www.genieo.com/webfilter.html'
563  producer:
564    name: 'Genieo'
565    url: 'http://www.genieo.com'
566
567- regex: 'GigablastOpenSource'
568  name: 'Gigablast'
569  category: 'Search bot'
570  url: 'https://github.com/gigablast/open-source-search-engine'
571  producer:
572    name: 'Matt Wells'
573    url: 'http://www.gigablast.com/faq.html'
574
575- regex: 'Gluten Free Crawler'
576  name: 'Gluten Free Crawler'
577  category: 'Crawler'
578  url: 'http://glutenfreepleasure.com/'
579  producer:
580    name: ''
581    url: ''
582
583- regex: 'gobuster'
584  name: 'Gobuster'
585  url: 'https://github.com/OJ/gobuster'
586
587- regex: 'ichiro/mobile goo'
588  name: 'Goo'
589  category: 'Search bot'
590  url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
591  producer:
592    name: 'NTT Resonant'
593    url: 'http://goo.ne.jp'
594
595- regex: 'Storebot-Google'
596  name: 'Google StoreBot'
597  category: 'Crawler'
598
599- regex: 'Google Favicon'
600  name: 'Google Favicon'
601  category: 'Crawler'
602
603- regex: 'Google Search Console'
604  name: 'Google Search Console'
605  category: 'Crawler'
606  url: 'https://search.google.com/search-console/about'
607  producer:
608    name: 'Google Inc.'
609    url: 'http://www.google.com'
610
611- regex: 'Google Page Speed Insights'
612  name: 'Google PageSpeed Insights'
613  category: 'Site Monitor'
614  url: 'http://developers.google.com/speed/pagespeed/insights/'
615  producer:
616    name: 'Google Inc.'
617    url: 'http://www.google.com'
618
619- regex: 'google_partner_monitoring'
620  name: 'Google Partner Monitoring'
621  category: 'Site Monitor'
622  url: ''
623  producer:
624    name: 'Google Inc.'
625    url: 'http://www.google.com'
626
627- regex: 'Google-Cloud-Scheduler'
628  name: 'Google Cloud Scheduler'
629  category: 'Crawler'
630  url: 'https://cloud.google.com/scheduler'
631  producer:
632    name: 'Google Inc.'
633    url: 'https://www.google.com'
634
635- regex: 'Google-Structured-Data-Testing-Tool'
636  name: 'Google Structured Data Testing Tool'
637  category: 'Validator'
638  url: 'https://search.google.com/structured-data/testing-tool'
639  producer:
640    name: 'Google Inc.'
641    url: 'http://www.google.com'
642
643- regex: 'GoogleStackdriverMonitoring'
644  name: 'Google Stackdriver Monitoring'
645  category: 'Site Monitor'
646  url: 'https://cloud.google.com/monitoring'
647  producer:
648    name: 'Google Inc.'
649    url: 'https://www.google.com'
650
651- regex: 'via ggpht\.com GoogleImageProxy'
652  name: 'Gmail Image Proxy'
653  category: 'Crawler'
654  url: ''
655  producer:
656    name: 'Google Inc.'
657    url: 'http://www.google.com'
658
659- regex: 'SeznamEmailProxy'
660  name: 'Seznam Email Proxy'
661  category: 'Crawler'
662  url: ''
663  producer:
664    name: 'Seznam.cz, a.s.'
665    url: 'http://www.seznam.cz/'
666
667- regex: 'Seznam-Zbozi-robot'
668  name: 'Seznam Zbozi.cz'
669  category: 'Crawler'
670  url: ''
671  producer:
672    name: 'Seznam.cz, a.s.'
673    url: 'https://www.zbozi.cz/'
674
675- regex: 'Heurekabot-Feed'
676  name: 'Heureka Feed'
677  category: 'Crawler'
678  url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
679  producer:
680    name: 'Heureka.cz, a.s.'
681    url: 'https://www.heureka.cz/'
682
683- regex: 'ShopAlike'
684  name: 'ShopAlike'
685  category: 'Crawler'
686  url: ''
687  producer:
688    name: 'Visual Meta'
689    url: 'https://www.shopalike.cz/'
690
691- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
692  name: 'Googlebot'
693  category: 'Search bot'
694  url: 'http://www.google.com/bot.html'
695  producer:
696    name: 'Google Inc.'
697    url: 'http://www.google.com'
698
699- regex: 'heritrix'
700  name: 'Heritrix'
701  category: 'Crawler'
702  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
703  producer:
704    name: 'The Internet Archive'
705    url: 'https://archive.org'
706
707- regex: 'HubSpot '
708  name: 'HubSpot'
709  category: 'Crawler'
710  producer:
711    name: 'HubSpot Inc.'
712    url: 'https://www.hubspot.com'
713
714- regex: 'HTTPMon'
715  name: 'HTTPMon'
716  category: 'Site Monitor'
717  url: 'http://www.httpmon.com'
718  producer:
719    name: 'towards GmbH'
720    url: 'http://www.towards.ch/'
721
722- regex: 'ICC-Crawler'
723  name: 'ICC-Crawler'
724  category: 'Crawler'
725  url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html'
726  producer:
727    name: ''
728    url: ''
729
730- regex: 'inoreader.com'
731  name: 'inoreader'
732  category: 'Feed Reader'
733  url: 'https://www.inoreader.com'
734
735- regex: 'iisbot'
736  name: 'IIS Site Analysis'
737  category: 'Crawler'
738  url: 'http://www.iis.net/iisbot.html'
739  producer:
740    name: 'Microsoft Corporation'
741    url: 'http://www.microsoft.com'
742
743- regex: 'ips-agent'
744  name: 'IPS Agent'
745  category: 'Crawler'
746  producer:
747    name: 'VeriSign, Inc'
748    url: 'http://www.verisign.com/'
749
750- regex: 'IP-Guide\.com'
751  name: 'IP-Guide Crawler'
752  category: 'Crawler'
753  url: ''
754  producer:
755    name: ''
756    url: 'https://ip-guide.com'
757
758- regex: 'k6/[0-9\.]+'
759  name: 'K6'
760  url: 'https://k6.io/'
761
762- regex: 'kouio'
763  name: 'Kouio'
764  url: 'http://kouio.com/'
765  category: 'Feed Fetcher'
766  producer:
767    name: ''
768    url: ''
769
770- regex: 'larbin'
771  name: 'Larbin web crawler'
772  category: 'Crawler'
773  url: 'http://larbin.sourceforge.net'
774  producer:
775    name: ''
776    url: ''
777
778- regex: '([A-z0-9]*)-Lighthouse'
779  name: 'Lighthouse'
780  category: 'Site Monitor'
781  url: 'https://developers.google.com/web/tools/lighthouse'
782  producer:
783    name: 'Lighthouse'
784    url: 'https://developers.google.com/web/tools/lighthouse'
785
786- regex: 'linkdexbot|linkdex\.com'
787  name: 'Linkdex Bot'
788  category: 'Search bot'
789  url: 'http://www.linkdex.com/bots'
790  producer:
791    name: 'Mojeek Ltd.'
792    url: 'http://www.mojeek.com'
793
794- regex: 'LinkedInBot'
795  name: 'LinkedIn Bot'
796  category: 'Social Media Agent'
797  url: 'http://www.linkedin.com'
798  producer:
799    name: 'LinkedIn'
800    url: 'http://www.linkedin.com'
801
802- regex: 'ltx71'
803  name: 'LTX71'
804  url: 'http://ltx71.com/'
805  producer:
806    name: ''
807    url: ''
808
809- regex: 'Mail\.RU'
810  name: 'Mail.Ru Bot'
811  category: 'Search bot'
812  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
813  producer:
814    name: 'Mail.Ru Group'
815    url: 'http://corp.mail.ru'
816
817- regex: 'magpie-crawler'
818  name: 'Magpie-Crawler'
819  category: 'Social Media Agent'
820  url: 'http://www.brandwatch.com/magpie-crawler/'
821  producer:
822    name: 'Brandwatch'
823    url: 'http://www.brandwatch.com'
824
825- regex: 'MagpieRSS'
826  name: 'MagpieRSS'
827  url: 'http://magpierss.sourceforge.net/'
828  category: 'Feed Parser'
829  producer:
830    name: ''
831    url: ''
832
833- regex: 'masscan'
834  name: 'masscan'
835  url: 'https://github.com/robertdavidgraham/masscan'
836  category: 'Crawler'
837  producer:
838    name: 'Robert Graham'
839    url: 'https://github.com/robertdavidgraham'
840
841- regex: 'Mastodon/'
842  name: 'Mastodon Bot'
843  category: 'Social Media Agent'
844
845- regex: 'meanpathbot'
846  name: 'Meanpath Bot'
847  category: 'Search bot'
848  url: 'http://www.meanpath.com/meanpathbot.html'
849  producer:
850    name: 'Meanpath'
851    url: 'http://www.meanpath.com'
852
853- regex: 'MetaJobBot'
854  name: 'MetaJobBot'
855  category: 'Crawler'
856  url: 'http://www.metajob.at/the/crawler'
857  producer:
858    name: 'MetaJob'
859    url: 'http://www.metajob.at'
860
861- regex: 'MetaInspector'
862  name: 'MetaInspector'
863  category: 'Crawler'
864  url: 'https://github.com/jaimeiniesta/metainspector'
865
866- regex: 'MixrankBot'
867  name: 'Mixrank Bot'
868  category: 'Crawler'
869  url: 'http://mixrank.com'
870  producer:
871    name: 'Online Media Group, Inc.'
872    url: ''
873
874- regex: 'MJ12bot'
875  name: 'MJ12 Bot'
876  category: 'Search bot'
877  url: 'http://majestic12.co.uk/bot.php'
878  producer:
879    name: 'Majestic-12'
880    url: 'http://majestic12.co.uk'
881
882- regex: 'Mnogosearch'
883  name: 'Mnogosearch'
884  category: 'Search bot'
885  url: 'http://www.mnogosearch.org/'
886  producer:
887    name: 'Lavtech.Com Corp.'
888    url: ''
889- regex: 'MojeekBot'
890  name: 'MojeekBot'
891  category: 'Search bot'
892  url: 'http://www.mojeek.com/bot.html'
893  producer:
894    name: 'Mojeek Ltd.'
895    url: 'http://www.mojeek.com'
896
897- regex: 'munin'
898  name: 'Munin'
899  category: 'Site Monitor'
900  url: 'http://munin-monitoring.org/'
901  producer:
902    name: 'Munin'
903    url: 'http://munin-monitoring.org/'
904
905- regex: 'NalezenCzBot'
906  name: 'NalezenCzBot'
907  category: 'Crawler'
908  url: 'http://www.nalezen.cz/about-crawler'
909  producer:
910    name: 'Jaroslav Kuboš'
911    url: ''
912
913- regex: 'check_http/v'
914  name: 'Nagios check_http'
915  category: 'Site Monitor'
916  url: 'https://nagios.org'
917  producer:
918    name: 'Nagios Plugins Development Team'
919    url: 'https://nagios.org'
920
921- regex: 'nbertaupete95\(at\)gmail.com'
922  name: 'nbertaupete95'
923  category: 'Crawler'
924
925- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
926  name: 'Netcraft Survey Bot'
927  category: 'Search bot'
928  url: ''
929  producer:
930    name: 'Netcraft'
931    url: 'http://www.netcraft.com'
932
933- regex: 'netEstate NE Crawler'
934  name: 'netEstate'
935  category: 'Crawler'
936  url: 'http://www.website-datenbank.de/Impressum'
937  producer:
938    name: 'netEstate GmbH'
939    url: 'https://www.netestate.de/en/'
940
941- regex: 'Netvibes'
942  name: 'Netvibes'
943  url: 'http://www.netvibes.com/'
944  category: 'Feed Fetcher'
945  producer:
946    name: ''
947    url: ''
948
949- regex: 'NewsBlur .*(Fetcher|Finder)'
950  name: 'NewsBlur'
951  url: 'http://www.newsblur.com'
952  category: 'Feed Fetcher'
953  producer:
954    name: ''
955    url: ''
956
957- regex: 'NewsGatorOnline'
958  name: 'NewsGator'
959  url: 'http://www.newsgator.com'
960  category: 'Feed Fetcher'
961  producer:
962    name: ''
963    url: ''
964
965- regex: 'nlcrawler'
966  name: 'NLCrawler'
967  category: 'Crawler'
968  url: ''
969  producer:
970    name: 'Northern Light'
971    url: 'http://northernlight.com'
972
973- regex: 'Nmap Scripting Engine'
974  name: 'Nmap'
975  category: 'Security Checker'
976  url: 'https://nmap.org/book/nse.html'
977  producer:
978    name: 'Nmap'
979    url: 'https://nmap.org/'
980
981- regex: 'Nuzzel'
982  name: 'Nuzzel'
983  category: 'Crawler'
984  producer:
985    name: 'Nuzzel'
986    url: 'https://www.nuzzel.com/'
987
988- regex: 'Octopus [0-9]'
989  name: 'Octopus'
990
991- regex: 'omgili'
992  name: 'Omgili bot'
993  category: 'Search bot'
994  url: 'http://www.omgili.com/Crawler.html'
995  producer:
996    name: 'Omgili'
997    url: 'http://www.omgili.com'
998
999- regex: 'OpenindexSpider'
1000  name: 'Openindex Spider'
1001  category: 'Search bot'
1002  url: 'http://www.openindex.io/en/webmasters/spider.html'
1003  producer:
1004    name: 'Openindex B.V.'
1005    url: 'http://www.openindex.io'
1006
1007- regex: 'spbot'
1008  name: 'OpenLinkProfiler'
1009  category: 'Crawler'
1010  url: 'http://openlinkprofiler.org/bot'
1011  producer:
1012    name: 'Axandra GmbH'
1013    url: 'http://www.axandra.com'
1014
1015- regex: 'OpenWebSpider'
1016  name: 'OpenWebSpider'
1017  category: 'Crawler'
1018  url: 'http://www.openwebspider.org'
1019  producer:
1020    name: 'OpenWebSpider Lab'
1021    url: 'http://lab.openwebspider.org'
1022
1023- regex: 'OrangeBot|VoilaBot'
1024  name: 'Orange Bot'
1025  category: 'Search bot'
1026  url: 'http://lemoteur.orange.fr'
1027  producer:
1028    name: 'Orange'
1029    url: 'http://www.orange.fr'
1030
1031- regex: 'PaperLiBot'
1032  name: 'PaperLiBot'
1033  category: 'Search bot'
1034  url: 'http://support.paper.li/entries/20023257-what-is-paper-li'
1035  producer:
1036    name: 'Smallrivers SA'
1037    url: 'http://www.paper.li'
1038
1039- regex: 'phantomas/'
1040  name: 'Phantomas'
1041  category: 'Site Monitor'
1042  url: 'https://github.com/macbre/phantomas'
1043
1044- regex: 'phpservermon'
1045  name: 'PHP Server Monitor'
1046  category: 'Site Monitor'
1047  url: 'https://github.com/phpservermon/phpservermon'
1048  producer:
1049    name: 'PHP Server Monitor'
1050    url: 'http://www.phpservermonitor.org/'
1051
1052- regex: 'PocketParser'
1053  name: 'PocketParser'
1054  category: 'Read-it-later Service'
1055  url: 'https://getpocket.com/pocketparser_ua'
1056  producer:
1057    name: 'Pocket'
1058    url: 'https://getpocket.com/'
1059
1060- regex: 'PritTorrent'
1061  name: 'PritTorrent'
1062  category: 'Crawler'
1063  url: 'https://github.com/astro/prittorrent'
1064  producer:
1065    name: 'Bitlove'
1066    url: 'http://bitlove.org/'
1067
1068- regex: 'PRTG Network Monitor'
1069  name: 'PRTG Network Monitor'
1070  category: 'Network Monitor'
1071  url: 'https://www.paessler.com/prtg'
1072  producer:
1073    name: 'Paessler AG'
1074    url: 'https://www.paessler.com'
1075
1076- regex: 'psbot'
1077  name: 'Picsearch bot'
1078  category: 'Search bot'
1079  url: 'http://www.picsearch.com/bot.html'
1080  producer:
1081    name: 'Picsearch'
1082    url: 'http://www.picsearch.com'
1083
1084- regex: 'Pingdom(?:\.com|TMS)'
1085  name: 'Pingdom Bot'
1086  category: 'Site Monitor'
1087  url: ''
1088  producer:
1089    name: 'Pingdom AB'
1090    url: 'https://www.pingdom.com'
1091
1092- regex: 'Quora Link Preview'
1093  name: 'Quora Link Preview'
1094  category: 'Crawler'
1095  url: ''
1096  producer:
1097    name: 'Quora'
1098    url: 'http://www.quora.com'
1099
1100- regex: 'Quora-Bot'
1101  name: 'Quora Bot'
1102  category: 'Crawler'
1103  url: ''
1104  producer:
1105    name: 'Quora'
1106    url: 'https://www.quora.com/'
1107
1108- regex: 'RamblerMail'
1109  name: 'RamblerMail Image Proxy'
1110  category: 'Crawler'
1111  url: ''
1112  producer:
1113    name: 'Rambler&Co'
1114    url: 'https://rambler-co.ru/'
1115
1116- regex: 'QuerySeekerSpider'
1117  name: 'QuerySeekerSpider'
1118  category: 'Crawler'
1119  url: 'http://queryseeker.com/bot.html'
1120  producer:
1121    name: 'QueryEye Inc.'
1122    url: 'http://queryeye.com'
1123
1124- regex: 'Qwantify'
1125  name: 'Qwantify'
1126  category: 'Crawler'
1127  url: 'https://www.qwant.com/'
1128  producer:
1129    name: 'Qwant Corporation'
1130    url: 'https://www.qwant.com/'
1131
1132- regex: 'Rainmeter'
1133  name: 'Rainmeter'
1134  category: 'Crawler'
1135  url: 'https://www.rainmeter.net'
1136
1137- regex: 'redditbot'
1138  name: 'Reddit Bot'
1139  category: 'Social Media Agent'
1140  url: 'http://www.reddit.com/feedback'
1141  producer:
1142    name: 'reddit inc.'
1143    url: 'http://www.reddit.com'
1144
1145- regex: 'Riddler'
1146  name: 'Riddler'
1147  category: 'Security search bot'
1148  url: 'https://riddler.io/about'
1149  producer:
1150    name: 'F-Secure'
1151    url: 'https://www.f-secure.com'
1152
1153- regex: 'rogerbot'
1154  name: 'Rogerbot'
1155  category: 'Crawler'
1156  url: 'http://moz.com/help/pro/what-is-rogerbot-'
1157  producer:
1158    name: 'SEOmoz, Inc.'
1159    url: 'http://moz.com/'
1160
1161- regex: 'ROI Hunter'
1162  name: 'ROI Hunter'
1163  category: 'Crawler'
1164  url: ''
1165  producer:
1166    name: 'Roihunter a.s.'
1167    url: 'http://roihunter.com/'
1168
1169- regex: 'SafeDNSBot'
1170  name: 'SafeDNSBot'
1171  category: 'Crawler'
1172  url: 'https://www.safedns.com/searchbot'
1173  producer:
1174    name: 'SafeDNS, Inc.'
1175    url: 'https://www.safedns.com/'
1176
1177- regex: 'Scrapy'
1178  name: 'Scrapy'
1179  category: 'Crawler'
1180  url: 'http://scrapy.org'
1181
1182- regex: 'Screaming Frog SEO Spider'
1183  name: 'Screaming Frog SEO Spider'
1184  category: 'Crawler'
1185  url: 'http://www.screamingfrog.co.uk/seo-spider'
1186  producer:
1187    name: 'Screaming Frog Ltd'
1188    url: 'http://www.screamingfrog.co.uk'
1189
1190- regex: 'ScreenerBot'
1191  name: 'ScreenerBot'
1192  category: 'Crawler'
1193  url: 'http://www.screenerbot.com'
1194  producer:
1195    name: ''
1196    url: ''
1197
1198- regex: 'SemrushBot'
1199  name: 'Semrush Bot'
1200  category: 'Crawler'
1201  url: 'http://www.semrush.com/bot.html'
1202  producer:
1203    name: 'SEMrush'
1204    url: 'http://www.semrush.com'
1205
1206- regex: 'SensikaBot'
1207  name: 'Sensika Bot'
1208  category: ''
1209  url: ''
1210  producer:
1211    name: 'Sensika'
1212    url: 'http://sensika.com'
1213
1214- regex: 'SEOENG(World)?Bot'
1215  name: 'SEOENGBot'
1216  category: 'Crawler'
1217  url: 'http://www.seoengine.com/seoengbot.htm'
1218  producer:
1219    name: 'SEO Engine'
1220    url: 'http://www.seoengine.com'
1221
1222- regex: 'SEOkicks-Robot'
1223  name: 'SEOkicks-Robot'
1224  category: 'Crawler'
1225  url: 'http://www.seokicks.de/robot.html'
1226  producer:
1227    name: 'SEOkicks'
1228    url: 'https://www.seokicks.de/'
1229
1230- regex: 'seoscanners\.net'
1231  name: 'Seoscanners.net'
1232  category: 'Crawler'
1233  url: ''
1234
1235- regex: 'SkypeUriPreview'
1236  name: 'Skype URI Preview'
1237  category: 'Service Agent'
1238  url: ''
1239  producer:
1240    name: 'Skype Communications S.à.r.l.'
1241    url: 'https://www.skype.com'
1242
1243- regex: 'SeznamBot|SklikBot|Seznam screenshot-generator'
1244  name: 'Seznam Bot'
1245  category: 'Search bot'
1246  url: 'http://www.mapy.cz/cz/seznambot.html'
1247  producer:
1248    name: 'Seznam.cz, a.s.'
1249    url: 'http://www.seznam.cz/'
1250
1251- regex: 'shopify-partner-homepage-scraper'
1252  name: 'Shopify Partner'
1253  category: 'Crawler'
1254  url: 'https://www.shopify.com/partners'
1255  producer:
1256    name: 'Shopify'
1257    url: 'https://www.shopify.com/'
1258
1259- regex: 'ShopWiki'
1260  name: 'ShopWiki'
1261  category: 'Search tools'
1262  url: 'http://www.shopwiki.com/wiki/Help:Bot'
1263  producer:
1264    name: 'ShopWiki Corp.'
1265    url: 'http://www.shopwiki.com'
1266
1267- regex: 'SilverReader'
1268  name: 'SilverReader'
1269  url: 'http://silverreader.com'
1270  category: 'Feed Fetcher'
1271  producer:
1272    name: ''
1273    url: ''
1274
1275- regex: 'SimplePie'
1276  name: 'SimplePie'
1277  url: 'http://www.simplepie.org'
1278  category: 'Feed Parser'
1279  producer:
1280    name: ''
1281    url: ''
1282
1283- regex: 'SISTRIX Crawler'
1284  name: 'SISTRIX Crawler'
1285  category: 'Crawler'
1286  url: 'http://crawler.sistrix.net'
1287  producer:
1288    name: 'SISTRIX GmbH'
1289    url: 'http://www.sistrix.de'
1290
1291- regex: 'compatible; (?:SISTRIX )?Optimizer'
1292  name: 'SISTRIX Optimizer'
1293  category: 'Crawler'
1294  url: 'https://optimizer.sistrix.com'
1295  producer:
1296    name: 'SISTRIX GmbH'
1297    url: 'http://www.sistrix.de'
1298
1299- regex: 'SiteSucker'
1300  name: 'SiteSucker'
1301  category: 'Crawler'
1302  url: 'http://ricks-apps.com/osx/sitesucker/'
1303
1304- regex: 'sixy.ch'
1305  name: 'Sixy.ch'
1306  category: 'Site Monitor'
1307  url: 'http://sixy.ch'
1308  producer:
1309    name: 'Manuel Kasper'
1310    url: 'https://neon1.net/'
1311
1312- regex: 'Slackbot|Slack-ImgProxy'
1313  name: 'Slackbot'
1314  category: 'Crawler'
1315  url: 'https://api.slack.com/robots'
1316  producer:
1317    name: 'Slack Technologies'
1318    url: 'http://slack.com'
1319
1320- regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
1321  name: 'Sogou Spider'
1322  category: 'Search bot'
1323  url: 'http://www.sogou.com/docs/help/webmasters.htm'
1324  producer:
1325    name: 'Sohu, Inc.'
1326    url: 'http://www.sogou.com'
1327
1328- regex: 'Sosospider|Sosoimagespider'
1329  name: 'Soso Spider'
1330  category: 'Search bot'
1331  url: 'http://help.soso.com/webspider.htm'
1332  producer:
1333    name: 'Tencent Holdings'
1334    url: 'http://www.soso.com'
1335
1336- regex: 'Sprinklr'
1337  name: 'Sprinklr'
1338  category: 'Crawler'
1339  url: ''
1340  producer:
1341    name: 'Sprinklr, Inc.'
1342    url: 'https://www.sprinklr.com/'
1343
1344- regex: 'sqlmap/'
1345  name: 'sqlmap'
1346  category: 'Security Checker'
1347  url: 'http://sqlmap.org/'
1348  producer:
1349    name: 'sqlmap'
1350    url: 'http://sqlmap.org/'
1351
1352- regex: 'SSL Labs'
1353  name: 'SSL Labs'
1354  category: 'Validator'
1355  url: 'https://www.ssllabs.com/about/assessment.html'
1356  producer:
1357    name: 'SSL Labs'
1358    url: 'https://www.ssllabs.com/about/assessment.html'
1359
1360- regex: 'StatusCake'
1361  name: 'StatusCake'
1362  category: 'Site Monitor'
1363  url: 'https://www.statuscake.com'
1364  producer:
1365    name: 'StatusCake'
1366    url: 'https://www.statuscake.com'
1367
1368- regex: 'Superfeedr bot'
1369  name: 'Superfeedr Bot'
1370  category: 'Feed Fetcher'
1371  url: ''
1372  producer:
1373    name: 'Superfeedr'
1374    url: 'https://superfeedr.com/'
1375
1376- regex: 'Sparkler/[0-9]'
1377  name: 'Sparkler'
1378  category: 'Crawler'
1379  url: 'https://github.com/USCDataScience/sparkler'
1380
1381- regex: 'Spinn3r'
1382  name: 'Spinn3r'
1383  category: 'Crawler'
1384  url: 'http://spinn3r.com/robot'
1385  producer:
1386    name: 'Tailrank Inc'
1387    url: 'http://spinn3r.com'
1388
1389- regex: 'SputnikBot'
1390  name: 'Sputnik Bot'
1391  category: 'Crawler'
1392  url: ''
1393
1394- regex: 'SputnikFaviconBot'
1395  name: 'Sputnik Favicon Bot'
1396  category: 'Crawler'
1397  url: ''
1398
1399- regex: 'SputnikImageBot'
1400  name: 'Sputnik Image Bot'
1401  category: 'Crawler'
1402  url: ''
1403
1404- regex: 'SurveyBot'
1405  name: 'Survey Bot'
1406  category: 'Search bot'
1407  url: 'http://www.domaintools.com/webmasters/surveybot.php'
1408  producer:
1409    name: 'Domain Tools'
1410    url: 'http://www.domaintools.com'
1411
1412- regex: 'TarmotGezgin'
1413  name: 'Tarmot Gezgin'
1414  url: 'http://www.tarmot.com/gezgin/'
1415  category: 'Search bot'
1416
1417- regex: 'TelegramBot'
1418  name: 'TelegramBot'
1419  url: 'https://telegram.org/blog/bot-revolution'
1420
1421- regex: 'TLSProbe'
1422  name: 'TLSProbe'
1423  url: 'https://scan.trustnet.venafi.com/'
1424  category: 'Security search bot'
1425  producer:
1426    name: 'Venafi TrustNet'
1427    url: 'https://www.venafi.com'
1428
1429- regex: 'TinEye-bot'
1430  name: 'TinEye Crawler'
1431  category: 'Search bot'
1432  url: 'http://www.tineye.com/crawler.html'
1433  producer:
1434    name: 'Idée Inc.'
1435    url: 'http://ideeinc.com'
1436
1437- regex: 'Tiny Tiny RSS'
1438  name: 'Tiny Tiny RSS'
1439  url: 'http://tt-rss.org'
1440  category: 'Feed Fetcher'
1441  producer:
1442    name: ''
1443    url: ''
1444
1445- regex: 'theoldreader.com'
1446  name: 'theoldreader'
1447  category: 'Feed Reader'
1448  url: 'https://theoldreader.com'
1449
1450- regex: 'trendictionbot'
1451  name: 'Trendiction Bot'
1452  category: 'Crawler'
1453  url: 'http://www.trendiction.de/bot'
1454  producer:
1455    name: 'Talkwalker Inc.'
1456    url: 'http://www.talkwalker.com'
1457
1458- regex: 'TurnitinBot'
1459  name: 'TurnitinBot'
1460  category: 'Crawler'
1461  url: 'http://www.turnitin.com/robot/crawlerinfo.html'
1462  producer:
1463    name: 'iParadigms, LLC.'
1464    url: 'http://www.turnitin.com'
1465
1466- regex: 'TweetedTimes Bot'
1467  name: 'TweetedTimes Bot'
1468  category: 'Crawler'
1469  url: 'http://tweetedtimes.com'
1470  producer:
1471    name: 'TweetedTimes'
1472    url: 'http://tweetedtimes.com/'
1473
1474- regex: 'TweetmemeBot'
1475  name: 'Tweetmeme Bot'
1476  category: 'Crawler'
1477  url: 'http://tweetmeme.com/'
1478  producer:
1479    name: 'Mediasift'
1480    url: ''
1481
1482- regex: 'Twingly Recon'
1483  name: 'Twingly Recon'
1484  category: 'Crawler'
1485  producer:
1486    name: 'Twingly'
1487    url: 'https://www.twingly.com'
1488
1489- regex: 'Twitterbot'
1490  name: 'Twitterbot'
1491  category: 'Social Media Agent'
1492  url: 'https://dev.twitter.com/docs/cards/getting-started'
1493  producer:
1494    name: 'Twitter'
1495    url: 'http://www.twitter.com'
1496
1497- regex: 'UniversalFeedParser'
1498  name: 'UniversalFeedParser'
1499  category: 'Feed Fetcher'
1500  url: 'https://github.com/kurtmckee/feedparser'
1501  producer:
1502    name: 'Kurt McKee'
1503    url: 'https://github.com/kurtmckee'
1504
1505- regex: 'via secureurl\.fwdcdn\.com'
1506  name: 'UkrNet Mail Proxy'
1507  category: 'Crawler'
1508  url: ''
1509  producer:
1510    name: 'UkrNet Ltd'
1511    url: 'https://www.ukr.net/'
1512
1513- regex: 'Uptimebot'
1514  name: 'Uptimebot'
1515  category: 'Site Monitor'
1516  url: 'https://uptime.com/uptimebot'
1517  producer:
1518    name: 'Uptime'
1519    url: 'https://uptime.com'
1520
1521- regex: 'UptimeRobot'
1522  name: 'Uptime Robot'
1523  category: 'Site Monitor'
1524  url: ''
1525  producer:
1526    name: 'Uptime Robot'
1527    url: 'http://uptimerobot.com'
1528
1529- regex: 'URLAppendBot'
1530  name: 'URLAppendBot'
1531  category: 'Crawler'
1532  url: 'http://www.profound.net/urlappendbot.html'
1533  producer:
1534    name: 'Profound Networks'
1535    url: 'http://www.profound.net'
1536
1537- regex: 'Vagabondo'
1538  name: 'Vagabondo'
1539  category: 'Crawler'
1540  url: ''
1541  producer:
1542    name: 'WiseGuys'
1543    url: 'http://www.wise-guys.nl/'
1544
1545- regex: 'vkShare; '
1546  name: 'VK Share Button'
1547  category: 'Crawler'
1548  url: 'http://vk.com/dev/Share'
1549  producer:
1550    name: 'VK'
1551    url: 'http://vk.com/'
1552
1553- regex: 'VSMCrawler'
1554  name: 'Visual Site Mapper Crawler'
1555  category: 'Crawler'
1556  url: 'http://www.visualsitemapper.com/crawler'
1557  producer:
1558    name: 'Alentum Software Ltd.'
1559    url: 'http://www.alentum.com'
1560
1561- regex: 'Jigsaw'
1562  name: 'W3C CSS Validator'
1563  category: 'Validator'
1564  url: 'http://jigsaw.w3.org/css-validator'
1565  producer:
1566    name: 'W3C'
1567    url: 'http://www.w3.org'
1568
1569- regex: 'W3C_I18n-Checker'
1570  name: 'W3C I18N Checker'
1571  category: 'Validator'
1572  url: 'http://validator.w3.org/i18n-checker'
1573  producer:
1574    name: 'W3C'
1575    url: 'http://www.w3.org'
1576
1577- regex: 'W3C-checklink'
1578  name: 'W3C Link Checker'
1579  category: 'Validator'
1580  url: 'http://validator.w3.org/checklink'
1581  producer:
1582    name: 'W3C'
1583    url: 'http://www.w3.org'
1584
1585- regex: 'W3C_Validator|Validator.nu'
1586  name: 'W3C Markup Validation Service'
1587  category: 'Validator'
1588  url: 'http://validator.w3.org/services'
1589  producer:
1590    name: 'W3C'
1591    url: 'http://www.w3.org'
1592
1593- regex: 'W3C-mobileOK'
1594  name: 'W3C MobileOK Checker'
1595  category: 'Validator'
1596  url: 'http://validator.w3.org/mobile'
1597  producer:
1598    name: 'W3C'
1599    url: 'http://www.w3.org'
1600
1601- regex: 'W3C_Unicorn'
1602  name: 'W3C Unified Validator'
1603  category: 'Validator'
1604  url: 'http://validator.w3.org/unicorn'
1605  producer:
1606    name: 'W3C'
1607    url: 'http://www.w3.org'
1608
1609- regex: 'Wappalyzer'
1610  name: 'Wappalyzer'
1611  url: 'https://github.com/AliasIO/Wappalyzer'
1612  producer:
1613    name: 'AliasIO'
1614    url: 'https://github.com/AliasIO'
1615
1616- regex: 'PTST/'
1617  name: 'WebPageTest'
1618  category: 'Site Monitor'
1619  url: 'https://www.webpagetest.org'
1620
1621- regex: 'WeSEE'
1622  name: 'WeSEE:Search'
1623  category: 'Search bot'
1624  url: 'http://www.wesee.com/bot'
1625  producer:
1626    name: 'WeSEE Ltd'
1627    url: 'http://www.wesee.com'
1628
1629- regex: 'WebbCrawler'
1630  name: 'WebbCrawler'
1631  category: 'Crawler'
1632  url: 'http://badcheese.com/crawler.html'
1633  producer:
1634    name: 'Steve Webb'
1635    url: 'http://badcheese.com'
1636
1637- regex: 'websitepulse[+ ]checker'
1638  name: 'WebSitePulse'
1639  category: 'Site Monitor'
1640  url: 'http://badcheese.com/crawler.html'
1641  producer:
1642    name: 'WebSitePulse'
1643    url: 'http://www.websitepulse.com/'
1644
1645- regex: 'WordPress'
1646  name: 'WordPress'
1647  category: 'Service Agent'
1648  url: 'https://wordpress.org/'
1649  producer:
1650    name: 'Wordpress.org'
1651    url: 'https://wordpress.org/'
1652
1653- regex: 'Wotbox'
1654  name: 'Wotbox'
1655  category: 'Search bot'
1656  url: 'http://www.wotbox.com/bot/'
1657  producer:
1658    name: 'Wotbox'
1659    url: 'http://www.wotbox.com'
1660
1661- regex: 'XenForo'
1662  name: 'XenForo'
1663  category: 'Service Agent'
1664  url: 'https://xenforo.com/'
1665  producer:
1666    name: 'XenForo Ltd.'
1667    url: 'https://xenforo.com/'
1668
1669- regex: 'yacybot'
1670  name: 'YaCy'
1671  category: 'Search bot'
1672  url: 'http://yacy.net/bot.html'
1673  producer:
1674    name: 'YaCy'
1675    url: 'http://yacy.net'
1676
1677- regex: 'Yahoo! Slurp|Yahoo!-AdCrawler'
1678  name: 'Yahoo! Slurp'
1679  category: 'Search bot'
1680  url: 'http://help.yahoo.com/ysearch/slurp'
1681  producer:
1682    name: 'Yahoo! Inc.'
1683    url: 'http://www.yahoo.com'
1684
1685- regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone'
1686  name: 'Yahoo! Link Preview'
1687  category: 'Crawler'
1688  url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html'
1689  producer:
1690    name: 'Yahoo! Inc.'
1691    url: 'http://www.yahoo.com'
1692
1693- regex: 'YahooMailProxy'
1694  name: 'Yahoo! Mail Proxy'
1695  category: 'Service Agent'
1696  url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697  producer:
1698    name: 'Yahoo! Inc.'
1699    url: 'http://www.yahoo.com'
1700
1701- regex: 'YahooCacheSystem'
1702  name: 'Yahoo! Cache System'
1703  category: 'Crawler'
1704  url: ''
1705  producer:
1706    name: 'Yahoo! Inc.'
1707    url: 'http://www.yahoo.com'
1708
1709- regex: 'Y!J-BRW'
1710  name: 'Yahoo! Japan BRW'
1711  category: 'Crawler'
1712  url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1713  producer:
1714    name: 'Yahoo! Japan Corp.'
1715    url: 'https://www.yahoo.co.jp/'
1716
1717- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1718  name: 'Yandex Bot'
1719  category: 'Search bot'
1720  url: 'http://www.yandex.com/bots'
1721  producer:
1722    name: 'Yandex LLC'
1723    url: 'http://company.yandex.com'
1724
1725- regex: 'Yeti|NaverJapan'
1726  name: 'Yeti/Naverbot'
1727  category: 'Search bot'
1728  url: 'http://help.naver.com/robots/'
1729  producer:
1730    name: 'Naver'
1731    url: 'http://www.naver.com'
1732
1733- regex: 'YoudaoBot'
1734  name: 'Youdao Bot'
1735  category: 'Search bot'
1736  url: 'http://www.youdao.com/help/webmaster/spider'
1737  producer:
1738    name: 'NetEase, Inc.'
1739    url: 'http://corp.163.com'
1740
1741- regex: 'YOURLS v[0-9]'
1742  name: 'Yourls'
1743  category: 'Crawler'
1744  url: 'http://yourls.org'
1745
1746- regex: 'YRSpider|YYSpider'
1747  name: 'Yunyun Bot'
1748  category: 'Search bot'
1749  url: 'http://www.yunyun.com/SiteInfo.php?r=about'
1750  producer:
1751    name: 'YunYun'
1752    url: 'http://www.yunyun.com'
1753
1754- regex: 'zgrab'
1755  name: 'zgrab'
1756  category: 'Security Checker'
1757  url: 'https://github.com/zmap/zgrab'
1758
1759- regex: 'Zookabot'
1760  name: 'Zookabot'
1761  category: 'Crawler'
1762  url: 'http://zookabot.com'
1763  producer:
1764    name: 'Hwacha ApS'
1765    url: 'http://hwacha.dk'
1766
1767- regex: 'ZumBot'
1768  name: 'ZumBot'
1769  category: 'Search bot'
1770  url: 'http://help.zum.com/inquiry'
1771  producer:
1772    name: 'ZUM internet'
1773    url: 'http://www.zuminternet.com/'
1774
1775- regex: 'YottaaMonitor'
1776  name: 'Yottaa Site Monitor'
1777  category: 'Site Monitor'
1778  url: 'http://www.yottaa.com/products/site-monitor'
1779  producer:
1780    name: 'Yottaa'
1781    url: 'http://www.yottaa.com/'
1782
1783- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
1784  name: 'Yahoo Gemini'
1785  category: 'Crawler'
1786  url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
1787  producer:
1788    name: 'Yahoo! Inc.'
1789    url: 'http://www.yahoo.com'
1790
1791- regex: '.*Java.*outbrain'
1792  name: 'Outbrain'
1793  category: 'Crawler'
1794  url: ''
1795  producer:
1796    name: 'Outbrain'
1797    url: 'http://www.outbrain.com/'
1798
1799- regex: 'HubPages.*crawlingpolicy'
1800  name: 'HubPages'
1801  category: 'Crawler'
1802  url: 'https://hubpages.com/help/crawlingpolicy'
1803  producer:
1804    name: 'HubPages, Inc.'
1805    url: 'https://discover.hubpages.com/'
1806
1807- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1808  name: 'Pinterest'
1809  url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1810  category: 'Crawler'
1811  producer:
1812    name: 'Pinterest'
1813    url: 'https://www.pinterest.com/'
1814
1815- regex: 'Site24x7'
1816  name: 'Site24x7 Website Monitoring'
1817  category: 'Site Monitor'
1818  url: 'https://www.site24x7.com/site24x7-faq.html'
1819  producer:
1820    name: 'Site24x7'
1821    url: 'https://www.site24x7.com'
1822
1823- regex: 's~snapchat-proxy'
1824  name: 'Snapchat Proxy'
1825  category: 'Crawler'
1826  url: 'https://www.snapchat.com'
1827  producer:
1828    name: 'Snapchat Inc.'
1829    url: 'https://www.snapchat.com'
1830
1831- regex: "Let's Encrypt validation server"
1832  name: "Let's Encrypt Validation"
1833  category: 'Service Agent'
1834  url: 'https://letsencrypt.org/how-it-works/'
1835  producer:
1836    name: "Let's Encrypt"
1837    url: 'https://letsencrypt.org'
1838
1839- regex: 'GrapeshotCrawler'
1840  name: 'Grapeshot'
1841  category: 'Crawler'
1842  url: 'https://www.grapeshot.com/crawler'
1843  producer:
1844    name: 'Grapeshot'
1845    url: 'https://www.grapeshot.com'
1846
1847- regex: 'www\.monitor\.us'
1848  name: 'Monitor.Us'
1849  category: 'Site Monitor'
1850  url: 'http://www.monitor.us'
1851  producer:
1852    name: 'Monitor.Us'
1853    url: 'http://www.monitor.us'
1854
1855- regex: 'Catchpoint'
1856  name: 'Catchpoint'
1857  category: 'Site Monitor'
1858  url: 'https://www.catchpoint.com/'
1859  producer:
1860    name: 'Catchpoint Systems'
1861    url: 'https://www.catchpoint.com/'
1862
1863- regex: 'bitlybot'
1864  name: 'BitlyBot'
1865  category: 'Crawler'
1866  url: 'https://bitly.com'
1867  producer:
1868    name: 'Bitly, Inc.'
1869    url: 'https://bitly.com'
1870
1871- regex: 'Zao/'
1872  name: 'Zao'
1873  category: 'Crawler'
1874
1875- regex: 'lycos'
1876  name: 'Lycos'
1877
1878- regex: 'Slurp'
1879  name: 'Inktomi Slurp'
1880
1881- regex: 'Speedy Spider'
1882  name: 'Speedy'
1883
1884- regex: 'ScoutJet'
1885  name: 'ScoutJet'
1886
1887- regex: 'nrsbot|netresearch'
1888  name: 'NetResearchServer'
1889
1890- regex: 'scooter'
1891  name: 'Scooter'
1892
1893- regex: 'gigabot'
1894  name: 'Gigabot'
1895
1896- regex: 'charlotte'
1897  name: 'Charlotte'
1898
1899- regex: 'Pompos'
1900  name: 'Pompos'
1901
1902- regex: 'ichiro'
1903  name: 'ichiro'
1904
1905- regex: 'PagePeeker'
1906  name: 'PagePeeker'
1907
1908- regex: 'WebThumbnail'
1909  name: 'WebThumbnail'
1910
1911- regex: 'Willow Internet Crawler'
1912  name: 'Willow Internet Crawler'
1913
1914- regex: 'EmailWolf'
1915  name: 'EmailWolf'
1916
1917- regex: 'NetLyzer FastProbe'
1918  name: 'NetLyzer FastProbe'
1919
1920- regex: 'AdMantX.*admantx\.com'
1921  name: 'ADMantX'
1922
1923- regex: 'Server Density Service Monitoring.*'
1924  name: 'Server Density'
1925
1926- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1927  name: 'RSSRadio Bot'
1928
1929- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent)'
1930  name: 'Generic Bot'
1931
1932- regex: '^sentry'
1933  name: 'Sentry Bot'
1934  producer:
1935    name: 'Sentry'
1936    url: 'https://sentry.io'
1937
1938- regex: '^Spotify'
1939  name: 'Spotify'
1940  producer:
1941    name: 'Spotify'
1942    url: 'https://www.spotify.com'
1943
1944- regex: 'The Knowledge AI'
1945  name: 'The Knowledge AI'
1946  category: 'Crawler'
1947
1948- regex: 'Embedly'
1949  name: 'Embedly'
1950  category: 'Crawler'
1951  url: 'https://support.embed.ly/hc/en-us'
1952  producer:
1953    name: 'A Medium, Corp.'
1954    url: 'https://medium.com/'
1955
1956- regex: 'BrandVerity'
1957  name: 'BrandVerity'
1958  category: 'Crawler'
1959  url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1960  producer:
1961    name: 'BrandVerity, Inc.'
1962    url: 'https://www.brandverity.com/'
1963
1964- regex: 'Kaspersky Lab CFR link resolver'
1965  name: 'Kaspersky'
1966  category: 'Security Checker'
1967  url: 'https://www.kaspersky.com/'
1968  producer:
1969    name: 'AO Kaspersky Lab'
1970    url: 'https://www.kaspersky.com/'
1971
1972- regex: 'eZ Publish Link Validator'
1973  name: 'eZ Publish Link Validator'
1974  category: 'Crawler'
1975  url: 'https://ez.no/'
1976  producer:
1977    name: 'eZ Systems AS'
1978    url: 'https://ez.no/'
1979
1980- regex: 'woorankreview'
1981  name: 'WooRank'
1982  category: 'Search bot'
1983  url: 'https://www.woorank.com/'
1984  producer:
1985    name: 'WooRank sprl'
1986    url: 'https://www.woorank.com/'
1987
1988- regex: '(Match|LinkCheck) by Siteimprove.com'
1989  name: 'Siteimprove'
1990  category: 'Search bot'
1991  url: 'https://siteimprove.com/'
1992  producer:
1993    name: 'Siteimprove GmbH'
1994    url: 'https://siteimprove.com/'
1995
1996- regex: 'CATExplorador'
1997  name: 'CATExplorador'
1998  category: 'Search bot'
1999  url: 'https://fundacio.cat/ca/domini/'
2000  producer:
2001    name: 'Fundació puntCAT'
2002    url: 'https://fundacio.cat/ca/domini/'
2003
2004- regex: 'Buck'
2005  name: 'Buck'
2006  category: 'Search bot'
2007  url: 'https://hypefactors.com/'
2008  producer:
2009    name: 'Hypefactors A/S'
2010    url: 'https://hypefactors.com/'
2011
2012- regex: 'tracemyfile'
2013  name: 'TraceMyFile'
2014  category: 'Search bot'
2015  url: 'https://www.tracemyfile.com/'
2016  producer:
2017    name: 'Idee Inc.'
2018    url: 'http://ideeinc.com/'
2019
2020- regex: 'zelist.ro feed parser'
2021  name: 'Ze List'
2022  url: 'https://www.zelist.ro/'
2023  category: 'Feed Fetcher'
2024  producer:
2025    name: 'Treeworks SRL'
2026    url: 'https://www.tree.ro/'
2027
2028- regex: 'weborama-fetcher'
2029  name: 'Weborama'
2030  category: 'Search bot'
2031  url: 'https://weborama.com/'
2032  producer:
2033    name: 'Weborama SA'
2034    url: 'https://weborama.com/'
2035
2036- regex: 'BoardReader Favicon Fetcher'
2037  name: 'BoardReader'
2038  category: 'Search bot'
2039  url: 'https://boardreader.com/'
2040  producer:
2041    name: 'Effyis Inc'
2042    url: 'https://boardreader.com/'
2043
2044- regex: 'IDG/IT'
2045  name: 'IDG/IT'
2046  category: 'Search bot'
2047  url: 'https://spaziodati.eu/'
2048  producer:
2049    name: 'SpazioDati S.r.l.'
2050    url: 'https://spaziodati.eu/'
2051
2052- regex: 'Bytespider'
2053  name: 'Bytespider'
2054  category: 'Search bot'
2055  url: 'https://bytedance.com/'
2056  producer:
2057    name: 'ByteDance Ltd.'
2058    url: 'https://bytedance.com/'
2059
2060- regex: 'WikiDo'
2061  name: 'WikiDo'
2062  category: 'Search bot'
2063  url: 'https://www.wikido.com/'
2064  producer:
2065    name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2066    url: 'https://www.wikido.com/'
2067
2068- regex: 'AwarioSmartBot'
2069  name: 'Awario'
2070  category: 'Search bot'
2071  url: 'https://awario.com/bots.html'
2072  producer:
2073    name: 'Awario'
2074    url: 'https://awario.com/'
2075
2076- regex: 'AwarioRssBot'
2077  name: 'Awario'
2078  category: 'Feed Fetcher'
2079  url: 'https://awario.com/bots.html'
2080  producer:
2081    name: 'Awario'
2082    url: 'https://awario.com/'
2083
2084- regex: 'oBot'
2085  name: 'oBot'
2086  category: 'Search bot'
2087  url: 'https://www.xforce-security.com/crawler/'
2088  producer:
2089    name: 'IBM Germany Research & Development GmbH'
2090    url: 'https://exchange.xforce.ibmcloud.com/'
2091
2092- regex: 'SMTBot'
2093  name: 'SMTBot'
2094  category: 'Search bot'
2095  url: 'https://www.similartech.com/smtbot'
2096  producer:
2097    name: 'SimilarTech Ltd.'
2098    url: 'https://www.similartech.com/'
2099
2100- regex: 'LCC'
2101  name: 'LCC'
2102  category: 'Search bot'
2103  url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2104  producer:
2105    name: 'Universität Leipzig'
2106    url: 'https://www.uni-leipzig.de/'
2107
2108- regex: 'Startpagina-Linkchecker'
2109  name: 'Startpagina Linkchecker'
2110  category: 'Search bot'
2111  url: 'https://www.startpagina.nl/linkchecker'
2112  producer:
2113    name: 'Startpagina B.V.'
2114    url: 'https://www.startpagina.nl/'
2115
2116- regex: 'GTmetrix'
2117  name: 'GTmetrix'
2118  category: 'Crawler'
2119  url: 'https://gtmetrix.com/'
2120  producer:
2121    name: 'Carbon60 Operating Co. Ltd.'
2122    url: 'https://www.carbon60.com/'
2123
2124- regex: 'Nutch'
2125  name: 'Nutch-based Bot'
2126  category: 'Crawler'
2127  url: 'https://nutch.apache.org'
2128  producer:
2129    name: 'The Apache Software Foundation'
2130    url: 'https://www.apache.org/foundation/'
2131
2132- regex: 'Seobility'
2133  name: 'Seobility'
2134  category: 'Crawler'
2135  url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2136
2137- regex: 'Vercelbot'
2138  name: 'Vercel Bot'
2139  category: 'Service bot'
2140  url: 'https://vercel.com'
2141
2142- regex: 'Grammarly'
2143  name: 'Grammarly'
2144  category: 'Service bot'
2145  url: 'https://www.grammarly.com'
2146
2147- regex: 'Robozilla'
2148  name: 'Robozilla'
2149  category: 'Crawler'
2150
2151- regex: 'Domains Project'
2152  name: 'Domains Project'
2153  category: 'Crawler'
2154  url: 'https://domainsproject.org'
2155
2156- regex: 'PetalBot'
2157  name: 'Petal Bot'
2158  category: 'Crawler'
2159  url: 'https://aspiegel.com/petalbot'
2160
2161- regex: 'SerendeputyBot'
2162  name: 'Serendeputy Bot'
2163  category: 'Crawler'
2164  url: 'https://serendeputy.com/about/serendeputy-bot'
2165
2166- regex: 'ias-va.*admantx.*service-fetcher'
2167  name: 'ADmantX Service Fetcher'
2168  category: 'Service bot'
2169  url: 'https://www.admantx.com/service-fetcher.html'
2170
2171- regex: 'SemanticScholarBot'
2172  name: 'Semantic Scholar Bot'
2173  category: 'Crawler'
2174  url: 'https://www.semanticscholar.org/crawler'
2175
2176- regex: 'VelenPublicWebCrawler'
2177  name: 'Velen Public Web Crawler'
2178  category: 'Crawler'
2179  url: 'https://hunter.io/robot'
2180
2181- regex: 'Barkrowler'
2182  name: 'Barkrowler'
2183  category: 'Crawler'
2184  url: 'http://www.exensa.com/crawl'
2185
2186- regex: 'BDCbot'
2187  name: 'BDCbot'
2188  category: 'Crawler'
2189  url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190  producer:
2191    name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192    url: 'https://bigdatacorp.com.br/'
2193
2194- regex: 'adbeat'
2195  name: 'Adbeat'
2196  category: 'Crawler'
2197  url: 'https://www.adbeat.com/operation_policy'
2198  producer:
2199    name: 'PPC Labs LLC'
2200    url: 'https://www.adbeat.com/'
2201
2202- regex: 'BW/(?:(\d+[\.\d]+))'
2203  name: 'BuiltWith'
2204  category: 'Crawler'
2205  url: 'https://builtwith.com/biup'
2206  producer:
2207    name: 'BuiltWith Pty Ltd'
2208    url: 'https://builtwith.com/'
2209
2210- regex: 'https://whatis.contentkingapp.com'
2211  name: 'ContentKing'
2212  category: 'Site Monitor'
2213  url: 'https://whatis.contentkingapp.com/'
2214  producer:
2215    name: 'ContentKing BV'
2216    url: 'https://www.contentkingapp.com/'
2217
2218- regex: 'MicroAdBot'
2219  name: 'MicroAdBot'
2220  category: 'Crawler'
2221  url: 'https://www.microad.co.jp/'
2222  producer:
2223    name: 'MicroAd, Inc.'
2224    url: 'https://www.microad.co.jp/'
2225
2226- regex: 'PingAdmin.Ru'
2227  name: 'PingAdmin.Ru'
2228  category: 'Site Monitor'
2229  url: 'https://ping-admin.ru/'
2230
2231- regex: 'notifyninja.+monitoring'
2232  name: 'Notify Ninja'
2233  category: 'Site Monitor'
2234  url: 'http://notifyninja.com'
2235
2236- regex: 'WebDataStats'
2237  name: 'WebDataStats'
2238  category: 'Crawler'
2239  url: 'https://webdatastats.com/policy.html'
2240  producer:
2241    name: 'WebTehRazrabotka LLC'
2242    url: 'https://webdatastats.com/'
2243
2244- regex: 'parse.ly scraper'
2245  name: 'parse.ly'
2246  category: 'Crawler'
2247  url: 'https://www.parse.ly/help/integration/crawler'
2248  producer:
2249    name: 'Parsely, Inc.'
2250    url: 'https://www.parse.ly/'
2251
2252- regex: 'Nimbostratus-Bot'
2253  name: 'Nimbostratus Bot'
2254  category: 'Site Monitor'
2255  url: 'http://cloudsystemnetworks.com'
2256
2257- regex: 'HeartRails_Capture/\d'
2258  name: 'Heart Rails Capture'
2259  category: 'Service Agent'
2260  url: 'http://capture.heartrails.com'
2261
2262- regex: 'Project-Resonance'
2263  name: 'Project Resonance'
2264  category: 'Crawler'
2265  url: 'http://project-resonance.com'
2266
2267- regex: 'DataXu/\d'
2268  name: 'DataXu'
2269  category: 'Service Agent'
2270  url: 'https://advertising.roku.com/dataxu'
2271  producer:
2272    name: 'Roku, Inc.'
2273    url: 'https://roku.com'
2274
2275- regex: 'Cocolyzebot'
2276  name: 'Cocolyzebot'
2277  category: 'Crawler'
2278  url: 'https://cocolyze.com/en/cocolyzebot'
2279  producer:
2280    name: 'VSI INNOVATION SAS'
2281    url: 'https://vsi-innovation.com/'
2282
2283- regex: 'veryhip'
2284  name: 'VeryHip'
2285  category: 'Crawler'
2286  url: 'https://veryhip.com/'
2287  producer:
2288    name: 'VeryHip'
2289    url: 'https://veryhip.com/'
2290
2291- regex: 'LinkpadBot'
2292  name: 'LinkpadBot'
2293  category: 'Crawler'
2294  url: 'https://www.linkpad.org/'
2295  producer:
2296    name: 'Solomono LLC'
2297    url: 'https://www.linkpad.org/'
2298
2299- regex: 'MuscatFerret'
2300  name: 'MuscatFerret'
2301  category: 'Crawler'
2302  url: 'http://www.webtop.com/'
2303
2304- regex: 'PageThing.com'
2305  name: 'PageThing'
2306  category: 'Crawler'
2307  url: 'https://www.pagething.com/'
2308  producer:
2309    name: 'SPECIALNOISE LTD'
2310    url: 'https://www.specialnoise.com/'
2311
2312- regex: 'ArchiveBox'
2313  name: 'ArchiveBox'
2314  url: 'https://archivebox.io/'
2315  category: 'Crawler'
2316  producer:
2317    name: ''
2318    url: ''
2319
2320- regex: 'Choosito'
2321  name: 'Choosito'
2322  url: 'https://www.choosito.com/'
2323  category: 'Crawler'
2324  producer:
2325    name: 'Choosito! Inc.'
2326    url: 'https://www.choosito.com/'
2327
2328- regex: 'datagnionbot'
2329  name: 'datagnionbot'
2330  url: 'https://www.datagnion.com/bot.html'
2331  category: 'Crawler'
2332  producer:
2333    name: 'DATAGNION GMBH'
2334    url: 'https://www.datagnion.com/'
2335
2336- regex: 'WhatCMS'
2337  name: 'WhatCMS'
2338  url: 'https://whatcms.org/'
2339  category: 'Crawler'
2340  producer:
2341    name: 'Nineteen Ten LLC'
2342    url: 'https://whatcms.org/'
2343
2344- regex: 'httpx'
2345  name: 'httpx'
2346  url: 'https://github.com/projectdiscovery/httpx'
2347  category: 'Crawler'
2348  producer:
2349    name: ''
2350    url: ''
2351
2352- regex: 'scaninfo@expanseinc.com'
2353  name: 'Expanse'
2354  category: 'Security Checker'
2355  url: 'https://expanse.co/'
2356  producer:
2357    name: 'Expanse Inc.'
2358    url: 'https://expanse.co/'
2359
2360- regex: 'HuaweiWebCatBot'
2361  name: 'HuaweiWebCatBot'
2362  category: 'Crawler'
2363  url: 'https://isecurity.huawei.com'
2364  producer:
2365    name: 'Huawei Technologies Co., Ltd.'
2366    url: 'https://huawei.com'
2367
2368- regex: 'Hatena-Favicon'
2369  name: 'Hatena Favicon'
2370  category: 'Crawler'
2371  url: 'https://www.hatena.ne.jp/faq/'
2372  producer:
2373    name: 'Hatena Co., Ltd.'
2374    url: 'https://www.hatena.ne.jp'
2375
2376- regex: 'RyowlEngine/(\d+)'
2377  name: 'Ryowl'
2378  category: 'Crawler'
2379  url: 'https://ryowl.org'
2380
2381- regex: 'OdklBot/(\d+)'
2382  name: 'Odnoklassniki Bot'
2383  category: 'Crawler'
2384  url: 'https://odnoklassniki.ru'
2385
2386- regex: 'Mediatoolkitbot'
2387  name: 'Mediatoolkit Bot'
2388  category: 'Crawler'
2389  url: 'https://mediatoolkit.com'
2390
2391- regex: 'ZoominfoBot'
2392  name: 'ZoominfoBot'
2393  category: 'Crawler'
2394  url: 'https://www.zoominfo.com'
2395
2396- regex: 'WeViKaBot/([\d+\.])'
2397  name: 'WeViKaBot'
2398  category: 'Crawler'
2399  url: 'http://www.wevika.de'
2400
2401- regex: 'SEOkicks'
2402  name: 'SEOkicks'
2403  category: 'Crawler'
2404  url: 'https://www.seokicks.de/robot.html'
2405
2406- regex: 'Plukkie/([\d+\.])'
2407  name: 'Plukkie'
2408  category: 'Crawler'
2409  url: 'http://www.botje.com/plukkie.htm'
2410
2411- regex: 'proximic;'
2412  name: 'Comscore'
2413  category: 'Crawler'
2414  url: 'https://www.comscore.com/Web-Crawler'
2415
2416- regex: 'SurdotlyBot/([\d+\.])'
2417  name: 'SurdotlyBot'
2418  category: 'Crawler'
2419  url: 'http://sur.ly/bot.html'
2420
2421- regex: 'Gowikibot/([\d+\.])'
2422  name: 'Gowikibot'
2423  category: 'Crawler'
2424  url: 'http:/www.gowikibot.com'
2425
2426- regex: 'SabsimBot/([\d+\.])'
2427  name: 'SabsimBot'
2428  category: 'Crawler'
2429  url: 'https://sabsim.com'
2430
2431- regex: 'LumtelBot/([\d+\.])'
2432  name: 'LumtelBot'
2433  category: 'Crawler'
2434  url: 'https://umtel.com'
2435
2436- regex: 'PiplBot'
2437  name: 'PiplBot'
2438  category: 'Crawler'
2439  url: 'http://www.pipl.com/bot'
2440
2441- regex: 'woobot/([\d+\.])'
2442  name: 'WooRank'
2443  category: 'Crawler'
2444  url: 'https://www.woorank.com/bot'
2445
2446- regex: 'Cookiebot/([\d+\.])'
2447  name: 'Cookiebot'
2448  category: 'Crawler'
2449  url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450  producer:
2451    name: 'Cybot A/S'
2452    url: 'https://www.cybot.com/'
2453
2454- regex: 'NetSystemsResearch'
2455  name: 'NetSystemsResearch'
2456  category: 'Security Checker'
2457  url: 'https://www.netsystemsresearch.com/'
2458  producer:
2459    name: 'NET SYSTEMS RESEARCH LLC'
2460    url: 'https://www.netsystemsresearch.com/'
2461
2462- regex: 'CensysInspect/([\d+\.])'
2463  name: 'CensysInspect'
2464  category: 'Security Checker'
2465  url: 'https://about.censys.io/'
2466  producer:
2467    name: 'Censys, Inc.'
2468    url: 'https://censys.io/'
2469
2470- regex: 'gdnplus.com'
2471  name: 'GDNP'
2472  category: 'Crawler'
2473  url: 'https://gdnplus.com/'
2474  producer:
2475    name: 'Global Digital Network Plus, LLC'
2476    url: 'https://gdnplus.com/'
2477
2478- regex: 'WellKnownBot/([\d+\.])'
2479  name: 'WellKnownBot'
2480  category: 'Crawler'
2481  url: 'https://well-known.dev'
2482
2483- regex: 'Adsbot/([\d+\.])'
2484  name: 'Adsbot'
2485  category: 'Crawler'
2486  url: 'https://seostar.co/robot/'
2487
2488- regex: 'MTRobot/([\d+\.])'
2489  name: 'MTRobot'
2490  category: 'Crawler'
2491  url: 'https://metrics-tools.de/robot.html'
2492  producer:
2493    name: 'Metrics Tools'
2494    url: 'https://metrics-tools.de/'
2495
2496- regex: 'serpstatbot/([\d+\.])'
2497  name: 'serpstatbot'
2498  category: 'Crawler'
2499  url: 'http://serpstatbot.com/'
2500  producer:
2501    name: 'Netpeak Ltd'
2502    url: 'https://netpeak.net/'
2503
2504- regex: 'colly'
2505  name: 'colly'
2506  category: 'Crawler'
2507  url: 'https://github.com/gocolly/colly/'
2508
2509- regex: 'l9tcpid/v([\d+\.])'
2510  name: 'l9tcpid'
2511  category: 'Security Checker'
2512  url: 'https://github.com/LeakIX/l9tcpid'
2513
2514- regex: 'MegaIndex.ru/([\d+\.])'
2515  name: 'MegaIndex'
2516  category: 'Crawler'
2517  url: 'https://megaindex.com/crawler'
2518
2519- regex: 'Seekport'
2520  name: 'Seekport'
2521  category: 'Crawler'
2522  url: 'http://www.seekport.com/'
2523  producer:
2524    name: 'SISTRIX GmbH'
2525    url: 'https://www.sistrix.de/'
2526
2527- regex: 'seolyt/([\d+\.])'
2528  name: 'seolyt'
2529  category: 'Crawler'
2530  url: 'https://seolyt.com/'
2531
2532- regex: 'YaK/([\d+\.])'
2533  name: 'YaK'
2534  category: 'Crawler'
2535  url: 'https://www.linkfluence.com/'
2536  producer:
2537    name: 'Linkfluence SAS'
2538    url: 'https://www.linkfluence.com/'
2539
2540- regex: 'KomodiaBot/([\d+\.])'
2541  name: 'KomodiaBot'
2542  category: 'Crawler'
2543  url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544  producer:
2545    name: 'Komodia Inc.'
2546    url: 'https://www.komodia.com/'
2547
2548- regex: 'Neevabot/([\d+\.])'
2549  name: 'Neevabot'
2550  category: 'Search bot'
2551  url: 'https://neeva.com/neevabot'
2552  producer:
2553    name: 'Neeva Inc.'
2554    url: 'https://neeva.com/'
2555
2556- regex: 'LinkPreview/([\d+\.])'
2557  name: 'LinkPreview'
2558  category: 'Service Agent'
2559  url: 'https://www.linkpreview.net/'
2560
2561- regex: 'JungleKeyThumbnail/([\d+\.])'
2562  name: 'JungleKeyThumbnail'
2563  category: 'Crawler'
2564  url: 'https://junglekey.com/'
2565
2566- regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567  name: 'RocketMonitorBot'
2568  category: 'Site Monitor'
2569  url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570  producer:
2571    name: 'Radio Mast, Inc.'
2572    url: 'https://www.radiomast.io/'
2573
2574- regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575  name: 'SitemapParser-VIPnytt'
2576  category: 'Crawler'
2577  url: 'https://github.com/VIPnytt/SitemapParser/'
2578
2579
2580- regex: '^Turnitin'
2581  name: 'Turnitin'
2582  category: 'Crawler'
2583  url: 'https://turnitin.com/robot/crawlerinfo.html'
2584
2585- regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2586  name: 'Dotcom Monitor'
2587  category: 'Site Monitor'
2588  url: 'https://www.dotcom-monitor.com'
2589
2590- regex: 'ThinkChaos/'
2591  name: 'ThinkChaos'
2592  category: 'Crawler'
2593
2594- regex: 'DataForSeoBot'
2595  name: 'DataForSeoBot'
2596  category: 'Crawler'
2597  url: 'https://dataforseo.com/dataforseo-bot'
2598
2599# Generic detections
2600- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2601  name: 'Generic Bot'
2602