1############### 2# Device Detector - The Universal Device Detection library for parsing User Agents 3# 4# @link https://matomo.org 5# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later 6############### 7 8- regex: '360Spider' 9 name: '360Spider' 10 category: 'Search bot' 11 url: 'https://www.so.com/help/help_3_2.html' 12 producer: 13 name: 'Online Media Group, Inc.' 14 url: '' 15 16- regex: 'Aboundex' 17 name: 'Aboundexbot' 18 category: 'Search bot' 19 url: 'http://www.aboundex.com/crawler/' 20 producer: 21 name: 'Aboundex.com' 22 url: 'http://www.aboundex.com' 23 24- regex: 'AcoonBot' 25 name: 'Acoon' 26 category: 'Search bot' 27 url: 'http://www.acoon.de/robot.asp' 28 producer: 29 name: 'Acoon GmbH' 30 url: 'http://www.acoon.de' 31 32- regex: 'AddThis\.com' 33 name: 'AddThis.com' 34 category: 'Social Media Agent' 35 url: '' 36 producer: 37 name: 'Clearspring Technologies, Inc.' 38 url: 'http://www.clearspring.com' 39 40- regex: 'AhrefsBot' 41 name: 'aHrefs Bot' 42 category: 'Crawler' 43 url: 'https://ahrefs.com/robot' 44 producer: 45 name: 'Ahrefs Pte Ltd' 46 url: 'https://ahrefs.com/robot' 47 48- regex: 'ia_archiver|alexabot|verifybot' 49 name: 'Alexa Crawler' 50 category: 'Search bot' 51 url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers' 52 producer: 53 name: 'Alexa Internet' 54 url: 'https://www.alexa.com' 55 56- regex: 'alexa site audit' 57 name: 'Alexa Site Audit' 58 category: 'Site Monitor' 59 url: 'https://support.alexa.com/hc/en-us/articles/200450194' 60 producer: 61 name: 'Alexa Internet' 62 url: 'https://www.alexa.com' 63 64- regex: 'Amazonbot' 65 name: 'Amazon Bot' 66 category: 'Crawler' 67 url: 'https://developer.amazon.com/support/amazonbot' 68 producer: 69 name: 'Amazon.com, Inc.' 70 url: 'https://www.amazon.com/' 71 72- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service' 73 name: 'Amazon Route53 Health Check' 74 category: 'Service Agent' 75 producer: 76 name: 'Amazon Web Services' 77 url: 'https://aws.amazon.com/' 78 79- regex: 'AmorankSpider' 80 name: 'Amorank Spider' 81 category: 'Crawler' 82 url: 'http://amorank.com/webcrawler.html' 83 producer: 84 name: 'Amorank' 85 url: 'http://www.amorank.com' 86 87- regex: 'ApacheBench' 88 name: 'ApacheBench' 89 category: 'Benchmark' 90 url: 'https://httpd.apache.org/docs/2.4/programs/ab.html' 91 producer: 92 name: 'The Apache Software Foundation' 93 url: 'https://www.apache.org/foundation/' 94 95- regex: 'Applebot' 96 name: 'Applebot' 97 category: 'Crawler' 98 url: 'https://support.apple.com/en-us/HT204683' 99 producer: 100 name: 'Apple Inc' 101 url: 'https://www.apple.com' 102 103- regex: "AppSignalBot" 104 name: "AppSignalBot" 105 category: "Site Monitor" 106 url: "https://docs.appsignal.com/uptime-monitoring/" 107 producer: 108 name: "AppSignal" 109 url: "https://appsignal.com/" 110 111- regex: 'Arachni' 112 name: 'Arachni' 113 category: 'Security Checker' 114 url: 'https://www.arachni-scanner.com/' 115 producer: 116 name: 'Sarosys LLC' 117 url: 'https://www.sarosys.com/' 118 119- regex: 'AspiegelBot' 120 name: 'AspiegelBot' 121 category: 'Crawler' 122 url: 'https://aspiegel.com/' 123 producer: 124 name: 'Huawei' 125 url: 'https://www.huawei.com/' 126 127- regex: 'Castro 2, Episode Duration Lookup' 128 name: 'Castro 2' 129 category: 'Service Agent' 130 url: 'http://supertop.co/castro/' 131 producer: 132 name: 'Supertop' 133 url: 'http://supertop.co' 134 135- regex: 'Curious George' 136 name: 'Analytics SEO Crawler' 137 category: 'Crawler' 138 url: 'http://www.analyticsseo.com/crawler' 139 producer: 140 name: 'Analytics SEO' 141 url: 'http://www.analyticsseo.com' 142 143- regex: 'archive\.org_bot|special_archiver' 144 name: 'archive.org bot' 145 category: 'Crawler' 146 url: 'https://archive.org/details/archive.org_bot' 147 producer: 148 name: 'The Internet Archive' 149 url: 'https://archive.org' 150 151- regex: 'Ask Jeeves/Teoma' 152 name: 'Ask Jeeves' 153 category: 'Search bot' 154 url: '' 155 producer: 156 name: 'Ask Jeeves Inc.' 157 url: 'http://www.ask.com' 158 159- regex: 'Backlink-Check\.de' 160 name: 'Backlink-Check.de' 161 category: 'Crawler' 162 url: 'http://www.backlink-check.de/bot.html' 163 producer: 164 name: 'Mediagreen Medienservice' 165 url: 'http://www.backlink-check.de' 166 167- regex: 'BacklinkCrawler' 168 name: 'BacklinkCrawler' 169 category: 'Crawler' 170 url: 'http://www.backlinktest.com/crawler.html' 171 producer: 172 name: '2.0Promotion GbR' 173 url: 'http://www.backlinktest.com' 174 175- regex: 'Baidu.*spider|baidu Transcoder' 176 name: 'Baidu Spider' 177 category: 'Search bot' 178 url: 'http://www.baidu.com/search/spider.htm' 179 producer: 180 name: 'Baidu' 181 url: 'http://www.baidu.com' 182 183- regex: 'BazQux' 184 name: 'BazQux Reader' 185 url: 'https://bazqux.com/fetcher' 186 category: 'Feed Fetcher' 187 producer: 188 name: '' 189 url: '' 190 191- regex: 'Better Uptime Bot' 192 name: 'Better Uptime Bot' 193 category: 'Site Monitor' 194 url: 'https://betteruptime.com/faq' 195 producer: 196 name: 'Better Uptime' 197 url: 'https://betteruptime.com/' 198 199- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot' 200 name: 'BingBot' 201 category: 'Search bot' 202 url: 'http://search.msn.com/msnbot.htmn' 203 producer: 204 name: 'Microsoft Corporation' 205 url: 'http://www.microsoft.com' 206 207- regex: 'Blekkobot' 208 name: 'Blekkobot' 209 category: 'Search bot' 210 url: 'http://blekko.com/about/blekkobot' 211 producer: 212 name: 'Blekko' 213 url: 'http://blekko.com' 214 215- regex: 'BLEXBot' 216 name: 'BLEXBot Crawler' 217 category: 'Crawler' 218 url: 'http://webmeup-crawler.com' 219 producer: 220 name: 'WebMeUp' 221 url: 'http://webmeup.com' 222 223- regex: 'Bloglovin' 224 name: 'Bloglovin' 225 url: 'http://www.bloglovin.com' 226 category: 'Feed Fetcher' 227 producer: 228 name: '' 229 url: '' 230 231- regex: 'Blogtrottr' 232 name: 'Blogtrottr' 233 url: '' 234 category: 'Feed Fetcher' 235 producer: 236 name: 'Blogtrottr Ltd' 237 url: 'https://blogtrottr.com/' 238 239- regex: 'BoardReader Blog Indexer' 240 name: 'BoardReader Blog Indexer' 241 category: 'Crawler' 242 producer: 243 name: 'BoardReader' 244 url: 'https://boardreader.com/' 245 246- regex: 'BountiiBot' 247 name: 'Bountii Bot' 248 category: 'Search bot' 249 url: 'http://bountii.com/contact.php' 250 producer: 251 name: 'Bountii Inc.' 252 url: 'http://bountii.com' 253 254- regex: 'Browsershots' 255 name: 'Browsershots' 256 category: 'Service Agent' 257 url: 'http://browsershots.org/faq' 258 producer: 259 name: 'Browsershots.org' 260 url: 'http://browsershots.org' 261 262- regex: 'BUbiNG' 263 name: 'BUbiNG' 264 category: 'Crawler' 265 url: 'http://law.di.unimi.it/BUbiNG.html' 266 producer: 267 name: 'The Laboratory for Web Algorithmics (LAW)' 268 url: 'http://law.di.unimi.it/software.php#buging' 269 270- regex: '(?<!HTC)[ _]Butterfly/' 271 name: 'Butterfly Robot' 272 category: 'Search bot' 273 url: 'http://labs.topsy.com/butterfly' 274 producer: 275 name: 'Topsy Labs' 276 url: 'http://labs.topsy.com' 277 278- regex: 'CareerBot' 279 name: 'CareerBot' 280 category: 'Crawler' 281 url: 'http://www.career-x.de/bot.html' 282 producer: 283 name: 'career-x GmbH' 284 url: 'http://www.career-x.de' 285 286- regex: 'CCBot' 287 name: 'ccBot crawler' 288 category: 'Crawler' 289 url: 'http://commoncrawl.org/faq/' 290 producer: 291 name: 'reddit inc.' 292 url: 'http://www.reddit.com' 293 294- regex: 'Cliqzbot' 295 name: 'Cliqzbot' 296 category: 'Crawler' 297 url: 'http://cliqz.com/company/cliqzbot' 298 producer: 299 name: '10betterpages GmbH' 300 url: 'http://cliqz.com' 301 302- regex: 'Cloudflare-AMP' 303 name: 'CloudFlare AMP Fetcher' 304 category: 'Crawler' 305 url: 'https://amp.cloudflare.com/doc/fetcher.html' 306 producer: 307 name: 'CloudFlare' 308 url: 'http://www.cloudflare.com' 309 310- regex: 'CloudflareDiagnostics' 311 name: 'Cloudflare Diagnostics' 312 category: 'Site Monitor' 313 url: 'https://www.cloudflare.com/' 314 producer: 315 name: 'Cloudflare' 316 url: 'https://www.cloudflare.com' 317 318- regex: 'CloudFlare-AlwaysOnline' 319 name: 'CloudFlare Always Online' 320 category: 'Site Monitor' 321 url: 'http://www.cloudflare.com/always-online' 322 producer: 323 name: 'CloudFlare' 324 url: 'http://www.cloudflare.com' 325 326- regex: 'coccoc.com' 327 name: 'Cốc Cốc Bot' 328 url: 'https://help.coccoc.com/en/search-engine/coccoc-robots' 329 category: 'Search bot' 330 producer: 331 name: 'Cốc Cốc' 332 url: 'https://coccoc.com/' 333 334- regex: 'collectd' 335 name: 'Collectd' 336 url: 'https://collectd.org/' 337 category: 'Site Monitor' 338 producer: 339 name: 'Collectd' 340 url: 'https://collectd.org/' 341 342- regex: 'CommaFeed' 343 name: 'CommaFeed' 344 url: 'http://www.commafeed.com' 345 category: 'Feed Fetcher' 346 producer: 347 name: '' 348 url: '' 349 350- regex: 'CSS Certificate Spider' 351 name: 'CSS Certificate Spider' 352 category: 'Crawler' 353 url: 'http://www.css-security.com/certificatespider/' 354 producer: 355 name: 'Certified Security Solutions' 356 url: 'https://www.css-security.com/company/about-us/' 357 358- regex: 'Datadog Agent' 359 name: 'Datadog Agent' 360 url: 'https://github.com/DataDog/dd-agent' 361 category: 'Site Monitor' 362 producer: 363 name: 'Datadog' 364 url: 'https://www.datadoghq.com/' 365 366- regex: 'Datanyze' 367 name: 'Datanyze' 368 url: '' 369 category: 'Crawler' 370 producer: 371 name: 'Datanyze' 372 url: 'https://www.datanyze.com' 373 374- regex: 'Dataprovider' 375 name: 'Dataprovider' 376 category: 'Crawler' 377 url: '' 378 producer: 379 name: 'Dataprovider B.V.' 380 url: 'https://www.dataprovider.com/' 381 382- regex: 'Daum(oa)?[ /][0-9]' 383 name: 'Daum' 384 category: 'Search bot' 385 url: 'http://tab.search.daum.net/aboutWebSearch_en.html' 386 producer: 387 name: 'Daum Communications Corp.' 388 url: 'http://www.kakaocorp.com/main' 389 390- regex: 'Dazoobot' 391 name: 'Dazoobot' 392 category: 'Search bot' 393 url: '' 394 producer: 395 name: 'DAZOO.FR' 396 url: 'http://dazoo.fr' 397 398- regex: 'discobot' 399 name: 'Discobot' 400 category: 'Search bot' 401 url: 'http://discoveryengine.com/discobot.html' 402 producer: 403 name: 'Discovery Engine' 404 url: 'http://discoveryengine.com' 405 406- regex: 'Domain Re-Animator Bot|support@domainreanimator.com' 407 name: 'Domain Re-Animator Bot' 408 category: 'Crawler' 409 url: '' 410 producer: 411 name: 'Domain Re-Animator, LLC' 412 url: 'http://domainreanimator.com' 413 414- regex: 'DotBot' 415 name: 'DotBot' 416 category: 'Crawler' 417 url: 'http://www.opensiteexplorer.org/dotbot' 418 producer: 419 name: 'SEOmoz, Inc.' 420 url: 'http://moz.com/' 421 422- regex: 'DuckDuck(?:Go-Favicons-)?Bot' 423 name: 'DuckDuckGo Bot' 424 category: 'Search bot' 425 url: 'https://duckduckgo.com/duckduckbot' 426 producer: 427 name: 'DuckDuckGo' 428 url: 'https://duckduckgo.com/' 429 430- regex: 'EasouSpider' 431 name: 'Easou Spider' 432 category: 'Search bot' 433 url: 'http://www.easou.com/search/spider.html' 434 producer: 435 name: 'easou ICP' 436 url: 'http://www.easou.com' 437 438- regex: 'eCairn-Grabber' 439 name: 'eCairn-Grabber' 440 category: 'Crawler' 441 producer: 442 name: 'eCairn' 443 url: 'https://ecairn.com' 444 445- regex: 'EMail Exractor' 446 name: 'EMail Exractor' 447 category: 'Crawler' 448 url: '' 449 producer: 450 name: '' 451 url: '' 452 453- regex: 'evc-batch' 454 name: 'evc-batch' 455 category: 'Crawler' 456 url: '' 457 producer: 458 name: 'eVenture Capital Partners II, LLC' 459 url: 'http://www.eventures.vc/' 460 461- regex: 'Exabot|ExaleadCloudview' 462 name: 'ExaBot' 463 category: 'Crawler' 464 url: 'http://www.exabot.com/go/robot' 465 producer: 466 name: 'Dassault Systèmes' 467 url: 'http://www.3ds.com' 468 469- regex: 'ExactSeek Crawler' 470 name: 'ExactSeek Crawler' 471 category: 'Search bot' 472 url: 'http://www.exactseek.com' 473 producer: 474 name: 'Jayde Online, Inc.' 475 url: 'http://www.jaydeonlineinc.com' 476 477- regex: 'Ezooms' 478 name: 'Ezooms' 479 category: 'Crawler' 480 url: '' 481 producer: 482 name: 'SEOmoz, Inc.' 483 url: 'http://moz.com/' 484 485- regex: 'facebookexternalhit|facebookplatform|facebookexternalua' 486 name: 'Facebook External Hit' 487 category: 'Social Media Agent' 488 url: 'https://www.facebook.com/externalhit_uatext.php' 489 producer: 490 name: 'Facebook' 491 url: 'http://www.facebook.com' 492 493- regex: 'Feedbin' 494 name: 'Feedbin' 495 url: 'http://feedbin.com/' 496 category: 'Feed Fetcher' 497 producer: 498 name: '' 499 url: '' 500 501- regex: 'FeedBurner' 502 name: 'FeedBurner' 503 url: 'http://www.feedburner.com' 504 category: 'Feed Fetcher' 505 producer: 506 name: '' 507 url: '' 508 509- regex: 'Feed Wrangler' 510 name: 'Feed Wrangler' 511 url: 'https://feedwrangler.net/' 512 category: 'Feed Fetcher' 513 producer: 514 name: 'David Smith & Developing Perspective, LLC' 515 url: 'https://david-smith.org' 516 517- regex: 'Feedly' 518 name: 'Feedly' 519 url: 'http://www.feedly.com' 520 category: 'Feed Fetcher' 521 producer: 522 name: '' 523 url: '' 524 525- regex: 'Feedspot' 526 name: 'Feedspot' 527 url: 'http://www.feedspot.com' 528 category: 'Feed Fetcher' 529 producer: 530 name: '' 531 url: '' 532 533- regex: 'Fever/[0-9]' 534 name: 'Fever' 535 url: 'http://feedafever.com/' 536 category: 'Feed Fetcher' 537 producer: 538 name: '' 539 url: '' 540 541- regex: 'FlipboardProxy|FlipboardRSS' 542 name: 'Flipboard' 543 url: 'http://flipboard.com/browserproxy' 544 category: 'Feed Fetcher' 545 producer: 546 name: 'Flipboard' 547 url: 'http://flipboard.com/' 548 549- regex: 'Findxbot' 550 name: 'Findxbot' 551 category: 'Crawler' 552 url: 'http://www.findxbot.com' 553 554- regex: 'FreshRSS' 555 name: 'FreshRSS' 556 category: 'Feed Fetcher' 557 url: 'https://freshrss.org/' 558 559- regex: 'Genieo' 560 name: 'Genieo Web filter' 561 category: '' 562 url: 'http://www.genieo.com/webfilter.html' 563 producer: 564 name: 'Genieo' 565 url: 'http://www.genieo.com' 566 567- regex: 'GigablastOpenSource' 568 name: 'Gigablast' 569 category: 'Search bot' 570 url: 'https://github.com/gigablast/open-source-search-engine' 571 producer: 572 name: 'Matt Wells' 573 url: 'http://www.gigablast.com/faq.html' 574 575- regex: 'Gluten Free Crawler' 576 name: 'Gluten Free Crawler' 577 category: 'Crawler' 578 url: 'http://glutenfreepleasure.com/' 579 producer: 580 name: '' 581 url: '' 582 583- regex: 'gobuster' 584 name: 'Gobuster' 585 url: 'https://github.com/OJ/gobuster' 586 587- regex: 'ichiro/mobile goo' 588 name: 'Goo' 589 category: 'Search bot' 590 url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1' 591 producer: 592 name: 'NTT Resonant' 593 url: 'http://goo.ne.jp' 594 595- regex: 'Storebot-Google' 596 name: 'Google StoreBot' 597 category: 'Crawler' 598 599- regex: 'Google Favicon' 600 name: 'Google Favicon' 601 category: 'Crawler' 602 603- regex: 'Google Search Console' 604 name: 'Google Search Console' 605 category: 'Crawler' 606 url: 'https://search.google.com/search-console/about' 607 producer: 608 name: 'Google Inc.' 609 url: 'http://www.google.com' 610 611- regex: 'Google Page Speed Insights' 612 name: 'Google PageSpeed Insights' 613 category: 'Site Monitor' 614 url: 'http://developers.google.com/speed/pagespeed/insights/' 615 producer: 616 name: 'Google Inc.' 617 url: 'http://www.google.com' 618 619- regex: 'google_partner_monitoring' 620 name: 'Google Partner Monitoring' 621 category: 'Site Monitor' 622 url: '' 623 producer: 624 name: 'Google Inc.' 625 url: 'http://www.google.com' 626 627- regex: 'Google-Cloud-Scheduler' 628 name: 'Google Cloud Scheduler' 629 category: 'Crawler' 630 url: 'https://cloud.google.com/scheduler' 631 producer: 632 name: 'Google Inc.' 633 url: 'https://www.google.com' 634 635- regex: 'Google-Structured-Data-Testing-Tool' 636 name: 'Google Structured Data Testing Tool' 637 category: 'Validator' 638 url: 'https://search.google.com/structured-data/testing-tool' 639 producer: 640 name: 'Google Inc.' 641 url: 'http://www.google.com' 642 643- regex: 'GoogleStackdriverMonitoring' 644 name: 'Google Stackdriver Monitoring' 645 category: 'Site Monitor' 646 url: 'https://cloud.google.com/monitoring' 647 producer: 648 name: 'Google Inc.' 649 url: 'https://www.google.com' 650 651- regex: 'via ggpht\.com GoogleImageProxy' 652 name: 'Gmail Image Proxy' 653 category: 'Crawler' 654 url: '' 655 producer: 656 name: 'Google Inc.' 657 url: 'http://www.google.com' 658 659- regex: 'SeznamEmailProxy' 660 name: 'Seznam Email Proxy' 661 category: 'Crawler' 662 url: '' 663 producer: 664 name: 'Seznam.cz, a.s.' 665 url: 'http://www.seznam.cz/' 666 667- regex: 'Seznam-Zbozi-robot' 668 name: 'Seznam Zbozi.cz' 669 category: 'Crawler' 670 url: '' 671 producer: 672 name: 'Seznam.cz, a.s.' 673 url: 'https://www.zbozi.cz/' 674 675- regex: 'Heurekabot-Feed' 676 name: 'Heureka Feed' 677 category: 'Crawler' 678 url: 'https://sluzby.heureka.cz/napoveda/heurekabot/' 679 producer: 680 name: 'Heureka.cz, a.s.' 681 url: 'https://www.heureka.cz/' 682 683- regex: 'ShopAlike' 684 name: 'ShopAlike' 685 category: 'Crawler' 686 url: '' 687 producer: 688 name: 'Visual Meta' 689 url: 'https://www.shopalike.cz/' 690 691- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet' 692 name: 'Googlebot' 693 category: 'Search bot' 694 url: 'http://www.google.com/bot.html' 695 producer: 696 name: 'Google Inc.' 697 url: 'http://www.google.com' 698 699- regex: 'heritrix' 700 name: 'Heritrix' 701 category: 'Crawler' 702 url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix' 703 producer: 704 name: 'The Internet Archive' 705 url: 'https://archive.org' 706 707- regex: 'HubSpot ' 708 name: 'HubSpot' 709 category: 'Crawler' 710 producer: 711 name: 'HubSpot Inc.' 712 url: 'https://www.hubspot.com' 713 714- regex: 'HTTPMon' 715 name: 'HTTPMon' 716 category: 'Site Monitor' 717 url: 'http://www.httpmon.com' 718 producer: 719 name: 'towards GmbH' 720 url: 'http://www.towards.ch/' 721 722- regex: 'ICC-Crawler' 723 name: 'ICC-Crawler' 724 category: 'Crawler' 725 url: 'http://www.nict.go.jp/en/univ-com/plan/crawl.html' 726 producer: 727 name: '' 728 url: '' 729 730- regex: 'inoreader.com' 731 name: 'inoreader' 732 category: 'Feed Reader' 733 url: 'https://www.inoreader.com' 734 735- regex: 'iisbot' 736 name: 'IIS Site Analysis' 737 category: 'Crawler' 738 url: 'http://www.iis.net/iisbot.html' 739 producer: 740 name: 'Microsoft Corporation' 741 url: 'http://www.microsoft.com' 742 743- regex: 'ips-agent' 744 name: 'IPS Agent' 745 category: 'Crawler' 746 producer: 747 name: 'VeriSign, Inc' 748 url: 'http://www.verisign.com/' 749 750- regex: 'IP-Guide\.com' 751 name: 'IP-Guide Crawler' 752 category: 'Crawler' 753 url: '' 754 producer: 755 name: '' 756 url: 'https://ip-guide.com' 757 758- regex: 'k6/[0-9\.]+' 759 name: 'K6' 760 url: 'https://k6.io/' 761 762- regex: 'kouio' 763 name: 'Kouio' 764 url: 'http://kouio.com/' 765 category: 'Feed Fetcher' 766 producer: 767 name: '' 768 url: '' 769 770- regex: 'larbin' 771 name: 'Larbin web crawler' 772 category: 'Crawler' 773 url: 'http://larbin.sourceforge.net' 774 producer: 775 name: '' 776 url: '' 777 778- regex: '([A-z0-9]*)-Lighthouse' 779 name: 'Lighthouse' 780 category: 'Site Monitor' 781 url: 'https://developers.google.com/web/tools/lighthouse' 782 producer: 783 name: 'Lighthouse' 784 url: 'https://developers.google.com/web/tools/lighthouse' 785 786- regex: 'linkdexbot|linkdex\.com' 787 name: 'Linkdex Bot' 788 category: 'Search bot' 789 url: 'http://www.linkdex.com/bots' 790 producer: 791 name: 'Mojeek Ltd.' 792 url: 'http://www.mojeek.com' 793 794- regex: 'LinkedInBot' 795 name: 'LinkedIn Bot' 796 category: 'Social Media Agent' 797 url: 'http://www.linkedin.com' 798 producer: 799 name: 'LinkedIn' 800 url: 'http://www.linkedin.com' 801 802- regex: 'ltx71' 803 name: 'LTX71' 804 url: 'http://ltx71.com/' 805 producer: 806 name: '' 807 url: '' 808 809- regex: 'Mail\.RU' 810 name: 'Mail.Ru Bot' 811 category: 'Search bot' 812 url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots' 813 producer: 814 name: 'Mail.Ru Group' 815 url: 'http://corp.mail.ru' 816 817- regex: 'magpie-crawler' 818 name: 'Magpie-Crawler' 819 category: 'Social Media Agent' 820 url: 'http://www.brandwatch.com/magpie-crawler/' 821 producer: 822 name: 'Brandwatch' 823 url: 'http://www.brandwatch.com' 824 825- regex: 'MagpieRSS' 826 name: 'MagpieRSS' 827 url: 'http://magpierss.sourceforge.net/' 828 category: 'Feed Parser' 829 producer: 830 name: '' 831 url: '' 832 833- regex: 'masscan' 834 name: 'masscan' 835 url: 'https://github.com/robertdavidgraham/masscan' 836 category: 'Crawler' 837 producer: 838 name: 'Robert Graham' 839 url: 'https://github.com/robertdavidgraham' 840 841- regex: 'Mastodon/' 842 name: 'Mastodon Bot' 843 category: 'Social Media Agent' 844 845- regex: 'meanpathbot' 846 name: 'Meanpath Bot' 847 category: 'Search bot' 848 url: 'http://www.meanpath.com/meanpathbot.html' 849 producer: 850 name: 'Meanpath' 851 url: 'http://www.meanpath.com' 852 853- regex: 'MetaJobBot' 854 name: 'MetaJobBot' 855 category: 'Crawler' 856 url: 'http://www.metajob.at/the/crawler' 857 producer: 858 name: 'MetaJob' 859 url: 'http://www.metajob.at' 860 861- regex: 'MetaInspector' 862 name: 'MetaInspector' 863 category: 'Crawler' 864 url: 'https://github.com/jaimeiniesta/metainspector' 865 866- regex: 'MixrankBot' 867 name: 'Mixrank Bot' 868 category: 'Crawler' 869 url: 'http://mixrank.com' 870 producer: 871 name: 'Online Media Group, Inc.' 872 url: '' 873 874- regex: 'MJ12bot' 875 name: 'MJ12 Bot' 876 category: 'Search bot' 877 url: 'http://majestic12.co.uk/bot.php' 878 producer: 879 name: 'Majestic-12' 880 url: 'http://majestic12.co.uk' 881 882- regex: 'Mnogosearch' 883 name: 'Mnogosearch' 884 category: 'Search bot' 885 url: 'http://www.mnogosearch.org/' 886 producer: 887 name: 'Lavtech.Com Corp.' 888 url: '' 889- regex: 'MojeekBot' 890 name: 'MojeekBot' 891 category: 'Search bot' 892 url: 'http://www.mojeek.com/bot.html' 893 producer: 894 name: 'Mojeek Ltd.' 895 url: 'http://www.mojeek.com' 896 897- regex: 'munin' 898 name: 'Munin' 899 category: 'Site Monitor' 900 url: 'http://munin-monitoring.org/' 901 producer: 902 name: 'Munin' 903 url: 'http://munin-monitoring.org/' 904 905- regex: 'NalezenCzBot' 906 name: 'NalezenCzBot' 907 category: 'Crawler' 908 url: 'http://www.nalezen.cz/about-crawler' 909 producer: 910 name: 'Jaroslav Kuboš' 911 url: '' 912 913- regex: 'check_http/v' 914 name: 'Nagios check_http' 915 category: 'Site Monitor' 916 url: 'https://nagios.org' 917 producer: 918 name: 'Nagios Plugins Development Team' 919 url: 'https://nagios.org' 920 921- regex: 'nbertaupete95\(at\)gmail.com' 922 name: 'nbertaupete95' 923 category: 'Crawler' 924 925- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)' 926 name: 'Netcraft Survey Bot' 927 category: 'Search bot' 928 url: '' 929 producer: 930 name: 'Netcraft' 931 url: 'http://www.netcraft.com' 932 933- regex: 'netEstate NE Crawler' 934 name: 'netEstate' 935 category: 'Crawler' 936 url: 'http://www.website-datenbank.de/Impressum' 937 producer: 938 name: 'netEstate GmbH' 939 url: 'https://www.netestate.de/en/' 940 941- regex: 'Netvibes' 942 name: 'Netvibes' 943 url: 'http://www.netvibes.com/' 944 category: 'Feed Fetcher' 945 producer: 946 name: '' 947 url: '' 948 949- regex: 'NewsBlur .*(Fetcher|Finder)' 950 name: 'NewsBlur' 951 url: 'http://www.newsblur.com' 952 category: 'Feed Fetcher' 953 producer: 954 name: '' 955 url: '' 956 957- regex: 'NewsGatorOnline' 958 name: 'NewsGator' 959 url: 'http://www.newsgator.com' 960 category: 'Feed Fetcher' 961 producer: 962 name: '' 963 url: '' 964 965- regex: 'nlcrawler' 966 name: 'NLCrawler' 967 category: 'Crawler' 968 url: '' 969 producer: 970 name: 'Northern Light' 971 url: 'http://northernlight.com' 972 973- regex: 'Nmap Scripting Engine' 974 name: 'Nmap' 975 category: 'Security Checker' 976 url: 'https://nmap.org/book/nse.html' 977 producer: 978 name: 'Nmap' 979 url: 'https://nmap.org/' 980 981- regex: 'Nuzzel' 982 name: 'Nuzzel' 983 category: 'Crawler' 984 producer: 985 name: 'Nuzzel' 986 url: 'https://www.nuzzel.com/' 987 988- regex: 'Octopus [0-9]' 989 name: 'Octopus' 990 991- regex: 'omgili' 992 name: 'Omgili bot' 993 category: 'Search bot' 994 url: 'http://www.omgili.com/Crawler.html' 995 producer: 996 name: 'Omgili' 997 url: 'http://www.omgili.com' 998 999- regex: 'OpenindexSpider' 1000 name: 'Openindex Spider' 1001 category: 'Search bot' 1002 url: 'http://www.openindex.io/en/webmasters/spider.html' 1003 producer: 1004 name: 'Openindex B.V.' 1005 url: 'http://www.openindex.io' 1006 1007- regex: 'spbot' 1008 name: 'OpenLinkProfiler' 1009 category: 'Crawler' 1010 url: 'http://openlinkprofiler.org/bot' 1011 producer: 1012 name: 'Axandra GmbH' 1013 url: 'http://www.axandra.com' 1014 1015- regex: 'OpenWebSpider' 1016 name: 'OpenWebSpider' 1017 category: 'Crawler' 1018 url: 'http://www.openwebspider.org' 1019 producer: 1020 name: 'OpenWebSpider Lab' 1021 url: 'http://lab.openwebspider.org' 1022 1023- regex: 'OrangeBot|VoilaBot' 1024 name: 'Orange Bot' 1025 category: 'Search bot' 1026 url: 'http://lemoteur.orange.fr' 1027 producer: 1028 name: 'Orange' 1029 url: 'http://www.orange.fr' 1030 1031- regex: 'PaperLiBot' 1032 name: 'PaperLiBot' 1033 category: 'Search bot' 1034 url: 'http://support.paper.li/entries/20023257-what-is-paper-li' 1035 producer: 1036 name: 'Smallrivers SA' 1037 url: 'http://www.paper.li' 1038 1039- regex: 'phantomas/' 1040 name: 'Phantomas' 1041 category: 'Site Monitor' 1042 url: 'https://github.com/macbre/phantomas' 1043 1044- regex: 'phpservermon' 1045 name: 'PHP Server Monitor' 1046 category: 'Site Monitor' 1047 url: 'https://github.com/phpservermon/phpservermon' 1048 producer: 1049 name: 'PHP Server Monitor' 1050 url: 'http://www.phpservermonitor.org/' 1051 1052- regex: 'PocketParser' 1053 name: 'PocketParser' 1054 category: 'Read-it-later Service' 1055 url: 'https://getpocket.com/pocketparser_ua' 1056 producer: 1057 name: 'Pocket' 1058 url: 'https://getpocket.com/' 1059 1060- regex: 'PritTorrent' 1061 name: 'PritTorrent' 1062 category: 'Crawler' 1063 url: 'https://github.com/astro/prittorrent' 1064 producer: 1065 name: 'Bitlove' 1066 url: 'http://bitlove.org/' 1067 1068- regex: 'PRTG Network Monitor' 1069 name: 'PRTG Network Monitor' 1070 category: 'Network Monitor' 1071 url: 'https://www.paessler.com/prtg' 1072 producer: 1073 name: 'Paessler AG' 1074 url: 'https://www.paessler.com' 1075 1076- regex: 'psbot' 1077 name: 'Picsearch bot' 1078 category: 'Search bot' 1079 url: 'http://www.picsearch.com/bot.html' 1080 producer: 1081 name: 'Picsearch' 1082 url: 'http://www.picsearch.com' 1083 1084- regex: 'Pingdom(?:\.com|TMS)' 1085 name: 'Pingdom Bot' 1086 category: 'Site Monitor' 1087 url: '' 1088 producer: 1089 name: 'Pingdom AB' 1090 url: 'https://www.pingdom.com' 1091 1092- regex: 'Quora Link Preview' 1093 name: 'Quora Link Preview' 1094 category: 'Crawler' 1095 url: '' 1096 producer: 1097 name: 'Quora' 1098 url: 'http://www.quora.com' 1099 1100- regex: 'Quora-Bot' 1101 name: 'Quora Bot' 1102 category: 'Crawler' 1103 url: '' 1104 producer: 1105 name: 'Quora' 1106 url: 'https://www.quora.com/' 1107 1108- regex: 'RamblerMail' 1109 name: 'RamblerMail Image Proxy' 1110 category: 'Crawler' 1111 url: '' 1112 producer: 1113 name: 'Rambler&Co' 1114 url: 'https://rambler-co.ru/' 1115 1116- regex: 'QuerySeekerSpider' 1117 name: 'QuerySeekerSpider' 1118 category: 'Crawler' 1119 url: 'http://queryseeker.com/bot.html' 1120 producer: 1121 name: 'QueryEye Inc.' 1122 url: 'http://queryeye.com' 1123 1124- regex: 'Qwantify' 1125 name: 'Qwantify' 1126 category: 'Crawler' 1127 url: 'https://www.qwant.com/' 1128 producer: 1129 name: 'Qwant Corporation' 1130 url: 'https://www.qwant.com/' 1131 1132- regex: 'Rainmeter' 1133 name: 'Rainmeter' 1134 category: 'Crawler' 1135 url: 'https://www.rainmeter.net' 1136 1137- regex: 'redditbot' 1138 name: 'Reddit Bot' 1139 category: 'Social Media Agent' 1140 url: 'http://www.reddit.com/feedback' 1141 producer: 1142 name: 'reddit inc.' 1143 url: 'http://www.reddit.com' 1144 1145- regex: 'Riddler' 1146 name: 'Riddler' 1147 category: 'Security search bot' 1148 url: 'https://riddler.io/about' 1149 producer: 1150 name: 'F-Secure' 1151 url: 'https://www.f-secure.com' 1152 1153- regex: 'rogerbot' 1154 name: 'Rogerbot' 1155 category: 'Crawler' 1156 url: 'http://moz.com/help/pro/what-is-rogerbot-' 1157 producer: 1158 name: 'SEOmoz, Inc.' 1159 url: 'http://moz.com/' 1160 1161- regex: 'ROI Hunter' 1162 name: 'ROI Hunter' 1163 category: 'Crawler' 1164 url: '' 1165 producer: 1166 name: 'Roihunter a.s.' 1167 url: 'http://roihunter.com/' 1168 1169- regex: 'SafeDNSBot' 1170 name: 'SafeDNSBot' 1171 category: 'Crawler' 1172 url: 'https://www.safedns.com/searchbot' 1173 producer: 1174 name: 'SafeDNS, Inc.' 1175 url: 'https://www.safedns.com/' 1176 1177- regex: 'Scrapy' 1178 name: 'Scrapy' 1179 category: 'Crawler' 1180 url: 'http://scrapy.org' 1181 1182- regex: 'Screaming Frog SEO Spider' 1183 name: 'Screaming Frog SEO Spider' 1184 category: 'Crawler' 1185 url: 'http://www.screamingfrog.co.uk/seo-spider' 1186 producer: 1187 name: 'Screaming Frog Ltd' 1188 url: 'http://www.screamingfrog.co.uk' 1189 1190- regex: 'ScreenerBot' 1191 name: 'ScreenerBot' 1192 category: 'Crawler' 1193 url: 'http://www.screenerbot.com' 1194 producer: 1195 name: '' 1196 url: '' 1197 1198- regex: 'SemrushBot' 1199 name: 'Semrush Bot' 1200 category: 'Crawler' 1201 url: 'http://www.semrush.com/bot.html' 1202 producer: 1203 name: 'SEMrush' 1204 url: 'http://www.semrush.com' 1205 1206- regex: 'SensikaBot' 1207 name: 'Sensika Bot' 1208 category: '' 1209 url: '' 1210 producer: 1211 name: 'Sensika' 1212 url: 'http://sensika.com' 1213 1214- regex: 'SEOENG(World)?Bot' 1215 name: 'SEOENGBot' 1216 category: 'Crawler' 1217 url: 'http://www.seoengine.com/seoengbot.htm' 1218 producer: 1219 name: 'SEO Engine' 1220 url: 'http://www.seoengine.com' 1221 1222- regex: 'SEOkicks-Robot' 1223 name: 'SEOkicks-Robot' 1224 category: 'Crawler' 1225 url: 'http://www.seokicks.de/robot.html' 1226 producer: 1227 name: 'SEOkicks' 1228 url: 'https://www.seokicks.de/' 1229 1230- regex: 'seoscanners\.net' 1231 name: 'Seoscanners.net' 1232 category: 'Crawler' 1233 url: '' 1234 1235- regex: 'SkypeUriPreview' 1236 name: 'Skype URI Preview' 1237 category: 'Service Agent' 1238 url: '' 1239 producer: 1240 name: 'Skype Communications S.à.r.l.' 1241 url: 'https://www.skype.com' 1242 1243- regex: 'SeznamBot|SklikBot|Seznam screenshot-generator' 1244 name: 'Seznam Bot' 1245 category: 'Search bot' 1246 url: 'http://www.mapy.cz/cz/seznambot.html' 1247 producer: 1248 name: 'Seznam.cz, a.s.' 1249 url: 'http://www.seznam.cz/' 1250 1251- regex: 'shopify-partner-homepage-scraper' 1252 name: 'Shopify Partner' 1253 category: 'Crawler' 1254 url: 'https://www.shopify.com/partners' 1255 producer: 1256 name: 'Shopify' 1257 url: 'https://www.shopify.com/' 1258 1259- regex: 'ShopWiki' 1260 name: 'ShopWiki' 1261 category: 'Search tools' 1262 url: 'http://www.shopwiki.com/wiki/Help:Bot' 1263 producer: 1264 name: 'ShopWiki Corp.' 1265 url: 'http://www.shopwiki.com' 1266 1267- regex: 'SilverReader' 1268 name: 'SilverReader' 1269 url: 'http://silverreader.com' 1270 category: 'Feed Fetcher' 1271 producer: 1272 name: '' 1273 url: '' 1274 1275- regex: 'SimplePie' 1276 name: 'SimplePie' 1277 url: 'http://www.simplepie.org' 1278 category: 'Feed Parser' 1279 producer: 1280 name: '' 1281 url: '' 1282 1283- regex: 'SISTRIX Crawler' 1284 name: 'SISTRIX Crawler' 1285 category: 'Crawler' 1286 url: 'http://crawler.sistrix.net' 1287 producer: 1288 name: 'SISTRIX GmbH' 1289 url: 'http://www.sistrix.de' 1290 1291- regex: 'compatible; (?:SISTRIX )?Optimizer' 1292 name: 'SISTRIX Optimizer' 1293 category: 'Crawler' 1294 url: 'https://optimizer.sistrix.com' 1295 producer: 1296 name: 'SISTRIX GmbH' 1297 url: 'http://www.sistrix.de' 1298 1299- regex: 'SiteSucker' 1300 name: 'SiteSucker' 1301 category: 'Crawler' 1302 url: 'http://ricks-apps.com/osx/sitesucker/' 1303 1304- regex: 'sixy.ch' 1305 name: 'Sixy.ch' 1306 category: 'Site Monitor' 1307 url: 'http://sixy.ch' 1308 producer: 1309 name: 'Manuel Kasper' 1310 url: 'https://neon1.net/' 1311 1312- regex: 'Slackbot|Slack-ImgProxy' 1313 name: 'Slackbot' 1314 category: 'Crawler' 1315 url: 'https://api.slack.com/robots' 1316 producer: 1317 name: 'Slack Technologies' 1318 url: 'http://slack.com' 1319 1320- regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider' 1321 name: 'Sogou Spider' 1322 category: 'Search bot' 1323 url: 'http://www.sogou.com/docs/help/webmasters.htm' 1324 producer: 1325 name: 'Sohu, Inc.' 1326 url: 'http://www.sogou.com' 1327 1328- regex: 'Sosospider|Sosoimagespider' 1329 name: 'Soso Spider' 1330 category: 'Search bot' 1331 url: 'http://help.soso.com/webspider.htm' 1332 producer: 1333 name: 'Tencent Holdings' 1334 url: 'http://www.soso.com' 1335 1336- regex: 'Sprinklr' 1337 name: 'Sprinklr' 1338 category: 'Crawler' 1339 url: '' 1340 producer: 1341 name: 'Sprinklr, Inc.' 1342 url: 'https://www.sprinklr.com/' 1343 1344- regex: 'sqlmap/' 1345 name: 'sqlmap' 1346 category: 'Security Checker' 1347 url: 'http://sqlmap.org/' 1348 producer: 1349 name: 'sqlmap' 1350 url: 'http://sqlmap.org/' 1351 1352- regex: 'SSL Labs' 1353 name: 'SSL Labs' 1354 category: 'Validator' 1355 url: 'https://www.ssllabs.com/about/assessment.html' 1356 producer: 1357 name: 'SSL Labs' 1358 url: 'https://www.ssllabs.com/about/assessment.html' 1359 1360- regex: 'StatusCake' 1361 name: 'StatusCake' 1362 category: 'Site Monitor' 1363 url: 'https://www.statuscake.com' 1364 producer: 1365 name: 'StatusCake' 1366 url: 'https://www.statuscake.com' 1367 1368- regex: 'Superfeedr bot' 1369 name: 'Superfeedr Bot' 1370 category: 'Feed Fetcher' 1371 url: '' 1372 producer: 1373 name: 'Superfeedr' 1374 url: 'https://superfeedr.com/' 1375 1376- regex: 'Sparkler/[0-9]' 1377 name: 'Sparkler' 1378 category: 'Crawler' 1379 url: 'https://github.com/USCDataScience/sparkler' 1380 1381- regex: 'Spinn3r' 1382 name: 'Spinn3r' 1383 category: 'Crawler' 1384 url: 'http://spinn3r.com/robot' 1385 producer: 1386 name: 'Tailrank Inc' 1387 url: 'http://spinn3r.com' 1388 1389- regex: 'SputnikBot' 1390 name: 'Sputnik Bot' 1391 category: 'Crawler' 1392 url: '' 1393 1394- regex: 'SputnikFaviconBot' 1395 name: 'Sputnik Favicon Bot' 1396 category: 'Crawler' 1397 url: '' 1398 1399- regex: 'SputnikImageBot' 1400 name: 'Sputnik Image Bot' 1401 category: 'Crawler' 1402 url: '' 1403 1404- regex: 'SurveyBot' 1405 name: 'Survey Bot' 1406 category: 'Search bot' 1407 url: 'http://www.domaintools.com/webmasters/surveybot.php' 1408 producer: 1409 name: 'Domain Tools' 1410 url: 'http://www.domaintools.com' 1411 1412- regex: 'TarmotGezgin' 1413 name: 'Tarmot Gezgin' 1414 url: 'http://www.tarmot.com/gezgin/' 1415 category: 'Search bot' 1416 1417- regex: 'TelegramBot' 1418 name: 'TelegramBot' 1419 url: 'https://telegram.org/blog/bot-revolution' 1420 1421- regex: 'TLSProbe' 1422 name: 'TLSProbe' 1423 url: 'https://scan.trustnet.venafi.com/' 1424 category: 'Security search bot' 1425 producer: 1426 name: 'Venafi TrustNet' 1427 url: 'https://www.venafi.com' 1428 1429- regex: 'TinEye-bot' 1430 name: 'TinEye Crawler' 1431 category: 'Search bot' 1432 url: 'http://www.tineye.com/crawler.html' 1433 producer: 1434 name: 'Idée Inc.' 1435 url: 'http://ideeinc.com' 1436 1437- regex: 'Tiny Tiny RSS' 1438 name: 'Tiny Tiny RSS' 1439 url: 'http://tt-rss.org' 1440 category: 'Feed Fetcher' 1441 producer: 1442 name: '' 1443 url: '' 1444 1445- regex: 'theoldreader.com' 1446 name: 'theoldreader' 1447 category: 'Feed Reader' 1448 url: 'https://theoldreader.com' 1449 1450- regex: 'trendictionbot' 1451 name: 'Trendiction Bot' 1452 category: 'Crawler' 1453 url: 'http://www.trendiction.de/bot' 1454 producer: 1455 name: 'Talkwalker Inc.' 1456 url: 'http://www.talkwalker.com' 1457 1458- regex: 'TurnitinBot' 1459 name: 'TurnitinBot' 1460 category: 'Crawler' 1461 url: 'http://www.turnitin.com/robot/crawlerinfo.html' 1462 producer: 1463 name: 'iParadigms, LLC.' 1464 url: 'http://www.turnitin.com' 1465 1466- regex: 'TweetedTimes Bot' 1467 name: 'TweetedTimes Bot' 1468 category: 'Crawler' 1469 url: 'http://tweetedtimes.com' 1470 producer: 1471 name: 'TweetedTimes' 1472 url: 'http://tweetedtimes.com/' 1473 1474- regex: 'TweetmemeBot' 1475 name: 'Tweetmeme Bot' 1476 category: 'Crawler' 1477 url: 'http://tweetmeme.com/' 1478 producer: 1479 name: 'Mediasift' 1480 url: '' 1481 1482- regex: 'Twingly Recon' 1483 name: 'Twingly Recon' 1484 category: 'Crawler' 1485 producer: 1486 name: 'Twingly' 1487 url: 'https://www.twingly.com' 1488 1489- regex: 'Twitterbot' 1490 name: 'Twitterbot' 1491 category: 'Social Media Agent' 1492 url: 'https://dev.twitter.com/docs/cards/getting-started' 1493 producer: 1494 name: 'Twitter' 1495 url: 'http://www.twitter.com' 1496 1497- regex: 'UniversalFeedParser' 1498 name: 'UniversalFeedParser' 1499 category: 'Feed Fetcher' 1500 url: 'https://github.com/kurtmckee/feedparser' 1501 producer: 1502 name: 'Kurt McKee' 1503 url: 'https://github.com/kurtmckee' 1504 1505- regex: 'via secureurl\.fwdcdn\.com' 1506 name: 'UkrNet Mail Proxy' 1507 category: 'Crawler' 1508 url: '' 1509 producer: 1510 name: 'UkrNet Ltd' 1511 url: 'https://www.ukr.net/' 1512 1513- regex: 'Uptimebot' 1514 name: 'Uptimebot' 1515 category: 'Site Monitor' 1516 url: 'https://uptime.com/uptimebot' 1517 producer: 1518 name: 'Uptime' 1519 url: 'https://uptime.com' 1520 1521- regex: 'UptimeRobot' 1522 name: 'Uptime Robot' 1523 category: 'Site Monitor' 1524 url: '' 1525 producer: 1526 name: 'Uptime Robot' 1527 url: 'http://uptimerobot.com' 1528 1529- regex: 'URLAppendBot' 1530 name: 'URLAppendBot' 1531 category: 'Crawler' 1532 url: 'http://www.profound.net/urlappendbot.html' 1533 producer: 1534 name: 'Profound Networks' 1535 url: 'http://www.profound.net' 1536 1537- regex: 'Vagabondo' 1538 name: 'Vagabondo' 1539 category: 'Crawler' 1540 url: '' 1541 producer: 1542 name: 'WiseGuys' 1543 url: 'http://www.wise-guys.nl/' 1544 1545- regex: 'vkShare; ' 1546 name: 'VK Share Button' 1547 category: 'Crawler' 1548 url: 'http://vk.com/dev/Share' 1549 producer: 1550 name: 'VK' 1551 url: 'http://vk.com/' 1552 1553- regex: 'VSMCrawler' 1554 name: 'Visual Site Mapper Crawler' 1555 category: 'Crawler' 1556 url: 'http://www.visualsitemapper.com/crawler' 1557 producer: 1558 name: 'Alentum Software Ltd.' 1559 url: 'http://www.alentum.com' 1560 1561- regex: 'Jigsaw' 1562 name: 'W3C CSS Validator' 1563 category: 'Validator' 1564 url: 'http://jigsaw.w3.org/css-validator' 1565 producer: 1566 name: 'W3C' 1567 url: 'http://www.w3.org' 1568 1569- regex: 'W3C_I18n-Checker' 1570 name: 'W3C I18N Checker' 1571 category: 'Validator' 1572 url: 'http://validator.w3.org/i18n-checker' 1573 producer: 1574 name: 'W3C' 1575 url: 'http://www.w3.org' 1576 1577- regex: 'W3C-checklink' 1578 name: 'W3C Link Checker' 1579 category: 'Validator' 1580 url: 'http://validator.w3.org/checklink' 1581 producer: 1582 name: 'W3C' 1583 url: 'http://www.w3.org' 1584 1585- regex: 'W3C_Validator|Validator.nu' 1586 name: 'W3C Markup Validation Service' 1587 category: 'Validator' 1588 url: 'http://validator.w3.org/services' 1589 producer: 1590 name: 'W3C' 1591 url: 'http://www.w3.org' 1592 1593- regex: 'W3C-mobileOK' 1594 name: 'W3C MobileOK Checker' 1595 category: 'Validator' 1596 url: 'http://validator.w3.org/mobile' 1597 producer: 1598 name: 'W3C' 1599 url: 'http://www.w3.org' 1600 1601- regex: 'W3C_Unicorn' 1602 name: 'W3C Unified Validator' 1603 category: 'Validator' 1604 url: 'http://validator.w3.org/unicorn' 1605 producer: 1606 name: 'W3C' 1607 url: 'http://www.w3.org' 1608 1609- regex: 'Wappalyzer' 1610 name: 'Wappalyzer' 1611 url: 'https://github.com/AliasIO/Wappalyzer' 1612 producer: 1613 name: 'AliasIO' 1614 url: 'https://github.com/AliasIO' 1615 1616- regex: 'PTST/' 1617 name: 'WebPageTest' 1618 category: 'Site Monitor' 1619 url: 'https://www.webpagetest.org' 1620 1621- regex: 'WeSEE' 1622 name: 'WeSEE:Search' 1623 category: 'Search bot' 1624 url: 'http://www.wesee.com/bot' 1625 producer: 1626 name: 'WeSEE Ltd' 1627 url: 'http://www.wesee.com' 1628 1629- regex: 'WebbCrawler' 1630 name: 'WebbCrawler' 1631 category: 'Crawler' 1632 url: 'http://badcheese.com/crawler.html' 1633 producer: 1634 name: 'Steve Webb' 1635 url: 'http://badcheese.com' 1636 1637- regex: 'websitepulse[+ ]checker' 1638 name: 'WebSitePulse' 1639 category: 'Site Monitor' 1640 url: 'http://badcheese.com/crawler.html' 1641 producer: 1642 name: 'WebSitePulse' 1643 url: 'http://www.websitepulse.com/' 1644 1645- regex: 'WordPress' 1646 name: 'WordPress' 1647 category: 'Service Agent' 1648 url: 'https://wordpress.org/' 1649 producer: 1650 name: 'Wordpress.org' 1651 url: 'https://wordpress.org/' 1652 1653- regex: 'Wotbox' 1654 name: 'Wotbox' 1655 category: 'Search bot' 1656 url: 'http://www.wotbox.com/bot/' 1657 producer: 1658 name: 'Wotbox' 1659 url: 'http://www.wotbox.com' 1660 1661- regex: 'XenForo' 1662 name: 'XenForo' 1663 category: 'Service Agent' 1664 url: 'https://xenforo.com/' 1665 producer: 1666 name: 'XenForo Ltd.' 1667 url: 'https://xenforo.com/' 1668 1669- regex: 'yacybot' 1670 name: 'YaCy' 1671 category: 'Search bot' 1672 url: 'http://yacy.net/bot.html' 1673 producer: 1674 name: 'YaCy' 1675 url: 'http://yacy.net' 1676 1677- regex: 'Yahoo! Slurp|Yahoo!-AdCrawler' 1678 name: 'Yahoo! Slurp' 1679 category: 'Search bot' 1680 url: 'http://help.yahoo.com/ysearch/slurp' 1681 producer: 1682 name: 'Yahoo! Inc.' 1683 url: 'http://www.yahoo.com' 1684 1685- regex: 'Yahoo Link Preview|Yahoo:LinkExpander:Slingstone' 1686 name: 'Yahoo! Link Preview' 1687 category: 'Crawler' 1688 url: 'https://help.yahoo.com/kb/mail/yahoo-link-preview-SLN23615.html' 1689 producer: 1690 name: 'Yahoo! Inc.' 1691 url: 'http://www.yahoo.com' 1692 1693- regex: 'YahooMailProxy' 1694 name: 'Yahoo! Mail Proxy' 1695 category: 'Service Agent' 1696 url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html' 1697 producer: 1698 name: 'Yahoo! Inc.' 1699 url: 'http://www.yahoo.com' 1700 1701- regex: 'YahooCacheSystem' 1702 name: 'Yahoo! Cache System' 1703 category: 'Crawler' 1704 url: '' 1705 producer: 1706 name: 'Yahoo! Inc.' 1707 url: 'http://www.yahoo.com' 1708 1709- regex: 'Y!J-BRW' 1710 name: 'Yahoo! Japan BRW' 1711 category: 'Crawler' 1712 url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて' 1713 producer: 1714 name: 'Yahoo! Japan Corp.' 1715 url: 'https://www.yahoo.co.jp/' 1716 1717- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot' 1718 name: 'Yandex Bot' 1719 category: 'Search bot' 1720 url: 'http://www.yandex.com/bots' 1721 producer: 1722 name: 'Yandex LLC' 1723 url: 'http://company.yandex.com' 1724 1725- regex: 'Yeti|NaverJapan' 1726 name: 'Yeti/Naverbot' 1727 category: 'Search bot' 1728 url: 'http://help.naver.com/robots/' 1729 producer: 1730 name: 'Naver' 1731 url: 'http://www.naver.com' 1732 1733- regex: 'YoudaoBot' 1734 name: 'Youdao Bot' 1735 category: 'Search bot' 1736 url: 'http://www.youdao.com/help/webmaster/spider' 1737 producer: 1738 name: 'NetEase, Inc.' 1739 url: 'http://corp.163.com' 1740 1741- regex: 'YOURLS v[0-9]' 1742 name: 'Yourls' 1743 category: 'Crawler' 1744 url: 'http://yourls.org' 1745 1746- regex: 'YRSpider|YYSpider' 1747 name: 'Yunyun Bot' 1748 category: 'Search bot' 1749 url: 'http://www.yunyun.com/SiteInfo.php?r=about' 1750 producer: 1751 name: 'YunYun' 1752 url: 'http://www.yunyun.com' 1753 1754- regex: 'zgrab' 1755 name: 'zgrab' 1756 category: 'Security Checker' 1757 url: 'https://github.com/zmap/zgrab' 1758 1759- regex: 'Zookabot' 1760 name: 'Zookabot' 1761 category: 'Crawler' 1762 url: 'http://zookabot.com' 1763 producer: 1764 name: 'Hwacha ApS' 1765 url: 'http://hwacha.dk' 1766 1767- regex: 'ZumBot' 1768 name: 'ZumBot' 1769 category: 'Search bot' 1770 url: 'http://help.zum.com/inquiry' 1771 producer: 1772 name: 'ZUM internet' 1773 url: 'http://www.zuminternet.com/' 1774 1775- regex: 'YottaaMonitor' 1776 name: 'Yottaa Site Monitor' 1777 category: 'Site Monitor' 1778 url: 'http://www.yottaa.com/products/site-monitor' 1779 producer: 1780 name: 'Yottaa' 1781 url: 'http://www.yottaa.com/' 1782 1783- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*' 1784 name: 'Yahoo Gemini' 1785 category: 'Crawler' 1786 url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html' 1787 producer: 1788 name: 'Yahoo! Inc.' 1789 url: 'http://www.yahoo.com' 1790 1791- regex: '.*Java.*outbrain' 1792 name: 'Outbrain' 1793 category: 'Crawler' 1794 url: '' 1795 producer: 1796 name: 'Outbrain' 1797 url: 'http://www.outbrain.com/' 1798 1799- regex: 'HubPages.*crawlingpolicy' 1800 name: 'HubPages' 1801 category: 'Crawler' 1802 url: 'https://hubpages.com/help/crawlingpolicy' 1803 producer: 1804 name: 'HubPages, Inc.' 1805 url: 'https://discover.hubpages.com/' 1806 1807- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*' 1808 name: 'Pinterest' 1809 url: 'https://help.pinterest.com/en/business/article/pinterest-crawler' 1810 category: 'Crawler' 1811 producer: 1812 name: 'Pinterest' 1813 url: 'https://www.pinterest.com/' 1814 1815- regex: 'Site24x7' 1816 name: 'Site24x7 Website Monitoring' 1817 category: 'Site Monitor' 1818 url: 'https://www.site24x7.com/site24x7-faq.html' 1819 producer: 1820 name: 'Site24x7' 1821 url: 'https://www.site24x7.com' 1822 1823- regex: 's~snapchat-proxy' 1824 name: 'Snapchat Proxy' 1825 category: 'Crawler' 1826 url: 'https://www.snapchat.com' 1827 producer: 1828 name: 'Snapchat Inc.' 1829 url: 'https://www.snapchat.com' 1830 1831- regex: "Let's Encrypt validation server" 1832 name: "Let's Encrypt Validation" 1833 category: 'Service Agent' 1834 url: 'https://letsencrypt.org/how-it-works/' 1835 producer: 1836 name: "Let's Encrypt" 1837 url: 'https://letsencrypt.org' 1838 1839- regex: 'GrapeshotCrawler' 1840 name: 'Grapeshot' 1841 category: 'Crawler' 1842 url: 'https://www.grapeshot.com/crawler' 1843 producer: 1844 name: 'Grapeshot' 1845 url: 'https://www.grapeshot.com' 1846 1847- regex: 'www\.monitor\.us' 1848 name: 'Monitor.Us' 1849 category: 'Site Monitor' 1850 url: 'http://www.monitor.us' 1851 producer: 1852 name: 'Monitor.Us' 1853 url: 'http://www.monitor.us' 1854 1855- regex: 'Catchpoint' 1856 name: 'Catchpoint' 1857 category: 'Site Monitor' 1858 url: 'https://www.catchpoint.com/' 1859 producer: 1860 name: 'Catchpoint Systems' 1861 url: 'https://www.catchpoint.com/' 1862 1863- regex: 'bitlybot' 1864 name: 'BitlyBot' 1865 category: 'Crawler' 1866 url: 'https://bitly.com' 1867 producer: 1868 name: 'Bitly, Inc.' 1869 url: 'https://bitly.com' 1870 1871- regex: 'Zao/' 1872 name: 'Zao' 1873 category: 'Crawler' 1874 1875- regex: 'lycos' 1876 name: 'Lycos' 1877 1878- regex: 'Slurp' 1879 name: 'Inktomi Slurp' 1880 1881- regex: 'Speedy Spider' 1882 name: 'Speedy' 1883 1884- regex: 'ScoutJet' 1885 name: 'ScoutJet' 1886 1887- regex: 'nrsbot|netresearch' 1888 name: 'NetResearchServer' 1889 1890- regex: 'scooter' 1891 name: 'Scooter' 1892 1893- regex: 'gigabot' 1894 name: 'Gigabot' 1895 1896- regex: 'charlotte' 1897 name: 'Charlotte' 1898 1899- regex: 'Pompos' 1900 name: 'Pompos' 1901 1902- regex: 'ichiro' 1903 name: 'ichiro' 1904 1905- regex: 'PagePeeker' 1906 name: 'PagePeeker' 1907 1908- regex: 'WebThumbnail' 1909 name: 'WebThumbnail' 1910 1911- regex: 'Willow Internet Crawler' 1912 name: 'Willow Internet Crawler' 1913 1914- regex: 'EmailWolf' 1915 name: 'EmailWolf' 1916 1917- regex: 'NetLyzer FastProbe' 1918 name: 'NetLyzer FastProbe' 1919 1920- regex: 'AdMantX.*admantx\.com' 1921 name: 'ADMantX' 1922 1923- regex: 'Server Density Service Monitoring.*' 1924 name: 'Server Density' 1925 1926- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)' 1927 name: 'RSSRadio Bot' 1928 1929- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent)' 1930 name: 'Generic Bot' 1931 1932- regex: '^sentry' 1933 name: 'Sentry Bot' 1934 producer: 1935 name: 'Sentry' 1936 url: 'https://sentry.io' 1937 1938- regex: '^Spotify' 1939 name: 'Spotify' 1940 producer: 1941 name: 'Spotify' 1942 url: 'https://www.spotify.com' 1943 1944- regex: 'The Knowledge AI' 1945 name: 'The Knowledge AI' 1946 category: 'Crawler' 1947 1948- regex: 'Embedly' 1949 name: 'Embedly' 1950 category: 'Crawler' 1951 url: 'https://support.embed.ly/hc/en-us' 1952 producer: 1953 name: 'A Medium, Corp.' 1954 url: 'https://medium.com/' 1955 1956- regex: 'BrandVerity' 1957 name: 'BrandVerity' 1958 category: 'Crawler' 1959 url: 'https://www.brandverity.com/why-is-brandverity-visiting-me' 1960 producer: 1961 name: 'BrandVerity, Inc.' 1962 url: 'https://www.brandverity.com/' 1963 1964- regex: 'Kaspersky Lab CFR link resolver' 1965 name: 'Kaspersky' 1966 category: 'Security Checker' 1967 url: 'https://www.kaspersky.com/' 1968 producer: 1969 name: 'AO Kaspersky Lab' 1970 url: 'https://www.kaspersky.com/' 1971 1972- regex: 'eZ Publish Link Validator' 1973 name: 'eZ Publish Link Validator' 1974 category: 'Crawler' 1975 url: 'https://ez.no/' 1976 producer: 1977 name: 'eZ Systems AS' 1978 url: 'https://ez.no/' 1979 1980- regex: 'woorankreview' 1981 name: 'WooRank' 1982 category: 'Search bot' 1983 url: 'https://www.woorank.com/' 1984 producer: 1985 name: 'WooRank sprl' 1986 url: 'https://www.woorank.com/' 1987 1988- regex: '(Match|LinkCheck) by Siteimprove.com' 1989 name: 'Siteimprove' 1990 category: 'Search bot' 1991 url: 'https://siteimprove.com/' 1992 producer: 1993 name: 'Siteimprove GmbH' 1994 url: 'https://siteimprove.com/' 1995 1996- regex: 'CATExplorador' 1997 name: 'CATExplorador' 1998 category: 'Search bot' 1999 url: 'https://fundacio.cat/ca/domini/' 2000 producer: 2001 name: 'Fundació puntCAT' 2002 url: 'https://fundacio.cat/ca/domini/' 2003 2004- regex: 'Buck' 2005 name: 'Buck' 2006 category: 'Search bot' 2007 url: 'https://hypefactors.com/' 2008 producer: 2009 name: 'Hypefactors A/S' 2010 url: 'https://hypefactors.com/' 2011 2012- regex: 'tracemyfile' 2013 name: 'TraceMyFile' 2014 category: 'Search bot' 2015 url: 'https://www.tracemyfile.com/' 2016 producer: 2017 name: 'Idee Inc.' 2018 url: 'http://ideeinc.com/' 2019 2020- regex: 'zelist.ro feed parser' 2021 name: 'Ze List' 2022 url: 'https://www.zelist.ro/' 2023 category: 'Feed Fetcher' 2024 producer: 2025 name: 'Treeworks SRL' 2026 url: 'https://www.tree.ro/' 2027 2028- regex: 'weborama-fetcher' 2029 name: 'Weborama' 2030 category: 'Search bot' 2031 url: 'https://weborama.com/' 2032 producer: 2033 name: 'Weborama SA' 2034 url: 'https://weborama.com/' 2035 2036- regex: 'BoardReader Favicon Fetcher' 2037 name: 'BoardReader' 2038 category: 'Search bot' 2039 url: 'https://boardreader.com/' 2040 producer: 2041 name: 'Effyis Inc' 2042 url: 'https://boardreader.com/' 2043 2044- regex: 'IDG/IT' 2045 name: 'IDG/IT' 2046 category: 'Search bot' 2047 url: 'https://spaziodati.eu/' 2048 producer: 2049 name: 'SpazioDati S.r.l.' 2050 url: 'https://spaziodati.eu/' 2051 2052- regex: 'Bytespider' 2053 name: 'Bytespider' 2054 category: 'Search bot' 2055 url: 'https://bytedance.com/' 2056 producer: 2057 name: 'ByteDance Ltd.' 2058 url: 'https://bytedance.com/' 2059 2060- regex: 'WikiDo' 2061 name: 'WikiDo' 2062 category: 'Search bot' 2063 url: 'https://www.wikido.com/' 2064 producer: 2065 name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.' 2066 url: 'https://www.wikido.com/' 2067 2068- regex: 'AwarioSmartBot' 2069 name: 'Awario' 2070 category: 'Search bot' 2071 url: 'https://awario.com/bots.html' 2072 producer: 2073 name: 'Awario' 2074 url: 'https://awario.com/' 2075 2076- regex: 'AwarioRssBot' 2077 name: 'Awario' 2078 category: 'Feed Fetcher' 2079 url: 'https://awario.com/bots.html' 2080 producer: 2081 name: 'Awario' 2082 url: 'https://awario.com/' 2083 2084- regex: 'oBot' 2085 name: 'oBot' 2086 category: 'Search bot' 2087 url: 'https://www.xforce-security.com/crawler/' 2088 producer: 2089 name: 'IBM Germany Research & Development GmbH' 2090 url: 'https://exchange.xforce.ibmcloud.com/' 2091 2092- regex: 'SMTBot' 2093 name: 'SMTBot' 2094 category: 'Search bot' 2095 url: 'https://www.similartech.com/smtbot' 2096 producer: 2097 name: 'SimilarTech Ltd.' 2098 url: 'https://www.similartech.com/' 2099 2100- regex: 'LCC' 2101 name: 'LCC' 2102 category: 'Search bot' 2103 url: 'https://corpora.uni-leipzig.de/crawler_faq.html' 2104 producer: 2105 name: 'Universität Leipzig' 2106 url: 'https://www.uni-leipzig.de/' 2107 2108- regex: 'Startpagina-Linkchecker' 2109 name: 'Startpagina Linkchecker' 2110 category: 'Search bot' 2111 url: 'https://www.startpagina.nl/linkchecker' 2112 producer: 2113 name: 'Startpagina B.V.' 2114 url: 'https://www.startpagina.nl/' 2115 2116- regex: 'GTmetrix' 2117 name: 'GTmetrix' 2118 category: 'Crawler' 2119 url: 'https://gtmetrix.com/' 2120 producer: 2121 name: 'Carbon60 Operating Co. Ltd.' 2122 url: 'https://www.carbon60.com/' 2123 2124- regex: 'Nutch' 2125 name: 'Nutch-based Bot' 2126 category: 'Crawler' 2127 url: 'https://nutch.apache.org' 2128 producer: 2129 name: 'The Apache Software Foundation' 2130 url: 'https://www.apache.org/foundation/' 2131 2132- regex: 'Seobility' 2133 name: 'Seobility' 2134 category: 'Crawler' 2135 url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot' 2136 2137- regex: 'Vercelbot' 2138 name: 'Vercel Bot' 2139 category: 'Service bot' 2140 url: 'https://vercel.com' 2141 2142- regex: 'Grammarly' 2143 name: 'Grammarly' 2144 category: 'Service bot' 2145 url: 'https://www.grammarly.com' 2146 2147- regex: 'Robozilla' 2148 name: 'Robozilla' 2149 category: 'Crawler' 2150 2151- regex: 'Domains Project' 2152 name: 'Domains Project' 2153 category: 'Crawler' 2154 url: 'https://domainsproject.org' 2155 2156- regex: 'PetalBot' 2157 name: 'Petal Bot' 2158 category: 'Crawler' 2159 url: 'https://aspiegel.com/petalbot' 2160 2161- regex: 'SerendeputyBot' 2162 name: 'Serendeputy Bot' 2163 category: 'Crawler' 2164 url: 'https://serendeputy.com/about/serendeputy-bot' 2165 2166- regex: 'ias-va.*admantx.*service-fetcher' 2167 name: 'ADmantX Service Fetcher' 2168 category: 'Service bot' 2169 url: 'https://www.admantx.com/service-fetcher.html' 2170 2171- regex: 'SemanticScholarBot' 2172 name: 'Semantic Scholar Bot' 2173 category: 'Crawler' 2174 url: 'https://www.semanticscholar.org/crawler' 2175 2176- regex: 'VelenPublicWebCrawler' 2177 name: 'Velen Public Web Crawler' 2178 category: 'Crawler' 2179 url: 'https://hunter.io/robot' 2180 2181- regex: 'Barkrowler' 2182 name: 'Barkrowler' 2183 category: 'Crawler' 2184 url: 'http://www.exensa.com/crawl' 2185 2186- regex: 'BDCbot' 2187 name: 'BDCbot' 2188 category: 'Crawler' 2189 url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx' 2190 producer: 2191 name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA' 2192 url: 'https://bigdatacorp.com.br/' 2193 2194- regex: 'adbeat' 2195 name: 'Adbeat' 2196 category: 'Crawler' 2197 url: 'https://www.adbeat.com/operation_policy' 2198 producer: 2199 name: 'PPC Labs LLC' 2200 url: 'https://www.adbeat.com/' 2201 2202- regex: 'BW/(?:(\d+[\.\d]+))' 2203 name: 'BuiltWith' 2204 category: 'Crawler' 2205 url: 'https://builtwith.com/biup' 2206 producer: 2207 name: 'BuiltWith Pty Ltd' 2208 url: 'https://builtwith.com/' 2209 2210- regex: 'https://whatis.contentkingapp.com' 2211 name: 'ContentKing' 2212 category: 'Site Monitor' 2213 url: 'https://whatis.contentkingapp.com/' 2214 producer: 2215 name: 'ContentKing BV' 2216 url: 'https://www.contentkingapp.com/' 2217 2218- regex: 'MicroAdBot' 2219 name: 'MicroAdBot' 2220 category: 'Crawler' 2221 url: 'https://www.microad.co.jp/' 2222 producer: 2223 name: 'MicroAd, Inc.' 2224 url: 'https://www.microad.co.jp/' 2225 2226- regex: 'PingAdmin.Ru' 2227 name: 'PingAdmin.Ru' 2228 category: 'Site Monitor' 2229 url: 'https://ping-admin.ru/' 2230 2231- regex: 'notifyninja.+monitoring' 2232 name: 'Notify Ninja' 2233 category: 'Site Monitor' 2234 url: 'http://notifyninja.com' 2235 2236- regex: 'WebDataStats' 2237 name: 'WebDataStats' 2238 category: 'Crawler' 2239 url: 'https://webdatastats.com/policy.html' 2240 producer: 2241 name: 'WebTehRazrabotka LLC' 2242 url: 'https://webdatastats.com/' 2243 2244- regex: 'parse.ly scraper' 2245 name: 'parse.ly' 2246 category: 'Crawler' 2247 url: 'https://www.parse.ly/help/integration/crawler' 2248 producer: 2249 name: 'Parsely, Inc.' 2250 url: 'https://www.parse.ly/' 2251 2252- regex: 'Nimbostratus-Bot' 2253 name: 'Nimbostratus Bot' 2254 category: 'Site Monitor' 2255 url: 'http://cloudsystemnetworks.com' 2256 2257- regex: 'HeartRails_Capture/\d' 2258 name: 'Heart Rails Capture' 2259 category: 'Service Agent' 2260 url: 'http://capture.heartrails.com' 2261 2262- regex: 'Project-Resonance' 2263 name: 'Project Resonance' 2264 category: 'Crawler' 2265 url: 'http://project-resonance.com' 2266 2267- regex: 'DataXu/\d' 2268 name: 'DataXu' 2269 category: 'Service Agent' 2270 url: 'https://advertising.roku.com/dataxu' 2271 producer: 2272 name: 'Roku, Inc.' 2273 url: 'https://roku.com' 2274 2275- regex: 'Cocolyzebot' 2276 name: 'Cocolyzebot' 2277 category: 'Crawler' 2278 url: 'https://cocolyze.com/en/cocolyzebot' 2279 producer: 2280 name: 'VSI INNOVATION SAS' 2281 url: 'https://vsi-innovation.com/' 2282 2283- regex: 'veryhip' 2284 name: 'VeryHip' 2285 category: 'Crawler' 2286 url: 'https://veryhip.com/' 2287 producer: 2288 name: 'VeryHip' 2289 url: 'https://veryhip.com/' 2290 2291- regex: 'LinkpadBot' 2292 name: 'LinkpadBot' 2293 category: 'Crawler' 2294 url: 'https://www.linkpad.org/' 2295 producer: 2296 name: 'Solomono LLC' 2297 url: 'https://www.linkpad.org/' 2298 2299- regex: 'MuscatFerret' 2300 name: 'MuscatFerret' 2301 category: 'Crawler' 2302 url: 'http://www.webtop.com/' 2303 2304- regex: 'PageThing.com' 2305 name: 'PageThing' 2306 category: 'Crawler' 2307 url: 'https://www.pagething.com/' 2308 producer: 2309 name: 'SPECIALNOISE LTD' 2310 url: 'https://www.specialnoise.com/' 2311 2312- regex: 'ArchiveBox' 2313 name: 'ArchiveBox' 2314 url: 'https://archivebox.io/' 2315 category: 'Crawler' 2316 producer: 2317 name: '' 2318 url: '' 2319 2320- regex: 'Choosito' 2321 name: 'Choosito' 2322 url: 'https://www.choosito.com/' 2323 category: 'Crawler' 2324 producer: 2325 name: 'Choosito! Inc.' 2326 url: 'https://www.choosito.com/' 2327 2328- regex: 'datagnionbot' 2329 name: 'datagnionbot' 2330 url: 'https://www.datagnion.com/bot.html' 2331 category: 'Crawler' 2332 producer: 2333 name: 'DATAGNION GMBH' 2334 url: 'https://www.datagnion.com/' 2335 2336- regex: 'WhatCMS' 2337 name: 'WhatCMS' 2338 url: 'https://whatcms.org/' 2339 category: 'Crawler' 2340 producer: 2341 name: 'Nineteen Ten LLC' 2342 url: 'https://whatcms.org/' 2343 2344- regex: 'httpx' 2345 name: 'httpx' 2346 url: 'https://github.com/projectdiscovery/httpx' 2347 category: 'Crawler' 2348 producer: 2349 name: '' 2350 url: '' 2351 2352- regex: 'scaninfo@expanseinc.com' 2353 name: 'Expanse' 2354 category: 'Security Checker' 2355 url: 'https://expanse.co/' 2356 producer: 2357 name: 'Expanse Inc.' 2358 url: 'https://expanse.co/' 2359 2360- regex: 'HuaweiWebCatBot' 2361 name: 'HuaweiWebCatBot' 2362 category: 'Crawler' 2363 url: 'https://isecurity.huawei.com' 2364 producer: 2365 name: 'Huawei Technologies Co., Ltd.' 2366 url: 'https://huawei.com' 2367 2368- regex: 'Hatena-Favicon' 2369 name: 'Hatena Favicon' 2370 category: 'Crawler' 2371 url: 'https://www.hatena.ne.jp/faq/' 2372 producer: 2373 name: 'Hatena Co., Ltd.' 2374 url: 'https://www.hatena.ne.jp' 2375 2376- regex: 'RyowlEngine/(\d+)' 2377 name: 'Ryowl' 2378 category: 'Crawler' 2379 url: 'https://ryowl.org' 2380 2381- regex: 'OdklBot/(\d+)' 2382 name: 'Odnoklassniki Bot' 2383 category: 'Crawler' 2384 url: 'https://odnoklassniki.ru' 2385 2386- regex: 'Mediatoolkitbot' 2387 name: 'Mediatoolkit Bot' 2388 category: 'Crawler' 2389 url: 'https://mediatoolkit.com' 2390 2391- regex: 'ZoominfoBot' 2392 name: 'ZoominfoBot' 2393 category: 'Crawler' 2394 url: 'https://www.zoominfo.com' 2395 2396- regex: 'WeViKaBot/([\d+\.])' 2397 name: 'WeViKaBot' 2398 category: 'Crawler' 2399 url: 'http://www.wevika.de' 2400 2401- regex: 'SEOkicks' 2402 name: 'SEOkicks' 2403 category: 'Crawler' 2404 url: 'https://www.seokicks.de/robot.html' 2405 2406- regex: 'Plukkie/([\d+\.])' 2407 name: 'Plukkie' 2408 category: 'Crawler' 2409 url: 'http://www.botje.com/plukkie.htm' 2410 2411- regex: 'proximic;' 2412 name: 'Comscore' 2413 category: 'Crawler' 2414 url: 'https://www.comscore.com/Web-Crawler' 2415 2416- regex: 'SurdotlyBot/([\d+\.])' 2417 name: 'SurdotlyBot' 2418 category: 'Crawler' 2419 url: 'http://sur.ly/bot.html' 2420 2421- regex: 'Gowikibot/([\d+\.])' 2422 name: 'Gowikibot' 2423 category: 'Crawler' 2424 url: 'http:/www.gowikibot.com' 2425 2426- regex: 'SabsimBot/([\d+\.])' 2427 name: 'SabsimBot' 2428 category: 'Crawler' 2429 url: 'https://sabsim.com' 2430 2431- regex: 'LumtelBot/([\d+\.])' 2432 name: 'LumtelBot' 2433 category: 'Crawler' 2434 url: 'https://umtel.com' 2435 2436- regex: 'PiplBot' 2437 name: 'PiplBot' 2438 category: 'Crawler' 2439 url: 'http://www.pipl.com/bot' 2440 2441- regex: 'woobot/([\d+\.])' 2442 name: 'WooRank' 2443 category: 'Crawler' 2444 url: 'https://www.woorank.com/bot' 2445 2446- regex: 'Cookiebot/([\d+\.])' 2447 name: 'Cookiebot' 2448 category: 'Crawler' 2449 url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent' 2450 producer: 2451 name: 'Cybot A/S' 2452 url: 'https://www.cybot.com/' 2453 2454- regex: 'NetSystemsResearch' 2455 name: 'NetSystemsResearch' 2456 category: 'Security Checker' 2457 url: 'https://www.netsystemsresearch.com/' 2458 producer: 2459 name: 'NET SYSTEMS RESEARCH LLC' 2460 url: 'https://www.netsystemsresearch.com/' 2461 2462- regex: 'CensysInspect/([\d+\.])' 2463 name: 'CensysInspect' 2464 category: 'Security Checker' 2465 url: 'https://about.censys.io/' 2466 producer: 2467 name: 'Censys, Inc.' 2468 url: 'https://censys.io/' 2469 2470- regex: 'gdnplus.com' 2471 name: 'GDNP' 2472 category: 'Crawler' 2473 url: 'https://gdnplus.com/' 2474 producer: 2475 name: 'Global Digital Network Plus, LLC' 2476 url: 'https://gdnplus.com/' 2477 2478- regex: 'WellKnownBot/([\d+\.])' 2479 name: 'WellKnownBot' 2480 category: 'Crawler' 2481 url: 'https://well-known.dev' 2482 2483- regex: 'Adsbot/([\d+\.])' 2484 name: 'Adsbot' 2485 category: 'Crawler' 2486 url: 'https://seostar.co/robot/' 2487 2488- regex: 'MTRobot/([\d+\.])' 2489 name: 'MTRobot' 2490 category: 'Crawler' 2491 url: 'https://metrics-tools.de/robot.html' 2492 producer: 2493 name: 'Metrics Tools' 2494 url: 'https://metrics-tools.de/' 2495 2496- regex: 'serpstatbot/([\d+\.])' 2497 name: 'serpstatbot' 2498 category: 'Crawler' 2499 url: 'http://serpstatbot.com/' 2500 producer: 2501 name: 'Netpeak Ltd' 2502 url: 'https://netpeak.net/' 2503 2504- regex: 'colly' 2505 name: 'colly' 2506 category: 'Crawler' 2507 url: 'https://github.com/gocolly/colly/' 2508 2509- regex: 'l9tcpid/v([\d+\.])' 2510 name: 'l9tcpid' 2511 category: 'Security Checker' 2512 url: 'https://github.com/LeakIX/l9tcpid' 2513 2514- regex: 'MegaIndex.ru/([\d+\.])' 2515 name: 'MegaIndex' 2516 category: 'Crawler' 2517 url: 'https://megaindex.com/crawler' 2518 2519- regex: 'Seekport' 2520 name: 'Seekport' 2521 category: 'Crawler' 2522 url: 'http://www.seekport.com/' 2523 producer: 2524 name: 'SISTRIX GmbH' 2525 url: 'https://www.sistrix.de/' 2526 2527- regex: 'seolyt/([\d+\.])' 2528 name: 'seolyt' 2529 category: 'Crawler' 2530 url: 'https://seolyt.com/' 2531 2532- regex: 'YaK/([\d+\.])' 2533 name: 'YaK' 2534 category: 'Crawler' 2535 url: 'https://www.linkfluence.com/' 2536 producer: 2537 name: 'Linkfluence SAS' 2538 url: 'https://www.linkfluence.com/' 2539 2540- regex: 'KomodiaBot/([\d+\.])' 2541 name: 'KomodiaBot' 2542 category: 'Crawler' 2543 url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler' 2544 producer: 2545 name: 'Komodia Inc.' 2546 url: 'https://www.komodia.com/' 2547 2548- regex: 'Neevabot/([\d+\.])' 2549 name: 'Neevabot' 2550 category: 'Search bot' 2551 url: 'https://neeva.com/neevabot' 2552 producer: 2553 name: 'Neeva Inc.' 2554 url: 'https://neeva.com/' 2555 2556- regex: 'LinkPreview/([\d+\.])' 2557 name: 'LinkPreview' 2558 category: 'Service Agent' 2559 url: 'https://www.linkpreview.net/' 2560 2561- regex: 'JungleKeyThumbnail/([\d+\.])' 2562 name: 'JungleKeyThumbnail' 2563 category: 'Crawler' 2564 url: 'https://junglekey.com/' 2565 2566- regex: 'rocketmonitor(?: |bot/)([\d+\.])' 2567 name: 'RocketMonitorBot' 2568 category: 'Site Monitor' 2569 url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html' 2570 producer: 2571 name: 'Radio Mast, Inc.' 2572 url: 'https://www.radiomast.io/' 2573 2574- regex: 'SitemapParser-VIPnytt/([\d+\.])' 2575 name: 'SitemapParser-VIPnytt' 2576 category: 'Crawler' 2577 url: 'https://github.com/VIPnytt/SitemapParser/' 2578 2579 2580- regex: '^Turnitin' 2581 name: 'Turnitin' 2582 category: 'Crawler' 2583 url: 'https://turnitin.com/robot/crawlerinfo.html' 2584 2585- regex: 'DMBrowser/\d+|DMBrowser-[UB]V' 2586 name: 'Dotcom Monitor' 2587 category: 'Site Monitor' 2588 url: 'https://www.dotcom-monitor.com' 2589 2590- regex: 'ThinkChaos/' 2591 name: 'ThinkChaos' 2592 category: 'Crawler' 2593 2594- regex: 'DataForSeoBot' 2595 name: 'DataForSeoBot' 2596 category: 'Crawler' 2597 url: 'https://dataforseo.com/dataforseo-bot' 2598 2599# Generic detections 2600- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)' 2601 name: 'Generic Bot' 2602