1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3""" Simple class to interact with VirusTotal's Public and Private API as well as VirusTotal Intelligence. 4 5:copyright: (c) 2014 by Josh "blacktop" Maine. 6:license: MIT, see LICENSE for more details. 7 8The APIs are documented at: 9https://www.virustotal.com/en/documentation/public-api/ 10https://www.virustotal.com/en/documentation/private-api/ 11https://www.virustotal.com/intelligence/help/automation/ 12 13EXAMPLE USAGE::: 14 15from virus_total_apis import PublicApi as vtPubAPI 16 17vt = vtPubAPI(<INSERT_API_KEY_HERE>) 18response = vt.get_file_report('44cda81782dc2a346abd7b2285530c5f') 19 20print json.dumps(response, sort_keys=False, indent=4) 21""" 22 23import os 24from datetime import datetime, timedelta 25 26try: 27 import requests 28except ImportError: 29 pass 30 31 32class PublicApi(): 33 """ VirusTotal's Public API lets you upload and scan files, submit and scan URLs, access finished scan reports 34 and make automatic comments on URLs and samples without the need of using the HTML website interface. In other 35 words, it allows you to build simple scripts to access the information generated by VirusTotal. 36 37 The chosen format for the API is HTTP POST requests with JSON object responses and it is limited to at most 4 38 requests of any nature in any given 1 minute time frame. If you run a honeyclient, honeypot or any other 39 automation that is going to provide resources to VirusTotal and not only retrieve reports you are entitled to 40 a higher request rate quota, ask for it at contact@virustotal.com and you will receive special privileges when 41 performing the calls to the API. Note that you will only have a higher request rate quota when asking for files 42 or URLs that you previously sent to VirusTotal. 43 44 In this second version we have improved the response format so as to ease the task of retrieving results, we 45 have also introduced batch requests, you may now ask for several items with a sole API call (as long as you 46 cohere with the request rate limit). 47 48 The public API is a free service, available for any website or application that is free to consumers. The API 49 must not be used in commercial products or services, it can not be used as a substitute for antivirus products 50 and it can not be integrated in any project that may harm the antivirus industry directly or indirectly. 51 Noncompliance of these terms will result in inmediate permanent ban of the infractor individual or organization. 52 """ 53 54 def __init__(self, api_key=None, proxies=None): 55 self.api_key = api_key 56 self.proxies = proxies 57 self.base = 'https://www.virustotal.com/vtapi/v2/' 58 self.version = 2 59 if api_key is None: 60 raise ApiError("You must supply a valid VirusTotal API key.") 61 62 def scan_file(self, this_file, from_disk=True, filename=None, timeout=None): 63 """ Submit a file to be scanned by VirusTotal. 64 65 The VirusTotal API allows you to send files. Before performing your submissions we encourage you to retrieve 66 the latest report on the files, if it is recent enough you might want to save time and bandwidth by making use 67 of it. File size limit is 32MB. If you have a need to scan larger files, please contact us, and tell us your 68 use case. 69 70 :param this_file: The file to be uploaded. (32MB file size limit) 71 :param from_disk: If True we read the file contents from disk using this_file as filepath. If False this_file 72 is the actual file object. 73 :param filename: Specify the filename, this overwrites the filename if we read a file from disk. 74 :param timeout: The amount of time in seconds the request should wait before timing out. 75 76 :return: JSON response that contains scan_id and permalink. 77 """ 78 params = {'apikey': self.api_key} 79 if from_disk: 80 if not filename: 81 filename = os.path.basename(this_file) 82 files = {'file': (filename, open(this_file, 'rb').read())} 83 else: 84 if filename: 85 files = {'file': (filename, this_file)} 86 else: 87 files = {'file': this_file} 88 89 try: 90 response = requests.post(self.base + 'file/scan', 91 files=files, 92 params=params, 93 proxies=self.proxies, 94 timeout=timeout) 95 except requests.RequestException as e: 96 return dict(error=e.message) 97 98 return _return_response_and_status_code(response) 99 100 def rescan_file(self, this_hash, timeout=None): 101 """ Rescan a previously submitted filed or schedule an scan to be performed in the future. 102 103 :param this_hash: a md5/sha1/sha256 hash. You can also specify a CSV list made up of a combination of any of 104 the three allowed hashes (up to 25 items), this allows you to perform a batch request with 105 one single call. Note that the file must already be present in our file store. 106 :param timeout: The amount of time in seconds the request should wait before timing out. 107 108 :return: JSON response that contains scan_id and permalink. 109 """ 110 params = {'apikey': self.api_key, 'resource': this_hash} 111 112 try: 113 response = requests.post(self.base + 'file/rescan', params=params, proxies=self.proxies, timeout=timeout) 114 except requests.RequestException as e: 115 return dict(error=e.message) 116 117 return _return_response_and_status_code(response) 118 119 def get_file_report(self, this_hash, timeout=None): 120 """ Get the scan results for a file. 121 122 You can also specify a CSV list made up of a combination of hashes and scan_ids 123 (up to 4 items with the standard request rate), this allows you to perform a batch 124 request with one single call. 125 i.e. {'resource': '99017f6eebbac24f351415dd410d522d, 88817f6eebbac24f351415dd410d522d'}. 126 127 :param this_hash: The md5/sha1/sha256/scan_ids hash of the file whose dynamic behavioural report you want to 128 retrieve or scan_ids from a previous call to scan_file. 129 :param timeout: The amount of time in seconds the request should wait before timing out. 130 131 :return: 132 """ 133 params = {'apikey': self.api_key, 'resource': this_hash} 134 135 try: 136 response = requests.get(self.base + 'file/report', params=params, proxies=self.proxies, timeout=timeout) 137 except requests.RequestException as e: 138 return dict(error=e.message) 139 140 return _return_response_and_status_code(response) 141 142 def scan_url(self, this_url, timeout=None): 143 """ Submit a URL to be scanned by VirusTotal. 144 145 :param this_url: The URL that should be scanned. This parameter accepts a list of URLs (up to 4 with the 146 standard request rate) so as to perform a batch scanning request with one single call. The 147 URLs must be separated by a new line character. 148 :param timeout: The amount of time in seconds the request should wait before timing out. 149 150 :return: JSON response that contains scan_id and permalink. 151 """ 152 params = {'apikey': self.api_key, 'url': this_url} 153 154 try: 155 response = requests.post(self.base + 'url/scan', params=params, proxies=self.proxies, timeout=None) 156 except requests.RequestException as e: 157 return dict(error=e.message) 158 159 return _return_response_and_status_code(response) 160 161 def get_url_report(self, this_url, scan='0', timeout=None): 162 """ Get the scan results for a URL. (can do batch searches like get_file_report) 163 164 :param this_url: a URL will retrieve the most recent report on the given URL. You may also specify a scan_id 165 (sha256-timestamp as returned by the URL submission API) to access a specific report. At the 166 same time, you can specify a CSV list made up of a combination of hashes and scan_ids so as 167 to perform a batch request with one single call (up to 4 resources per call with the standard 168 request rate). When sending multiples, the scan_ids or URLs must be separated by a new line 169 character. 170 :param scan: (optional): this is an optional parameter that when set to "1" will automatically submit the URL 171 for analysis if no report is found for it in VirusTotal's database. In this case the result will 172 contain a scan_id field that can be used to query the analysis report later on. 173 :param timeout: The amount of time in seconds the request should wait before timing out. 174 175 :return: JSON response 176 """ 177 params = {'apikey': self.api_key, 'resource': this_url, 'scan': scan} 178 179 try: 180 response = requests.get(self.base + 'url/report', params=params, proxies=self.proxies, timeout=timeout) 181 except requests.RequestException as e: 182 return dict(error=e.message) 183 184 return _return_response_and_status_code(response) 185 186 def put_comments(self, resource, comment, timeout=None): 187 """ Post a comment on a file or URL. 188 189 The initial idea of VirusTotal Community was that users should be able to make comments on files and URLs, 190 the comments may be malware analyses, false positive flags, disinfection instructions, etc. 191 192 Imagine you have some automatic setup that can produce interesting results related to a given sample or URL 193 that you submit to VirusTotal for antivirus characterization, you might want to give visibility to your setup 194 by automatically reviewing samples and URLs with the output of your automation. 195 196 :param resource: either a md5/sha1/sha256 hash of the file you want to review or the URL itself that you want 197 to comment on. 198 :param comment: the actual review, you can tag it using the "#" twitter-like syntax (e.g. #disinfection #zbot) 199 and reference users using the "@" syntax (e.g. @VirusTotalTeam). 200 :param timeout: The amount of time in seconds the request should wait before timing out. 201 202 :return: If the comment was successfully posted the response code will be 1, 0 otherwise. 203 """ 204 params = {'apikey': self.api_key, 'resource': resource, 'comment': comment} 205 206 try: 207 response = requests.post(self.base + 'comments/put', params=params, proxies=self.proxies, timeout=timeout) 208 except requests.RequestException as e: 209 return dict(error=e.message) 210 211 return _return_response_and_status_code(response) 212 213 def get_ip_report(self, this_ip, timeout=None): 214 """ Get IP address reports. 215 216 :param this_ip: a valid IPv4 address in dotted quad notation, for the time being only IPv4 addresses are 217 supported. 218 :param timeout: The amount of time in seconds the request should wait before timing out. 219 220 :return: JSON response 221 """ 222 params = {'apikey': self.api_key, 'ip': this_ip} 223 224 try: 225 response = requests.get(self.base + 'ip-address/report', 226 params=params, 227 proxies=self.proxies, 228 timeout=timeout) 229 except requests.RequestException as e: 230 return dict(error=e.message) 231 232 return _return_response_and_status_code(response) 233 234 def get_domain_report(self, this_domain, timeout=None): 235 """ Get information about a given domain. 236 237 :param this_domain: a domain name. 238 :param timeout: The amount of time in seconds the request should wait before timing out. 239 240 :return: JSON response 241 """ 242 params = {'apikey': self.api_key, 'domain': this_domain} 243 244 try: 245 response = requests.get(self.base + 'domain/report', params=params, proxies=self.proxies, timeout=timeout) 246 except requests.RequestException as e: 247 return dict(error=e.message) 248 249 return _return_response_and_status_code(response) 250 251 252class PrivateApi(PublicApi): 253 254 def scan_file(self, 255 this_file, 256 notify_url=None, 257 notify_changes_only=None, 258 from_disk=True, 259 filename=None, 260 timeout=None): 261 """ Submit a file to be scanned by VirusTotal. 262 263 Allows you to send a file for scanning with VirusTotal. Before performing your submissions we encourage you to 264 retrieve the latest report on the files, if it is recent enough you might want to save time and bandwidth by 265 making use of it. File size limit is 32MB, in order to submmit files up to 200MB in size you must request a 266 special upload URL. 267 268 :param this_file: The file to be uploaded. 269 :param notify_url: A URL to which a POST notification should be sent when the scan finishes. 270 :param notify_changes_only: Used in conjunction with notify_url. Indicates if POST notifications should be 271 sent only if the scan results differ from the previous analysis. 272 :param from_disk: If True we read the file contents from disk using this_file as filepath. If False this_file 273 is the actual file object. 274 :param filename: Specify the filename, this overwrites the filename if we read a file from disk. 275 :param timeout: The amount of time in seconds the request should wait before timing out. 276 277 :return: JSON response that contains scan_id and permalink. 278 """ 279 params = {'apikey': self.api_key} 280 if from_disk: 281 if not filename: 282 filename = os.path.basename(this_file) 283 files = {'file': (filename, open(this_file, 'rb').read())} 284 else: 285 if filename: 286 files = {'file': (filename, this_file)} 287 else: 288 files = {'file': this_file} 289 290 try: 291 response = requests.post(self.base + 'file/scan', 292 files=files, 293 params=params, 294 proxies=self.proxies, 295 timeout=timeout) 296 except requests.RequestException as e: 297 return dict(error=e.message) 298 299 return _return_response_and_status_code(response) 300 301 @property 302 def get_upload_url(self, timeout=None): 303 """ Get a special URL for submitted files bigger than 32MB. 304 305 In order to submit files bigger than 32MB you need to obtain a special upload URL to which you 306 can POST files up to 200MB in size. This API generates such a URL. 307 308 :param timeout: The amount of time in seconds the request should wait before timing out. 309 310 :return: JSON special upload URL to which you can POST files up to 200MB in size. 311 """ 312 params = {'apikey': self.api_key} 313 314 try: 315 response = requests.get(self.base + 'file/scan/upload_url', 316 params=params, 317 proxies=self.proxies, 318 timeout=timeout) 319 except requests.RequestException as e: 320 return dict(error=e.message) 321 322 if response.status_code == requests.codes.ok: 323 return response.json()['upload_url'] 324 else: 325 return dict(response_code=response.status_code) 326 327 def rescan_file(self, resource, date='', period='', repeat='', notify_url='', notify_changes_only='', timeout=None): 328 """ Rescan a previously submitted filed or schedule an scan to be performed in the future. 329 330 This API allows you to rescan files present in VirusTotal's file store without having to 331 resubmit them, thus saving bandwidth. You only need to know one of the hashes of the file 332 to rescan. 333 334 :param resource: An md5/sha1/sha256 hash. You can also specify a CSV list made up of a 335 combination of any of the three allowed hashes (up to 25 items), this allows you to perform 336 a batch request with just one single call. Note that the file must already be present in our 337 file store. 338 :param date: (optional) Date in %Y%m%d%H%M%S format (example: 20120725170000) in which the rescan should 339 be performed. If not specified the rescan will be performed immediately. 340 :param period: (optional) Periodicity (in days) with which the file should be rescanned. If this argument 341 is provided the file will be rescanned periodically every period days, if not, the rescan is 342 performed once and not repated again. 343 :param repeat: (optional) Used in conjunction with period to specify the number of times the file should be 344 rescanned. If this argument is provided the file will be rescanned the given amount of times in coherence 345 with the chosen periodicity, if not, the file will be rescanned indefinitely. 346 :param notify_url: (optional) A URL to which a POST notification should be sent when the rescan finishes. 347 :param notify_changes_only: (optional) Used in conjunction with notify_url. Indicates if POST notifications 348 should only be sent if the scan results differ from the previous one. 349 :param timeout: The amount of time in seconds the request should wait before timing out. 350 351 :return: JSON response that contains scan_id and permalink. 352 """ 353 params = {'apikey': self.api_key, 'resource': resource} 354 355 try: 356 response = requests.post(self.base + 'file/rescan', params=params, proxies=self.proxies, timeout=timeout) 357 except requests.RequestException as e: 358 return dict(error=e.message) 359 360 return _return_response_and_status_code(response) 361 362 def cancel_rescan_file(self, resource, timeout=None): 363 """ Delete a previously scheduled scan. 364 365 Deletes a scheduled file rescan task. The file rescan api allows you to schedule periodic scans of a file, 366 this API call tells VirusTotal to stop rescanning a file that you have previously enqueued for recurrent 367 scanning. 368 369 :param resource: The md5/sha1/sha256 hash of the file whose dynamic behavioural report you want to retrieve. 370 :param timeout: The amount of time in seconds the request should wait before timing out. 371 372 :return: JSON acknowledgement. In the event that the scheduled scan deletion fails for whatever reason, the 373 response code will be -1. 374 """ 375 params = {'apikey': self.api_key, 'resource': resource} 376 377 try: 378 response = requests.post(self.base + 'rescan/delete', params=params, proxies=self.proxies, timeout=timeout) 379 except requests.RequestException as e: 380 return dict(error=e.message) 381 382 return _return_response_and_status_code(response) 383 384 def get_file_report(self, resource, allinfo=1, timeout=None): 385 """ Get the scan results for a file. 386 387 Retrieves a concluded file scan report for a given file. Unlike the public API, this call allows you to also 388 access all the information we have on a particular file (VirusTotal metadata, signature information, structural 389 information, etc.) by using the allinfo parameter described later on. 390 391 :param resource: An md5/sha1/sha256 hash of a file for which you want to retrieve the most recent antivirus 392 report. You may also specify a scan_id (sha256-timestamp as returned by the scan API) to access a specific 393 report. You can also specify a CSV list made up of a combination of hashes and scan_ids (up to 25 items), 394 this allows you to perform a batch request with just one single call. 395 :param allinfo: (optional) If specified and set to one, the call will return additional info, other than the 396 antivirus results, on the file being queried. This additional info includes the output of several tools acting 397 on the file (PDFiD, ExifTool, sigcheck, TrID, etc.), metadata regarding VirusTotal submissions (number of 398 unique sources that have sent the file in the past, first seen date, last seen date, etc.), the output of 399 in-house technologies such as a behavioural sandbox, etc. 400 :param timeout: The amount of time in seconds the request should wait before timing out. 401 402 :return: JSON response 403 """ 404 params = {'apikey': self.api_key, 'resource': resource, 'allinfo': allinfo} 405 406 try: 407 response = requests.get(self.base + 'file/report', params=params, proxies=self.proxies, timeout=timeout) 408 except requests.RequestException as e: 409 return dict(error=e.message) 410 411 return _return_response_and_status_code(response) 412 413 def get_file_behaviour(self, this_hash, timeout=None): 414 """ Get a report about the behaviour of the file in sand boxed environment. 415 416 VirusTotal runs a distributed setup of Cuckoo sandbox machines that execute the files we receive. Execution is 417 attempted only once, upon first submission to VirusTotal, and only Portable Executables under 10MB in size are 418 ran. The execution of files is a best effort process, hence, there are no guarantees about a report being 419 generated for a given file in our dataset. 420 421 If a file did indeed produce a behavioural report, a summary of it can be obtained by using the file scan 422 lookup call providing the additional HTTP POST parameter allinfo=1. The summary will appear under the 423 behaviour-v1 property of the additional_info field in the JSON report. 424 425 :param this_hash: The md5/sha1/sha256 hash of the file whose dynamic behavioural report you want to retrieve. 426 :param timeout: The amount of time in seconds the request should wait before timing out. 427 428 :return: full JSON report of the file's execution as returned by the Cuckoo JSON report encoder. 429 """ 430 params = {'apikey': self.api_key, 'hash': this_hash} 431 432 try: 433 response = requests.get(self.base + 'file/behaviour', params=params, proxies=self.proxies, timeout=timeout) 434 except requests.RequestException as e: 435 return dict(error=e.message) 436 437 return _return_response_and_status_code(response) 438 439 def get_network_traffic(self, this_hash, timeout=None): 440 """ Get a dump of the network traffic generated by the file. 441 442 VirusTotal runs a distributed setup of Cuckoo sandbox machines that execute the files we receive. 443 Execution is attempted only once, upon first submission to VirusTotal, and only Portable Executables 444 under 10MB in size are ran. The execution of files is a best effort process, hence, there are no 445 guarantees about a report being generated for a given file in our dataset. 446 447 Files that are successfully executed may communicate with certain network resources, all this 448 communication is recorded in a network traffic dump (pcap file). This API allows you to retrieve 449 the network traffic dump generated during the file's execution. 450 451 :param this_hash: The md5/sha1/sha256 hash of the file whose network traffic dump you want to retrieve. 452 :return: Pcap 453 """ 454 params = {'apikey': self.api_key, 'hash': this_hash} 455 456 try: 457 response = requests.get(self.base + 'file/network-traffic', 458 params=params, 459 proxies=self.proxies, 460 timeout=timeout) 461 except requests.RequestException as e: 462 return dict(error=e.message) 463 464 try: 465 return _return_response_and_status_code(response) 466 except ValueError: 467 return response.content 468 469 def file_search(self, query, offset=None, timeout=None): 470 """ Search for samples. 471 472 In addition to retrieving all information on a particular file, VirusTotal allows you to perform what we 473 call "advanced reverse searches". Reverse searches take you from a file property to a list of files that 474 match that property. For example, this functionality enables you to retrieve all those files marked by at 475 least one antivirus vendor as Zbot, or all those files that have a size under 90KB and are detected by at 476 least 10 antivirus solutions, or all those PDF files that have an invalid XREF section, etc. 477 478 This API is equivalent to VirusTotal Intelligence advanced searches. A very wide variety of search modifiers 479 are available, including: file size, file type, first submission date to VirusTotal, last submission date to 480 VirusTotal, number of positives, dynamic behavioural properties, binary content, submission file name, and a 481 very long etcetera. The full list of search modifiers allowed for file search queries is documented at: 482 https://www.virustotal.com/intelligence/help/file-search/#search-modifiers 483 484 NOTE: 485 Daily limited! No matter what API step you have licensed, this API call is limited to 50K requests per day. 486 If you need any more, chances are you are approaching your engineering problem erroneously and you can 487 probably solve it using the file distribution call. Do not hesitate to contact us with your particular 488 use case. 489 490 EXAMPLE: 491 search_options = 'type:peexe size:90kb+ positives:5+ behaviour:"taskkill"' 492 493 :param query: A search modifier compliant file search query. 494 :param offset: (optional) The offset value returned by a previously issued identical query, allows you to 495 paginate over the results. If not specified the first 300 matching files sorted according to last submission 496 date to VirusTotal in a descending fashion will be returned. 497 :param timeout: The amount of time in seconds the request should wait before timing out. 498 499 :return: JSON response - By default the list returned contains at most 300 hashes, ordered according to 500 last submission date to VirusTotal in a descending fashion. 501 """ 502 params = dict(apikey=self.api_key, query=query, offset=offset) 503 504 try: 505 response = requests.get(self.base + 'file/search', params=params, proxies=self.proxies, timeout=timeout) 506 except requests.RequestException as e: 507 return dict(error=e.message) 508 509 return _return_response_and_status_code(response) 510 511 def get_file_clusters(self, this_date, timeout=None): 512 """ File similarity clusters for a given time frame. 513 514 VirusTotal has built its own in-house file similarity clustering functionality. At present, this clustering 515 works only on PE, PDF, DOC and RTF files and is based on a very simple structural feature hash. This hash 516 can very often be confused by certain compression and packing strategies, in other words, this clustering 517 logic is no holly grail, yet it has proven itself very useful in the past. 518 519 This API offers a programmatic access to the clustering section of VirusTotal Intelligence: 520 https://www.virustotal.com/intelligence/clustering/ 521 522 NOTE: 523 Please note that you must be logged in with a valid VirusTotal Community user account with access to 524 VirusTotal Intelligence in order to be able to view the clustering listing. 525 526 :param this_date: A specific day for which we want to access the clustering details, example: 2013-09-10. 527 :param timeout: The amount of time in seconds the request should wait before timing out. 528 529 :return: JSON object contains several properties 530 num_candidates - Total number of files submitted during the given time frame for which a feature hash could 531 be calculated. 532 num_clusters - Total number of clusters generated for the given time period under consideration, a cluster 533 can be as small as an individual file, meaning that no other feature-wise similar file was 534 found. 535 size_top200 - The sum of the number of files in the 200 largest clusters identified. 536 clusters - List of JSON objects that contain details about the 200 largest clusters identified. These 537 objects contain 4 properties: id, label, size and avg_positives.. The id field can be used 538 to then query the search API call for files contained in the given cluster. The label 539 property is a verbose human-intelligible name for the cluster. The size field is the number 540 of files that make up the cluster. Finally, avg_positives represents the average number of 541 antivirus detections that the files in the cluster exhibit. 542 """ 543 params = {'apikey': self.api_key, 'date': this_date} 544 545 try: 546 response = requests.get(self.base + 'file/clusters', params=params, proxies=self.proxies, timeout=timeout) 547 except requests.RequestException as e: 548 return dict(error=e.message) 549 550 return _return_response_and_status_code(response) 551 552 def get_file_distribution(self, before='', after='', reports='false', limit='1000', timeout=None): 553 """ Get a live feed with the latest files submitted to VirusTotal. 554 555 Allows you to retrieve a live feed of absolutely all uploaded files to VirusTotal, and download them for 556 further scrutiny. This API requires you to stay synced with the live submissions as only a backlog of 6 557 hours is provided at any given point in time. 558 559 :param before: (optional) Retrieve files received before the given timestamp, in timestamp descending order. 560 :param after: (optional) Retrieve files received after the given timestamp, in timestamp ascending order. 561 :param reports: (optional) Include the files' antivirus results in the response. Possible values are 'true' or 562 'false' (default value is 'false'). 563 :param limit: (optional) Retrieve limit file items at most (default: 1000). 564 :param timeout: The amount of time in seconds the request should wait before timing out. 565 566 :return: JSON response: please see https://www.virustotal.com/en/documentation/private-api/#file-distribution 567 """ 568 params = {'apikey': self.api_key, 'before': before, 'after': after, 'reports': reports, 'limit': limit} 569 570 try: 571 response = requests.get(self.base + 'file/distribution', 572 params=params, 573 proxies=self.proxies, 574 timeout=timeout) 575 except requests.RequestException as e: 576 return dict(error=e.message) 577 578 return _return_response_and_status_code(response) 579 580 def get_file_feed(self, package=None, timeout=None): 581 """ Get a live file feed with the latest files submitted to VirusTotal. 582 583 Allows you to retrieve a live feed of absolutely all uploaded files to VirusTotal, and download them for 584 further scrutiny, along with their full reports. This API requires you to stay relatively synced with the live 585 submissions as only a backlog of 24 hours is provided at any given point in time. 586 587 This API returns a bzip2 compressed tarball. For per-minute packages the compressed package contains a unique 588 file, the file contains a json per line, this json is a full report on a given file processed by VirusTotal 589 during the given time window. The file report follows the exact same format as the response of the file report 590 API if the allinfo=1 parameter is provided. For hourly packages, the tarball contains 60 files, one per each 591 minute of the window. 592 593 :param package: Indicates a time window to pull reports on all items received during such window. 594 Only per-minute and hourly windows are allowed, the format is %Y%m%dT%H%M (e.g. 20160304T0900) 595 or %Y%m%dT%H (e.g. 20160304T09). Time is expressed in UTC. 596 :param timeout: The amount of time in seconds the request should wait before timing out. 597 598 :return: BZIP2 response: please see https://www.virustotal.com/en/documentation/private-api/#file-feed 599 """ 600 if package is None: 601 now = datetime.utcnow() 602 five_minutes_ago = now - timedelta(minutes=now.minute % 5 + 5, 603 seconds=now.second, 604 microseconds=now.microsecond) 605 package = five_minutes_ago.strftime('%Y%m%dT%H%M') 606 607 params = {'apikey': self.api_key, 'package': package} 608 609 try: 610 response = requests.get(self.base + 'file/feed', params=params, proxies=self.proxies, timeout=timeout) 611 except requests.RequestException as e: 612 return dict(error=e.message) 613 614 if response.ok: 615 return response.content 616 elif response.status_code == 400: 617 return dict(error='package sent is either malformed or not within the past 24 hours.', 618 response_code=response.status_code) 619 elif response.status_code == 403: 620 return dict(error='You tried to perform calls to functions for which you require a Private API key.', 621 response_code=response.status_code) 622 elif response.status_code == 404: 623 return dict(error='File not found.', response_code=response.status_code) 624 else: 625 return dict(response_code=response.status_code) 626 627 def get_file(self, this_hash, timeout=None): 628 """ Download a file by its hash. 629 630 Downloads a file from VirusTotal's store given one of its hashes. This call can be used in conjuction with 631 the file searching call in order to download samples that match a given set of criteria. 632 633 :param this_hash: The md5/sha1/sha256 hash of the file you want to download. 634 :param timeout: The amount of time in seconds the request should wait before timing out. 635 636 :return: Downloaded file in response.content 637 """ 638 params = {'apikey': self.api_key, 'hash': this_hash} 639 640 try: 641 response = requests.get(self.base + 'file/download', params=params, proxies=self.proxies, timeout=timeout) 642 except requests.RequestException as e: 643 return dict(error=e.message) 644 645 if response.status_code == requests.codes.ok: 646 return response.content 647 elif response.status_code == 403: 648 return dict(error='You tried to perform calls to functions for which you require a Private API key.', 649 response_code=response.status_code) 650 elif response.status_code == 404: 651 return dict(error='File not found.', response_code=response.status_code) 652 else: 653 return dict(response_code=response.status_code) 654 655 def scan_url(self, this_url, timeout=None): 656 """ Submit a URL to be scanned by VirusTotal. 657 658 Allows you to submit URLs to be scanned by VirusTotal. Before performing your submission we encourage you to 659 retrieve the latest report on the URL, if it is recent enough you might want to save time and bandwidth by 660 making use of it. 661 662 :param this_url: The URL that should be scanned. This parameter accepts a list of URLs so as to perform a batch 663 scanning request with just one single call (up to 25 URLs per call). The URLs must be separated by a new line 664 character. 665 :param timeout: The amount of time in seconds the request should wait before timing out. 666 667 :return: JSON response that contains scan_id and permalink. 668 """ 669 params = {'apikey': self.api_key, 'url': this_url} 670 671 try: 672 response = requests.post(self.base + 'url/scan', params=params, proxies=self.proxies, timeout=timeout) 673 except requests.RequestException as e: 674 return dict(error=e.message) 675 676 return _return_response_and_status_code(response) 677 678 def get_url_report(self, this_url, scan='0', allinfo=1, timeout=None): 679 """ Get the scan results for a URL. 680 681 :param this_url: A URL for which you want to retrieve the most recent report. You may also specify a scan_id 682 (sha256-timestamp as returned by the URL submission API) to access a specific report. At the same time, you 683 can specify a CSV list made up of a combination of urls and scan_ids (up to 25 items) so as to perform a batch 684 request with one single call. The CSV list must be separated by new line characters. 685 :param scan: (optional) This is an optional parameter that when set to "1" will automatically submit the URL 686 for analysis if no report is found for it in VirusTotal's database. In this case the result will contain a 687 scan_id field that can be used to query the analysis report later on. 688 :param allinfo: (optional) If this parameter is specified and set to "1" additional info regarding the URL 689 (other than the URL scanning engine results) will also be returned. This additional info includes VirusTotal 690 related metadata (first seen date, last seen date, files downloaded from the given URL, etc.) and the output 691 of other tools and datasets when fed with the URL. 692 :param timeout: The amount of time in seconds the request should wait before timing out. 693 694 :return: JSON response 695 """ 696 697 params = {'apikey': self.api_key, 'resource': this_url, 'scan': scan, 'allinfo': allinfo} 698 699 try: 700 response = requests.get(self.base + 'url/report', params=params, proxies=self.proxies, timeout=timeout) 701 except requests.RequestException as e: 702 return dict(error=e.message) 703 704 return _return_response_and_status_code(response) 705 706 def get_url_distribution(self, after=None, reports='true', limit=1000, timeout=None): 707 """ Get a live feed with the lastest URLs submitted to VirusTotal. 708 709 Allows you to retrieve a live feed of URLs submitted to VirusTotal, along with their scan reports. This 710 call enables you to stay synced with VirusTotal URL submissions and replicate our dataset. 711 712 :param after: (optional) Retrieve URLs received after the given timestamp, in timestamp ascending order. 713 :param reports: (optional) When set to "true" each item retrieved will include the results for each particular 714 URL scan (in exactly the same format as the URL scan retrieving API). If the parameter is not specified, each 715 item returned will only contain the scanned URL and its detection ratio. 716 :param limit: (optional) Retrieve limit file items at most (default: 1000). 717 :param timeout: The amount of time in seconds the request should wait before timing out. 718 719 :return: JSON response 720 """ 721 722 params = {'apikey': self.api_key, 'after': after, 'reports': reports, 'limit': limit} 723 724 try: 725 response = requests.get(self.base + 'url/distribution', 726 params=params, 727 proxies=self.proxies, 728 timeout=timeout) 729 except requests.RequestException as e: 730 return dict(error=e.message) 731 732 return _return_response_and_status_code(response) 733 734 def get_url_feed(self, package=None, timeout=None): 735 """ Get a live file feed with the latest files submitted to VirusTotal. 736 737 Allows you to retrieve a live feed of reports on absolutely all URLs scanned by VirusTotal. This API requires 738 you to stay relatively synced with the live submissions as only a backlog of 24 hours is provided at any given 739 point in time. 740 741 This API returns a bzip2 compressed tarball. For per-minute packages the compressed package contains a unique 742 file, the file contains a json per line, this json is a full report on a given URL processed by VirusTotal 743 during the given time window. The URL report follows the exact same format as the response of the URL report 744 API if the allinfo=1 parameter is provided. For hourly packages, the tarball contains 60 files, one per each 745 minute of the window. 746 747 :param package: Indicates a time window to pull reports on all items received during such window. 748 Only per-minute and hourly windows are allowed, the format is %Y%m%dT%H%M (e.g. 20160304T0900) 749 or %Y%m%dT%H (e.g. 20160304T09). Time is expressed in UTC. 750 :param timeout: The amount of time in seconds the request should wait before timing out. 751 752 :return: BZIP2 response: please see https://www.virustotal.com/en/documentation/private-api/#file-feed 753 """ 754 if package is None: 755 now = datetime.utcnow() 756 five_minutes_ago = now - timedelta(minutes=now.minute % 5 + 5, 757 seconds=now.second, 758 microseconds=now.microsecond) 759 package = five_minutes_ago.strftime('%Y%m%dT%H%M') 760 761 params = {'apikey': self.api_key, 'package': package} 762 763 try: 764 response = requests.get(self.base + 'url/feed', params=params, proxies=self.proxies, timeout=timeout) 765 except requests.RequestException as e: 766 return dict(error=e.message) 767 768 if response.ok: 769 return response.content 770 elif response.status_code == 400: 771 return dict(error='package sent is either malformed or not within the past 24 hours.', 772 response_code=response.status_code) 773 elif response.status_code == 403: 774 return dict(error='You tried to perform calls to functions for which you require a Private API key.', 775 response_code=response.status_code) 776 elif response.status_code == 404: 777 return dict(error='File not found.', response_code=response.status_code) 778 else: 779 return dict(response_code=response.status_code) 780 781 def get_ip_report(self, this_ip, timeout=None): 782 """ Get information about a given IP address. 783 784 Retrieves a report on a given IP address (including the information recorded by VirusTotal's Passive DNS 785 infrastructure). 786 787 :param this_ip: A valid IPv4 address in dotted quad notation, for the time being only IPv4 addresses are 788 supported. 789 :param timeout: The amount of time in seconds the request should wait before timing out. 790 791 :return: JSON response 792 """ 793 params = {'apikey': self.api_key, 'ip': this_ip} 794 795 try: 796 response = requests.get(self.base + 'ip-address/report', 797 params=params, 798 proxies=self.proxies, 799 timeout=timeout) 800 except requests.RequestException as e: 801 return dict(error=e.message) 802 803 return _return_response_and_status_code(response) 804 805 def get_domain_report(self, this_domain, timeout=None): 806 """ Get information about a given domain. 807 808 Retrieves a report on a given domain (including the information recorded by VirusTotal's passive DNS 809 infrastructure). 810 811 :param this_domain: A domain name. 812 :param timeout: The amount of time in seconds the request should wait before timing out. 813 814 :return: JSON response 815 """ 816 params = {'apikey': self.api_key, 'domain': this_domain} 817 818 try: 819 response = requests.get(self.base + 'domain/report', params=params, proxies=self.proxies, timeout=timeout) 820 except requests.RequestException as e: 821 return dict(error=e.message) 822 823 return _return_response_and_status_code(response) 824 825 def put_comments(self, resource, comment, timeout=None): 826 """ Post a comment on a file or URL. 827 828 Allows you to place comments on URLs and files, these comments will be publicly visible in VirusTotal 829 Community, under the corresponding tab in the reports for each particular item. 830 831 Comments can range from URLs and locations where a given file was found in the wild to full reverse 832 engineering reports on a given malware specimen, anything that may help other analysts in extending their 833 knowledge about a particular file or URL. 834 835 :param resource: Either an md5/sha1/sha256 hash of the file you want to review or the URL itself that you want 836 to comment on. 837 :param comment: The actual review, you can tag it using the "#" twitter-like syntax (e.g. #disinfection #zbot) 838 and reference users using the "@" syntax (e.g. @VirusTotalTeam). 839 :param timeout: The amount of time in seconds the request should wait before timing out. 840 841 :return: JSON response 842 """ 843 params = {'apikey': self.api_key, 'resource': resource, 'comment': comment} 844 845 try: 846 response = requests.post(self.base + 'comments/put', params=params, proxies=self.proxies, timeout=timeout) 847 except requests.RequestException as e: 848 return dict(error=e.message) 849 850 return _return_response_and_status_code(response) 851 852 def get_comments(self, resource, before=None, timeout=None): 853 """ Get comments for a file or URL. 854 855 Retrieve a list of VirusTotal Community comments for a given file or URL. VirusTotal Community comments are 856 user submitted reviews on a given item, these comments may contain anything from the in-the-wild locations of 857 files up to fully-featured reverse engineering reports on a given sample. 858 859 :param resource: Either an md5/sha1/sha256 hash of the file or the URL itself you want to retrieve. 860 :param before: (optional) A datetime token that allows you to iterate over all comments on a specific item 861 whenever it has been commented on more than 25 times. 862 :param timeout: The amount of time in seconds the request should wait before timing out. 863 864 :return: JSON response - The application answers with the comments sorted in descending order according to 865 their date. 866 """ 867 params = dict(apikey=self.api_key, resource=resource, before=before) 868 869 try: 870 response = requests.get(self.base + 'comments/get', params=params, proxies=self.proxies, timeout=timeout) 871 except requests.RequestException as e: 872 return dict(error=e.message) 873 874 return _return_response_and_status_code(response) 875 876 877class IntelApi(): 878 """ To make the best use of your VirusTotal Intelligence account and so, we have exposed some 879 VirusTotal Intelligence functionality for programmatic interaction even if you do not have a 880 Private Mass API key. 881 """ 882 883 def __init__(self, api_key, proxies=None): 884 self.api_key = api_key 885 self.proxies = proxies 886 self.base = 'https://www.virustotal.com/intelligence/' 887 888 def get_hashes_from_search(self, query, page=None, timeout=None): 889 """ Get the scan results for a file. 890 891 Even if you do not have a Private Mass API key that you can use, you can still automate VirusTotal Intelligence 892 searches pretty much in the same way that the searching for files api call works. 893 894 :param query: a VirusTotal Intelligence search string in accordance with the file search documentation . 895 <https://www.virustotal.com/intelligence/help/file-search/> 896 :param page: the next_page property of the results of a previously issued query to this API. This parameter 897 should not be provided if it is the very first query to the API, i.e. if we are retrieving the 898 first page of results. 899 :param timeout: The amount of time in seconds the request should wait before timing out. 900 901 apikey: the API key associated to a VirusTotal Community account with VirusTotal Intelligence privileges. 902 """ 903 params = {'query': query, 'apikey': self.api_key, 'page': page} 904 905 try: 906 response = requests.get(self.base + 'search/programmatic/', 907 params=params, 908 proxies=self.proxies, 909 timeout=timeout) 910 except requests.RequestException as e: 911 return dict(error=e.message) 912 913 return response.json()['next_page'], response 914 915 def get_file(self, file_hash, save_file_at, timeout=None): 916 """ Get the scan results for a file. 917 918 Even if you do not have a Private Mass API key that you can use, you can still download files from the 919 VirusTotal storage making use of your VirusTotal Intelligence quota, i.e. programmatic downloads will 920 also deduct quota. 921 922 :param file_hash: You may use either the md5, sha1 or sha256 hash of the file in order to download it. 923 :param save_file_at: Path of where to save the file. 924 :param timeout: The amount of time in seconds the request should wait before timing out. 925 926 """ 927 params = {'hash': file_hash, 'apikey': self.api_key} 928 929 try: 930 response = requests.get(self.base + 'download/', 931 params=params, 932 proxies=self.proxies, 933 stream=True, 934 timeout=timeout) 935 except requests.RequestException as e: 936 return dict(error=e.message) 937 938 if response.status_code == requests.codes.ok: 939 self.save_downloaded_file(file_hash, save_file_at, response.content) 940 return response.content 941 elif response.status_code == 403: 942 return dict(error='You tried to perform calls to functions for which you require a Private API key.', 943 response_code=response.status_code) 944 elif response.status_code == 404: 945 return dict(error='File not found.', response_code=response.status_code) 946 else: 947 return dict(response_code=response.status_code) 948 949 def get_all_file_report_pages(self, query): 950 """ Get File Report (All Pages). 951 952 :param query: a VirusTotal Intelligence search string in accordance with the file search documentation. 953 :return: All JSON responses appended together. 954 """ 955 responses = [] 956 next_page, response = self.get_hashes_from_search(self, query) 957 responses.append(_return_response_and_status_code(response)) 958 while next_page: 959 next_page, response = self.get_hashes_from_search(query, next_page) 960 responses.append(_return_response_and_status_code(response)) 961 return dict(results=responses) 962 963 @staticmethod 964 def save_downloaded_file(filename, save_file_at, file_stream): 965 """ Save Downloaded File to Disk Helper Function 966 967 :param save_file_at: Path of where to save the file. 968 :param file_stream: File stream 969 :param filename: Name to save the file. 970 """ 971 filename = os.path.join(save_file_at, filename) 972 with open(filename, 'wb') as f: 973 f.write(file_stream) 974 f.flush() 975 976 977class ApiError(Exception): 978 pass 979 980 981def _return_response_and_status_code(response): 982 """ Output the requests response JSON and status code 983 984 :rtype : dict 985 :param response: requests response object 986 :return: dict containing the JSON response and/or the status code with error string. 987 """ 988 if response.status_code == requests.codes.ok: 989 return dict(results=response.json(), response_code=response.status_code) 990 elif response.status_code == 204: 991 return dict(error='You exceeded the public API request rate limit (4 requests of any nature per minute)', 992 response_code=response.status_code) 993 elif response.status_code == 403: 994 return dict(error='You tried to perform calls to functions for which you require a Private API key.', 995 response_code=response.status_code) 996 else: 997 return dict(response_code=response.status_code) 998