1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3""" Simple class to interact with VirusTotal's Public and Private API as well as VirusTotal Intelligence.
4
5:copyright: (c) 2014 by Josh "blacktop" Maine.
6:license: MIT, see LICENSE for more details.
7
8The APIs are documented at:
9https://www.virustotal.com/en/documentation/public-api/
10https://www.virustotal.com/en/documentation/private-api/
11https://www.virustotal.com/intelligence/help/automation/
12
13EXAMPLE USAGE:::
14
15from virus_total_apis import PublicApi as vtPubAPI
16
17vt = vtPubAPI(<INSERT_API_KEY_HERE>)
18response = vt.get_file_report('44cda81782dc2a346abd7b2285530c5f')
19
20print json.dumps(response, sort_keys=False, indent=4)
21"""
22
23import os
24from datetime import datetime, timedelta
25
26try:
27    import requests
28except ImportError:
29    pass
30
31
32class PublicApi():
33    """ VirusTotal's Public API lets you upload and scan files, submit and scan URLs, access finished scan reports
34    and make automatic comments on URLs and samples without the need of using the HTML website interface. In other
35    words, it allows you to build simple scripts to access the information generated by VirusTotal.
36
37    The chosen format for the API is HTTP POST requests with JSON object responses and it is limited to at most 4
38    requests of any nature in any given 1 minute time frame. If you run a honeyclient, honeypot or any other
39    automation that is going to provide resources to VirusTotal and not only retrieve reports you are entitled to
40    a higher request rate quota, ask for it at contact@virustotal.com and you will receive special privileges when
41    performing the calls to the API. Note that you will only have a higher request rate quota when asking for files
42    or URLs that you previously sent to VirusTotal.
43
44    In this second version we have improved the response format so as to ease the task of retrieving results, we
45    have also introduced batch requests, you may now ask for several items with a sole API call (as long as you
46    cohere with the request rate limit).
47
48    The public API is a free service, available for any website or application that is free to consumers. The API
49    must not be used in commercial products or services, it can not be used as a substitute for antivirus products
50    and it can not be integrated in any project that may harm the antivirus industry directly or indirectly.
51    Noncompliance of these terms will result in inmediate permanent ban of the infractor individual or organization.
52    """
53
54    def __init__(self, api_key=None, proxies=None):
55        self.api_key = api_key
56        self.proxies = proxies
57        self.base = 'https://www.virustotal.com/vtapi/v2/'
58        self.version = 2
59        if api_key is None:
60            raise ApiError("You must supply a valid VirusTotal API key.")
61
62    def scan_file(self, this_file, from_disk=True, filename=None, timeout=None):
63        """ Submit a file to be scanned by VirusTotal.
64
65        The VirusTotal API allows you to send files. Before performing your submissions we encourage you to retrieve
66        the latest report on the files, if it is recent enough you might want to save time and bandwidth by making use
67        of it. File size limit is 32MB. If you have a need to scan larger files, please contact us, and tell us your
68        use case.
69
70        :param this_file: The file to be uploaded. (32MB file size limit)
71        :param from_disk: If True we read the file contents from disk using this_file as filepath. If False this_file
72                          is the actual file object.
73        :param filename: Specify the filename, this overwrites the filename if we read a file from disk.
74        :param timeout: The amount of time in seconds the request should wait before timing out.
75
76        :return: JSON response that contains scan_id and permalink.
77        """
78        params = {'apikey': self.api_key}
79        if from_disk:
80            if not filename:
81                filename = os.path.basename(this_file)
82            files = {'file': (filename, open(this_file, 'rb').read())}
83        else:
84            if filename:
85                files = {'file': (filename, this_file)}
86            else:
87                files = {'file': this_file}
88
89        try:
90            response = requests.post(self.base + 'file/scan',
91                                     files=files,
92                                     params=params,
93                                     proxies=self.proxies,
94                                     timeout=timeout)
95        except requests.RequestException as e:
96            return dict(error=e.message)
97
98        return _return_response_and_status_code(response)
99
100    def rescan_file(self, this_hash, timeout=None):
101        """ Rescan a previously submitted filed or schedule an scan to be performed in the future.
102
103        :param this_hash: a md5/sha1/sha256 hash. You can also specify a CSV list made up of a combination of any of
104                          the three allowed hashes (up to 25 items), this allows you to perform a batch request with
105                          one single call. Note that the file must already be present in our file store.
106        :param timeout: The amount of time in seconds the request should wait before timing out.
107
108        :return: JSON response that contains scan_id and permalink.
109        """
110        params = {'apikey': self.api_key, 'resource': this_hash}
111
112        try:
113            response = requests.post(self.base + 'file/rescan', params=params, proxies=self.proxies, timeout=timeout)
114        except requests.RequestException as e:
115            return dict(error=e.message)
116
117        return _return_response_and_status_code(response)
118
119    def get_file_report(self, this_hash, timeout=None):
120        """ Get the scan results for a file.
121
122        You can also specify a CSV list made up of a combination of hashes and scan_ids
123        (up to 4 items with the standard request rate), this allows you to perform a batch
124        request with one single call.
125        i.e. {'resource': '99017f6eebbac24f351415dd410d522d, 88817f6eebbac24f351415dd410d522d'}.
126
127        :param this_hash: The md5/sha1/sha256/scan_ids hash of the file whose dynamic behavioural report you want to
128                            retrieve or scan_ids from a previous call to scan_file.
129        :param timeout: The amount of time in seconds the request should wait before timing out.
130
131        :return:
132        """
133        params = {'apikey': self.api_key, 'resource': this_hash}
134
135        try:
136            response = requests.get(self.base + 'file/report', params=params, proxies=self.proxies, timeout=timeout)
137        except requests.RequestException as e:
138            return dict(error=e.message)
139
140        return _return_response_and_status_code(response)
141
142    def scan_url(self, this_url, timeout=None):
143        """ Submit a URL to be scanned by VirusTotal.
144
145        :param this_url: The URL that should be scanned. This parameter accepts a list of URLs (up to 4 with the
146                         standard request rate) so as to perform a batch scanning request with one single call. The
147                         URLs must be separated by a new line character.
148        :param timeout: The amount of time in seconds the request should wait before timing out.
149
150        :return: JSON response that contains scan_id and permalink.
151        """
152        params = {'apikey': self.api_key, 'url': this_url}
153
154        try:
155            response = requests.post(self.base + 'url/scan', params=params, proxies=self.proxies, timeout=None)
156        except requests.RequestException as e:
157            return dict(error=e.message)
158
159        return _return_response_and_status_code(response)
160
161    def get_url_report(self, this_url, scan='0', timeout=None):
162        """ Get the scan results for a URL. (can do batch searches like get_file_report)
163
164        :param this_url: a URL will retrieve the most recent report on the given URL. You may also specify a scan_id
165                         (sha256-timestamp as returned by the URL submission API) to access a specific report. At the
166                         same time, you can specify a CSV list made up of a combination of hashes and scan_ids so as
167                         to perform a batch request with one single call (up to 4 resources per call with the standard
168                         request rate). When sending multiples, the scan_ids or URLs must be separated by a new line
169                         character.
170        :param scan: (optional): this is an optional parameter that when set to "1" will automatically submit the URL
171                      for analysis if no report is found for it in VirusTotal's database. In this case the result will
172                      contain a scan_id field that can be used to query the analysis report later on.
173        :param timeout: The amount of time in seconds the request should wait before timing out.
174
175        :return: JSON response
176        """
177        params = {'apikey': self.api_key, 'resource': this_url, 'scan': scan}
178
179        try:
180            response = requests.get(self.base + 'url/report', params=params, proxies=self.proxies, timeout=timeout)
181        except requests.RequestException as e:
182            return dict(error=e.message)
183
184        return _return_response_and_status_code(response)
185
186    def put_comments(self, resource, comment, timeout=None):
187        """ Post a comment on a file or URL.
188
189        The initial idea of VirusTotal Community was that users should be able to make comments on files and URLs,
190        the comments may be malware analyses, false positive flags, disinfection instructions, etc.
191
192        Imagine you have some automatic setup that can produce interesting results related to a given sample or URL
193        that you submit to VirusTotal for antivirus characterization, you might want to give visibility to your setup
194        by automatically reviewing samples and URLs with the output of your automation.
195
196        :param resource: either a md5/sha1/sha256 hash of the file you want to review or the URL itself that you want
197                         to comment on.
198        :param comment: the actual review, you can tag it using the "#" twitter-like syntax (e.g. #disinfection #zbot)
199                        and reference users using the "@" syntax (e.g. @VirusTotalTeam).
200        :param timeout: The amount of time in seconds the request should wait before timing out.
201
202        :return: If the comment was successfully posted the response code will be 1, 0 otherwise.
203        """
204        params = {'apikey': self.api_key, 'resource': resource, 'comment': comment}
205
206        try:
207            response = requests.post(self.base + 'comments/put', params=params, proxies=self.proxies, timeout=timeout)
208        except requests.RequestException as e:
209            return dict(error=e.message)
210
211        return _return_response_and_status_code(response)
212
213    def get_ip_report(self, this_ip, timeout=None):
214        """ Get IP address reports.
215
216        :param this_ip: a valid IPv4 address in dotted quad notation, for the time being only IPv4 addresses are
217                        supported.
218        :param timeout: The amount of time in seconds the request should wait before timing out.
219
220        :return: JSON response
221        """
222        params = {'apikey': self.api_key, 'ip': this_ip}
223
224        try:
225            response = requests.get(self.base + 'ip-address/report',
226                                    params=params,
227                                    proxies=self.proxies,
228                                    timeout=timeout)
229        except requests.RequestException as e:
230            return dict(error=e.message)
231
232        return _return_response_and_status_code(response)
233
234    def get_domain_report(self, this_domain, timeout=None):
235        """ Get information about a given domain.
236
237        :param this_domain: a domain name.
238        :param timeout: The amount of time in seconds the request should wait before timing out.
239
240        :return: JSON response
241        """
242        params = {'apikey': self.api_key, 'domain': this_domain}
243
244        try:
245            response = requests.get(self.base + 'domain/report', params=params, proxies=self.proxies, timeout=timeout)
246        except requests.RequestException as e:
247            return dict(error=e.message)
248
249        return _return_response_and_status_code(response)
250
251
252class PrivateApi(PublicApi):
253
254    def scan_file(self,
255                  this_file,
256                  notify_url=None,
257                  notify_changes_only=None,
258                  from_disk=True,
259                  filename=None,
260                  timeout=None):
261        """ Submit a file to be scanned by VirusTotal.
262
263        Allows you to send a file for scanning with VirusTotal. Before performing your submissions we encourage you to
264        retrieve the latest report on the files, if it is recent enough you might want to save time and bandwidth by
265        making use of it. File size limit is 32MB, in order to submmit files up to 200MB in size you must request a
266        special upload URL.
267
268        :param this_file: The file to be uploaded.
269        :param notify_url: A URL to which a POST notification should be sent when the scan finishes.
270        :param notify_changes_only: Used in conjunction with notify_url. Indicates if POST notifications should be
271                                    sent only if the scan results differ from the previous analysis.
272        :param from_disk: If True we read the file contents from disk using this_file as filepath. If False this_file
273                          is the actual file object.
274        :param filename: Specify the filename, this overwrites the filename if we read a file from disk.
275        :param timeout: The amount of time in seconds the request should wait before timing out.
276
277        :return: JSON response that contains scan_id and permalink.
278        """
279        params = {'apikey': self.api_key}
280        if from_disk:
281            if not filename:
282                filename = os.path.basename(this_file)
283            files = {'file': (filename, open(this_file, 'rb').read())}
284        else:
285            if filename:
286                files = {'file': (filename, this_file)}
287            else:
288                files = {'file': this_file}
289
290        try:
291            response = requests.post(self.base + 'file/scan',
292                                     files=files,
293                                     params=params,
294                                     proxies=self.proxies,
295                                     timeout=timeout)
296        except requests.RequestException as e:
297            return dict(error=e.message)
298
299        return _return_response_and_status_code(response)
300
301    @property
302    def get_upload_url(self, timeout=None):
303        """ Get a special URL for submitted files bigger than 32MB.
304
305        In order to submit files bigger than 32MB you need to obtain a special upload URL to which you
306        can POST files up to 200MB in size. This API generates such a URL.
307
308        :param timeout: The amount of time in seconds the request should wait before timing out.
309
310        :return: JSON special upload URL to which you can POST files up to 200MB in size.
311        """
312        params = {'apikey': self.api_key}
313
314        try:
315            response = requests.get(self.base + 'file/scan/upload_url',
316                                    params=params,
317                                    proxies=self.proxies,
318                                    timeout=timeout)
319        except requests.RequestException as e:
320            return dict(error=e.message)
321
322        if response.status_code == requests.codes.ok:
323            return response.json()['upload_url']
324        else:
325            return dict(response_code=response.status_code)
326
327    def rescan_file(self, resource, date='', period='', repeat='', notify_url='', notify_changes_only='', timeout=None):
328        """ Rescan a previously submitted filed or schedule an scan to be performed in the future.
329
330        This API allows you to rescan files present in VirusTotal's file store without having to
331        resubmit them, thus saving bandwidth. You only need to know one of the hashes of the file
332        to rescan.
333
334        :param resource: An md5/sha1/sha256 hash. You can also specify a CSV list made up of a
335        combination of any of the three allowed hashes (up to 25 items), this allows you to perform
336        a batch request with just one single call. Note that the file must already be present in our
337        file store.
338        :param date: (optional) Date in %Y%m%d%H%M%S format (example: 20120725170000) in which the rescan should
339        be performed. If not specified the rescan will be performed immediately.
340        :param period: (optional) Periodicity (in days) with which the file should be rescanned. If this argument
341        is provided the file will be rescanned periodically every period days, if not, the rescan is
342        performed once and not repated again.
343        :param repeat: (optional) Used in conjunction with period to specify the number of times the file should be
344        rescanned. If this argument is provided the file will be rescanned the given amount of times in coherence
345        with the chosen periodicity, if not, the file will be rescanned indefinitely.
346        :param notify_url: (optional) A URL to which a POST notification should be sent when the rescan finishes.
347        :param notify_changes_only: (optional) Used in conjunction with notify_url. Indicates if POST notifications
348        should only be sent if the scan results differ from the previous one.
349        :param timeout: The amount of time in seconds the request should wait before timing out.
350
351        :return: JSON response that contains scan_id and permalink.
352        """
353        params = {'apikey': self.api_key, 'resource': resource}
354
355        try:
356            response = requests.post(self.base + 'file/rescan', params=params, proxies=self.proxies, timeout=timeout)
357        except requests.RequestException as e:
358            return dict(error=e.message)
359
360        return _return_response_and_status_code(response)
361
362    def cancel_rescan_file(self, resource, timeout=None):
363        """ Delete a previously scheduled scan.
364
365        Deletes a scheduled file rescan task. The file rescan api allows you to schedule periodic scans of a file,
366        this API call tells VirusTotal to stop rescanning a file that you have previously enqueued for recurrent
367        scanning.
368
369        :param resource: The md5/sha1/sha256 hash of the file whose dynamic behavioural report you want to retrieve.
370        :param timeout: The amount of time in seconds the request should wait before timing out.
371
372        :return: JSON acknowledgement. In the event that the scheduled scan deletion fails for whatever reason, the
373        response code will be -1.
374        """
375        params = {'apikey': self.api_key, 'resource': resource}
376
377        try:
378            response = requests.post(self.base + 'rescan/delete', params=params, proxies=self.proxies, timeout=timeout)
379        except requests.RequestException as e:
380            return dict(error=e.message)
381
382        return _return_response_and_status_code(response)
383
384    def get_file_report(self, resource, allinfo=1, timeout=None):
385        """ Get the scan results for a file.
386
387        Retrieves a concluded file scan report for a given file. Unlike the public API, this call allows you to also
388        access all the information we have on a particular file (VirusTotal metadata, signature information, structural
389        information, etc.) by using the allinfo parameter described later on.
390
391        :param resource: An md5/sha1/sha256 hash of a file for which you want to retrieve the most recent antivirus
392        report. You may also specify a scan_id (sha256-timestamp as returned by the scan API) to access a specific
393        report. You can also specify a CSV list made up of a combination of hashes and scan_ids (up to 25 items),
394        this allows you to perform a batch request with just one single call.
395        :param allinfo: (optional) If specified and set to one, the call will return additional info, other than the
396        antivirus results, on the file being queried. This additional info includes the output of several tools acting
397        on the file (PDFiD, ExifTool, sigcheck, TrID, etc.), metadata regarding VirusTotal submissions (number of
398        unique sources that have sent the file in the past, first seen date, last seen date, etc.), the output of
399        in-house technologies such as a behavioural sandbox, etc.
400        :param timeout: The amount of time in seconds the request should wait before timing out.
401
402        :return: JSON response
403        """
404        params = {'apikey': self.api_key, 'resource': resource, 'allinfo': allinfo}
405
406        try:
407            response = requests.get(self.base + 'file/report', params=params, proxies=self.proxies, timeout=timeout)
408        except requests.RequestException as e:
409            return dict(error=e.message)
410
411        return _return_response_and_status_code(response)
412
413    def get_file_behaviour(self, this_hash, timeout=None):
414        """ Get a report about the behaviour of the file in sand boxed environment.
415
416        VirusTotal runs a distributed setup of Cuckoo sandbox machines that execute the files we receive. Execution is
417        attempted only once, upon first submission to VirusTotal, and only Portable Executables under 10MB in size are
418        ran. The execution of files is a best effort process, hence, there are no guarantees about a report being
419        generated for a given file in our dataset.
420
421        If a file did indeed produce a behavioural report, a summary of it can be obtained by using the file scan
422        lookup call providing the additional HTTP POST parameter allinfo=1. The summary will appear under the
423        behaviour-v1 property of the additional_info field in the JSON report.
424
425        :param this_hash: The md5/sha1/sha256 hash of the file whose dynamic behavioural report you want to retrieve.
426        :param timeout: The amount of time in seconds the request should wait before timing out.
427
428        :return: full JSON report of the file's execution as returned by the Cuckoo JSON report encoder.
429        """
430        params = {'apikey': self.api_key, 'hash': this_hash}
431
432        try:
433            response = requests.get(self.base + 'file/behaviour', params=params, proxies=self.proxies, timeout=timeout)
434        except requests.RequestException as e:
435            return dict(error=e.message)
436
437        return _return_response_and_status_code(response)
438
439    def get_network_traffic(self, this_hash, timeout=None):
440        """ Get a dump of the network traffic generated by the file.
441
442        VirusTotal runs a distributed setup of Cuckoo sandbox machines that execute the files we receive.
443        Execution is attempted only once, upon first submission to VirusTotal, and only Portable Executables
444        under 10MB in size are ran. The execution of files is a best effort process, hence, there are no
445        guarantees about a report being generated for a given file in our dataset.
446
447        Files that are successfully executed may communicate with certain network resources, all this
448        communication is recorded in a network traffic dump (pcap file). This API allows you to retrieve
449        the network traffic dump generated during the file's execution.
450
451        :param this_hash: The md5/sha1/sha256 hash of the file whose network traffic dump you want to retrieve.
452        :return: Pcap
453        """
454        params = {'apikey': self.api_key, 'hash': this_hash}
455
456        try:
457            response = requests.get(self.base + 'file/network-traffic',
458                                    params=params,
459                                    proxies=self.proxies,
460                                    timeout=timeout)
461        except requests.RequestException as e:
462            return dict(error=e.message)
463
464        try:
465            return _return_response_and_status_code(response)
466        except ValueError:
467            return response.content
468
469    def file_search(self, query, offset=None, timeout=None):
470        """ Search for samples.
471
472        In addition to retrieving all information on a particular file, VirusTotal allows you to perform what we
473        call "advanced reverse searches". Reverse searches take you from a file property to a list of files that
474        match that property. For example, this functionality enables you to retrieve all those files marked by at
475        least one antivirus vendor as Zbot, or all those files that have a size under 90KB and are detected by at
476        least 10 antivirus solutions, or all those PDF files that have an invalid XREF section, etc.
477
478        This API is equivalent to VirusTotal Intelligence advanced searches. A very wide variety of search modifiers
479        are available, including: file size, file type, first submission date to VirusTotal, last submission date to
480        VirusTotal, number of positives, dynamic behavioural properties, binary content, submission file name, and a
481        very long etcetera. The full list of search modifiers allowed for file search queries is documented at:
482        https://www.virustotal.com/intelligence/help/file-search/#search-modifiers
483
484        NOTE:
485        Daily limited! No matter what API step you have licensed, this API call is limited to 50K requests per day.
486        If you need any more, chances are you are approaching your engineering problem erroneously and you can
487        probably solve it using the file distribution call. Do not hesitate to contact us with your particular
488        use case.
489
490        EXAMPLE:
491        search_options = 'type:peexe size:90kb+ positives:5+ behaviour:"taskkill"'
492
493        :param query: A search modifier compliant file search query.
494        :param offset: (optional) The offset value returned by a previously issued identical query, allows you to
495        paginate over the results. If not specified the first 300 matching files sorted according to last submission
496        date to VirusTotal in a descending fashion will be returned.
497        :param timeout: The amount of time in seconds the request should wait before timing out.
498
499        :return: JSON response -  By default the list returned contains at most 300 hashes, ordered according to
500        last submission date to VirusTotal in a descending fashion.
501        """
502        params = dict(apikey=self.api_key, query=query, offset=offset)
503
504        try:
505            response = requests.get(self.base + 'file/search', params=params, proxies=self.proxies, timeout=timeout)
506        except requests.RequestException as e:
507            return dict(error=e.message)
508
509        return _return_response_and_status_code(response)
510
511    def get_file_clusters(self, this_date, timeout=None):
512        """ File similarity clusters for a given time frame.
513
514        VirusTotal has built its own in-house file similarity clustering functionality. At present, this clustering
515        works only on PE, PDF, DOC and RTF files and is based on a very simple structural feature hash. This hash
516        can very often be confused by certain compression and packing strategies, in other words, this clustering
517        logic is no holly grail, yet it has proven itself very useful in the past.
518
519        This API offers a programmatic access to the clustering section of VirusTotal Intelligence:
520        https://www.virustotal.com/intelligence/clustering/
521
522        NOTE:
523        Please note that you must be logged in with a valid VirusTotal Community user account with access to
524        VirusTotal Intelligence in order to be able to view the clustering listing.
525
526        :param this_date: A specific day for which we want to access the clustering details, example: 2013-09-10.
527        :param timeout: The amount of time in seconds the request should wait before timing out.
528
529        :return: JSON object contains several properties
530        num_candidates - Total number of files submitted during the given time frame for which a feature hash could
531                         be calculated.
532        num_clusters   - Total number of clusters generated for the given time period under consideration, a cluster
533                         can be as small as an individual file, meaning that no other feature-wise similar file was
534                         found.
535        size_top200	   - The sum of the number of files in the 200 largest clusters identified.
536        clusters       - List of JSON objects that contain details about the 200 largest clusters identified. These
537                         objects contain 4 properties: id, label, size and avg_positives.. The id field can be used
538                         to then query the search API call for files contained in the given cluster. The label
539                         property is a verbose human-intelligible name for the cluster. The size field is the number
540                         of files that make up the cluster. Finally, avg_positives represents the average number of
541                         antivirus detections that the files in the cluster exhibit.
542        """
543        params = {'apikey': self.api_key, 'date': this_date}
544
545        try:
546            response = requests.get(self.base + 'file/clusters', params=params, proxies=self.proxies, timeout=timeout)
547        except requests.RequestException as e:
548            return dict(error=e.message)
549
550        return _return_response_and_status_code(response)
551
552    def get_file_distribution(self, before='', after='', reports='false', limit='1000', timeout=None):
553        """ Get a live feed with the latest files submitted to VirusTotal.
554
555        Allows you to retrieve a live feed of absolutely all uploaded files to VirusTotal, and download them for
556        further scrutiny. This API requires you to stay synced with the live submissions as only a backlog of 6
557        hours is provided at any given point in time.
558
559        :param before: (optional) Retrieve files received before the given timestamp, in timestamp descending order.
560        :param after: (optional) Retrieve files received after the given timestamp, in timestamp ascending order.
561        :param reports: (optional) Include the files' antivirus results in the response. Possible values are 'true' or
562        'false' (default value is 'false').
563        :param limit: (optional) Retrieve limit file items at most (default: 1000).
564        :param timeout: The amount of time in seconds the request should wait before timing out.
565
566        :return: JSON response: please see https://www.virustotal.com/en/documentation/private-api/#file-distribution
567        """
568        params = {'apikey': self.api_key, 'before': before, 'after': after, 'reports': reports, 'limit': limit}
569
570        try:
571            response = requests.get(self.base + 'file/distribution',
572                                    params=params,
573                                    proxies=self.proxies,
574                                    timeout=timeout)
575        except requests.RequestException as e:
576            return dict(error=e.message)
577
578        return _return_response_and_status_code(response)
579
580    def get_file_feed(self, package=None, timeout=None):
581        """ Get a live file feed with the latest files submitted to VirusTotal.
582
583        Allows you to retrieve a live feed of absolutely all uploaded files to VirusTotal, and download them for
584        further scrutiny, along with their full reports. This API requires you to stay relatively synced with the live
585        submissions as only a backlog of 24 hours is provided at any given point in time.
586
587        This API returns a bzip2 compressed tarball. For per-minute packages the compressed package contains a unique
588        file, the file contains a json per line, this json is a full report on a given file processed by VirusTotal
589        during the given time window. The file report follows the exact same format as the response of the file report
590        API if the allinfo=1 parameter is provided. For hourly packages, the tarball contains 60 files, one per each
591        minute of the window.
592
593        :param package: Indicates a time window to pull reports on all items received during such window.
594                        Only per-minute and hourly windows are allowed, the format is %Y%m%dT%H%M (e.g. 20160304T0900)
595                        or %Y%m%dT%H (e.g. 20160304T09). Time is expressed in UTC.
596        :param timeout: The amount of time in seconds the request should wait before timing out.
597
598        :return: BZIP2 response: please see https://www.virustotal.com/en/documentation/private-api/#file-feed
599        """
600        if package is None:
601            now = datetime.utcnow()
602            five_minutes_ago = now - timedelta(minutes=now.minute % 5 + 5,
603                                               seconds=now.second,
604                                               microseconds=now.microsecond)
605            package = five_minutes_ago.strftime('%Y%m%dT%H%M')
606
607        params = {'apikey': self.api_key, 'package': package}
608
609        try:
610            response = requests.get(self.base + 'file/feed', params=params, proxies=self.proxies, timeout=timeout)
611        except requests.RequestException as e:
612            return dict(error=e.message)
613
614        if response.ok:
615            return response.content
616        elif response.status_code == 400:
617            return dict(error='package sent is either malformed or not within the past 24 hours.',
618                        response_code=response.status_code)
619        elif response.status_code == 403:
620            return dict(error='You tried to perform calls to functions for which you require a Private API key.',
621                        response_code=response.status_code)
622        elif response.status_code == 404:
623            return dict(error='File not found.', response_code=response.status_code)
624        else:
625            return dict(response_code=response.status_code)
626
627    def get_file(self, this_hash, timeout=None):
628        """ Download a file by its hash.
629
630        Downloads a file from VirusTotal's store given one of its hashes. This call can be used in conjuction with
631        the file searching call in order to download samples that match a given set of criteria.
632
633        :param this_hash: The md5/sha1/sha256 hash of the file you want to download.
634        :param timeout: The amount of time in seconds the request should wait before timing out.
635
636        :return: Downloaded file in response.content
637        """
638        params = {'apikey': self.api_key, 'hash': this_hash}
639
640        try:
641            response = requests.get(self.base + 'file/download', params=params, proxies=self.proxies, timeout=timeout)
642        except requests.RequestException as e:
643            return dict(error=e.message)
644
645        if response.status_code == requests.codes.ok:
646            return response.content
647        elif response.status_code == 403:
648            return dict(error='You tried to perform calls to functions for which you require a Private API key.',
649                        response_code=response.status_code)
650        elif response.status_code == 404:
651            return dict(error='File not found.', response_code=response.status_code)
652        else:
653            return dict(response_code=response.status_code)
654
655    def scan_url(self, this_url, timeout=None):
656        """ Submit a URL to be scanned by VirusTotal.
657
658        Allows you to submit URLs to be scanned by VirusTotal. Before performing your submission we encourage you to
659        retrieve the latest report on the URL, if it is recent enough you might want to save time and bandwidth by
660        making use of it.
661
662        :param this_url: The URL that should be scanned. This parameter accepts a list of URLs so as to perform a batch
663        scanning request with just one single call (up to 25 URLs per call). The URLs must be separated by a new line
664        character.
665        :param timeout: The amount of time in seconds the request should wait before timing out.
666
667        :return: JSON response that contains scan_id and permalink.
668        """
669        params = {'apikey': self.api_key, 'url': this_url}
670
671        try:
672            response = requests.post(self.base + 'url/scan', params=params, proxies=self.proxies, timeout=timeout)
673        except requests.RequestException as e:
674            return dict(error=e.message)
675
676        return _return_response_and_status_code(response)
677
678    def get_url_report(self, this_url, scan='0', allinfo=1, timeout=None):
679        """ Get the scan results for a URL.
680
681        :param this_url: A URL for which you want to retrieve the most recent report. You may also specify a scan_id
682        (sha256-timestamp as returned by the URL submission API) to access a specific report. At the same time, you
683        can specify a CSV list made up of a combination of urls and scan_ids (up to 25 items) so as to perform a batch
684        request with one single call. The CSV list must be separated by new line characters.
685        :param scan: (optional) This is an optional parameter that when set to "1" will automatically submit the URL
686        for analysis if no report is found for it in VirusTotal's database. In this case the result will contain a
687        scan_id field that can be used to query the analysis report later on.
688        :param allinfo: (optional) If this parameter is specified and set to "1" additional info regarding the URL
689        (other than the URL scanning engine results) will also be returned. This additional info includes VirusTotal
690        related metadata (first seen date, last seen date, files downloaded from the given URL, etc.) and the output
691        of other tools and datasets when fed with the URL.
692        :param timeout: The amount of time in seconds the request should wait before timing out.
693
694        :return: JSON response
695        """
696
697        params = {'apikey': self.api_key, 'resource': this_url, 'scan': scan, 'allinfo': allinfo}
698
699        try:
700            response = requests.get(self.base + 'url/report', params=params, proxies=self.proxies, timeout=timeout)
701        except requests.RequestException as e:
702            return dict(error=e.message)
703
704        return _return_response_and_status_code(response)
705
706    def get_url_distribution(self, after=None, reports='true', limit=1000, timeout=None):
707        """ Get a live feed with the lastest URLs submitted to VirusTotal.
708
709        Allows you to retrieve a live feed of URLs submitted to VirusTotal, along with their scan reports. This
710        call enables you to stay synced with VirusTotal URL submissions and replicate our dataset.
711
712        :param after: (optional) Retrieve URLs received after the given timestamp, in timestamp ascending order.
713        :param reports:  (optional) When set to "true" each item retrieved will include the results for each particular
714        URL scan (in exactly the same format as the URL scan retrieving API). If the parameter is not specified, each
715        item returned will only contain the scanned URL and its detection ratio.
716        :param limit: (optional) Retrieve limit file items at most (default: 1000).
717        :param timeout: The amount of time in seconds the request should wait before timing out.
718
719        :return: JSON response
720        """
721
722        params = {'apikey': self.api_key, 'after': after, 'reports': reports, 'limit': limit}
723
724        try:
725            response = requests.get(self.base + 'url/distribution',
726                                    params=params,
727                                    proxies=self.proxies,
728                                    timeout=timeout)
729        except requests.RequestException as e:
730            return dict(error=e.message)
731
732        return _return_response_and_status_code(response)
733
734    def get_url_feed(self, package=None, timeout=None):
735        """ Get a live file feed with the latest files submitted to VirusTotal.
736
737        Allows you to retrieve a live feed of reports on absolutely all URLs scanned by VirusTotal. This API requires
738        you to stay relatively synced with the live submissions as only a backlog of 24 hours is provided at any given
739        point in time.
740
741        This API returns a bzip2 compressed tarball. For per-minute packages the compressed package contains a unique
742        file, the file contains a json per line, this json is a full report on a given URL processed by VirusTotal
743        during the given time window. The URL report follows the exact same format as the response of the URL report
744        API if the allinfo=1 parameter is provided. For hourly packages, the tarball contains 60 files, one per each
745        minute of the window.
746
747        :param package: Indicates a time window to pull reports on all items received during such window.
748                        Only per-minute and hourly windows are allowed, the format is %Y%m%dT%H%M (e.g. 20160304T0900)
749                        or %Y%m%dT%H (e.g. 20160304T09). Time is expressed in UTC.
750        :param timeout: The amount of time in seconds the request should wait before timing out.
751
752        :return: BZIP2 response: please see https://www.virustotal.com/en/documentation/private-api/#file-feed
753        """
754        if package is None:
755            now = datetime.utcnow()
756            five_minutes_ago = now - timedelta(minutes=now.minute % 5 + 5,
757                                               seconds=now.second,
758                                               microseconds=now.microsecond)
759            package = five_minutes_ago.strftime('%Y%m%dT%H%M')
760
761        params = {'apikey': self.api_key, 'package': package}
762
763        try:
764            response = requests.get(self.base + 'url/feed', params=params, proxies=self.proxies, timeout=timeout)
765        except requests.RequestException as e:
766            return dict(error=e.message)
767
768        if response.ok:
769            return response.content
770        elif response.status_code == 400:
771            return dict(error='package sent is either malformed or not within the past 24 hours.',
772                        response_code=response.status_code)
773        elif response.status_code == 403:
774            return dict(error='You tried to perform calls to functions for which you require a Private API key.',
775                        response_code=response.status_code)
776        elif response.status_code == 404:
777            return dict(error='File not found.', response_code=response.status_code)
778        else:
779            return dict(response_code=response.status_code)
780
781    def get_ip_report(self, this_ip, timeout=None):
782        """ Get information about a given IP address.
783
784        Retrieves a report on a given IP address (including the information recorded by VirusTotal's Passive DNS
785        infrastructure).
786
787        :param this_ip: A valid IPv4 address in dotted quad notation, for the time being only IPv4 addresses are
788        supported.
789        :param timeout: The amount of time in seconds the request should wait before timing out.
790
791        :return: JSON response
792        """
793        params = {'apikey': self.api_key, 'ip': this_ip}
794
795        try:
796            response = requests.get(self.base + 'ip-address/report',
797                                    params=params,
798                                    proxies=self.proxies,
799                                    timeout=timeout)
800        except requests.RequestException as e:
801            return dict(error=e.message)
802
803        return _return_response_and_status_code(response)
804
805    def get_domain_report(self, this_domain, timeout=None):
806        """ Get information about a given domain.
807
808        Retrieves a report on a given domain (including the information recorded by VirusTotal's passive DNS
809        infrastructure).
810
811        :param this_domain: A domain name.
812        :param timeout: The amount of time in seconds the request should wait before timing out.
813
814        :return: JSON response
815        """
816        params = {'apikey': self.api_key, 'domain': this_domain}
817
818        try:
819            response = requests.get(self.base + 'domain/report', params=params, proxies=self.proxies, timeout=timeout)
820        except requests.RequestException as e:
821            return dict(error=e.message)
822
823        return _return_response_and_status_code(response)
824
825    def put_comments(self, resource, comment, timeout=None):
826        """ Post a comment on a file or URL.
827
828        Allows you to place comments on URLs and files, these comments will be publicly visible in VirusTotal
829        Community, under the corresponding tab in the reports for each particular item.
830
831        Comments can range from URLs and locations where a given file was found in the wild to full reverse
832        engineering reports on a given malware specimen, anything that may help other analysts in extending their
833        knowledge about a particular file or URL.
834
835        :param resource: Either an md5/sha1/sha256 hash of the file you want to review or the URL itself that you want
836        to comment on.
837        :param comment: The actual review, you can tag it using the "#" twitter-like syntax (e.g. #disinfection #zbot)
838        and reference users using the "@" syntax (e.g. @VirusTotalTeam).
839        :param timeout: The amount of time in seconds the request should wait before timing out.
840
841        :return: JSON response
842        """
843        params = {'apikey': self.api_key, 'resource': resource, 'comment': comment}
844
845        try:
846            response = requests.post(self.base + 'comments/put', params=params, proxies=self.proxies, timeout=timeout)
847        except requests.RequestException as e:
848            return dict(error=e.message)
849
850        return _return_response_and_status_code(response)
851
852    def get_comments(self, resource, before=None, timeout=None):
853        """ Get comments for a file or URL.
854
855        Retrieve a list of VirusTotal Community comments for a given file or URL. VirusTotal Community comments are
856        user submitted reviews on a given item, these comments may contain anything from the in-the-wild locations of
857        files up to fully-featured reverse engineering reports on a given sample.
858
859        :param resource: Either an md5/sha1/sha256 hash of the file or the URL itself you want to retrieve.
860        :param before: (optional) A datetime token that allows you to iterate over all comments on a specific item
861        whenever it has been commented on more than 25 times.
862        :param timeout: The amount of time in seconds the request should wait before timing out.
863
864        :return: JSON response - The application answers with the comments sorted in descending order according to
865        their date.
866        """
867        params = dict(apikey=self.api_key, resource=resource, before=before)
868
869        try:
870            response = requests.get(self.base + 'comments/get', params=params, proxies=self.proxies, timeout=timeout)
871        except requests.RequestException as e:
872            return dict(error=e.message)
873
874        return _return_response_and_status_code(response)
875
876
877class IntelApi():
878    """ To make the best use of your VirusTotal Intelligence account and so, we have exposed some
879    VirusTotal Intelligence functionality for programmatic interaction even if you do not have a
880    Private Mass API key.
881    """
882
883    def __init__(self, api_key, proxies=None):
884        self.api_key = api_key
885        self.proxies = proxies
886        self.base = 'https://www.virustotal.com/intelligence/'
887
888    def get_hashes_from_search(self, query, page=None, timeout=None):
889        """ Get the scan results for a file.
890
891        Even if you do not have a Private Mass API key that you can use, you can still automate VirusTotal Intelligence
892        searches pretty much in the same way that the searching for files api call works.
893
894        :param query: a VirusTotal Intelligence search string in accordance with the file search documentation .
895            <https://www.virustotal.com/intelligence/help/file-search/>
896        :param page: the next_page property of the results of a previously issued query to this API. This parameter
897            should not be provided if it is the very first query to the API, i.e. if we are retrieving the
898            first page of results.
899        :param timeout: The amount of time in seconds the request should wait before timing out.
900
901        apikey: the API key associated to a VirusTotal Community account with VirusTotal Intelligence privileges.
902        """
903        params = {'query': query, 'apikey': self.api_key, 'page': page}
904
905        try:
906            response = requests.get(self.base + 'search/programmatic/',
907                                    params=params,
908                                    proxies=self.proxies,
909                                    timeout=timeout)
910        except requests.RequestException as e:
911            return dict(error=e.message)
912
913        return response.json()['next_page'], response
914
915    def get_file(self, file_hash, save_file_at, timeout=None):
916        """ Get the scan results for a file.
917
918        Even if you do not have a Private Mass API key that you can use, you can still download files from the
919        VirusTotal storage making use of your VirusTotal Intelligence quota, i.e. programmatic downloads will
920        also deduct quota.
921
922        :param file_hash: You may use either the md5, sha1 or sha256 hash of the file in order to download it.
923        :param save_file_at: Path of where to save the file.
924        :param timeout: The amount of time in seconds the request should wait before timing out.
925
926        """
927        params = {'hash': file_hash, 'apikey': self.api_key}
928
929        try:
930            response = requests.get(self.base + 'download/',
931                                    params=params,
932                                    proxies=self.proxies,
933                                    stream=True,
934                                    timeout=timeout)
935        except requests.RequestException as e:
936            return dict(error=e.message)
937
938        if response.status_code == requests.codes.ok:
939            self.save_downloaded_file(file_hash, save_file_at, response.content)
940            return response.content
941        elif response.status_code == 403:
942            return dict(error='You tried to perform calls to functions for which you require a Private API key.',
943                        response_code=response.status_code)
944        elif response.status_code == 404:
945            return dict(error='File not found.', response_code=response.status_code)
946        else:
947            return dict(response_code=response.status_code)
948
949    def get_all_file_report_pages(self, query):
950        """ Get File Report (All Pages).
951
952        :param query: a VirusTotal Intelligence search string in accordance with the file search documentation.
953        :return: All JSON responses appended together.
954        """
955        responses = []
956        next_page, response = self.get_hashes_from_search(self, query)
957        responses.append(_return_response_and_status_code(response))
958        while next_page:
959            next_page, response = self.get_hashes_from_search(query, next_page)
960            responses.append(_return_response_and_status_code(response))
961        return dict(results=responses)
962
963    @staticmethod
964    def save_downloaded_file(filename, save_file_at, file_stream):
965        """ Save Downloaded File to Disk Helper Function
966
967        :param save_file_at: Path of where to save the file.
968        :param file_stream: File stream
969        :param filename: Name to save the file.
970        """
971        filename = os.path.join(save_file_at, filename)
972        with open(filename, 'wb') as f:
973            f.write(file_stream)
974            f.flush()
975
976
977class ApiError(Exception):
978    pass
979
980
981def _return_response_and_status_code(response):
982    """ Output the requests response JSON and status code
983
984    :rtype : dict
985    :param response: requests response object
986    :return: dict containing the JSON response and/or the status code with error string.
987    """
988    if response.status_code == requests.codes.ok:
989        return dict(results=response.json(), response_code=response.status_code)
990    elif response.status_code == 204:
991        return dict(error='You exceeded the public API request rate limit (4 requests of any nature per minute)',
992                    response_code=response.status_code)
993    elif response.status_code == 403:
994        return dict(error='You tried to perform calls to functions for which you require a Private API key.',
995                    response_code=response.status_code)
996    else:
997        return dict(response_code=response.status_code)
998