1# Copyright (C) 2015-2021 Regents of the University of California
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14import datetime
15import json
16import logging
17import os
18import textwrap
19
20import requests
21
22from typing import Dict, List, Union, Tuple, Any
23
24logger = logging.getLogger(__name__)
25dirname = os.path.dirname(__file__)
26
27
28EC2Regions = {'us-west-1': 'US West (N. California)',
29              'us-west-2': 'US West (Oregon)',
30              'us-east-1': 'US East (N. Virginia)',
31              'us-east-2': 'US East (Ohio)',
32              'us-gov-west-1': 'AWS GovCloud (US)',
33              'ca-central-1': 'Canada (Central)',
34              'ap-northeast-1': 'Asia Pacific (Tokyo)',
35              'ap-northeast-2': 'Asia Pacific (Seoul)',
36              'ap-northeast-3': 'Asia Pacific (Osaka-Local)',
37              'ap-southeast-1': 'Asia Pacific (Singapore)',
38              'ap-southeast-2': 'Asia Pacific (Sydney)',
39              'ap-south-1': 'Asia Pacific (Mumbai)',
40              'eu-west-1': 'EU (Ireland)',
41              'eu-west-2': 'EU (London)',
42              'eu-west-3': 'EU (Paris)',
43              'eu-central-1': 'EU (Frankfurt)',
44              'sa-east-1': 'South America (Sao Paulo)'}
45
46
47class InstanceType(object):
48    __slots__ = ('name', 'cores', 'memory', 'disks', 'disk_capacity')
49
50    def __init__(self, name: str, cores: int, memory: float, disks: float, disk_capacity: float):
51        self.name = name  # the API name of the instance type
52        self.cores = cores  # the number of cores
53        self.memory = memory  # RAM in GiB
54        self.disks = disks  # the number of ephemeral (aka 'instance store') volumes
55        self.disk_capacity = disk_capacity  # the capacity of each ephemeral volume in GiB
56
57    def __str__(self) -> str:
58        return ("Type: {}\n"
59                "Cores: {}\n"
60                "Disks: {}\n"
61                "Memory: {}\n"
62                "Disk Capacity: {}\n"
63                "".format(
64                self.name,
65                self.cores,
66                self.disks,
67                self.memory,
68                self.disk_capacity))
69
70    def __eq__(self, other: object) -> bool:
71        if not isinstance(other, InstanceType):
72            return NotImplemented
73        if (self.name == other.name and
74            self.cores == other.cores and
75            self.memory == other.memory and
76            self.disks == other.disks and
77            self.disk_capacity == other.disk_capacity):
78            return True
79        return False
80
81
82def isNumber(s: str) -> bool:
83    """
84    Determines if a unicode string (that may include commas) is a number.
85
86    :param s: Any unicode string.
87    :return: True if s represents a number, False otherwise.
88    """
89    s = s.replace(',', '')
90    try:
91        float(s)
92        return True
93    except ValueError:
94        pass
95    try:
96        import unicodedata
97        unicodedata.numeric(s)
98        return True
99    except (TypeError, ValueError) as e:
100        pass
101    return False
102
103
104def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float], float]]:
105    """
106    Parses EC2 JSON storage param string into a number.
107
108    Examples:
109        "2 x 160 SSD"
110        "3 x 2000 HDD"
111        "EBS only"
112        "1 x 410"
113        "8 x 1.9 NVMe SSD"
114        "900 GB NVMe SSD"
115
116    :param str storageData: EC2 JSON storage param string.
117    :return: Two floats representing: (# of disks), and (disk_capacity in GiB of each disk).
118    """
119    if storageData == "EBS only":
120        return [0, 0]
121    else:
122        specs = storageData.strip().split()
123        if isNumber(specs[0]) and specs[1] == 'x' and isNumber(specs[2]):
124            return float(specs[0].replace(',', '')), float(specs[2].replace(',', ''))
125        elif isNumber(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
126            return 1, float(specs[0].replace(',', ''))
127        else:
128            raise RuntimeError('EC2 JSON format has likely changed.  Error parsing disk specs.')
129
130
131def parseMemory(memAttribute: str) -> float:
132    """
133    Returns EC2 'memory' string as a float.
134
135    Format should always be '#' GiB (example: '244 GiB' or '1,952 GiB').
136    Amazon loves to put commas in their numbers, so we have to accommodate that.
137    If the syntax ever changes, this will raise.
138
139    :param memAttribute: EC2 JSON memory param string.
140    :return: A float representing memory in GiB.
141    """
142    mem = memAttribute.replace(',', '').split()
143    if mem[1] == 'GiB':
144        return float(mem[0])
145    else:
146        raise RuntimeError('EC2 JSON format has likely changed.  Error parsing memory.')
147
148
149def fetchEC2Index(filename: str) -> None:
150    """Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
151
152    See: https://aws.amazon.com/blogs/aws/new-aws-price-list-api/
153
154    :return: A dict of InstanceType objects, where the key is the string:
155             aws instance name (example: 't2.micro'), and the value is an
156             InstanceType object representing that aws instance name.
157    """
158    print('Downloading ~1Gb AWS billing file to parse for information.\n')
159
160    response = requests.get('https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json')
161    if response.ok:
162        with open(filename, 'w') as f:
163            f.write(str(json.dumps(json.loads(response.text), indent=4)))
164            print('Download completed successfully!\n')
165    else:
166        raise RuntimeError('Error: ' + str(response) + ' :: ' + str(response.text))
167
168
169def fetchEC2InstanceDict(awsBillingJson: Dict[str, Any], region: str) -> Dict[str, InstanceType]:
170    """
171    Takes a JSON and returns a list of InstanceType objects representing EC2 instance params.
172
173    :param region:
174    :return:
175    """
176    ec2InstanceList = []
177    for k, v in awsBillingJson['products'].items():
178        i = v['attributes']
179        # NOTES:
180        #
181        # 3 tenant types: 'Host' (always $0.00; just a template?)
182        #                 'Dedicated' (toil does not support; these are pricier)
183        #                 'Shared' (AWS default and what toil uses)
184        #
185        # The same instance can appear with multiple "operation" values;
186        # "RunInstances" is normal
187        # "RunInstances:<code>" is e.g. Linux with MS SQL Server installed.
188        if (i.get('location') == region and
189            i.get('tenancy') == 'Shared' and
190            i.get('operatingSystem') == 'Linux' and
191            i.get('operation') == 'RunInstances'):
192
193            normal_use = i.get('usagetype').endswith('BoxUsage:' + i['instanceType'])  # not reserved or unused
194            if normal_use:
195                disks, disk_capacity = parseStorage(v["attributes"]["storage"])
196                instance = InstanceType(name=i["instanceType"],
197                                        cores=i["vcpu"],
198                                        memory=parseMemory(i["memory"]),
199                                        disks=disks,
200                                        disk_capacity=disk_capacity)
201                if instance in ec2InstanceList:
202                    raise RuntimeError('EC2 JSON format has likely changed.  '
203                                       'Duplicate instance {} found.'.format(instance))
204                ec2InstanceList.append(instance)
205    print('Finished for ' + str(region) + '.  ' + str(len(ec2InstanceList)) + ' added.')
206    return dict((_.name, _) for _ in ec2InstanceList)
207
208
209def updateStaticEC2Instances() -> None:
210    """
211    Generates a new python file of fetchable EC2 Instances by region with current prices and specs.
212
213    Takes a few (~3+) minutes to run (you'll need decent internet).
214
215    :return: Nothing.  Writes a new 'generatedEC2Lists.py' file.
216    """
217    print("Updating Toil's EC2 lists to the most current version from AWS's bulk API.\n"
218          "This may take a while, depending on your internet connection (~1Gb file).\n")
219
220    origFile = os.path.join(dirname, 'generatedEC2Lists.py')  # original
221    assert os.path.exists(origFile)
222    # use a temporary file until all info is fetched
223    genFile = os.path.join(dirname, 'generatedEC2Lists_tmp.py')  # temp
224    if os.path.exists(genFile):
225        os.remove(genFile)
226
227    # filepath to store the aws json request (will be cleaned up)
228    # this is done because AWS changes their json format from time to time
229    # and debugging is faster with the file stored locally
230    awsJsonIndex = os.path.join(dirname, 'index.json')
231
232    if not os.path.exists(awsJsonIndex):
233        fetchEC2Index(filename=awsJsonIndex)
234    else:
235        print('Reusing previously downloaded json @: ' + awsJsonIndex)
236
237    with open(awsJsonIndex, 'r') as f:
238        awsProductDict = json.loads(f.read())
239
240    currentEC2List = []
241    instancesByRegion: Dict[str, List[str]] = {}
242    for regionNickname in EC2Regions:
243        currentEC2Dict = fetchEC2InstanceDict(awsProductDict, region=EC2Regions[regionNickname])
244        for instanceName, instanceTypeObj in currentEC2Dict.items():
245            if instanceTypeObj not in currentEC2List:
246                currentEC2List.append(instanceTypeObj)
247            instancesByRegion.setdefault(regionNickname, []).append(instanceName)
248
249    # write provenance note, copyright and imports
250    with open(genFile, 'w') as f:
251        f.write(textwrap.dedent('''
252        # !!! AUTOGENERATED FILE !!!
253        # Update with: src/toil/utils/toilUpdateEC2Instances.py
254        #
255        # Copyright (C) 2015-{year} UCSC Computational Genomics Lab
256        #
257        # Licensed under the Apache License, Version 2.0 (the "License");
258        # you may not use this file except in compliance with the License.
259        # You may obtain a copy of the License at
260        #
261        #     http://www.apache.org/licenses/LICENSE-2.0
262        #
263        # Unless required by applicable law or agreed to in writing, software
264        # distributed under the License is distributed on an "AS IS" BASIS,
265        # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
266        # See the License for the specific language governing permissions and
267        # limitations under the License.
268        from toil.lib.ec2nodes import InstanceType\n\n\n''').format(year=datetime.date.today().strftime("%Y"))[1:])
269
270    # write header of total EC2 instance type list
271    genString = "# {num} Instance Types.  Generated {date}.\n".format(
272                num=str(len(currentEC2List)), date=str(datetime.datetime.now()))
273    genString = genString + "E2Instances = {\n"
274    sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name)
275
276    # write the list of all instances types
277    for i in sortedCurrentEC2List:
278        z = "    '{name}': InstanceType(name='{name}', cores={cores}, memory={memory}, disks={disks}, disk_capacity={disk_capacity})," \
279            "\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity)
280        genString = genString + z
281    genString = genString + '}\n\n'
282
283    genString = genString + 'regionDict = {\n'
284    for regionName, instanceList in instancesByRegion.items():
285        genString = genString + "              '{regionName}': [".format(regionName=regionName)
286        for instance in sorted(instanceList):
287            genString = genString + "'{instance}', ".format(instance=instance)
288        if genString.endswith(', '):
289            genString = genString[:-2]
290        genString = genString + '],\n'
291    if genString.endswith(',\n'):
292        genString = genString[:-len(',\n')]
293    genString = genString + '}\n'
294    with open(genFile, 'a+') as f:
295        f.write(genString)
296
297    # append key for fetching at the end
298    regionKey = '\nec2InstancesByRegion = dict((region, [E2Instances[i] for i in instances]) for region, instances in regionDict.items())\n'
299
300    with open(genFile, 'a+') as f:
301        f.write(regionKey)
302    # delete the original file
303    if os.path.exists(origFile):
304        os.remove(origFile)
305    # replace the instance list with a current list
306    os.rename(genFile, origFile)
307    # delete the aws billing json file
308    if os.path.exists(awsJsonIndex):
309        os.remove(awsJsonIndex)
310