1# Copyright (C) 2015-2021 Regents of the University of California
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
7#     http://www.apache.org/licenses/LICENSE-2.0
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14import datetime
15import json
16import logging
17import os
18import textwrap
20import requests
22from typing import Dict, List, Union, Tuple, Any
24logger = logging.getLogger(__name__)
25dirname = os.path.dirname(__file__)
28EC2Regions = {'us-west-1': 'US West (N. California)',
29              'us-west-2': 'US West (Oregon)',
30              'us-east-1': 'US East (N. Virginia)',
31              'us-east-2': 'US East (Ohio)',
32              'us-gov-west-1': 'AWS GovCloud (US)',
33              'ca-central-1': 'Canada (Central)',
34              'ap-northeast-1': 'Asia Pacific (Tokyo)',
35              'ap-northeast-2': 'Asia Pacific (Seoul)',
36              'ap-northeast-3': 'Asia Pacific (Osaka-Local)',
37              'ap-southeast-1': 'Asia Pacific (Singapore)',
38              'ap-southeast-2': 'Asia Pacific (Sydney)',
39              'ap-south-1': 'Asia Pacific (Mumbai)',
40              'eu-west-1': 'EU (Ireland)',
41              'eu-west-2': 'EU (London)',
42              'eu-west-3': 'EU (Paris)',
43              'eu-central-1': 'EU (Frankfurt)',
44              'sa-east-1': 'South America (Sao Paulo)'}
47class InstanceType(object):
48    __slots__ = ('name', 'cores', 'memory', 'disks', 'disk_capacity')
50    def __init__(self, name: str, cores: int, memory: float, disks: float, disk_capacity: float):
51        self.name = name  # the API name of the instance type
52        self.cores = cores  # the number of cores
53        self.memory = memory  # RAM in GiB
54        self.disks = disks  # the number of ephemeral (aka 'instance store') volumes
55        self.disk_capacity = disk_capacity  # the capacity of each ephemeral volume in GiB
57    def __str__(self) -> str:
58        return ("Type: {}\n"
59                "Cores: {}\n"
60                "Disks: {}\n"
61                "Memory: {}\n"
62                "Disk Capacity: {}\n"
63                "".format(
64                self.name,
65                self.cores,
66                self.disks,
67                self.memory,
68                self.disk_capacity))
70    def __eq__(self, other: object) -> bool:
71        if not isinstance(other, InstanceType):
72            return NotImplemented
73        if (self.name == other.name and
74            self.cores == other.cores and
75            self.memory == other.memory and
76            self.disks == other.disks and
77            self.disk_capacity == other.disk_capacity):
78            return True
79        return False
82def isNumber(s: str) -> bool:
83    """
84    Determines if a unicode string (that may include commas) is a number.
86    :param s: Any unicode string.
87    :return: True if s represents a number, False otherwise.
88    """
89    s = s.replace(',', '')
90    try:
91        float(s)
92        return True
93    except ValueError:
94        pass
95    try:
96        import unicodedata
97        unicodedata.numeric(s)
98        return True
99    except (TypeError, ValueError) as e:
100        pass
101    return False
104def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float], float]]:
105    """
106    Parses EC2 JSON storage param string into a number.
108    Examples:
109        "2 x 160 SSD"
110        "3 x 2000 HDD"
111        "EBS only"
112        "1 x 410"
113        "8 x 1.9 NVMe SSD"
114        "900 GB NVMe SSD"
116    :param str storageData: EC2 JSON storage param string.
117    :return: Two floats representing: (# of disks), and (disk_capacity in GiB of each disk).
118    """
119    if storageData == "EBS only":
120        return [0, 0]
121    else:
122        specs = storageData.strip().split()
123        if isNumber(specs[0]) and specs[1] == 'x' and isNumber(specs[2]):
124            return float(specs[0].replace(',', '')), float(specs[2].replace(',', ''))
125        elif isNumber(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD':
126            return 1, float(specs[0].replace(',', ''))
127        else:
128            raise RuntimeError('EC2 JSON format has likely changed.  Error parsing disk specs.')
131def parseMemory(memAttribute: str) -> float:
132    """
133    Returns EC2 'memory' string as a float.
135    Format should always be '#' GiB (example: '244 GiB' or '1,952 GiB').
136    Amazon loves to put commas in their numbers, so we have to accommodate that.
137    If the syntax ever changes, this will raise.
139    :param memAttribute: EC2 JSON memory param string.
140    :return: A float representing memory in GiB.
141    """
142    mem = memAttribute.replace(',', '').split()
143    if mem[1] == 'GiB':
144        return float(mem[0])
145    else:
146        raise RuntimeError('EC2 JSON format has likely changed.  Error parsing memory.')
149def fetchEC2Index(filename: str) -> None:
150    """Downloads and writes the AWS Billing JSON to a file using the AWS pricing API.
152    See: https://aws.amazon.com/blogs/aws/new-aws-price-list-api/
154    :return: A dict of InstanceType objects, where the key is the string:
155             aws instance name (example: 't2.micro'), and the value is an
156             InstanceType object representing that aws instance name.
157    """
158    print('Downloading ~1Gb AWS billing file to parse for information.\n')
160    response = requests.get('https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json')
161    if response.ok:
162        with open(filename, 'w') as f:
163            f.write(str(json.dumps(json.loads(response.text), indent=4)))
164            print('Download completed successfully!\n')
165    else:
166        raise RuntimeError('Error: ' + str(response) + ' :: ' + str(response.text))
169def fetchEC2InstanceDict(awsBillingJson: Dict[str, Any], region: str) -> Dict[str, InstanceType]:
170    """
171    Takes a JSON and returns a list of InstanceType objects representing EC2 instance params.
173    :param region:
174    :return:
175    """
176    ec2InstanceList = []
177    for k, v in awsBillingJson['products'].items():
178        i = v['attributes']
179        # NOTES:
180        #
181        # 3 tenant types: 'Host' (always $0.00; just a template?)
182        #                 'Dedicated' (toil does not support; these are pricier)
183        #                 'Shared' (AWS default and what toil uses)
184        #
185        # The same instance can appear with multiple "operation" values;
186        # "RunInstances" is normal
187        # "RunInstances:<code>" is e.g. Linux with MS SQL Server installed.
188        if (i.get('location') == region and
189            i.get('tenancy') == 'Shared' and
190            i.get('operatingSystem') == 'Linux' and
191            i.get('operation') == 'RunInstances'):
193            normal_use = i.get('usagetype').endswith('BoxUsage:' + i['instanceType'])  # not reserved or unused
194            if normal_use:
195                disks, disk_capacity = parseStorage(v["attributes"]["storage"])
196                instance = InstanceType(name=i["instanceType"],
197                                        cores=i["vcpu"],
198                                        memory=parseMemory(i["memory"]),
199                                        disks=disks,
200                                        disk_capacity=disk_capacity)
201                if instance in ec2InstanceList:
202                    raise RuntimeError('EC2 JSON format has likely changed.  '
203                                       'Duplicate instance {} found.'.format(instance))
204                ec2InstanceList.append(instance)
205    print('Finished for ' + str(region) + '.  ' + str(len(ec2InstanceList)) + ' added.')
206    return dict((_.name, _) for _ in ec2InstanceList)
209def updateStaticEC2Instances() -> None:
210    """
211    Generates a new python file of fetchable EC2 Instances by region with current prices and specs.
213    Takes a few (~3+) minutes to run (you'll need decent internet).
215    :return: Nothing.  Writes a new 'generatedEC2Lists.py' file.
216    """
217    print("Updating Toil's EC2 lists to the most current version from AWS's bulk API.\n"
218          "This may take a while, depending on your internet connection (~1Gb file).\n")
220    origFile = os.path.join(dirname, 'generatedEC2Lists.py')  # original
221    assert os.path.exists(origFile)
222    # use a temporary file until all info is fetched
223    genFile = os.path.join(dirname, 'generatedEC2Lists_tmp.py')  # temp
224    if os.path.exists(genFile):
225        os.remove(genFile)
227    # filepath to store the aws json request (will be cleaned up)
228    # this is done because AWS changes their json format from time to time
229    # and debugging is faster with the file stored locally
230    awsJsonIndex = os.path.join(dirname, 'index.json')
232    if not os.path.exists(awsJsonIndex):
233        fetchEC2Index(filename=awsJsonIndex)
234    else:
235        print('Reusing previously downloaded json @: ' + awsJsonIndex)
237    with open(awsJsonIndex, 'r') as f:
238        awsProductDict = json.loads(f.read())
240    currentEC2List = []
241    instancesByRegion: Dict[str, List[str]] = {}
242    for regionNickname in EC2Regions:
243        currentEC2Dict = fetchEC2InstanceDict(awsProductDict, region=EC2Regions[regionNickname])
244        for instanceName, instanceTypeObj in currentEC2Dict.items():
245            if instanceTypeObj not in currentEC2List:
246                currentEC2List.append(instanceTypeObj)
247            instancesByRegion.setdefault(regionNickname, []).append(instanceName)
249    # write provenance note, copyright and imports
250    with open(genFile, 'w') as f:
251        f.write(textwrap.dedent('''
252        # !!! AUTOGENERATED FILE !!!
253        # Update with: src/toil/utils/toilUpdateEC2Instances.py
254        #
255        # Copyright (C) 2015-{year} UCSC Computational Genomics Lab
256        #
257        # Licensed under the Apache License, Version 2.0 (the "License");
258        # you may not use this file except in compliance with the License.
259        # You may obtain a copy of the License at
260        #
261        #     http://www.apache.org/licenses/LICENSE-2.0
262        #
263        # Unless required by applicable law or agreed to in writing, software
264        # distributed under the License is distributed on an "AS IS" BASIS,
265        # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
266        # See the License for the specific language governing permissions and
267        # limitations under the License.
268        from toil.lib.ec2nodes import InstanceType\n\n\n''').format(year=datetime.date.today().strftime("%Y"))[1:])
270    # write header of total EC2 instance type list
271    genString = "# {num} Instance Types.  Generated {date}.\n".format(
272                num=str(len(currentEC2List)), date=str(datetime.datetime.now()))
273    genString = genString + "E2Instances = {\n"
274    sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name)
276    # write the list of all instances types
277    for i in sortedCurrentEC2List:
278        z = "    '{name}': InstanceType(name='{name}', cores={cores}, memory={memory}, disks={disks}, disk_capacity={disk_capacity})," \
279            "\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity)
280        genString = genString + z
281    genString = genString + '}\n\n'
283    genString = genString + 'regionDict = {\n'
284    for regionName, instanceList in instancesByRegion.items():
285        genString = genString + "              '{regionName}': [".format(regionName=regionName)
286        for instance in sorted(instanceList):
287            genString = genString + "'{instance}', ".format(instance=instance)
288        if genString.endswith(', '):
289            genString = genString[:-2]
290        genString = genString + '],\n'
291    if genString.endswith(',\n'):
292        genString = genString[:-len(',\n')]
293    genString = genString + '}\n'
294    with open(genFile, 'a+') as f:
295        f.write(genString)
297    # append key for fetching at the end
298    regionKey = '\nec2InstancesByRegion = dict((region, [E2Instances[i] for i in instances]) for region, instances in regionDict.items())\n'
300    with open(genFile, 'a+') as f:
301        f.write(regionKey)
302    # delete the original file
303    if os.path.exists(origFile):
304        os.remove(origFile)
305    # replace the instance list with a current list
306    os.rename(genFile, origFile)
307    # delete the aws billing json file
308    if os.path.exists(awsJsonIndex):
309        os.remove(awsJsonIndex)