1# Copyright (C) 2015-2021 Regents of the University of California 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14import datetime 15import json 16import logging 17import os 18import textwrap 19 20import requests 21 22from typing import Dict, List, Union, Tuple, Any 23 24logger = logging.getLogger(__name__) 25dirname = os.path.dirname(__file__) 26 27 28EC2Regions = {'us-west-1': 'US West (N. California)', 29 'us-west-2': 'US West (Oregon)', 30 'us-east-1': 'US East (N. Virginia)', 31 'us-east-2': 'US East (Ohio)', 32 'us-gov-west-1': 'AWS GovCloud (US)', 33 'ca-central-1': 'Canada (Central)', 34 'ap-northeast-1': 'Asia Pacific (Tokyo)', 35 'ap-northeast-2': 'Asia Pacific (Seoul)', 36 'ap-northeast-3': 'Asia Pacific (Osaka-Local)', 37 'ap-southeast-1': 'Asia Pacific (Singapore)', 38 'ap-southeast-2': 'Asia Pacific (Sydney)', 39 'ap-south-1': 'Asia Pacific (Mumbai)', 40 'eu-west-1': 'EU (Ireland)', 41 'eu-west-2': 'EU (London)', 42 'eu-west-3': 'EU (Paris)', 43 'eu-central-1': 'EU (Frankfurt)', 44 'sa-east-1': 'South America (Sao Paulo)'} 45 46 47class InstanceType(object): 48 __slots__ = ('name', 'cores', 'memory', 'disks', 'disk_capacity') 49 50 def __init__(self, name: str, cores: int, memory: float, disks: float, disk_capacity: float): 51 self.name = name # the API name of the instance type 52 self.cores = cores # the number of cores 53 self.memory = memory # RAM in GiB 54 self.disks = disks # the number of ephemeral (aka 'instance store') volumes 55 self.disk_capacity = disk_capacity # the capacity of each ephemeral volume in GiB 56 57 def __str__(self) -> str: 58 return ("Type: {}\n" 59 "Cores: {}\n" 60 "Disks: {}\n" 61 "Memory: {}\n" 62 "Disk Capacity: {}\n" 63 "".format( 64 self.name, 65 self.cores, 66 self.disks, 67 self.memory, 68 self.disk_capacity)) 69 70 def __eq__(self, other: object) -> bool: 71 if not isinstance(other, InstanceType): 72 return NotImplemented 73 if (self.name == other.name and 74 self.cores == other.cores and 75 self.memory == other.memory and 76 self.disks == other.disks and 77 self.disk_capacity == other.disk_capacity): 78 return True 79 return False 80 81 82def isNumber(s: str) -> bool: 83 """ 84 Determines if a unicode string (that may include commas) is a number. 85 86 :param s: Any unicode string. 87 :return: True if s represents a number, False otherwise. 88 """ 89 s = s.replace(',', '') 90 try: 91 float(s) 92 return True 93 except ValueError: 94 pass 95 try: 96 import unicodedata 97 unicodedata.numeric(s) 98 return True 99 except (TypeError, ValueError) as e: 100 pass 101 return False 102 103 104def parseStorage(storageData: str) -> Union[List[int], Tuple[Union[int, float], float]]: 105 """ 106 Parses EC2 JSON storage param string into a number. 107 108 Examples: 109 "2 x 160 SSD" 110 "3 x 2000 HDD" 111 "EBS only" 112 "1 x 410" 113 "8 x 1.9 NVMe SSD" 114 "900 GB NVMe SSD" 115 116 :param str storageData: EC2 JSON storage param string. 117 :return: Two floats representing: (# of disks), and (disk_capacity in GiB of each disk). 118 """ 119 if storageData == "EBS only": 120 return [0, 0] 121 else: 122 specs = storageData.strip().split() 123 if isNumber(specs[0]) and specs[1] == 'x' and isNumber(specs[2]): 124 return float(specs[0].replace(',', '')), float(specs[2].replace(',', '')) 125 elif isNumber(specs[0]) and specs[1] == 'GB' and specs[2] == 'NVMe' and specs[3] == 'SSD': 126 return 1, float(specs[0].replace(',', '')) 127 else: 128 raise RuntimeError('EC2 JSON format has likely changed. Error parsing disk specs.') 129 130 131def parseMemory(memAttribute: str) -> float: 132 """ 133 Returns EC2 'memory' string as a float. 134 135 Format should always be '#' GiB (example: '244 GiB' or '1,952 GiB'). 136 Amazon loves to put commas in their numbers, so we have to accommodate that. 137 If the syntax ever changes, this will raise. 138 139 :param memAttribute: EC2 JSON memory param string. 140 :return: A float representing memory in GiB. 141 """ 142 mem = memAttribute.replace(',', '').split() 143 if mem[1] == 'GiB': 144 return float(mem[0]) 145 else: 146 raise RuntimeError('EC2 JSON format has likely changed. Error parsing memory.') 147 148 149def fetchEC2Index(filename: str) -> None: 150 """Downloads and writes the AWS Billing JSON to a file using the AWS pricing API. 151 152 See: https://aws.amazon.com/blogs/aws/new-aws-price-list-api/ 153 154 :return: A dict of InstanceType objects, where the key is the string: 155 aws instance name (example: 't2.micro'), and the value is an 156 InstanceType object representing that aws instance name. 157 """ 158 print('Downloading ~1Gb AWS billing file to parse for information.\n') 159 160 response = requests.get('https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/index.json') 161 if response.ok: 162 with open(filename, 'w') as f: 163 f.write(str(json.dumps(json.loads(response.text), indent=4))) 164 print('Download completed successfully!\n') 165 else: 166 raise RuntimeError('Error: ' + str(response) + ' :: ' + str(response.text)) 167 168 169def fetchEC2InstanceDict(awsBillingJson: Dict[str, Any], region: str) -> Dict[str, InstanceType]: 170 """ 171 Takes a JSON and returns a list of InstanceType objects representing EC2 instance params. 172 173 :param region: 174 :return: 175 """ 176 ec2InstanceList = [] 177 for k, v in awsBillingJson['products'].items(): 178 i = v['attributes'] 179 # NOTES: 180 # 181 # 3 tenant types: 'Host' (always $0.00; just a template?) 182 # 'Dedicated' (toil does not support; these are pricier) 183 # 'Shared' (AWS default and what toil uses) 184 # 185 # The same instance can appear with multiple "operation" values; 186 # "RunInstances" is normal 187 # "RunInstances:<code>" is e.g. Linux with MS SQL Server installed. 188 if (i.get('location') == region and 189 i.get('tenancy') == 'Shared' and 190 i.get('operatingSystem') == 'Linux' and 191 i.get('operation') == 'RunInstances'): 192 193 normal_use = i.get('usagetype').endswith('BoxUsage:' + i['instanceType']) # not reserved or unused 194 if normal_use: 195 disks, disk_capacity = parseStorage(v["attributes"]["storage"]) 196 instance = InstanceType(name=i["instanceType"], 197 cores=i["vcpu"], 198 memory=parseMemory(i["memory"]), 199 disks=disks, 200 disk_capacity=disk_capacity) 201 if instance in ec2InstanceList: 202 raise RuntimeError('EC2 JSON format has likely changed. ' 203 'Duplicate instance {} found.'.format(instance)) 204 ec2InstanceList.append(instance) 205 print('Finished for ' + str(region) + '. ' + str(len(ec2InstanceList)) + ' added.') 206 return dict((_.name, _) for _ in ec2InstanceList) 207 208 209def updateStaticEC2Instances() -> None: 210 """ 211 Generates a new python file of fetchable EC2 Instances by region with current prices and specs. 212 213 Takes a few (~3+) minutes to run (you'll need decent internet). 214 215 :return: Nothing. Writes a new 'generatedEC2Lists.py' file. 216 """ 217 print("Updating Toil's EC2 lists to the most current version from AWS's bulk API.\n" 218 "This may take a while, depending on your internet connection (~1Gb file).\n") 219 220 origFile = os.path.join(dirname, 'generatedEC2Lists.py') # original 221 assert os.path.exists(origFile) 222 # use a temporary file until all info is fetched 223 genFile = os.path.join(dirname, 'generatedEC2Lists_tmp.py') # temp 224 if os.path.exists(genFile): 225 os.remove(genFile) 226 227 # filepath to store the aws json request (will be cleaned up) 228 # this is done because AWS changes their json format from time to time 229 # and debugging is faster with the file stored locally 230 awsJsonIndex = os.path.join(dirname, 'index.json') 231 232 if not os.path.exists(awsJsonIndex): 233 fetchEC2Index(filename=awsJsonIndex) 234 else: 235 print('Reusing previously downloaded json @: ' + awsJsonIndex) 236 237 with open(awsJsonIndex, 'r') as f: 238 awsProductDict = json.loads(f.read()) 239 240 currentEC2List = [] 241 instancesByRegion: Dict[str, List[str]] = {} 242 for regionNickname in EC2Regions: 243 currentEC2Dict = fetchEC2InstanceDict(awsProductDict, region=EC2Regions[regionNickname]) 244 for instanceName, instanceTypeObj in currentEC2Dict.items(): 245 if instanceTypeObj not in currentEC2List: 246 currentEC2List.append(instanceTypeObj) 247 instancesByRegion.setdefault(regionNickname, []).append(instanceName) 248 249 # write provenance note, copyright and imports 250 with open(genFile, 'w') as f: 251 f.write(textwrap.dedent(''' 252 # !!! AUTOGENERATED FILE !!! 253 # Update with: src/toil/utils/toilUpdateEC2Instances.py 254 # 255 # Copyright (C) 2015-{year} UCSC Computational Genomics Lab 256 # 257 # Licensed under the Apache License, Version 2.0 (the "License"); 258 # you may not use this file except in compliance with the License. 259 # You may obtain a copy of the License at 260 # 261 # http://www.apache.org/licenses/LICENSE-2.0 262 # 263 # Unless required by applicable law or agreed to in writing, software 264 # distributed under the License is distributed on an "AS IS" BASIS, 265 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 266 # See the License for the specific language governing permissions and 267 # limitations under the License. 268 from toil.lib.ec2nodes import InstanceType\n\n\n''').format(year=datetime.date.today().strftime("%Y"))[1:]) 269 270 # write header of total EC2 instance type list 271 genString = "# {num} Instance Types. Generated {date}.\n".format( 272 num=str(len(currentEC2List)), date=str(datetime.datetime.now())) 273 genString = genString + "E2Instances = {\n" 274 sortedCurrentEC2List = sorted(currentEC2List, key=lambda x: x.name) 275 276 # write the list of all instances types 277 for i in sortedCurrentEC2List: 278 z = " '{name}': InstanceType(name='{name}', cores={cores}, memory={memory}, disks={disks}, disk_capacity={disk_capacity})," \ 279 "\n".format(name=i.name, cores=i.cores, memory=i.memory, disks=i.disks, disk_capacity=i.disk_capacity) 280 genString = genString + z 281 genString = genString + '}\n\n' 282 283 genString = genString + 'regionDict = {\n' 284 for regionName, instanceList in instancesByRegion.items(): 285 genString = genString + " '{regionName}': [".format(regionName=regionName) 286 for instance in sorted(instanceList): 287 genString = genString + "'{instance}', ".format(instance=instance) 288 if genString.endswith(', '): 289 genString = genString[:-2] 290 genString = genString + '],\n' 291 if genString.endswith(',\n'): 292 genString = genString[:-len(',\n')] 293 genString = genString + '}\n' 294 with open(genFile, 'a+') as f: 295 f.write(genString) 296 297 # append key for fetching at the end 298 regionKey = '\nec2InstancesByRegion = dict((region, [E2Instances[i] for i in instances]) for region, instances in regionDict.items())\n' 299 300 with open(genFile, 'a+') as f: 301 f.write(regionKey) 302 # delete the original file 303 if os.path.exists(origFile): 304 os.remove(origFile) 305 # replace the instance list with a current list 306 os.rename(genFile, origFile) 307 # delete the aws billing json file 308 if os.path.exists(awsJsonIndex): 309 os.remove(awsJsonIndex) 310