1# ============================================================ 2# This file attempts to obtain a global lock (for a given 3# region / account combination. 4# 5# This makes one attempt to get the lock and will set the 6# won_lock variable to True or False to indicate whether 7# or not we got the lock. 8# 9# It's expected that this will be executed in a retry loop 10# so that if we don't get the lock we delay then try again. 11# 12# This should only be used in a block with cleanup-lock.yaml 13# included in the always clause to ensure the lock is released. 14# 15# There are several variables that control the locking behaviour: 16# * lock_timeout_seconds 17# How old a lock must be before it's assumed to be an expired 18# lock that was not cleaned up by the owner. Any locks older 19# than this will not prevent a lock being obtained and will 20# be deleted when a new process obtains the lock. 21# * lock_log_group_prefix 22# The log_group prefix that represents the lock being obtained. 23# This must be the same across all processes trying to obtain 24# the lock. 25# * lock_process_id 26# A unique identifier of this process. Each process that might 27# attempt to lock the process must have a different identifier. 28# This defaults to the resource_prefix which is generally 29# appropriate. 30# * max_obtain_lock_attempts 31# How many attempts to make to get the lock before giving up 32# NB: This is actually done in main.yaml 33# * obtain_lock_delay_seconds: 34# How long to delay after failing to get the lock before 35# trying again. 36# NB: This is actually done in obtain-lock-wrapper.yaml 37# 38# The locking here is based around creating cloudwatch log groups. 39# This resource was chosen because: 40# A) it's free 41# B) we have a built in grouping concept because of the hierarchy 42# that allows us to easily group attempts for the same lock 43# C) the creation time is tracked and returned which gives us 44# a mechanism for deterministically picking a winner 45# 46# Each lock is represented by a log group prefix. Each attempt 47# to obtain the lock is a log group of the lock_process_id below 48# that prefix. 49# 50# The winning lock is the one with the earliest creation time. 51# 52# To prevent a hanging lock from permanently hanging the build 53# lock attempts older than the lock timeout are ignored and 54# cleaned up by the next process to win the lock. 55# ============================================================ 56 57- name: set up aws connection info 58 set_fact: 59 aws_connection_info: &aws_connection_info 60 aws_access_key: "{{ aws_access_key }}" 61 aws_secret_key: "{{ aws_secret_key }}" 62 security_token: "{{ security_token }}" 63 region: "{{ aws_region }}" 64 no_log: yes 65 66- name: Set lock_attempt_log_group_name 67 set_fact: 68 lock_attempt_log_group_name: "{{ lock_log_group_prefix }}/{{ lock_process_id|default(resource_prefix) }}" 69 70 # Note the overwrite below to ensure that the creation time 71 # is upated. This is important as we calculate expiry relative 72 # the attempt creation. 73 # 74 # Because of this it's imporatnt that we delete the attempt 75 # if we don't get the lock. Otherwise we can get a deadlock 76 # where the stale atttempt from one process wins, but then 77 # because that process updates the creation date it doesn't 78 # consider its self to havewone. 79- name: Create Lock Attempt Log Group 80 cloudwatchlogs_log_group: 81 log_group_name: "{{ lock_attempt_log_group_name }}" 82 state: present 83 overwrite: True 84 <<: *aws_connection_info 85 register: lock_attempt_log_group_result 86 87- name: Get Lock Attempt Lock Groups 88 cloudwatchlogs_log_group_info: 89 log_group_name: "{{ lock_log_group_prefix }}/" 90 <<: *aws_connection_info 91 register: lock_attempt_log_groups 92 93- name: Calculate Expired Lock Attempt Timestamp 94 set_fact: 95 expired_lock_timestamp: "{{ lock_attempt_log_group_result.creation_time - (lock_timeout_seconds * 1000) }}" 96 97- name: Get Expired and Active Lock Attempts 98 set_fact: 99 expired_lock_attempts: "{{ lock_attempt_log_groups.log_groups|selectattr('creation_time', 'lt', expired_lock_timestamp|int)|list }}" 100 active_lock_attempts: "{{ lock_attempt_log_groups.log_groups|selectattr('creation_time', 'ge', expired_lock_timestamp|int)|list }}" 101 102- name: Pick Winning Lock Attempt 103 set_fact: 104 winning_lock_attempt: "{{ active_lock_attempts|sort(attribute='creation_time')|first }}" 105 106- name: Determine if Won Lock 107 set_fact: 108 won_lock: "{{ winning_lock_attempt.log_group_name == lock_attempt_log_group_name }}" 109 110 # Remove the lock attempt if we didn't get the lock. This prevents 111 # our stale lock attempt blocking another process from getting the lock. 112 # See more detailed comment above Create Lock Attempt Log Group 113- name: Remove Failed Lock Attempt Log Group 114 cloudwatchlogs_log_group: 115 log_group_name: "{{ lock_attempt_log_group_name }}" 116 state: absent 117 <<: *aws_connection_info 118 when: "not won_lock|bool" 119 120- name: Delete Expired Lock Attempts 121 cloudwatchlogs_log_group: 122 log_group_name: "{{ item.log_group_name }}" 123 state: absent 124 <<: *aws_connection_info 125 when: "won_lock|bool" 126 loop: "{{ expired_lock_attempts }}" 127