1# -*- coding: utf-8 -*- 2# © Copyright EnterpriseDB UK Limited 2011-2021 3# 4# This file is part of Barman. 5# 6# Barman is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10# 11# Barman is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with Barman. If not, see <http://www.gnu.org/licenses/>. 18 19""" 20This module contains functions to retrieve information about xlog 21files 22""" 23 24import collections 25import os 26import re 27from functools import partial 28from tempfile import NamedTemporaryFile 29 30from barman.exceptions import ( 31 BadHistoryFileContents, 32 BadXlogSegmentName, 33 CommandException, 34 WalArchiveContentError, 35) 36 37# xlog file segment name parser (regular expression) 38_xlog_re = re.compile( 39 r""" 40 ^ 41 ([\dA-Fa-f]{8}) # everything has a timeline 42 (?: 43 ([\dA-Fa-f]{8})([\dA-Fa-f]{8}) # segment name, if a wal file 44 (?: # and optional 45 \.[\dA-Fa-f]{8}\.backup # offset, if a backup label 46 | 47 \.partial # partial, if a partial file 48 )? 49 | 50 \.history # or only .history, if a history file 51 ) 52 $ 53 """, 54 re.VERBOSE, 55) 56 57# xlog location parser for concurrent backup (regular expression) 58_location_re = re.compile(r"^([\dA-F]+)/([\dA-F]+)$") 59 60# Taken from xlog_internal.h from PostgreSQL sources 61 62#: XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2 63#: and larger than XLOG_BLCKSZ (preferably, a great deal larger than 64#: XLOG_BLCKSZ). 65DEFAULT_XLOG_SEG_SIZE = 1 << 24 66 67#: This namedtuple is a container for the information 68#: contained inside history files 69HistoryFileData = collections.namedtuple( 70 "HistoryFileData", "tli parent_tli switchpoint reason" 71) 72 73 74def is_any_xlog_file(path): 75 """ 76 Return True if the xlog is either a WAL segment, a .backup file 77 or a .history file, False otherwise. 78 79 It supports either a full file path or a simple file name. 80 81 :param str path: the file name to test 82 :rtype: bool 83 """ 84 match = _xlog_re.match(os.path.basename(path)) 85 if match: 86 return True 87 return False 88 89 90def is_history_file(path): 91 """ 92 Return True if the xlog is a .history file, False otherwise 93 94 It supports either a full file path or a simple file name. 95 96 :param str path: the file name to test 97 :rtype: bool 98 """ 99 match = _xlog_re.search(os.path.basename(path)) 100 if match and match.group(0).endswith(".history"): 101 return True 102 return False 103 104 105def is_backup_file(path): 106 """ 107 Return True if the xlog is a .backup file, False otherwise 108 109 It supports either a full file path or a simple file name. 110 111 :param str path: the file name to test 112 :rtype: bool 113 """ 114 match = _xlog_re.search(os.path.basename(path)) 115 if match and match.group(0).endswith(".backup"): 116 return True 117 return False 118 119 120def is_partial_file(path): 121 """ 122 Return True if the xlog is a .partial file, False otherwise 123 124 It supports either a full file path or a simple file name. 125 126 :param str path: the file name to test 127 :rtype: bool 128 """ 129 match = _xlog_re.search(os.path.basename(path)) 130 if match and match.group(0).endswith(".partial"): 131 return True 132 return False 133 134 135def is_wal_file(path): 136 """ 137 Return True if the xlog is a regular xlog file, False otherwise 138 139 It supports either a full file path or a simple file name. 140 141 :param str path: the file name to test 142 :rtype: bool 143 """ 144 match = _xlog_re.search(os.path.basename(path)) 145 146 if not match: 147 return False 148 149 ends_with_backup = match.group(0).endswith(".backup") 150 ends_with_history = match.group(0).endswith(".history") 151 ends_with_partial = match.group(0).endswith(".partial") 152 153 if ends_with_backup: 154 return False 155 156 if ends_with_history: 157 return False 158 159 if ends_with_partial: 160 return False 161 162 return True 163 164 165def decode_segment_name(path): 166 """ 167 Retrieve the timeline, log ID and segment ID 168 from the name of a xlog segment 169 170 It can handle either a full file path or a simple file name. 171 172 :param str path: the file name to decode 173 :rtype: list[int] 174 """ 175 name = os.path.basename(path) 176 match = _xlog_re.match(name) 177 if not match: 178 raise BadXlogSegmentName(name) 179 return [int(x, 16) if x else None for x in match.groups()] 180 181 182def encode_segment_name(tli, log, seg): 183 """ 184 Build the xlog segment name based on timeline, log ID and segment ID 185 186 :param int tli: timeline number 187 :param int log: log number 188 :param int seg: segment number 189 :return str: segment file name 190 """ 191 return "%08X%08X%08X" % (tli, log, seg) 192 193 194def encode_history_file_name(tli): 195 """ 196 Build the history file name based on timeline 197 198 :return str: history file name 199 """ 200 return "%08X.history" % (tli,) 201 202 203def xlog_segments_per_file(xlog_segment_size): 204 """ 205 Given that WAL files are named using the following pattern: 206 207 <timeline_number><xlog_file_number><xlog_segment_number> 208 209 this is the number of XLOG segments in an XLOG file. By XLOG file 210 we don't mean an actual file on the filesystem, but the definition 211 used in the PostgreSQL sources: meaning a set of files containing the 212 same file number. 213 214 :param int xlog_segment_size: The XLOG segment size in bytes 215 :return int: The number of segments in an XLOG file 216 """ 217 return 0xFFFFFFFF // xlog_segment_size 218 219 220def xlog_segment_mask(xlog_segment_size): 221 """ 222 Given that WAL files are named using the following pattern: 223 224 <timeline_number><xlog_file_number><xlog_segment_number> 225 226 this is the bitmask of segment part of an XLOG file. 227 See the documentation of `xlog_segments_per_file` for a 228 commentary on the definition of `XLOG` file. 229 230 :param int xlog_segment_size: The XLOG segment size in bytes 231 :return int: The size of an XLOG file 232 """ 233 return xlog_segment_size * xlog_segments_per_file(xlog_segment_size) 234 235 236def generate_segment_names(begin, end=None, version=None, xlog_segment_size=None): 237 """ 238 Generate a sequence of XLOG segments starting from ``begin`` 239 If an ``end`` segment is provided the sequence will terminate after 240 returning it, otherwise the sequence will never terminate. 241 242 If the XLOG segment size is known, this generator is precise, 243 switching to the next file when required. 244 245 It the XLOG segment size is unknown, this generator will generate 246 all the possible XLOG file names. 247 The size of an XLOG segment can be every power of 2 between 248 the XLOG block size (8Kib) and the size of a log segment (4Gib) 249 250 :param str begin: begin segment name 251 :param str|None end: optional end segment name 252 :param int|None version: optional postgres version as an integer 253 (e.g. 90301 for 9.3.1) 254 :param int xlog_segment_size: the size of a XLOG segment 255 :rtype: collections.Iterable[str] 256 :raise: BadXlogSegmentName 257 """ 258 begin_tli, begin_log, begin_seg = decode_segment_name(begin) 259 end_tli, end_log, end_seg = None, None, None 260 if end: 261 end_tli, end_log, end_seg = decode_segment_name(end) 262 263 # this method doesn't support timeline changes 264 assert begin_tli == end_tli, ( 265 "Begin segment (%s) and end segment (%s) " 266 "must have the same timeline part" % (begin, end) 267 ) 268 269 # If version is less than 9.3 the last segment must be skipped 270 skip_last_segment = version is not None and version < 90300 271 272 # This is the number of XLOG segments in an XLOG file. By XLOG file 273 # we don't mean an actual file on the filesystem, but the definition 274 # used in the PostgreSQL sources: a set of files containing the 275 # same file number. 276 if xlog_segment_size: 277 # The generator is operating is precise and correct mode: 278 # knowing exactly when a switch to the next file is required 279 xlog_seg_per_file = xlog_segments_per_file(xlog_segment_size) 280 else: 281 # The generator is operating only in precise mode: generating every 282 # possible XLOG file name. 283 xlog_seg_per_file = 0x7FFFF 284 285 # Start from the first xlog and generate the segments sequentially 286 # If ``end`` has been provided, the while condition ensure the termination 287 # otherwise this generator will never stop 288 cur_log, cur_seg = begin_log, begin_seg 289 while ( 290 end is None or cur_log < end_log or (cur_log == end_log and cur_seg <= end_seg) 291 ): 292 yield encode_segment_name(begin_tli, cur_log, cur_seg) 293 cur_seg += 1 294 if cur_seg > xlog_seg_per_file or ( 295 skip_last_segment and cur_seg == xlog_seg_per_file 296 ): 297 cur_seg = 0 298 cur_log += 1 299 300 301def hash_dir(path): 302 """ 303 Get the directory where the xlog segment will be stored 304 305 It can handle either a full file path or a simple file name. 306 307 :param str|unicode path: xlog file name 308 :return str: directory name 309 """ 310 tli, log, _ = decode_segment_name(path) 311 # tli is always not None 312 if log is not None: 313 return "%08X%08X" % (tli, log) 314 else: 315 return "" 316 317 318def parse_lsn(lsn_string): 319 """ 320 Transform a string XLOG location, formatted as %X/%X, in the corresponding 321 numeric representation 322 323 :param str lsn_string: the string XLOG location, i.e. '2/82000168' 324 :rtype: int 325 """ 326 lsn_list = lsn_string.split("/") 327 if len(lsn_list) != 2: 328 raise ValueError("Invalid LSN: %s", lsn_string) 329 330 return (int(lsn_list[0], 16) << 32) + int(lsn_list[1], 16) 331 332 333def diff_lsn(lsn_string1, lsn_string2): 334 """ 335 Calculate the difference in bytes between two string XLOG location, 336 formatted as %X/%X 337 338 Tis function is a Python implementation of 339 the ``pg_xlog_location_diff(str, str)`` PostgreSQL function. 340 341 :param str lsn_string1: the string XLOG location, i.e. '2/82000168' 342 :param str lsn_string2: the string XLOG location, i.e. '2/82000168' 343 :rtype: int 344 """ 345 346 # If one the input is None returns None 347 if lsn_string1 is None or lsn_string2 is None: 348 return None 349 return parse_lsn(lsn_string1) - parse_lsn(lsn_string2) 350 351 352def format_lsn(lsn): 353 """ 354 Transform a numeric XLOG location, in the corresponding %X/%X string 355 representation 356 357 :param int lsn: numeric XLOG location 358 :rtype: str 359 """ 360 return "%X/%X" % (lsn >> 32, lsn & 0xFFFFFFFF) 361 362 363def location_to_xlogfile_name_offset(location, timeline, xlog_segment_size): 364 """ 365 Convert transaction log location string to file_name and file_offset 366 367 This is a reimplementation of pg_xlogfile_name_offset PostgreSQL function 368 369 This method returns a dictionary containing the following data: 370 371 * file_name 372 * file_offset 373 374 :param str location: XLOG location 375 :param int timeline: timeline 376 :param int xlog_segment_size: the size of a XLOG segment 377 :rtype: dict 378 """ 379 lsn = parse_lsn(location) 380 log = lsn >> 32 381 seg = (lsn & xlog_segment_mask(xlog_segment_size)) // xlog_segment_size 382 offset = lsn & (xlog_segment_size - 1) 383 return { 384 "file_name": encode_segment_name(timeline, log, seg), 385 "file_offset": offset, 386 } 387 388 389def location_from_xlogfile_name_offset(file_name, file_offset, xlog_segment_size): 390 """ 391 Convert file_name and file_offset to a transaction log location. 392 393 This is the inverted function of PostgreSQL's pg_xlogfile_name_offset 394 function. 395 396 :param str file_name: a WAL file name 397 :param int file_offset: a numeric offset 398 :param int xlog_segment_size: the size of a XLOG segment 399 :rtype: str 400 """ 401 decoded_segment = decode_segment_name(file_name) 402 location = decoded_segment[1] << 32 403 location += decoded_segment[2] * xlog_segment_size 404 location += file_offset 405 return format_lsn(location) 406 407 408def decode_history_file(wal_info, comp_manager): 409 """ 410 Read an history file and parse its contents. 411 412 Each line in the file represents a timeline switch, each field is 413 separated by tab, empty lines are ignored and lines starting with '#' 414 are comments. 415 416 Each line is composed by three fields: parentTLI, switchpoint and reason. 417 "parentTLI" is the ID of the parent timeline. 418 "switchpoint" is the WAL position where the switch happened 419 "reason" is an human-readable explanation of why the timeline was changed 420 421 The method requires a CompressionManager object to handle the eventual 422 compression of the history file. 423 424 :param barman.infofile.WalFileInfo wal_info: history file obj 425 :param comp_manager: compression manager used in case 426 of history file compression 427 :return List[HistoryFileData]: information from the history file 428 """ 429 430 path = wal_info.orig_filename 431 # Decompress the file if needed 432 if wal_info.compression: 433 # Use a NamedTemporaryFile to avoid explicit cleanup 434 uncompressed_file = NamedTemporaryFile( 435 dir=os.path.dirname(path), 436 prefix=".%s." % wal_info.name, 437 suffix=".uncompressed", 438 ) 439 path = uncompressed_file.name 440 comp_manager.get_compressor(wal_info.compression).decompress( 441 wal_info.orig_filename, path 442 ) 443 444 # Extract the timeline from history file name 445 tli, _, _ = decode_segment_name(wal_info.name) 446 447 lines = [] 448 with open(path) as fp: 449 for line in fp: 450 line = line.strip() 451 # Skip comments and empty lines 452 if line.startswith("#"): 453 continue 454 # Skip comments and empty lines 455 if len(line) == 0: 456 continue 457 # Use tab as separator 458 contents = line.split("\t") 459 if len(contents) != 3: 460 # Invalid content of the line 461 raise BadHistoryFileContents(path) 462 463 history = HistoryFileData( 464 tli=tli, 465 parent_tli=int(contents[0]), 466 switchpoint=parse_lsn(contents[1]), 467 reason=contents[2], 468 ) 469 lines.append(history) 470 471 # Empty history file or containing invalid content 472 if len(lines) == 0: 473 raise BadHistoryFileContents(path) 474 else: 475 return lines 476 477 478def _validate_timeline(timeline): 479 """Check that timeline is a valid timeline value.""" 480 try: 481 # Explicitly check the type becauase python 2 will allow < to be used 482 # between strings and ints 483 if type(timeline) is not int or timeline < 1: 484 raise ValueError() 485 return True 486 except Exception: 487 raise CommandException( 488 "Cannot check WAL archive with malformed timeline %s" % timeline 489 ) 490 491 492def _wal_archive_filter_fun(timeline, wal): 493 try: 494 if not is_any_xlog_file(wal): 495 raise ValueError() 496 except Exception: 497 raise WalArchiveContentError("Unexpected file %s found in WAL archive" % wal) 498 wal_timeline, _, _ = decode_segment_name(wal) 499 return timeline <= wal_timeline 500 501 502def check_archive_usable(existing_wals, timeline=None): 503 """ 504 Carry out pre-flight checks on the existing content of a WAL archive to 505 determine if it is safe to archive WALs from the supplied timeline. 506 """ 507 if timeline is None: 508 if len(existing_wals) > 0: 509 raise WalArchiveContentError("Expected empty archive") 510 else: 511 _validate_timeline(timeline) 512 filter_fun = partial(_wal_archive_filter_fun, timeline) 513 unexpected_wals = [wal for wal in existing_wals if filter_fun(wal)] 514 num_unexpected_wals = len(unexpected_wals) 515 if num_unexpected_wals > 0: 516 raise WalArchiveContentError( 517 "Found %s file%s in WAL archive equal to or newer than " 518 "timeline %s" 519 % ( 520 num_unexpected_wals, 521 num_unexpected_wals > 1 and "s" or "", 522 timeline, 523 ) 524 ) 525