1# -*- coding: utf-8 -*-
2# © Copyright EnterpriseDB UK Limited 2011-2021
3#
4# This file is part of Barman.
5#
6# Barman is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# Barman is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with Barman.  If not, see <http://www.gnu.org/licenses/>.
18
19"""
20This module contains functions to retrieve information about xlog
21files
22"""
23
24import collections
25import os
26import re
27from functools import partial
28from tempfile import NamedTemporaryFile
29
30from barman.exceptions import (
31    BadHistoryFileContents,
32    BadXlogSegmentName,
33    CommandException,
34    WalArchiveContentError,
35)
36
37# xlog file segment name parser (regular expression)
38_xlog_re = re.compile(
39    r"""
40    ^
41    ([\dA-Fa-f]{8})                    # everything has a timeline
42    (?:
43        ([\dA-Fa-f]{8})([\dA-Fa-f]{8}) # segment name, if a wal file
44        (?:                            # and optional
45            \.[\dA-Fa-f]{8}\.backup    # offset, if a backup label
46        |
47            \.partial                  # partial, if a partial file
48        )?
49    |
50        \.history                      # or only .history, if a history file
51    )
52    $
53    """,
54    re.VERBOSE,
55)
56
57# xlog location parser for concurrent backup (regular expression)
58_location_re = re.compile(r"^([\dA-F]+)/([\dA-F]+)$")
59
60# Taken from xlog_internal.h from PostgreSQL sources
61
62#: XLOG_SEG_SIZE is the size of a single WAL file.  This must be a power of 2
63#: and larger than XLOG_BLCKSZ (preferably, a great deal larger than
64#: XLOG_BLCKSZ).
65DEFAULT_XLOG_SEG_SIZE = 1 << 24
66
67#: This namedtuple is a container for the information
68#: contained inside history files
69HistoryFileData = collections.namedtuple(
70    "HistoryFileData", "tli parent_tli switchpoint reason"
71)
72
73
74def is_any_xlog_file(path):
75    """
76    Return True if the xlog is either a WAL segment, a .backup file
77    or a .history file, False otherwise.
78
79    It supports either a full file path or a simple file name.
80
81    :param str path: the file name to test
82    :rtype: bool
83    """
84    match = _xlog_re.match(os.path.basename(path))
85    if match:
86        return True
87    return False
88
89
90def is_history_file(path):
91    """
92    Return True if the xlog is a .history file, False otherwise
93
94    It supports either a full file path or a simple file name.
95
96    :param str path: the file name to test
97    :rtype: bool
98    """
99    match = _xlog_re.search(os.path.basename(path))
100    if match and match.group(0).endswith(".history"):
101        return True
102    return False
103
104
105def is_backup_file(path):
106    """
107    Return True if the xlog is a .backup file, False otherwise
108
109    It supports either a full file path or a simple file name.
110
111    :param str path: the file name to test
112    :rtype: bool
113    """
114    match = _xlog_re.search(os.path.basename(path))
115    if match and match.group(0).endswith(".backup"):
116        return True
117    return False
118
119
120def is_partial_file(path):
121    """
122    Return True if the xlog is a .partial file, False otherwise
123
124    It supports either a full file path or a simple file name.
125
126    :param str path: the file name to test
127    :rtype: bool
128    """
129    match = _xlog_re.search(os.path.basename(path))
130    if match and match.group(0).endswith(".partial"):
131        return True
132    return False
133
134
135def is_wal_file(path):
136    """
137    Return True if the xlog is a regular xlog file, False otherwise
138
139    It supports either a full file path or a simple file name.
140
141    :param str path: the file name to test
142    :rtype: bool
143    """
144    match = _xlog_re.search(os.path.basename(path))
145
146    if not match:
147        return False
148
149    ends_with_backup = match.group(0).endswith(".backup")
150    ends_with_history = match.group(0).endswith(".history")
151    ends_with_partial = match.group(0).endswith(".partial")
152
153    if ends_with_backup:
154        return False
155
156    if ends_with_history:
157        return False
158
159    if ends_with_partial:
160        return False
161
162    return True
163
164
165def decode_segment_name(path):
166    """
167    Retrieve the timeline, log ID and segment ID
168    from the name of a xlog segment
169
170    It can handle either a full file path or a simple file name.
171
172    :param str path: the file name to decode
173    :rtype: list[int]
174    """
175    name = os.path.basename(path)
176    match = _xlog_re.match(name)
177    if not match:
178        raise BadXlogSegmentName(name)
179    return [int(x, 16) if x else None for x in match.groups()]
180
181
182def encode_segment_name(tli, log, seg):
183    """
184    Build the xlog segment name based on timeline, log ID and segment ID
185
186    :param int tli: timeline number
187    :param int log: log number
188    :param int seg: segment number
189    :return str: segment file name
190    """
191    return "%08X%08X%08X" % (tli, log, seg)
192
193
194def encode_history_file_name(tli):
195    """
196    Build the history file name based on timeline
197
198    :return str: history file name
199    """
200    return "%08X.history" % (tli,)
201
202
203def xlog_segments_per_file(xlog_segment_size):
204    """
205    Given that WAL files are named using the following pattern:
206
207        <timeline_number><xlog_file_number><xlog_segment_number>
208
209    this is the number of XLOG segments in an XLOG file. By XLOG file
210    we don't mean an actual file on the filesystem, but the definition
211    used in the PostgreSQL sources: meaning a set of files containing the
212    same file number.
213
214    :param int xlog_segment_size: The XLOG segment size in bytes
215    :return int: The number of segments in an XLOG file
216    """
217    return 0xFFFFFFFF // xlog_segment_size
218
219
220def xlog_segment_mask(xlog_segment_size):
221    """
222    Given that WAL files are named using the following pattern:
223
224        <timeline_number><xlog_file_number><xlog_segment_number>
225
226    this is the bitmask of segment part of an XLOG file.
227    See the documentation of `xlog_segments_per_file` for a
228    commentary on the definition of `XLOG` file.
229
230    :param int xlog_segment_size: The XLOG segment size in bytes
231    :return int: The size of an XLOG file
232    """
233    return xlog_segment_size * xlog_segments_per_file(xlog_segment_size)
234
235
236def generate_segment_names(begin, end=None, version=None, xlog_segment_size=None):
237    """
238    Generate a sequence of XLOG segments starting from ``begin``
239    If an ``end`` segment is provided the sequence will terminate after
240    returning it, otherwise the sequence will never terminate.
241
242    If the XLOG segment size is known, this generator is precise,
243    switching to the next file when required.
244
245    It the XLOG segment size is unknown, this generator will generate
246    all the possible XLOG file names.
247    The size of an XLOG segment can be every power of 2 between
248    the XLOG block size (8Kib) and the size of a log segment (4Gib)
249
250    :param str begin: begin segment name
251    :param str|None end: optional end segment name
252    :param int|None version: optional postgres version as an integer
253        (e.g. 90301 for 9.3.1)
254    :param int xlog_segment_size: the size of a XLOG segment
255    :rtype: collections.Iterable[str]
256    :raise: BadXlogSegmentName
257    """
258    begin_tli, begin_log, begin_seg = decode_segment_name(begin)
259    end_tli, end_log, end_seg = None, None, None
260    if end:
261        end_tli, end_log, end_seg = decode_segment_name(end)
262
263        # this method doesn't support timeline changes
264        assert begin_tli == end_tli, (
265            "Begin segment (%s) and end segment (%s) "
266            "must have the same timeline part" % (begin, end)
267        )
268
269    # If version is less than 9.3 the last segment must be skipped
270    skip_last_segment = version is not None and version < 90300
271
272    # This is the number of XLOG segments in an XLOG file. By XLOG file
273    # we don't mean an actual file on the filesystem, but the definition
274    # used in the PostgreSQL sources: a set of files containing the
275    # same file number.
276    if xlog_segment_size:
277        # The generator is operating is precise and correct mode:
278        # knowing exactly when a switch to the next file is required
279        xlog_seg_per_file = xlog_segments_per_file(xlog_segment_size)
280    else:
281        # The generator is operating only in precise mode: generating every
282        # possible XLOG file name.
283        xlog_seg_per_file = 0x7FFFF
284
285    # Start from the first xlog and generate the segments sequentially
286    # If ``end`` has been provided, the while condition ensure the termination
287    # otherwise this generator will never stop
288    cur_log, cur_seg = begin_log, begin_seg
289    while (
290        end is None or cur_log < end_log or (cur_log == end_log and cur_seg <= end_seg)
291    ):
292        yield encode_segment_name(begin_tli, cur_log, cur_seg)
293        cur_seg += 1
294        if cur_seg > xlog_seg_per_file or (
295            skip_last_segment and cur_seg == xlog_seg_per_file
296        ):
297            cur_seg = 0
298            cur_log += 1
299
300
301def hash_dir(path):
302    """
303    Get the directory where the xlog segment will be stored
304
305    It can handle either a full file path or a simple file name.
306
307    :param str|unicode path: xlog file name
308    :return str: directory name
309    """
310    tli, log, _ = decode_segment_name(path)
311    # tli is always not None
312    if log is not None:
313        return "%08X%08X" % (tli, log)
314    else:
315        return ""
316
317
318def parse_lsn(lsn_string):
319    """
320    Transform a string XLOG location, formatted as %X/%X, in the corresponding
321    numeric representation
322
323    :param str lsn_string: the string XLOG location, i.e. '2/82000168'
324    :rtype: int
325    """
326    lsn_list = lsn_string.split("/")
327    if len(lsn_list) != 2:
328        raise ValueError("Invalid LSN: %s", lsn_string)
329
330    return (int(lsn_list[0], 16) << 32) + int(lsn_list[1], 16)
331
332
333def diff_lsn(lsn_string1, lsn_string2):
334    """
335    Calculate the difference in bytes between two string XLOG location,
336    formatted as %X/%X
337
338    Tis function is a Python implementation of
339    the ``pg_xlog_location_diff(str, str)`` PostgreSQL function.
340
341    :param str lsn_string1: the string XLOG location, i.e. '2/82000168'
342    :param str lsn_string2: the string XLOG location, i.e. '2/82000168'
343    :rtype: int
344    """
345
346    # If one the input is None returns None
347    if lsn_string1 is None or lsn_string2 is None:
348        return None
349    return parse_lsn(lsn_string1) - parse_lsn(lsn_string2)
350
351
352def format_lsn(lsn):
353    """
354    Transform a numeric XLOG location, in the corresponding %X/%X string
355    representation
356
357    :param int lsn: numeric XLOG location
358    :rtype: str
359    """
360    return "%X/%X" % (lsn >> 32, lsn & 0xFFFFFFFF)
361
362
363def location_to_xlogfile_name_offset(location, timeline, xlog_segment_size):
364    """
365    Convert transaction log location string to file_name and file_offset
366
367    This is a reimplementation of pg_xlogfile_name_offset PostgreSQL function
368
369    This method returns a dictionary containing the following data:
370
371         * file_name
372         * file_offset
373
374    :param str location: XLOG location
375    :param int timeline: timeline
376    :param int xlog_segment_size: the size of a XLOG segment
377    :rtype: dict
378    """
379    lsn = parse_lsn(location)
380    log = lsn >> 32
381    seg = (lsn & xlog_segment_mask(xlog_segment_size)) // xlog_segment_size
382    offset = lsn & (xlog_segment_size - 1)
383    return {
384        "file_name": encode_segment_name(timeline, log, seg),
385        "file_offset": offset,
386    }
387
388
389def location_from_xlogfile_name_offset(file_name, file_offset, xlog_segment_size):
390    """
391    Convert file_name and file_offset to a transaction log location.
392
393    This is the inverted function of PostgreSQL's pg_xlogfile_name_offset
394    function.
395
396    :param str file_name: a WAL file name
397    :param int file_offset: a numeric offset
398    :param int xlog_segment_size: the size of a XLOG segment
399    :rtype: str
400    """
401    decoded_segment = decode_segment_name(file_name)
402    location = decoded_segment[1] << 32
403    location += decoded_segment[2] * xlog_segment_size
404    location += file_offset
405    return format_lsn(location)
406
407
408def decode_history_file(wal_info, comp_manager):
409    """
410    Read an history file and parse its contents.
411
412    Each line in the file represents a timeline switch, each field is
413    separated by tab, empty lines are ignored and lines starting with '#'
414    are comments.
415
416    Each line is composed by three fields: parentTLI, switchpoint and reason.
417    "parentTLI" is the ID of the parent timeline.
418    "switchpoint" is the WAL position where the switch happened
419    "reason" is an human-readable explanation of why the timeline was changed
420
421    The method requires a CompressionManager object to handle the eventual
422     compression of the history file.
423
424    :param barman.infofile.WalFileInfo wal_info: history file obj
425    :param comp_manager: compression manager used in case
426        of history file compression
427    :return List[HistoryFileData]: information from the history file
428    """
429
430    path = wal_info.orig_filename
431    # Decompress the file if needed
432    if wal_info.compression:
433        # Use a NamedTemporaryFile to avoid explicit cleanup
434        uncompressed_file = NamedTemporaryFile(
435            dir=os.path.dirname(path),
436            prefix=".%s." % wal_info.name,
437            suffix=".uncompressed",
438        )
439        path = uncompressed_file.name
440        comp_manager.get_compressor(wal_info.compression).decompress(
441            wal_info.orig_filename, path
442        )
443
444    # Extract the timeline from history file name
445    tli, _, _ = decode_segment_name(wal_info.name)
446
447    lines = []
448    with open(path) as fp:
449        for line in fp:
450            line = line.strip()
451            # Skip comments and empty lines
452            if line.startswith("#"):
453                continue
454            # Skip comments and empty lines
455            if len(line) == 0:
456                continue
457            # Use tab as separator
458            contents = line.split("\t")
459            if len(contents) != 3:
460                # Invalid content of the line
461                raise BadHistoryFileContents(path)
462
463            history = HistoryFileData(
464                tli=tli,
465                parent_tli=int(contents[0]),
466                switchpoint=parse_lsn(contents[1]),
467                reason=contents[2],
468            )
469            lines.append(history)
470
471    # Empty history file or containing invalid content
472    if len(lines) == 0:
473        raise BadHistoryFileContents(path)
474    else:
475        return lines
476
477
478def _validate_timeline(timeline):
479    """Check that timeline is a valid timeline value."""
480    try:
481        # Explicitly check the type becauase python 2 will allow < to be used
482        # between strings and ints
483        if type(timeline) is not int or timeline < 1:
484            raise ValueError()
485        return True
486    except Exception:
487        raise CommandException(
488            "Cannot check WAL archive with malformed timeline %s" % timeline
489        )
490
491
492def _wal_archive_filter_fun(timeline, wal):
493    try:
494        if not is_any_xlog_file(wal):
495            raise ValueError()
496    except Exception:
497        raise WalArchiveContentError("Unexpected file %s found in WAL archive" % wal)
498    wal_timeline, _, _ = decode_segment_name(wal)
499    return timeline <= wal_timeline
500
501
502def check_archive_usable(existing_wals, timeline=None):
503    """
504    Carry out pre-flight checks on the existing content of a WAL archive to
505    determine if it is safe to archive WALs from the supplied timeline.
506    """
507    if timeline is None:
508        if len(existing_wals) > 0:
509            raise WalArchiveContentError("Expected empty archive")
510    else:
511        _validate_timeline(timeline)
512        filter_fun = partial(_wal_archive_filter_fun, timeline)
513        unexpected_wals = [wal for wal in existing_wals if filter_fun(wal)]
514        num_unexpected_wals = len(unexpected_wals)
515        if num_unexpected_wals > 0:
516            raise WalArchiveContentError(
517                "Found %s file%s in WAL archive equal to or newer than "
518                "timeline %s"
519                % (
520                    num_unexpected_wals,
521                    num_unexpected_wals > 1 and "s" or "",
522                    timeline,
523                )
524            )
525