1# -*- coding: utf-8 -*-
2# © Copyright EnterpriseDB UK Limited 2013-2021
3#
4# This file is part of Barman.
5#
6# Barman is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# Barman is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with Barman.  If not, see <http://www.gnu.org/licenses/>.
18
19import multiprocessing.dummy
20import os
21from datetime import datetime
22
23import dateutil.tz
24import mock
25import pytest
26from mock import patch
27
28from barman.copy_controller import (
29    BUCKET_SIZE,
30    RsyncCopyController,
31    _FileItem,
32    _RsyncCopyItem,
33)
34from barman.exceptions import CommandFailedException, RsyncListFilesFailure
35from testing_helpers import (
36    build_backup_manager,
37    build_real_server,
38    build_test_backup_info,
39)
40
41
42# noinspection PyMethodMayBeStatic
43class TestRsyncCopyController(object):
44    """
45    This class tests the methods of the RsyncCopyController object
46    """
47
48    def test_rsync_backup_executor_init(self):
49        """
50        Test the construction of a RsyncCopyController
51        """
52
53        # Build the prerequisites
54        backup_manager = build_backup_manager()
55        server = backup_manager.server
56        config = server.config
57        executor = server.executor
58
59        # Test
60        assert RsyncCopyController(
61            path=server.path,
62            ssh_command=executor.ssh_command,
63            ssh_options=executor.ssh_options,
64            network_compression=config.network_compression,
65            reuse_backup=None,
66            safe_horizon=None,
67        )
68
69    def test_reuse_args(self):
70        """
71        Simple test for the _reuse_args method
72
73        The method is necessary for the execution of incremental backups,
74        we need to test that the method build correctly the rsync option that
75        enables the incremental backup
76        """
77        # Build the prerequisites
78        backup_manager = build_backup_manager()
79        server = backup_manager.server
80        config = server.config
81        executor = server.executor
82
83        rcc = RsyncCopyController(
84            path=server.path,
85            ssh_command=executor.ssh_command,
86            ssh_options=executor.ssh_options,
87            network_compression=config.network_compression,
88            reuse_backup=None,
89            safe_horizon=None,
90        )
91
92        reuse_dir = "some/dir"
93
94        # Test for disabled incremental
95        assert rcc._reuse_args(reuse_dir) == []
96
97        # Test for link incremental
98        rcc.reuse_backup = "link"
99        assert rcc._reuse_args(reuse_dir) == ["--link-dest=some/dir"]
100
101        # Test for copy incremental
102        rcc.reuse_backup = "copy"
103        assert rcc._reuse_args(reuse_dir) == ["--copy-dest=some/dir"]
104
105    @patch("barman.copy_controller.Pool", new=multiprocessing.dummy.Pool)
106    @patch("barman.copy_controller.RsyncPgData")
107    @patch("barman.copy_controller.RsyncCopyController._analyze_directory")
108    @patch("barman.copy_controller.RsyncCopyController._create_dir_and_purge")
109    @patch("barman.copy_controller.RsyncCopyController._copy")
110    @patch("tempfile.mkdtemp")
111    @patch("signal.signal")
112    def test_full_copy(
113        self,
114        signal_mock,
115        tempfile_mock,
116        copy_mock,
117        create_and_purge_mock,
118        analyse_mock,
119        rsync_mock,
120        tmpdir,
121    ):
122        """
123        Test the execution of a full copy
124        """
125
126        # Build the prerequisites
127        tempdir = tmpdir.mkdir("tmp")
128        tempfile_mock.return_value = tempdir.strpath
129        server = build_real_server(
130            global_conf={"barman_home": tmpdir.mkdir("home").strpath}
131        )
132        config = server.config
133        executor = server.backup_manager.executor
134
135        rcc = RsyncCopyController(
136            path=server.path,
137            ssh_command=executor.ssh_command,
138            ssh_options=executor.ssh_options,
139            network_compression=config.network_compression,
140            reuse_backup=None,
141            safe_horizon=None,
142        )
143
144        backup_info = build_test_backup_info(
145            server=server,
146            pgdata="/pg/data",
147            config_file="/etc/postgresql.conf",
148            hba_file="/pg/data/pg_hba.conf",
149            ident_file="/pg/data/pg_ident.conf",
150            begin_xlog="0/2000028",
151            begin_wal="000000010000000000000002",
152            begin_offset=28,
153        )
154        backup_info.save()
155        # This is to check that all the preparation is done correctly
156        assert os.path.exists(backup_info.filename)
157
158        # Silence the access to result properties
159        rsync_mock.return_value.out = ""
160        rsync_mock.return_value.err = ""
161        rsync_mock.return_value.ret = 0
162
163        # Mock analyze directory
164        def analyse_func(item):
165            label = item.label
166            item.dir_file = label + "_dir_file"
167            item.exclude_and_protect_file = label + "_exclude_and_protect_file"
168            item.safe_list = [_FileItem("mode", 1, "date", "path")]
169            item.check_list = [_FileItem("mode", 1, "date", "path")]
170
171        analyse_mock.side_effect = analyse_func
172
173        rcc.add_directory(
174            label="tbs1",
175            src=":/fake/location/",
176            dst=backup_info.get_data_directory(16387),
177            reuse=None,
178            bwlimit=None,
179            item_class=rcc.TABLESPACE_CLASS,
180        )
181        rcc.add_directory(
182            label="tbs2",
183            src=":/another/location/",
184            dst=backup_info.get_data_directory(16405),
185            reuse=None,
186            bwlimit=None,
187            item_class=rcc.TABLESPACE_CLASS,
188        )
189        rcc.add_directory(
190            label="pgdata",
191            src=":/pg/data/",
192            dst=backup_info.get_data_directory(),
193            reuse=None,
194            bwlimit=None,
195            item_class=rcc.PGDATA_CLASS,
196            exclude=[
197                "/pg_xlog/*",
198                "/pg_log/*",
199                "/log/*",
200                "/recovery.conf",
201                "/postmaster.pid",
202            ],
203            exclude_and_protect=["pg_tblspc/16387", "pg_tblspc/16405"],
204        )
205        rcc.add_file(
206            label="pg_control",
207            src=":/pg/data/global/pg_control",
208            dst="%s/global/pg_control" % backup_info.get_data_directory(),
209            item_class=rcc.PGCONTROL_CLASS,
210        )
211        rcc.add_file(
212            label="config_file",
213            src=":/etc/postgresql.conf",
214            dst=backup_info.get_data_directory(),
215            item_class=rcc.CONFIG_CLASS,
216            optional=False,
217        )
218        rcc.copy()
219
220        # Check the order of calls to the Rsync mock
221        assert rsync_mock.mock_calls == [
222            mock.call(
223                network_compression=False,
224                args=[
225                    "--ignore-missing-args",
226                    "--itemize-changes",
227                    "--itemize-changes",
228                ],
229                bwlimit=None,
230                ssh="ssh",
231                path=None,
232                ssh_options=[
233                    "-c",
234                    '"arcfour"',
235                    "-p",
236                    "22",
237                    "postgres@pg01.nowhere",
238                    "-o",
239                    "BatchMode=yes",
240                    "-o",
241                    "StrictHostKeyChecking=no",
242                ],
243                exclude=None,
244                exclude_and_protect=None,
245                include=None,
246                retry_sleep=0,
247                retry_times=0,
248                retry_handler=mock.ANY,
249            ),
250            mock.call(
251                network_compression=False,
252                args=[
253                    "--ignore-missing-args",
254                    "--itemize-changes",
255                    "--itemize-changes",
256                ],
257                bwlimit=None,
258                ssh="ssh",
259                path=None,
260                ssh_options=[
261                    "-c",
262                    '"arcfour"',
263                    "-p",
264                    "22",
265                    "postgres@pg01.nowhere",
266                    "-o",
267                    "BatchMode=yes",
268                    "-o",
269                    "StrictHostKeyChecking=no",
270                ],
271                exclude=None,
272                exclude_and_protect=None,
273                include=None,
274                retry_sleep=0,
275                retry_times=0,
276                retry_handler=mock.ANY,
277            ),
278            mock.call(
279                network_compression=False,
280                args=[
281                    "--ignore-missing-args",
282                    "--itemize-changes",
283                    "--itemize-changes",
284                ],
285                bwlimit=None,
286                ssh="ssh",
287                path=None,
288                ssh_options=[
289                    "-c",
290                    '"arcfour"',
291                    "-p",
292                    "22",
293                    "postgres@pg01.nowhere",
294                    "-o",
295                    "BatchMode=yes",
296                    "-o",
297                    "StrictHostKeyChecking=no",
298                ],
299                exclude=[
300                    "/pg_xlog/*",
301                    "/pg_log/*",
302                    "/log/*",
303                    "/recovery.conf",
304                    "/postmaster.pid",
305                ],
306                exclude_and_protect=["pg_tblspc/16387", "pg_tblspc/16405"],
307                include=None,
308                retry_sleep=0,
309                retry_times=0,
310                retry_handler=mock.ANY,
311            ),
312            mock.call(
313                network_compression=False,
314                args=[
315                    "--ignore-missing-args",
316                    "--itemize-changes",
317                    "--itemize-changes",
318                ],
319                bwlimit=None,
320                ssh="ssh",
321                path=None,
322                ssh_options=[
323                    "-c",
324                    '"arcfour"',
325                    "-p",
326                    "22",
327                    "postgres@pg01.nowhere",
328                    "-o",
329                    "BatchMode=yes",
330                    "-o",
331                    "StrictHostKeyChecking=no",
332                ],
333                exclude=None,
334                exclude_and_protect=None,
335                include=None,
336                retry_sleep=0,
337                retry_times=0,
338                retry_handler=mock.ANY,
339            ),
340            mock.call()(
341                ":/etc/postgresql.conf",
342                backup_info.get_data_directory(),
343                allowed_retval=(0, 23, 24),
344            ),
345            mock.call(
346                network_compression=False,
347                args=[
348                    "--ignore-missing-args",
349                    "--itemize-changes",
350                    "--itemize-changes",
351                ],
352                bwlimit=None,
353                ssh="ssh",
354                path=None,
355                ssh_options=[
356                    "-c",
357                    '"arcfour"',
358                    "-p",
359                    "22",
360                    "postgres@pg01.nowhere",
361                    "-o",
362                    "BatchMode=yes",
363                    "-o",
364                    "StrictHostKeyChecking=no",
365                ],
366                exclude=None,
367                exclude_and_protect=None,
368                include=None,
369                retry_sleep=0,
370                retry_times=0,
371                retry_handler=mock.ANY,
372            ),
373            mock.call()(
374                ":/pg/data/global/pg_control",
375                "%s/global/pg_control" % backup_info.get_data_directory(),
376                allowed_retval=(0, 23, 24),
377            ),
378        ]
379
380        # Check calls to _analyse_directory method
381        assert analyse_mock.mock_calls == [
382            mock.call(item) for item in rcc.item_list if item.is_directory
383        ]
384
385        # Check calls to _create_dir_and_purge method
386        assert create_and_purge_mock.mock_calls == [
387            mock.call(item) for item in rcc.item_list if item.is_directory
388        ]
389
390        # Utility function to build the file_list name
391        def file_list_name(label, kind):
392            return "%s/%s_%s_%s.list" % (tempdir.strpath, label, kind, os.getpid())
393
394        # Check the order of calls to the copy method
395        # All the file_list arguments are None because the analyze part
396        # has not really been executed
397        assert copy_mock.mock_calls == [
398            mock.call(
399                mock.ANY,
400                ":/fake/location/",
401                backup_info.get_data_directory(16387),
402                checksum=False,
403                file_list=file_list_name("tbs1", "safe"),
404            ),
405            mock.call(
406                mock.ANY,
407                ":/fake/location/",
408                backup_info.get_data_directory(16387),
409                checksum=True,
410                file_list=file_list_name("tbs1", "check"),
411            ),
412            mock.call(
413                mock.ANY,
414                ":/another/location/",
415                backup_info.get_data_directory(16405),
416                checksum=False,
417                file_list=file_list_name("tbs2", "safe"),
418            ),
419            mock.call(
420                mock.ANY,
421                ":/another/location/",
422                backup_info.get_data_directory(16405),
423                checksum=True,
424                file_list=file_list_name("tbs2", "check"),
425            ),
426            mock.call(
427                mock.ANY,
428                ":/pg/data/",
429                backup_info.get_data_directory(),
430                checksum=False,
431                file_list=file_list_name("pgdata", "safe"),
432            ),
433            mock.call(
434                mock.ANY,
435                ":/pg/data/",
436                backup_info.get_data_directory(),
437                checksum=True,
438                file_list=file_list_name("pgdata", "check"),
439            ),
440        ]
441
442    @patch("barman.copy_controller.RsyncCopyController._rsync_factory")
443    def test_list_files(self, rsync_factory_mock):
444        """
445        Unit test for RsyncCopyController._list_file's code
446        """
447        # Mock rsync invocation
448        rsync_mock = mock.Mock(name="Rsync()")
449        rsync_mock.ret = 0
450        rsync_mock.out = (
451            "drwxrwxrwt       69632 2015/02/09 15:01:00 tmp\n"
452            "drwxrwxrwt       69612 Thu Feb 19 15:01:22 2015 tmp2"
453        )
454        rsync_mock.err = "err"
455
456        # Mock _rsync_factory() invocation
457        rsync_factory_mock.return_value = rsync_mock
458
459        # Create an item to inspect
460        item = _RsyncCopyItem(
461            label="pgdata",
462            src=":/pg/data/",
463            dst="/some/dir",
464            is_directory=True,
465            item_class=RsyncCopyController.PGDATA_CLASS,
466            optional=False,
467        )
468
469        # Test the _list_files internal method
470        rcc = RsyncCopyController()
471        return_values = list(rcc._list_files(item, "some/path"))
472
473        # Returned list must contain two elements
474        assert len(return_values) == 2
475
476        # Verify that _rsync_factory has been called correctly
477        assert rsync_factory_mock.mock_calls == [
478            mock.call(item),
479        ]
480
481        # Check rsync.get_output has called correctly
482        rsync_mock.get_output.assert_called_with(
483            "--no-human-readable", "--list-only", "-r", "some/path", check=True
484        )
485
486        # Check the result
487        assert return_values[0] == _FileItem(
488            "drwxrwxrwt",
489            69632,
490            datetime(
491                year=2015,
492                month=2,
493                day=9,
494                hour=15,
495                minute=1,
496                second=0,
497                tzinfo=dateutil.tz.tzlocal(),
498            ),
499            "tmp",
500        )
501        assert return_values[1] == _FileItem(
502            "drwxrwxrwt",
503            69612,
504            datetime(
505                year=2015,
506                month=2,
507                day=19,
508                hour=15,
509                minute=1,
510                second=22,
511                tzinfo=dateutil.tz.tzlocal(),
512            ),
513            "tmp2",
514        )
515
516        # Test the _list_files internal method with a wrong output (added TZ)
517        rsync_mock.out = "drwxrwxrwt       69612 Thu Feb 19 15:01:22 CET 2015 tmp2\n"
518
519        rcc = RsyncCopyController()
520        with pytest.raises(RsyncListFilesFailure):
521            # The list() call is needed to consume the generator
522            list(rcc._list_files(rsync_mock, "some/path"))
523
524        # Check rsync.get_output has called correctly
525        rsync_mock.get_output.assert_called_with(
526            "--no-human-readable", "--list-only", "-r", "some/path", check=True
527        )
528
529    def test_fill_buckets(self):
530        """
531        Unit test for RsyncCopyController._fill_buckets's code
532        """
533
534        # Create a fake file list af about 525 GB of files
535        filedate = datetime(
536            year=2015,
537            month=2,
538            day=19,
539            hour=15,
540            minute=1,
541            second=22,
542            tzinfo=dateutil.tz.tzlocal(),
543        )
544        file_list = []
545        total_size = 0
546        for i in range(1001):
547            # We are using a prime number to get a non-correlable distribution
548            # of file sizes in the buckets
549            size = 1048583 * i
550            file_list.append(_FileItem("drwxrwxrwt", size, filedate, "tmp%08d" % i))
551            total_size += size
552
553        # Test the _fill_buckets internal method with only one worker:
554        # the result must be a bucket with the same list passed as argument
555        rcc = RsyncCopyController(workers=1)
556        buckets = list(rcc._fill_buckets(file_list))
557        assert len(buckets) == 1
558        assert buckets[0] == file_list
559
560        # Test the _fill_buckets internal method with multiple workers
561        # the result must be a bucket with the same list passed as argument
562        for workers in range(2, 17):
563            rcc = RsyncCopyController(workers=workers)
564            buckets = list(rcc._fill_buckets(file_list))
565            # There is enough buckets to contains all the files
566            assert len(buckets) >= int(total_size / BUCKET_SIZE)
567            for i, bucket in enumerate(buckets):
568                size = sum([f.size for f in bucket])
569                # The bucket is not bigger than BUCKET_SIZE
570                assert size < BUCKET_SIZE, "Bucket %s (%s) size %s too big" % (
571                    i,
572                    workers,
573                    size,
574                )
575                # The bucket cannot be empty
576                assert len(bucket), "Bucket %s (%s) is empty" % (i, workers)
577
578    def _run_analyze_directory(self, list_files_mock, tmpdir, ref_list, src_list):
579        # Apply it to _list_files calls
580        list_files_mock.side_effect = [ref_list, src_list]
581
582        # Build the prerequisites
583        server = build_real_server(
584            global_conf={"barman_home": tmpdir.mkdir("home").strpath}
585        )
586        config = server.config
587        executor = server.backup_manager.executor
588
589        # Create the RsyncCopyController putting the safe_horizon between
590        # the tmp/safe and tmp2/check timestamps
591        rcc = RsyncCopyController(
592            path=server.path,
593            ssh_command=executor.ssh_command,
594            ssh_options=executor.ssh_options,
595            network_compression=config.network_compression,
596            reuse_backup=None,
597            safe_horizon=datetime(
598                year=2015,
599                month=2,
600                day=20,
601                hour=19,
602                minute=0,
603                second=0,
604                tzinfo=dateutil.tz.tzlocal(),
605            ),
606        )
607
608        backup_info = build_test_backup_info(
609            server=server,
610            pgdata="/pg/data",
611            config_file="/etc/postgresql.conf",
612            hba_file="/pg/data/pg_hba.conf",
613            ident_file="/pg/data/pg_ident.conf",
614            begin_xlog="0/2000028",
615            begin_wal="000000010000000000000002",
616            begin_offset=28,
617        )
618        backup_info.save()
619        # This is to check that all the preparation is done correctly
620        assert os.path.exists(backup_info.filename)
621
622        # Add a temp dir (usually created by copy method
623        rcc.temp_dir = tmpdir.mkdir("tmp").strpath
624
625        # Create an item to inspect
626        item = _RsyncCopyItem(
627            label="pgdata",
628            src=":/pg/data/",
629            dst=backup_info.get_data_directory(),
630            is_directory=True,
631            item_class=rcc.PGDATA_CLASS,
632            optional=False,
633        )
634
635        # Then run the _analyze_directory method
636        rcc._analyze_directory(item)
637
638        return item, backup_info
639
640    @patch("barman.copy_controller.RsyncCopyController._list_files")
641    def test_analyze_directory(self, list_files_mock, tmpdir):
642        """
643        Unit test for RsyncCopyController._analyze_directory's code
644        """
645
646        # Build file list for ref
647        ref_list = [
648            _FileItem(
649                "drwxrwxrwt",
650                69632,
651                datetime(
652                    year=2015,
653                    month=2,
654                    day=9,
655                    hour=15,
656                    minute=1,
657                    second=0,
658                    tzinfo=dateutil.tz.tzlocal(),
659                ),
660                ".",
661            ),
662            _FileItem(
663                "drwxrwxrwt",
664                69612,
665                datetime(
666                    year=2015,
667                    month=2,
668                    day=19,
669                    hour=15,
670                    minute=1,
671                    second=22,
672                    tzinfo=dateutil.tz.tzlocal(),
673                ),
674                "tmp",
675            ),
676            _FileItem(
677                "-rw-r--r--",
678                69632,
679                datetime(
680                    year=2015,
681                    month=2,
682                    day=20,
683                    hour=18,
684                    minute=15,
685                    second=33,
686                    tzinfo=dateutil.tz.tzlocal(),
687                ),
688                "tmp/safe",
689            ),
690            _FileItem(
691                "-rw-r--r--",
692                69612,
693                datetime(
694                    year=2015,
695                    month=2,
696                    day=20,
697                    hour=19,
698                    minute=15,
699                    second=33,
700                    tzinfo=dateutil.tz.tzlocal(),
701                ),
702                "tmp/check",
703            ),
704            _FileItem(
705                "-rw-r--r--",
706                69612,
707                datetime(
708                    year=2015,
709                    month=2,
710                    day=20,
711                    hour=19,
712                    minute=15,
713                    second=33,
714                    tzinfo=dateutil.tz.tzlocal(),
715                ),
716                "tmp/diff_time",
717            ),
718            _FileItem(
719                "-rw-r--r--",
720                69612,
721                datetime(
722                    year=2015,
723                    month=2,
724                    day=20,
725                    hour=19,
726                    minute=15,
727                    second=33,
728                    tzinfo=dateutil.tz.tzlocal(),
729                ),
730                "tmp/diff_size",
731            ),
732        ]
733
734        # Build the list for source adding a new file, ...
735        src_list = ref_list + [
736            _FileItem(
737                "-rw-r--r--",
738                69612,
739                datetime(
740                    year=2015,
741                    month=2,
742                    day=20,
743                    hour=22,
744                    minute=15,
745                    second=33,
746                    tzinfo=dateutil.tz.tzlocal(),
747                ),
748                "tmp/new",
749            ),
750        ]
751        # ... changing the timestamp one old file ...
752        src_list[4] = _FileItem(
753            "-rw-r--r--",
754            69612,
755            datetime(
756                year=2015,
757                month=2,
758                day=20,
759                hour=20,
760                minute=15,
761                second=33,
762                tzinfo=dateutil.tz.tzlocal(),
763            ),
764            "tmp/diff_time",
765        )
766        # ... and changing the size of another
767        src_list[5] = _FileItem(
768            "-rw-r--r--",
769            77777,
770            datetime(
771                year=2015,
772                month=2,
773                day=20,
774                hour=19,
775                minute=15,
776                second=33,
777                tzinfo=dateutil.tz.tzlocal(),
778            ),
779            "tmp/diff_size",
780        )
781
782        item, backup_info = self._run_analyze_directory(
783            list_files_mock, tmpdir, ref_list, src_list
784        )
785
786        # Verify that _list_files has been called correctly
787        assert list_files_mock.mock_calls == [
788            mock.call(item, backup_info.get_data_directory() + "/"),
789            mock.call(item, ":/pg/data/"),
790        ]
791
792        # Check the result
793        # 1) The list of directories should be there and should contain all
794        # the directories
795        assert item.dir_file
796        assert open(item.dir_file).read() == (".\ntmp\n")
797        # The exclude_and_protect file should be populated correctly with all
798        # the files in the source
799        assert item.exclude_and_protect_file
800        assert open(item.exclude_and_protect_file).read() == (
801            "P /tmp/safe\n"
802            "- /tmp/safe\n"
803            "P /tmp/check\n"
804            "- /tmp/check\n"
805            "P /tmp/diff_time\n"
806            "- /tmp/diff_time\n"
807            "P /tmp/diff_size\n"
808            "- /tmp/diff_size\n"
809            "P /tmp/new\n"
810            "- /tmp/new\n"
811        )
812        # The check list must contain identical files after the safe_horizon
813        assert len(item.check_list) == 1
814        assert item.check_list[0].path == "tmp/check"
815        # The safe list must contain every file that is not in check and is
816        # present in the source
817        assert len(item.safe_list) == 4
818        assert item.safe_list[0].path == "tmp/safe"
819        assert item.safe_list[1].path == "tmp/diff_time"
820        assert item.safe_list[2].path == "tmp/diff_size"
821        assert item.safe_list[3].path == "tmp/new"
822
823    @patch("barman.copy_controller.RsyncCopyController._list_files")
824    def test_analyze_directory_empty_dst(self, list_files_mock, tmpdir):
825        """
826        Verify that RsyncCopyController._analyze_directory produces an empty
827        exclude_and_protect_file when the destination directory is empty.
828        """
829
830        # Only the current directory is in file list
831        ref_list = [
832            _FileItem(
833                "drwxrwxrwt",
834                69632,
835                datetime(
836                    year=2015,
837                    month=2,
838                    day=9,
839                    hour=15,
840                    minute=1,
841                    second=0,
842                    tzinfo=dateutil.tz.tzlocal(),
843                ),
844                ".",
845            ),
846        ]
847
848        # Minimal src_list so that there is something to copy
849        src_list = ref_list + [
850            _FileItem(
851                "drwxrwxrwt",
852                69612,
853                datetime(
854                    year=2015,
855                    month=2,
856                    day=19,
857                    hour=15,
858                    minute=1,
859                    second=22,
860                    tzinfo=dateutil.tz.tzlocal(),
861                ),
862                "tmp",
863            ),
864        ]
865
866        # Set up prerequisites and run the analyze directory function
867        item, backup_info = self._run_analyze_directory(
868            list_files_mock, tmpdir, ref_list, src_list
869        )
870
871        # Verify that _list_files has been called correctly
872        assert list_files_mock.mock_calls == [
873            mock.call(item, backup_info.get_data_directory() + "/"),
874            mock.call(item, ":/pg/data/"),
875        ]
876
877        # Check the result
878        # 1) The list of directories should be there and should contain all
879        # the directories
880        assert item.dir_file
881        assert open(item.dir_file).read() == (".\ntmp\n")
882        # The exclude_and_protect file should contain only wildcards to include
883        # all directories and exclude all files
884        assert item.exclude_and_protect_file
885        assert open(item.exclude_and_protect_file).read() == "+ */\n- *\n"
886
887    @patch("barman.copy_controller.RsyncCopyController._rsync_factory")
888    @patch("barman.copy_controller.RsyncCopyController._rsync_ignore_vanished_files")
889    def test_create_dir_and_purge(self, rsync_ignore_mock, rsync_factory_mock, tmpdir):
890        """
891        Unit test for RsyncCopyController._create_dir_and_purge's code
892        """
893        # Build the prerequisites
894        server = build_real_server(
895            global_conf={"barman_home": tmpdir.mkdir("home").strpath}
896        )
897        config = server.config
898        executor = server.backup_manager.executor
899
900        # Create the RsyncCopyController putting the safe_horizon between
901        # the tmp/safe and tmp2/check timestamps
902        rcc = RsyncCopyController(
903            path=server.path,
904            ssh_command=executor.ssh_command,
905            ssh_options=executor.ssh_options,
906            network_compression=config.network_compression,
907            reuse_backup=None,
908            safe_horizon=datetime(
909                year=2015,
910                month=2,
911                day=20,
912                hour=19,
913                minute=0,
914                second=0,
915                tzinfo=dateutil.tz.tzlocal(),
916            ),
917        )
918
919        backup_info = build_test_backup_info(
920            server=server,
921            pgdata="/pg/data",
922            config_file="/etc/postgresql.conf",
923            hba_file="/pg/data/pg_hba.conf",
924            ident_file="/pg/data/pg_ident.conf",
925            begin_xlog="0/2000028",
926            begin_wal="000000010000000000000002",
927            begin_offset=28,
928        )
929        backup_info.save()
930        # This is to check that all the preparation is done correctly
931        assert os.path.exists(backup_info.filename)
932
933        # Create an item to inspect
934        item = _RsyncCopyItem(
935            label="pgdata",
936            src=":/pg/data/",
937            dst=backup_info.get_data_directory(),
938            is_directory=True,
939            item_class=rcc.PGDATA_CLASS,
940            optional=False,
941        )
942
943        # Then run the _create_dir_and_purge method
944        rcc._create_dir_and_purge(item)
945
946        # Verify that _rsync_factory has been called correctly
947        assert rsync_factory_mock.mock_calls == [
948            mock.call(item),
949        ]
950
951        # Verify that _rsync_ignore_vanished_files has been called correctly
952        assert rsync_ignore_mock.mock_calls == [
953            mock.call(
954                rsync_factory_mock.return_value,
955                "--recursive",
956                "--delete",
957                "--files-from=None",
958                "--filter",
959                "merge None",
960                ":/pg/data/",
961                backup_info.get_data_directory(),
962                check=True,
963            ),
964        ]
965
966    @patch("barman.copy_controller.RsyncCopyController._rsync_ignore_vanished_files")
967    def test_copy(self, rsync_ignore_mock, tmpdir):
968        """
969        Unit test for RsyncCopyController._copy's code
970        """
971        # Build the prerequisites
972        server = build_real_server(
973            global_conf={"barman_home": tmpdir.mkdir("home").strpath}
974        )
975        config = server.config
976        executor = server.backup_manager.executor
977
978        # Create the RsyncCopyController putting the safe_horizon between
979        # the tmp/safe and tmp2/check timestamps
980        rcc = RsyncCopyController(
981            path=server.path,
982            ssh_command=executor.ssh_command,
983            ssh_options=executor.ssh_options,
984            network_compression=config.network_compression,
985            reuse_backup=None,
986            safe_horizon=datetime(
987                year=2015,
988                month=2,
989                day=20,
990                hour=19,
991                minute=0,
992                second=0,
993                tzinfo=dateutil.tz.tzlocal(),
994            ),
995        )
996
997        backup_info = build_test_backup_info(
998            server=server,
999            pgdata="/pg/data",
1000            config_file="/etc/postgresql.conf",
1001            hba_file="/pg/data/pg_hba.conf",
1002            ident_file="/pg/data/pg_ident.conf",
1003            begin_xlog="0/2000028",
1004            begin_wal="000000010000000000000002",
1005            begin_offset=28,
1006        )
1007        backup_info.save()
1008        # This is to check that all the preparation is done correctly
1009        assert os.path.exists(backup_info.filename)
1010
1011        # Create an rsync mock
1012        rsync_mock = mock.Mock(name="Rsync()")
1013
1014        # Then run the _copy method
1015        rcc._copy(
1016            rsync_mock,
1017            ":/pg/data/",
1018            backup_info.get_data_directory(),
1019            "/path/to/file.list",
1020            checksum=True,
1021        )
1022
1023        # Verify that _rsync_ignore_vanished_files has been called correctly
1024        assert rsync_ignore_mock.mock_calls == [
1025            mock.call(
1026                rsync_mock,
1027                ":/pg/data/",
1028                backup_info.get_data_directory(),
1029                "--files-from=/path/to/file.list",
1030                "--checksum",
1031                check=True,
1032            ),
1033        ]
1034
1035        # Try again without checksum
1036        rsync_ignore_mock.reset_mock()
1037        rcc._copy(
1038            rsync_mock,
1039            ":/pg/data/",
1040            backup_info.get_data_directory(),
1041            "/path/to/file.list",
1042            checksum=False,
1043        )
1044
1045        # Verify that _rsync_ignore_vanished_files has been called correctly
1046        assert rsync_ignore_mock.mock_calls == [
1047            mock.call(
1048                rsync_mock,
1049                ":/pg/data/",
1050                backup_info.get_data_directory(),
1051                "--files-from=/path/to/file.list",
1052                check=True,
1053            ),
1054        ]
1055
1056    def test_rsync_ignore_vanished_files(self):
1057        """
1058        Unit test for RsyncCopyController._rsync_ignore_vanished_files's code
1059        """
1060        # Create the RsyncCopyController
1061        rcc = RsyncCopyController()
1062
1063        # Create an rsync mock
1064        rsync_mock = mock.Mock(name="Rsync()")
1065        rsync_mock.out = "out"
1066        rsync_mock.err = "err"
1067        rsync_mock.ret = 0
1068
1069        # Then run the _copy method
1070        out, err = rcc._rsync_ignore_vanished_files(rsync_mock, 1, 2, a=3, b=4)
1071
1072        # Verify that rsync has been called correctly
1073        assert rsync_mock.mock_calls == [
1074            mock.call.get_output(1, 2, a=3, b=4, allowed_retval=(0, 23, 24))
1075        ]
1076
1077        # Verify the result
1078        assert out == rsync_mock.out
1079        assert err == rsync_mock.err
1080
1081        # Check with return code != 0
1082        # 24 - Partial transfer due to vanished source files
1083        rsync_mock.reset_mock()
1084        rsync_mock.ret = 24
1085        rcc._rsync_ignore_vanished_files(rsync_mock, 1, 2, a=3, b=4)
1086
1087        # Check with return code != 0
1088        # 23 - Partial transfer due to error
1089        # This should raise because the error contains an invalid response
1090        rsync_mock.reset_mock()
1091        rsync_mock.ret = 23
1092        with pytest.raises(CommandFailedException):
1093            rcc._rsync_ignore_vanished_files(rsync_mock, 1, 2, a=3, b=4)
1094
1095        # Check with return code != 0
1096        # 23 - Partial transfer due to error
1097        # This should not raise
1098        rsync_mock.reset_mock()
1099        rsync_mock.ret = 23
1100        rsync_mock.err = (
1101            # a file has vanished before rsync start
1102            'rsync: link_stat "a/file" failed: No such file or directory (2)\n'
1103            # files which vanished after rsync start
1104            'file has vanished: "some/other/file"\n'
1105            # files which have been truncated during transfer
1106            'rsync: read errors mapping "/truncated": No data available (61)\n'
1107            # final summary
1108            "rsync error: some files/attrs were not transferred "
1109            "(see previous errors) (code 23) at main.c(1249) "
1110            "[generator=3.0.6]\n"
1111        )
1112        rcc._rsync_ignore_vanished_files(rsync_mock, 1, 2, a=3, b=4)
1113
1114        # Check with return code != 0
1115        # 23 - Partial transfer due to error
1116        # Version with 'receiver' as error source
1117        # This should not raise
1118        rsync_mock.reset_mock()
1119        rsync_mock.ret = 23
1120        rsync_mock.err = (
1121            # a file has vanished before rsync start
1122            'rsync: link_stat "a/file" failed: No such file or directory (2)\n'
1123            # files which vanished after rsync start
1124            'file has vanished: "some/other/file"\n'
1125            # files which have been truncated during transfer
1126            'rsync: read errors mapping "/truncated": No data available (61)\n'
1127            # final summary
1128            "rsync error: some files/attrs were not transferred "
1129            "(see previous errors) (code 23) at main.c(1249) "
1130            "[Receiver=3.1.2]\n"
1131        )
1132        rcc._rsync_ignore_vanished_files(rsync_mock, 1, 2, a=3, b=4)
1133
1134        # Check with return code != 0
1135        # 23 - Partial transfer due to error
1136        # Version with 'sender' as error source
1137        # This should not raise
1138        rsync_mock.reset_mock()
1139        rsync_mock.ret = 23
1140        rsync_mock.err = (
1141            # a file has vanished before rsync start
1142            'rsync: link_stat "a/file" failed: No such file or directory (2)\n'
1143            # files which vanished after rsync start
1144            'file has vanished: "some/other/file"\n'
1145            # files which have been truncated during transfer
1146            'rsync: read errors mapping "/truncated": No data available (61)\n'
1147            # final summary
1148            "rsync error: some files/attrs were not transferred "
1149            "(see previous errors) (code 23) at main.c(1249) "
1150            "[Sender=3.1.2]\n"
1151        )
1152        rcc._rsync_ignore_vanished_files(rsync_mock, 1, 2, a=3, b=4)
1153
1154    # This test runs for 1, 4 and 16 workers
1155    @pytest.mark.parametrize("workers", [1, 4, 16])
1156    @patch("barman.copy_controller.Pool", new=multiprocessing.dummy.Pool)
1157    @patch("barman.copy_controller.RsyncPgData")
1158    @patch("barman.copy_controller.RsyncCopyController._analyze_directory")
1159    @patch("barman.copy_controller.RsyncCopyController._create_dir_and_purge")
1160    @patch("barman.copy_controller.RsyncCopyController._copy")
1161    @patch("tempfile.mkdtemp")
1162    @patch("signal.signal")
1163    def test_statistics(
1164        self,
1165        signal_mock,
1166        tempfile_mock,
1167        copy_mock,
1168        create_and_purge_mock,
1169        analyse_mock,
1170        rsync_mock,
1171        tmpdir,
1172        workers,
1173    ):
1174        """
1175        Unit test for RsyncCopyController.statistics's code
1176        """
1177
1178        # Do a fake copy run to populate the start/stop timestamps.
1179        # The steps are the same of the full run test
1180        tempdir = tmpdir.mkdir("tmp")
1181        tempfile_mock.return_value = tempdir.strpath
1182        server = build_real_server(
1183            global_conf={"barman_home": tmpdir.mkdir("home").strpath}
1184        )
1185        config = server.config
1186        executor = server.backup_manager.executor
1187
1188        rcc = RsyncCopyController(
1189            path=server.path,
1190            ssh_command=executor.ssh_command,
1191            ssh_options=executor.ssh_options,
1192            network_compression=config.network_compression,
1193            reuse_backup=None,
1194            safe_horizon=None,
1195            workers=workers,
1196        )
1197
1198        backup_info = build_test_backup_info(
1199            server=server,
1200            pgdata="/pg/data",
1201            config_file="/etc/postgresql.conf",
1202            hba_file="/pg/data/pg_hba.conf",
1203            ident_file="/pg/data/pg_ident.conf",
1204            begin_xlog="0/2000028",
1205            begin_wal="000000010000000000000002",
1206            begin_offset=28,
1207        )
1208        backup_info.save()
1209        # This is to check that all the preparation is done correctly
1210        assert os.path.exists(backup_info.filename)
1211
1212        # Silence the access to result properties
1213        rsync_mock.return_value.out = ""
1214        rsync_mock.return_value.err = ""
1215        rsync_mock.return_value.ret = 0
1216
1217        # Mock analyze directory
1218        def analyse_func(item):
1219            label = item.label
1220            item.dir_file = label + "_dir_file"
1221            item.exclude_and_protect_file = label + "_exclude_and_protect_file"
1222            item.safe_list = [_FileItem("mode", 1, "date", "path")]
1223            item.check_list = [_FileItem("mode", 1, "date", "path")]
1224
1225        analyse_mock.side_effect = analyse_func
1226
1227        rcc.add_directory(
1228            label="tbs1",
1229            src=":/fake/location/",
1230            dst=backup_info.get_data_directory(16387),
1231            reuse=None,
1232            bwlimit=None,
1233            item_class=rcc.TABLESPACE_CLASS,
1234        )
1235        rcc.add_directory(
1236            label="tbs2",
1237            src=":/another/location/",
1238            dst=backup_info.get_data_directory(16405),
1239            reuse=None,
1240            bwlimit=None,
1241            item_class=rcc.TABLESPACE_CLASS,
1242        )
1243        rcc.add_directory(
1244            label="pgdata",
1245            src=":/pg/data/",
1246            dst=backup_info.get_data_directory(),
1247            reuse=None,
1248            bwlimit=None,
1249            item_class=rcc.PGDATA_CLASS,
1250            exclude=[
1251                "/pg_xlog/*",
1252                "/pg_log/*",
1253                "/log/*",
1254                "/recovery.conf",
1255                "/postmaster.pid",
1256            ],
1257            exclude_and_protect=["pg_tblspc/16387", "pg_tblspc/16405"],
1258        )
1259        rcc.add_file(
1260            label="pg_control",
1261            src=":/pg/data/global/pg_control",
1262            dst="%s/global/pg_control" % backup_info.get_data_directory(),
1263            item_class=rcc.PGCONTROL_CLASS,
1264        )
1265        rcc.add_file(
1266            label="config_file",
1267            src=":/etc/postgresql.conf",
1268            dst=backup_info.get_data_directory(),
1269            item_class=rcc.CONFIG_CLASS,
1270            optional=False,
1271        )
1272        # Do the fake run
1273        rcc.copy()
1274
1275        # Calculate statistics
1276        result = rcc.statistics()
1277
1278        # We cannot check the actual result because it is not predictable,
1279        # so we check that every value is present and is a number and it is
1280        # greather than 0
1281        assert result.get("analysis_time") > 0
1282        assert "analysis_time_per_item" in result
1283        for tbs in ("pgdata", "tbs1", "tbs2"):
1284            assert result["analysis_time_per_item"][tbs] > 0
1285
1286        assert result.get("copy_time") > 0
1287        assert "copy_time_per_item" in result
1288        assert "serialized_copy_time_per_item" in result
1289        for tbs in ("pgdata", "tbs1", "tbs2", "config_file", "pg_control"):
1290            assert result["copy_time_per_item"][tbs] > 0
1291            assert result["serialized_copy_time_per_item"][tbs] > 0
1292
1293        assert result.get("number_of_workers") == rcc.workers
1294        assert result.get("total_time") > 0
1295
1296    def test_rsync_copy_item_class(self):
1297        # A value for the item_class attribute is mandatory for this resource
1298        with pytest.raises(AssertionError):
1299            _RsyncCopyItem("symbolic_name", "source", "destination")
1300