1#!/usr/bin/env python
2#
3# Public Domain 2014-2018 MongoDB, Inc.
4# Public Domain 2008-2014 WiredTiger, Inc.
5#
6# This is free and unencumbered software released into the public domain.
7#
8# Anyone is free to copy, modify, publish, use, compile, sell, or
9# distribute this software, either in source code form or as a compiled
10# binary, for any purpose, commercial or non-commercial, and by any
11# means.
12#
13# In jurisdictions that recognize copyright laws, the author or authors
14# of this software dedicate any and all copyright interest in the
15# software to the public domain. We make this dedication for the benefit
16# of the public at large and to the detriment of our heirs and
17# successors. We intend this dedication to be an overt act of
18# relinquishment in perpetuity of all present and future rights to this
19# software under copyright law.
20#
21# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
25# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
26# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28
29import os, re, string
30from suite_subprocess import suite_subprocess
31from wtdataset import SimpleDataSet, ComplexDataSet, ComplexLSMDataSet
32import wiredtiger, wttest
33
34from wtscenario import make_scenarios
35
36# test_util13.py
37#    Utilities: wt dump, as well as the dump cursor
38#    Test that dump and load retain table configuration information.
39#
40class test_util13(wttest.WiredTigerTestCase, suite_subprocess):
41    """
42    Test wt dump.  We check for specific output and preservation of
43    non-default table create parameters.
44    """
45
46    pfx = 'test_util13'
47    nentries = 100
48    dir = "dump_dir"
49    #
50    # Select table configuration settings that are not the default.
51    #
52    types = [
53        ('file-simple', dict(uri='file:' + pfx, dataset=SimpleDataSet,
54            table_config='prefix_compression_min=3', cfg='',
55            cg_config='')),
56        ('lsm-simple', dict(uri='lsm:' + pfx, dataset=SimpleDataSet,
57            table_config='lsm=(bloom_bit_count=29)', cfg='bloom_bit_count=29',
58            cg_config='')),
59        ('table-simple', dict(uri='table:' + pfx, dataset=SimpleDataSet,
60            table_config='split_pct=50', cfg='',
61            cg_config='')),
62        ('table-complex',
63            dict(uri='table:' + pfx, dataset=ComplexDataSet,
64            table_config='allocation_size=512B', cfg='',
65            cg_config='allocation_size=512B')),
66        ('table-complex-lsm',
67            dict(uri='table:' + pfx, dataset=ComplexLSMDataSet,
68            table_config='lsm=(merge_max=5)', cfg='merge_max=5',
69            cg_config='lsm=(merge_max=5)'))
70    ]
71
72    scenarios = make_scenarios(types)
73
74    def compare_config(self, expected_cfg, actual_cfg):
75        # Replace '(' characters so configuration groups don't break parsing.
76        # If we ever want to look for config groups this will need to change.
77        #print "compare_config Actual config "
78        #print actual_cfg
79        #print "compare_config Expected config "
80        #print expected_cfg
81        cfg_orig = actual_cfg
82        if self.dataset != SimpleDataSet:
83            #
84            # If we have a complex config, strip out the colgroups and
85            # columns from the config.  Doing so allows us to keep the
86            # split commands below usable because those two items don't
87            # have assignments in them.
88            #
89            nocolgrp = re.sub("colgroups=\((.+?)\),", '', actual_cfg)
90            cfg_orig = re.sub("columns=\((.+?)\),", '', nocolgrp)
91
92        #print "Using original config "
93        #print cfg_orig
94        da = dict(kv.split('=') for kv in
95            cfg_orig.strip().replace('(',',').split(','))
96        dx = dict(kv.split('=') for kv in
97            expected_cfg.strip().replace('(',',').split(','))
98
99        # Check that all items in our expected config subset are in
100        # the actual configuration and they match.
101        match = all(item in da.items() for item in dx.items())
102        if match == False:
103            print "MISMATCH:"
104            print "Original dict: "
105            print da
106            print "Expected config: "
107            print dx
108        return match
109
110    def compare_files(self, expect_subset, dump_out):
111        inheader = isconfig = False
112        for l1, l2 in zip(open(expect_subset, "rb"), open(dump_out, "rb")):
113            if isconfig:
114                if not self.compare_config(l1, l2):
115                    return False
116            if inheader:
117                # This works because the expected subset has a format
118                # of URI and config lines alternating.
119                isconfig = not isconfig
120            if l1.strip() == 'Header':
121                inheader = True
122            if l1.strip() == 'Data':
123                break
124        return True
125
126    def load_recheck(self, ds, expect_subset, dump_out):
127        newdump = "newdump.out"
128        os.mkdir(self.dir)
129        self.runWt(['-h', self.dir, 'load', '-f', dump_out])
130        # Check the contents
131        conn = self.wiredtiger_open(self.dir)
132        session = conn.open_session()
133        cursor = session.open_cursor(self.uri, None, None)
134        ds.check()
135        conn.close()
136        dumpargs = ["-h"]
137        dumpargs.append(self.dir)
138        dumpargs.append("dump")
139        dumpargs.append(self.uri)
140        self.runWt(dumpargs, outfilename=newdump)
141
142        self.assertTrue(self.compare_files(expect_subset, newdump))
143        return True
144
145    def test_dump_config(self):
146        # The number of btree_entries reported is influenced by the
147        # number of column groups and indices.  Each insert will have
148        # a multiplied effect.
149        ds = self.dataset(self, self.uri, self.nentries,
150                          config=self.table_config, cgconfig=self.cg_config)
151        ds.populate()
152
153        ver = wiredtiger.wiredtiger_version()
154        verstring = str(ver[1]) + '.' + str(ver[2]) + '.' + str(ver[3])
155        expectfile="expect.out"
156        with open(expectfile, "w") as expectout:
157            # Note: this output is sensitive to the precise output format
158            # generated by wt dump.  If this is likely to change, we should
159            # make this test more accommodating.
160            expectout.write(
161                'WiredTiger Dump (WiredTiger Version ' + verstring + ')\n')
162            expectout.write('Format=print\n')
163            expectout.write('Header\n')
164            expectout.write(self.uri + '\n')
165            # Check the config on the colgroup itself for complex tables.
166            if self.dataset != SimpleDataSet:
167                expectout.write('key_format=S\n')
168                expectout.write('colgroup:' + self.pfx + ':cgroup1\n')
169            if self.cfg == '':
170                expectout.write(self.table_config + '\n')
171            else:
172                expectout.write(self.cfg + '\n')
173            expectout.write('Data\n')
174
175        self.pr('calling dump')
176        outfile="dump.out"
177        dumpargs = ["dump"]
178        dumpargs.append(self.uri)
179        self.runWt(dumpargs, outfilename=outfile)
180
181        self.assertTrue(self.compare_files(expectfile, outfile))
182        self.assertTrue(self.load_recheck(ds, expectfile, outfile))
183
184if __name__ == '__main__':
185    wttest.run()
186