1proc log_file_matches {log pattern} {
2    set fp [open $log r]
3    set content [read $fp]
4    close $fp
5    string match $pattern $content
6}
7
8start_server {tags {"repl network external:skip"}} {
9    set slave [srv 0 client]
10    set slave_host [srv 0 host]
11    set slave_port [srv 0 port]
12    set slave_log [srv 0 stdout]
13    start_server {} {
14        set master [srv 0 client]
15        set master_host [srv 0 host]
16        set master_port [srv 0 port]
17
18        # Configure the master in order to hang waiting for the BGSAVE
19        # operation, so that the slave remains in the handshake state.
20        $master config set repl-diskless-sync yes
21        $master config set repl-diskless-sync-delay 1000
22
23        # Use a short replication timeout on the slave, so that if there
24        # are no bugs the timeout is triggered in a reasonable amount
25        # of time.
26        $slave config set repl-timeout 5
27
28        # Start the replication process...
29        $slave slaveof $master_host $master_port
30
31        test {Slave enters handshake} {
32            wait_for_condition 50 1000 {
33                [string match *handshake* [$slave role]]
34            } else {
35                fail "Replica does not enter handshake state"
36            }
37        }
38
39        # But make the master unable to send
40        # the periodic newlines to refresh the connection. The slave
41        # should detect the timeout.
42        $master debug sleep 10
43
44        test {Slave is able to detect timeout during handshake} {
45            wait_for_condition 50 1000 {
46                [log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
47            } else {
48                fail "Replica is not able to detect timeout"
49            }
50        }
51    }
52}
53
54start_server {tags {"repl external:skip"}} {
55    set A [srv 0 client]
56    set A_host [srv 0 host]
57    set A_port [srv 0 port]
58    start_server {} {
59        set B [srv 0 client]
60        set B_host [srv 0 host]
61        set B_port [srv 0 port]
62
63        test {Set instance A as slave of B} {
64            $A slaveof $B_host $B_port
65            wait_for_condition 50 100 {
66                [lindex [$A role] 0] eq {slave} &&
67                [string match {*master_link_status:up*} [$A info replication]]
68            } else {
69                fail "Can't turn the instance into a replica"
70            }
71        }
72
73        test {INCRBYFLOAT replication, should not remove expire} {
74            r set test 1 EX 100
75            r incrbyfloat test 0.1
76            after 1000
77            assert_equal [$A debug digest] [$B debug digest]
78        }
79
80        test {GETSET replication} {
81            $A config resetstat
82            $A config set loglevel debug
83            $B config set loglevel debug
84            r set test foo
85            assert_equal [r getset test bar] foo
86            wait_for_condition 500 10 {
87                [$A get test] eq "bar"
88            } else {
89                fail "getset wasn't propagated"
90            }
91            assert_equal [r set test vaz get] bar
92            wait_for_condition 500 10 {
93                [$A get test] eq "vaz"
94            } else {
95                fail "set get wasn't propagated"
96            }
97            assert_match {*calls=3,*} [cmdrstat set $A]
98            assert_match {} [cmdrstat getset $A]
99        }
100
101        test {BRPOPLPUSH replication, when blocking against empty list} {
102            $A config resetstat
103            set rd [redis_deferring_client]
104            $rd brpoplpush a b 5
105            r lpush a foo
106            wait_for_condition 50 100 {
107                [$A debug digest] eq [$B debug digest]
108            } else {
109                fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
110            }
111            assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
112            assert_match {} [cmdrstat lmove $A]
113        }
114
115        test {BRPOPLPUSH replication, list exists} {
116            $A config resetstat
117            set rd [redis_deferring_client]
118            r lpush c 1
119            r lpush c 2
120            r lpush c 3
121            $rd brpoplpush c d 5
122            after 1000
123            assert_equal [$A debug digest] [$B debug digest]
124            assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
125            assert_match {} [cmdrstat lmove $A]
126        }
127
128        foreach wherefrom {left right} {
129            foreach whereto {left right} {
130                test "BLMOVE ($wherefrom, $whereto) replication, when blocking against empty list" {
131                    $A config resetstat
132                    set rd [redis_deferring_client]
133                    $rd blmove a b $wherefrom $whereto 5
134                    r lpush a foo
135                    wait_for_condition 50 100 {
136                        [$A debug digest] eq [$B debug digest]
137                    } else {
138                        fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
139                    }
140                    assert_match {*calls=1,*} [cmdrstat lmove $A]
141                    assert_match {} [cmdrstat rpoplpush $A]
142                }
143
144                test "BLMOVE ($wherefrom, $whereto) replication, list exists" {
145                    $A config resetstat
146                    set rd [redis_deferring_client]
147                    r lpush c 1
148                    r lpush c 2
149                    r lpush c 3
150                    $rd blmove c d $wherefrom $whereto 5
151                    after 1000
152                    assert_equal [$A debug digest] [$B debug digest]
153                    assert_match {*calls=1,*} [cmdrstat lmove $A]
154                    assert_match {} [cmdrstat rpoplpush $A]
155                }
156            }
157        }
158
159        test {BLPOP followed by role change, issue #2473} {
160            set rd [redis_deferring_client]
161            $rd blpop foo 0 ; # Block while B is a master
162
163            # Turn B into master of A
164            $A slaveof no one
165            $B slaveof $A_host $A_port
166            wait_for_condition 50 100 {
167                [lindex [$B role] 0] eq {slave} &&
168                [string match {*master_link_status:up*} [$B info replication]]
169            } else {
170                fail "Can't turn the instance into a replica"
171            }
172
173            # Push elements into the "foo" list of the new replica.
174            # If the client is still attached to the instance, we'll get
175            # a desync between the two instances.
176            $A rpush foo a b c
177            after 100
178
179            wait_for_condition 50 100 {
180                [$A debug digest] eq [$B debug digest] &&
181                [$A lrange foo 0 -1] eq {a b c} &&
182                [$B lrange foo 0 -1] eq {a b c}
183            } else {
184                fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
185            }
186        }
187    }
188}
189
190start_server {tags {"repl external:skip"}} {
191    r set mykey foo
192
193    start_server {} {
194        test {Second server should have role master at first} {
195            s role
196        } {master}
197
198        test {SLAVEOF should start with link status "down"} {
199            r multi
200            r slaveof [srv -1 host] [srv -1 port]
201            r info replication
202            r exec
203        } {*master_link_status:down*}
204
205        test {The role should immediately be changed to "replica"} {
206            s role
207        } {slave}
208
209        wait_for_sync r
210        test {Sync should have transferred keys from master} {
211            r get mykey
212        } {foo}
213
214        test {The link status should be up} {
215            s master_link_status
216        } {up}
217
218        test {SET on the master should immediately propagate} {
219            r -1 set mykey bar
220
221            wait_for_condition 500 100 {
222                [r  0 get mykey] eq {bar}
223            } else {
224                fail "SET on master did not propagated on replica"
225            }
226        }
227
228        test {FLUSHALL should replicate} {
229            r -1 flushall
230            if {$::valgrind} {after 2000}
231            list [r -1 dbsize] [r 0 dbsize]
232        } {0 0}
233
234        test {ROLE in master reports master with a slave} {
235            set res [r -1 role]
236            lassign $res role offset slaves
237            assert {$role eq {master}}
238            assert {$offset > 0}
239            assert {[llength $slaves] == 1}
240            lassign [lindex $slaves 0] master_host master_port slave_offset
241            assert {$slave_offset <= $offset}
242        }
243
244        test {ROLE in slave reports slave in connected state} {
245            set res [r role]
246            lassign $res role master_host master_port slave_state slave_offset
247            assert {$role eq {slave}}
248            assert {$slave_state eq {connected}}
249        }
250    }
251}
252
253foreach mdl {no yes} {
254    foreach sdl {disabled swapdb} {
255        start_server {tags {"repl external:skip"}} {
256            set master [srv 0 client]
257            $master config set repl-diskless-sync $mdl
258            $master config set repl-diskless-sync-delay 1
259            set master_host [srv 0 host]
260            set master_port [srv 0 port]
261            set slaves {}
262            start_server {} {
263                lappend slaves [srv 0 client]
264                start_server {} {
265                    lappend slaves [srv 0 client]
266                    start_server {} {
267                        lappend slaves [srv 0 client]
268                        test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" {
269                            # start load handles only inside the test, so that the test can be skipped
270                            set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
271                            set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
272                            set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000]
273                            set load_handle3 [start_write_load $master_host $master_port 8]
274                            set load_handle4 [start_write_load $master_host $master_port 4]
275                            after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork
276
277                            # Send SLAVEOF commands to slaves
278                            [lindex $slaves 0] config set repl-diskless-load $sdl
279                            [lindex $slaves 1] config set repl-diskless-load $sdl
280                            [lindex $slaves 2] config set repl-diskless-load $sdl
281                            [lindex $slaves 0] slaveof $master_host $master_port
282                            [lindex $slaves 1] slaveof $master_host $master_port
283                            [lindex $slaves 2] slaveof $master_host $master_port
284
285                            # Wait for all the three slaves to reach the "online"
286                            # state from the POV of the master.
287                            set retry 500
288                            while {$retry} {
289                                set info [r -3 info]
290                                if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
291                                    break
292                                } else {
293                                    incr retry -1
294                                    after 100
295                                }
296                            }
297                            if {$retry == 0} {
298                                error "assertion:Slaves not correctly synchronized"
299                            }
300
301                            # Wait that slaves acknowledge they are online so
302                            # we are sure that DBSIZE and DEBUG DIGEST will not
303                            # fail because of timing issues.
304                            wait_for_condition 500 100 {
305                                [lindex [[lindex $slaves 0] role] 3] eq {connected} &&
306                                [lindex [[lindex $slaves 1] role] 3] eq {connected} &&
307                                [lindex [[lindex $slaves 2] role] 3] eq {connected}
308                            } else {
309                                fail "Slaves still not connected after some time"
310                            }
311
312                            # Stop the write load
313                            stop_bg_complex_data $load_handle0
314                            stop_bg_complex_data $load_handle1
315                            stop_bg_complex_data $load_handle2
316                            stop_write_load $load_handle3
317                            stop_write_load $load_handle4
318
319                            # Make sure no more commands processed
320                            wait_load_handlers_disconnected
321
322                            wait_for_ofs_sync $master [lindex $slaves 0]
323                            wait_for_ofs_sync $master [lindex $slaves 1]
324                            wait_for_ofs_sync $master [lindex $slaves 2]
325
326                            # Check digests
327                            set digest [$master debug digest]
328                            set digest0 [[lindex $slaves 0] debug digest]
329                            set digest1 [[lindex $slaves 1] debug digest]
330                            set digest2 [[lindex $slaves 2] debug digest]
331                            assert {$digest ne 0000000000000000000000000000000000000000}
332                            assert {$digest eq $digest0}
333                            assert {$digest eq $digest1}
334                            assert {$digest eq $digest2}
335                        }
336                   }
337                }
338            }
339        }
340    }
341}
342
343start_server {tags {"repl external:skip"}} {
344    set master [srv 0 client]
345    set master_host [srv 0 host]
346    set master_port [srv 0 port]
347    start_server {} {
348        test "Master stream is correctly processed while the replica has a script in -BUSY state" {
349            set load_handle0 [start_write_load $master_host $master_port 3]
350            set slave [srv 0 client]
351            $slave config set lua-time-limit 500
352            $slave slaveof $master_host $master_port
353
354            # Wait for the slave to be online
355            wait_for_condition 500 100 {
356                [lindex [$slave role] 3] eq {connected}
357            } else {
358                fail "Replica still not connected after some time"
359            }
360
361            # Wait some time to make sure the master is sending data
362            # to the slave.
363            after 5000
364
365            # Stop the ability of the slave to process data by sendig
366            # a script that will put it in BUSY state.
367            $slave eval {for i=1,3000000000 do end} 0
368
369            # Wait some time again so that more master stream will
370            # be processed.
371            after 2000
372
373            # Stop the write load
374            stop_write_load $load_handle0
375
376            # number of keys
377            wait_for_condition 500 100 {
378                [$master debug digest] eq [$slave debug digest]
379            } else {
380                fail "Different datasets between replica and master"
381            }
382        }
383    }
384}
385
386# Diskless load swapdb when NOT async_loading (different master replid)
387foreach testType {Successful Aborted} {
388    start_server {tags {"repl external:skip"}} {
389        set replica [srv 0 client]
390        set replica_host [srv 0 host]
391        set replica_port [srv 0 port]
392        set replica_log [srv 0 stdout]
393        start_server {} {
394            set master [srv 0 client]
395            set master_host [srv 0 host]
396            set master_port [srv 0 port]
397
398            # Set master and replica to use diskless replication on swapdb mode
399            $master config set repl-diskless-sync yes
400            $master config set repl-diskless-sync-delay 0
401            $master config set save ""
402            $replica config set repl-diskless-load swapdb
403            $replica config set save ""
404
405            # Put different data sets on the master and replica
406            # We need to put large keys on the master since the replica replies to info only once in 2mb
407            $replica debug populate 200 slave 10
408            $master debug populate 1000 master 100000
409            $master config set rdbcompression no
410
411            # Set a key value on replica to check status on failure and after swapping db
412            $replica set mykey myvalue
413
414            switch $testType {
415                "Aborted" {
416                    # Set master with a slow rdb generation, so that we can easily intercept loading
417                    # 10ms per key, with 1000 keys is 10 seconds
418                    $master config set rdb-key-save-delay 10000
419
420                    # Start the replication process
421                    $replica replicaof $master_host $master_port
422
423                    test {Diskless load swapdb (different replid): replica enter loading} {
424                        # Wait for the replica to start reading the rdb
425                        wait_for_condition 100 100 {
426                            [s -1 loading] eq 1
427                        } else {
428                            fail "Replica didn't get into loading mode"
429                        }
430
431                        assert_equal [s -1 async_loading] 0
432                    }
433
434                    # Make sure that next sync will not start immediately so that we can catch the replica in between syncs
435                    $master config set repl-diskless-sync-delay 5
436
437                    # Kill the replica connection on the master
438                    set killed [$master client kill type replica]
439
440                    # Wait for loading to stop (fail)
441                    wait_for_condition 100 100 {
442                        [s -1 loading] eq 0
443                    } else {
444                        fail "Replica didn't disconnect"
445                    }
446
447                    test {Diskless load swapdb (different replid): old database is exposed after replication fails} {
448                        # Ensure we see old values from replica
449                        assert_equal [$replica get mykey] "myvalue"
450
451                        # Make sure amount of replica keys didn't change
452                        assert_equal [$replica dbsize] 201
453                    }
454
455                    # Speed up shutdown
456                    $master config set rdb-key-save-delay 0
457                }
458                "Successful" {
459                    # Start the replication process
460                    $replica replicaof $master_host $master_port
461
462                    # Let replica finish sync with master
463                    wait_for_condition 100 100 {
464                        [s -1 master_link_status] eq "up"
465                    } else {
466                        fail "Master <-> Replica didn't finish sync"
467                    }
468
469                    test {Diskless load swapdb (different replid): new database is exposed after swapping} {
470                        # Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status
471                        assert_equal [$replica GET mykey] ""
472
473                        # Make sure amount of keys matches master
474                        assert_equal [$replica dbsize] 1000
475                    }
476                }
477            }
478        }
479    }
480}
481
482# Diskless load swapdb when async_loading (matching master replid)
483foreach testType {Successful Aborted} {
484    start_server {tags {"repl external:skip"}} {
485        set replica [srv 0 client]
486        set replica_host [srv 0 host]
487        set replica_port [srv 0 port]
488        set replica_log [srv 0 stdout]
489        start_server {} {
490            set master [srv 0 client]
491            set master_host [srv 0 host]
492            set master_port [srv 0 port]
493
494            # Set master and replica to use diskless replication on swapdb mode
495            $master config set repl-diskless-sync yes
496            $master config set repl-diskless-sync-delay 0
497            $master config set save ""
498            $replica config set repl-diskless-load swapdb
499            $replica config set save ""
500
501            # Set replica writable so we can check that a key we manually added is served
502            # during replication and after failure, but disappears on success
503            $replica config set replica-read-only no
504
505            # Initial sync to have matching replids between master and replica
506            $replica replicaof $master_host $master_port
507
508            # Let replica finish initial sync with master
509            wait_for_condition 100 100 {
510                [s -1 master_link_status] eq "up"
511            } else {
512                fail "Master <-> Replica didn't finish sync"
513            }
514
515            # Put different data sets on the master and replica
516            # We need to put large keys on the master since the replica replies to info only once in 2mb
517            $replica debug populate 2000 slave 10
518            $master debug populate 1000 master 100000
519            $master config set rdbcompression no
520
521            # Set a key value on replica to check status during loading, on failure and after swapping db
522            $replica set mykey myvalue
523
524            # Force the replica to try another full sync (this time it will have matching master replid)
525            $master multi
526            $master client kill type replica
527            # Fill replication backlog with new content
528            $master config set repl-backlog-size 16384
529            for {set keyid 0} {$keyid < 10} {incr keyid} {
530                $master set "$keyid string_$keyid" [string repeat A 16384]
531            }
532            $master exec
533
534            switch $testType {
535                "Aborted" {
536                    # Set master with a slow rdb generation, so that we can easily intercept loading
537                    # 10ms per key, with 1000 keys is 10 seconds
538                    $master config set rdb-key-save-delay 10000
539
540                    test {Diskless load swapdb (async_loading): replica enter async_loading} {
541                        # Wait for the replica to start reading the rdb
542                        wait_for_condition 100 100 {
543                            [s -1 async_loading] eq 1
544                        } else {
545                            fail "Replica didn't get into async_loading mode"
546                        }
547
548                        assert_equal [s -1 loading] 0
549                    }
550
551                    test {Diskless load swapdb (async_loading): old database is exposed while async replication is in progress} {
552                        # Ensure we still see old values while async_loading is in progress and also not LOADING status
553                        assert_equal [$replica get mykey] "myvalue"
554
555                        # Make sure we're still async_loading to validate previous assertion
556                        assert_equal [s -1 async_loading] 1
557
558                        # Make sure amount of replica keys didn't change
559                        assert_equal [$replica dbsize] 2001
560                    }
561
562                    # Make sure that next sync will not start immediately so that we can catch the replica in between syncs
563                    $master config set repl-diskless-sync-delay 5
564
565                    # Kill the replica connection on the master
566                    set killed [$master client kill type replica]
567
568                    # Wait for loading to stop (fail)
569                    wait_for_condition 100 100 {
570                        [s -1 async_loading] eq 0
571                    } else {
572                        fail "Replica didn't disconnect"
573                    }
574
575                    test {Diskless load swapdb (async_loading): old database is exposed after async replication fails} {
576                        # Ensure we see old values from replica
577                        assert_equal [$replica get mykey] "myvalue"
578
579                        # Make sure amount of replica keys didn't change
580                        assert_equal [$replica dbsize] 2001
581                    }
582
583                    # Speed up shutdown
584                    $master config set rdb-key-save-delay 0
585                }
586                "Successful" {
587                    # Let replica finish sync with master
588                    wait_for_condition 100 100 {
589                        [s -1 master_link_status] eq "up"
590                    } else {
591                        fail "Master <-> Replica didn't finish sync"
592                    }
593
594                    test {Diskless load swapdb (async_loading): new database is exposed after swapping} {
595                        # Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status
596                        assert_equal [$replica GET mykey] ""
597
598                        # Make sure amount of keys matches master
599                        assert_equal [$replica dbsize] 1010
600                    }
601                }
602            }
603        }
604    }
605}
606
607test {diskless loading short read} {
608    start_server {tags {"repl"}} {
609        set replica [srv 0 client]
610        set replica_host [srv 0 host]
611        set replica_port [srv 0 port]
612        start_server {} {
613            set master [srv 0 client]
614            set master_host [srv 0 host]
615            set master_port [srv 0 port]
616
617            # Set master and replica to use diskless replication
618            $master config set repl-diskless-sync yes
619            $master config set rdbcompression no
620            $replica config set repl-diskless-load swapdb
621            $master config set hz 500
622            $replica config set hz 500
623            $master config set dynamic-hz no
624            $replica config set dynamic-hz no
625            # Try to fill the master with all types of data types / encodings
626            set start [clock clicks -milliseconds]
627            for {set k 0} {$k < 3} {incr k} {
628                for {set i 0} {$i < 10} {incr i} {
629                    r set "$k int_$i" [expr {int(rand()*10000)}]
630                    r expire "$k int_$i" [expr {int(rand()*10000)}]
631                    r set "$k string_$i" [string repeat A [expr {int(rand()*1000000)}]]
632                    r hset "$k hash_small" [string repeat A [expr {int(rand()*10)}]]  0[string repeat A [expr {int(rand()*10)}]]
633                    r hset "$k hash_large" [string repeat A [expr {int(rand()*10000)}]] [string repeat A [expr {int(rand()*1000000)}]]
634                    r sadd "$k set_small" [string repeat A [expr {int(rand()*10)}]]
635                    r sadd "$k set_large" [string repeat A [expr {int(rand()*1000000)}]]
636                    r zadd "$k zset_small" [expr {rand()}] [string repeat A [expr {int(rand()*10)}]]
637                    r zadd "$k zset_large" [expr {rand()}] [string repeat A [expr {int(rand()*1000000)}]]
638                    r lpush "$k list_small" [string repeat A [expr {int(rand()*10)}]]
639                    r lpush "$k list_large" [string repeat A [expr {int(rand()*1000000)}]]
640                    for {set j 0} {$j < 10} {incr j} {
641                        r xadd "$k stream" * foo "asdf" bar "1234"
642                    }
643                    r xgroup create "$k stream" "mygroup_$i" 0
644                    r xreadgroup GROUP "mygroup_$i" Alice COUNT 1 STREAMS "$k stream" >
645                }
646            }
647
648            if {$::verbose} {
649                set end [clock clicks -milliseconds]
650                set duration [expr $end - $start]
651                puts "filling took $duration ms (TODO: use pipeline)"
652                set start [clock clicks -milliseconds]
653            }
654
655            # Start the replication process...
656            set loglines [count_log_lines -1]
657            $master config set repl-diskless-sync-delay 0
658            $replica replicaof $master_host $master_port
659
660            # kill the replication at various points
661            set attempts 100
662            if {$::accurate} { set attempts 500 }
663            for {set i 0} {$i < $attempts} {incr i} {
664                # wait for the replica to start reading the rdb
665                # using the log file since the replica only responds to INFO once in 2mb
666                set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
667                set loglines [lindex $res 1]
668
669                # add some additional random sleep so that we kill the master on a different place each time
670                after [expr {int(rand()*50)}]
671
672                # kill the replica connection on the master
673                set killed [$master client kill type replica]
674
675                set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 500 10]
676                if {$::verbose} { puts $res }
677                set log_text [lindex $res 0]
678                set loglines [lindex $res 1]
679                if {![string match "*Internal error in RDB*" $log_text]} {
680                    # force the replica to try another full sync
681                    $master multi
682                    $master client kill type replica
683                    $master set asdf asdf
684                    # fill replication backlog with new content
685                    $master config set repl-backlog-size 16384
686                    for {set keyid 0} {$keyid < 10} {incr keyid} {
687                        $master set "$keyid string_$keyid" [string repeat A 16384]
688                    }
689                    $master exec
690                }
691
692                # wait for loading to stop (fail)
693                # After a loading successfully, next loop will enter `async_loading`
694                wait_for_condition 1000 1 {
695                    [s -1 async_loading] eq 0 &&
696                    [s -1 loading] eq 0
697                } else {
698                    fail "Replica didn't disconnect"
699                }
700            }
701            if {$::verbose} {
702                set end [clock clicks -milliseconds]
703                set duration [expr $end - $start]
704                puts "test took $duration ms"
705            }
706            # enable fast shutdown
707            $master config set rdb-key-save-delay 0
708        }
709    }
710} {} {external:skip}
711
712# get current stime and utime metrics for a thread (since it's creation)
713proc get_cpu_metrics { statfile } {
714    if { [ catch {
715        set fid   [ open $statfile r ]
716        set data  [ read $fid 1024 ]
717        ::close $fid
718        set data  [ split $data ]
719
720        ;## number of jiffies it has been scheduled...
721        set utime [ lindex $data 13 ]
722        set stime [ lindex $data 14 ]
723    } err ] } {
724        error "assertion:can't parse /proc: $err"
725    }
726    set mstime [clock milliseconds]
727    return [ list $mstime $utime $stime ]
728}
729
730# compute %utime and %stime of a thread between two measurements
731proc compute_cpu_usage {start end} {
732    set clock_ticks [exec getconf CLK_TCK]
733    # convert ms time to jiffies and calc delta
734    set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
735    set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
736    set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
737    set pucpu  [ expr { ($utime / $dtime) * 100 } ]
738    set pscpu  [ expr { ($stime / $dtime) * 100 } ]
739    return [ list $pucpu $pscpu ]
740}
741
742
743# test diskless rdb pipe with multiple replicas, which may drop half way
744start_server {tags {"repl external:skip"}} {
745    set master [srv 0 client]
746    $master config set repl-diskless-sync yes
747    $master config set repl-diskless-sync-delay 1
748    set master_host [srv 0 host]
749    set master_port [srv 0 port]
750    set master_pid [srv 0 pid]
751    # put enough data in the db that the rdb file will be bigger than the socket buffers
752    # and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds
753    # we also need the replica to process requests during transfer (which it does only once in 2mb)
754    $master debug populate 20000 test 10000
755    $master config set rdbcompression no
756    # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
757    set os [catch {exec uname}]
758    set measure_time [expr {$os == "Linux"} ? 1 : 0]
759    foreach all_drop {no slow fast all timeout} {
760        test "diskless $all_drop replicas drop during rdb pipe" {
761            set replicas {}
762            set replicas_alive {}
763            # start one replica that will read the rdb fast, and one that will be slow
764            start_server {} {
765                lappend replicas [srv 0 client]
766                lappend replicas_alive [srv 0 client]
767                start_server {} {
768                    lappend replicas [srv 0 client]
769                    lappend replicas_alive [srv 0 client]
770
771                    # start replication
772                    # it's enough for just one replica to be slow, and have it's write handler enabled
773                    # so that the whole rdb generation process is bound to that
774                    set loglines [count_log_lines -2]
775                    [lindex $replicas 0] config set repl-diskless-load swapdb
776                    [lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds
777                    [lindex $replicas 0] replicaof $master_host $master_port
778                    [lindex $replicas 1] replicaof $master_host $master_port
779
780                    # wait for the replicas to start reading the rdb
781                    # using the log file since the replica only responds to INFO once in 2mb
782                    wait_for_log_messages -1 {"*Loading DB in memory*"} 0 800 10
783
784                    if {$measure_time} {
785                        set master_statfile "/proc/$master_pid/stat"
786                        set master_start_metrics [get_cpu_metrics $master_statfile]
787                        set start_time [clock seconds]
788                    }
789
790                    # wait a while so that the pipe socket writer will be
791                    # blocked on write (since replica 0 is slow to read from the socket)
792                    after 500
793
794                    # add some command to be present in the command stream after the rdb.
795                    $master incr $all_drop
796
797                    # disconnect replicas depending on the current test
798                    if {$all_drop == "all" || $all_drop == "fast"} {
799                        exec kill [srv 0 pid]
800                        set replicas_alive [lreplace $replicas_alive 1 1]
801                    }
802                    if {$all_drop == "all" || $all_drop == "slow"} {
803                        exec kill [srv -1 pid]
804                        set replicas_alive [lreplace $replicas_alive 0 0]
805                    }
806                    if {$all_drop == "timeout"} {
807                        $master config set repl-timeout 2
808                        # we want the slow replica to hang on a key for very long so it'll reach repl-timeout
809                        exec kill -SIGSTOP [srv -1 pid]
810                        after 2000
811                    }
812
813                    # wait for rdb child to exit
814                    wait_for_condition 500 100 {
815                        [s -2 rdb_bgsave_in_progress] == 0
816                    } else {
817                        fail "rdb child didn't terminate"
818                    }
819
820                    # make sure we got what we were aiming for, by looking for the message in the log file
821                    if {$all_drop == "all"} {
822                        wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
823                    }
824                    if {$all_drop == "no"} {
825                        wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
826                    }
827                    if {$all_drop == "slow" || $all_drop == "fast"} {
828                        wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
829                    }
830                    if {$all_drop == "timeout"} {
831                        wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1
832                        wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
833                        # master disconnected the slow replica, remove from array
834                        set replicas_alive [lreplace $replicas_alive 0 0]
835                        # release it
836                        exec kill -SIGCONT [srv -1 pid]
837                    }
838
839                    # make sure we don't have a busy loop going thought epoll_wait
840                    if {$measure_time} {
841                        set master_end_metrics [get_cpu_metrics $master_statfile]
842                        set time_elapsed [expr {[clock seconds]-$start_time}]
843                        set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
844                        set master_utime [lindex $master_cpu 0]
845                        set master_stime [lindex $master_cpu 1]
846                        if {$::verbose} {
847                            puts "elapsed: $time_elapsed"
848                            puts "master utime: $master_utime"
849                            puts "master stime: $master_stime"
850                        }
851                        if {!$::no_latency && ($all_drop == "all" || $all_drop == "slow" || $all_drop == "timeout")} {
852                            assert {$master_utime < 70}
853                            assert {$master_stime < 70}
854                        }
855                        if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} {
856                            assert {$master_utime < 15}
857                            assert {$master_stime < 15}
858                        }
859                    }
860
861                    # verify the data integrity
862                    foreach replica $replicas_alive {
863                        # Wait that replicas acknowledge they are online so
864                        # we are sure that DBSIZE and DEBUG DIGEST will not
865                        # fail because of timing issues.
866                        wait_for_condition 150 100 {
867                            [lindex [$replica role] 3] eq {connected}
868                        } else {
869                            fail "replicas still not connected after some time"
870                        }
871
872                        # Make sure that replicas and master have same
873                        # number of keys
874                        wait_for_condition 50 100 {
875                            [$master dbsize] == [$replica dbsize]
876                        } else {
877                            fail "Different number of keys between master and replicas after too long time."
878                        }
879
880                        # Check digests
881                        set digest [$master debug digest]
882                        set digest0 [$replica debug digest]
883                        assert {$digest ne 0000000000000000000000000000000000000000}
884                        assert {$digest eq $digest0}
885                    }
886                }
887            }
888        }
889    }
890}
891
892test "diskless replication child being killed is collected" {
893    # when diskless master is waiting for the replica to become writable
894    # it removes the read event from the rdb pipe so if the child gets killed
895    # the replica will hung. and the master may not collect the pid with waitpid
896    start_server {tags {"repl"}} {
897        set master [srv 0 client]
898        set master_host [srv 0 host]
899        set master_port [srv 0 port]
900        set master_pid [srv 0 pid]
901        $master config set repl-diskless-sync yes
902        $master config set repl-diskless-sync-delay 0
903        # put enough data in the db that the rdb file will be bigger than the socket buffers
904        $master debug populate 20000 test 10000
905        $master config set rdbcompression no
906        start_server {} {
907            set replica [srv 0 client]
908            set loglines [count_log_lines 0]
909            $replica config set repl-diskless-load swapdb
910            $replica config set key-load-delay 1000000
911            $replica replicaof $master_host $master_port
912
913            # wait for the replicas to start reading the rdb
914            wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
915
916            # wait to be sure the eplica is hung and the master is blocked on write
917            after 500
918
919            # simulate the OOM killer or anyone else kills the child
920            set fork_child_pid [get_child_pid -1]
921            exec kill -9 $fork_child_pid
922
923            # wait for the parent to notice the child have exited
924            wait_for_condition 50 100 {
925                [s -1 rdb_bgsave_in_progress] == 0
926            } else {
927                fail "rdb child didn't terminate"
928            }
929        }
930    }
931} {} {external:skip}
932
933test "diskless replication read pipe cleanup" {
934    # In diskless replication, we create a read pipe for the RDB, between the child and the parent.
935    # When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).
936    # Otherwise, next time we will use the same fd, the registration will be fail (panic), because
937    # we will use EPOLL_CTL_MOD (the fd still register in the event loop), on fd that already removed from epoll_ctl
938    start_server {tags {"repl"}} {
939        set master [srv 0 client]
940        set master_host [srv 0 host]
941        set master_port [srv 0 port]
942        set master_pid [srv 0 pid]
943        $master config set repl-diskless-sync yes
944        $master config set repl-diskless-sync-delay 0
945
946        # put enough data in the db, and slowdown the save, to keep the parent busy at the read process
947        $master config set rdb-key-save-delay 100000
948        $master debug populate 20000 test 10000
949        $master config set rdbcompression no
950        start_server {} {
951            set replica [srv 0 client]
952            set loglines [count_log_lines 0]
953            $replica config set repl-diskless-load swapdb
954            $replica replicaof $master_host $master_port
955
956            # wait for the replicas to start reading the rdb
957            wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 800 10
958
959            set loglines [count_log_lines -1]
960            # send FLUSHALL so the RDB child will be killed
961            $master flushall
962
963            # wait for another RDB child process to be started
964            wait_for_log_messages -1 {"*Background RDB transfer started by pid*"} $loglines 800 10
965
966            # make sure master is alive
967            $master ping
968        }
969    }
970} {} {external:skip}
971
972test {replicaof right after disconnection} {
973    # this is a rare race condition that was reproduced sporadically by the psync2 unit.
974    # see details in #7205
975    start_server {tags {"repl"}} {
976        set replica1 [srv 0 client]
977        set replica1_host [srv 0 host]
978        set replica1_port [srv 0 port]
979        set replica1_log [srv 0 stdout]
980        start_server {} {
981            set replica2 [srv 0 client]
982            set replica2_host [srv 0 host]
983            set replica2_port [srv 0 port]
984            set replica2_log [srv 0 stdout]
985            start_server {} {
986                set master [srv 0 client]
987                set master_host [srv 0 host]
988                set master_port [srv 0 port]
989                $replica1 replicaof $master_host $master_port
990                $replica2 replicaof $master_host $master_port
991
992                wait_for_condition 50 100 {
993                    [string match {*master_link_status:up*} [$replica1 info replication]] &&
994                    [string match {*master_link_status:up*} [$replica2 info replication]]
995                } else {
996                    fail "Can't turn the instance into a replica"
997                }
998
999                set rd [redis_deferring_client -1]
1000                $rd debug sleep 1
1001                after 100
1002
1003                # when replica2 will wake up from the sleep it will find both disconnection
1004                # from it's master and also a replicaof command at the same event loop
1005                $master client kill type replica
1006                $replica2 replicaof $replica1_host $replica1_port
1007                $rd read
1008
1009                wait_for_condition 50 100 {
1010                    [string match {*master_link_status:up*} [$replica2 info replication]]
1011                } else {
1012                    fail "role change failed."
1013                }
1014
1015                # make sure psync succeeded, and there were no unexpected full syncs.
1016                assert_equal [status $master sync_full] 2
1017                assert_equal [status $replica1 sync_full] 0
1018                assert_equal [status $replica2 sync_full] 0
1019            }
1020        }
1021    }
1022} {} {external:skip}
1023
1024test {Kill rdb child process if its dumping RDB is not useful} {
1025    start_server {tags {"repl"}} {
1026        set slave1 [srv 0 client]
1027        start_server {} {
1028            set slave2 [srv 0 client]
1029            start_server {} {
1030                set master [srv 0 client]
1031                set master_host [srv 0 host]
1032                set master_port [srv 0 port]
1033                for {set i 0} {$i < 10} {incr i} {
1034                    $master set $i $i
1035                }
1036                # Generating RDB will cost 10s(10 * 1s)
1037                $master config set rdb-key-save-delay 1000000
1038                $master config set repl-diskless-sync no
1039                $master config set save ""
1040
1041                $slave1 slaveof $master_host $master_port
1042                $slave2 slaveof $master_host $master_port
1043
1044                # Wait for starting child
1045                wait_for_condition 50 100 {
1046                    ([s 0 rdb_bgsave_in_progress] == 1) &&
1047                    ([string match "*wait_bgsave*" [s 0 slave0]]) &&
1048                    ([string match "*wait_bgsave*" [s 0 slave1]])
1049                } else {
1050                    fail "rdb child didn't start"
1051                }
1052
1053                # Slave1 disconnect with master
1054                $slave1 slaveof no one
1055                # Shouldn't kill child since another slave wait for rdb
1056                after 100
1057                assert {[s 0 rdb_bgsave_in_progress] == 1}
1058
1059                # Slave2 disconnect with master
1060                $slave2 slaveof no one
1061                # Should kill child
1062                wait_for_condition 100 10 {
1063                    [s 0 rdb_bgsave_in_progress] eq 0
1064                } else {
1065                    fail "can't kill rdb child"
1066                }
1067
1068                # If have save parameters, won't kill child
1069                $master config set save "900 1"
1070                $slave1 slaveof $master_host $master_port
1071                $slave2 slaveof $master_host $master_port
1072                wait_for_condition 50 100 {
1073                    ([s 0 rdb_bgsave_in_progress] == 1) &&
1074                    ([string match "*wait_bgsave*" [s 0 slave0]]) &&
1075                    ([string match "*wait_bgsave*" [s 0 slave1]])
1076                } else {
1077                    fail "rdb child didn't start"
1078                }
1079                $slave1 slaveof no one
1080                $slave2 slaveof no one
1081                after 200
1082                assert {[s 0 rdb_bgsave_in_progress] == 1}
1083                catch {$master shutdown nosave}
1084            }
1085        }
1086    }
1087} {} {external:skip}
1088
1089start_server {tags {"repl external:skip"}} {
1090    set master1_host [srv 0 host]
1091    set master1_port [srv 0 port]
1092    r set a b
1093
1094    start_server {} {
1095        set master2 [srv 0 client]
1096        set master2_host [srv 0 host]
1097        set master2_port [srv 0 port]
1098        # Take 10s for dumping RDB
1099        $master2 debug populate 10 master2 10
1100        $master2 config set rdb-key-save-delay 1000000
1101
1102        start_server {} {
1103            set sub_replica [srv 0 client]
1104
1105            start_server {} {
1106                # Full sync with master1
1107                r slaveof $master1_host $master1_port
1108                wait_for_sync r
1109                assert_equal "b" [r get a]
1110
1111                # Let sub replicas sync with me
1112                $sub_replica slaveof [srv 0 host] [srv 0 port]
1113                wait_for_sync $sub_replica
1114                assert_equal "b" [$sub_replica get a]
1115
1116                # Full sync with master2, and then kill master2 before finishing dumping RDB
1117                r slaveof $master2_host $master2_port
1118                wait_for_condition 50 100 {
1119                    ([s -2 rdb_bgsave_in_progress] == 1) &&
1120                    ([string match "*wait_bgsave*" [s -2 slave0]])
1121                } else {
1122                    fail "full sync didn't start"
1123                }
1124                catch {$master2 shutdown nosave}
1125
1126                test {Don't disconnect with replicas before loading transferred RDB when full sync} {
1127                    assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"]
1128                    # The replication id is not changed in entire replication chain
1129                    assert_equal [s master_replid] [s -3 master_replid]
1130                    assert_equal [s master_replid] [s -1 master_replid]
1131                }
1132
1133                test {Discard cache master before loading transferred RDB when full sync} {
1134                    set full_sync [s -3 sync_full]
1135                    set partial_sync [s -3 sync_partial_ok]
1136                    # Partial sync with master1
1137                    r slaveof $master1_host $master1_port
1138                    wait_for_sync r
1139                    # master1 accepts partial sync instead of full sync
1140                    assert_equal $full_sync [s -3 sync_full]
1141                    assert_equal [expr $partial_sync+1] [s -3 sync_partial_ok]
1142
1143                    # Since master only partially sync replica, and repl id is not changed,
1144                    # the replica doesn't disconnect with its sub-replicas
1145                    assert_equal [s master_replid] [s -3 master_replid]
1146                    assert_equal [s master_replid] [s -1 master_replid]
1147                    assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"]
1148                    # Sub replica just has one full sync, no partial resync.
1149                    assert_equal 1 [s sync_full]
1150                    assert_equal 0 [s sync_partial_ok]
1151                }
1152            }
1153        }
1154    }
1155}
1156
1157test {replica can handle EINTR if use diskless load} {
1158    start_server {tags {"repl"}} {
1159        set replica [srv 0 client]
1160        set replica_log [srv 0 stdout]
1161        start_server {} {
1162            set master [srv 0 client]
1163            set master_host [srv 0 host]
1164            set master_port [srv 0 port]
1165
1166            $master debug populate 100 master 100000
1167            $master config set rdbcompression no
1168            $master config set repl-diskless-sync yes
1169            $master config set repl-diskless-sync-delay 0
1170            $replica config set repl-diskless-load on-empty-db
1171            # Construct EINTR error by using the built in watchdog
1172            $replica config set watchdog-period 200
1173            # Block replica in read()
1174            $master config set rdb-key-save-delay 10000
1175            # set speedy shutdown
1176            $master config set save ""
1177            # Start the replication process...
1178            $replica replicaof $master_host $master_port
1179
1180            # Wait for the replica to start reading the rdb
1181            set res [wait_for_log_messages -1 {"*Loading DB in memory*"} 0 200 10]
1182            set loglines [lindex $res 1]
1183
1184            # Wait till we see the watchgod log line AFTER the loading started
1185            wait_for_log_messages -1 {"*WATCHDOG TIMER EXPIRED*"} $loglines 200 10
1186
1187            # Make sure we're still loading, and that there was just one full sync attempt
1188            assert ![log_file_matches [srv -1 stdout] "*Reconnecting to MASTER*"]
1189            assert_equal 1 [s 0 sync_full]
1190            assert_equal 1 [s -1 loading]
1191        }
1192    }
1193} {} {external:skip}
1194