pg_probackup

Форк
0
/
locking_test.py 
628 строк · 21.8 Кб
1
import unittest
2
import os
3
from time import sleep
4
from .helpers.ptrack_helpers import ProbackupTest, ProbackupException
5

6

7
class LockingTest(ProbackupTest, unittest.TestCase):
8

9
    # @unittest.skip("skip")
10
    # @unittest.expectedFailure
11
    def test_locking_running_validate_1(self):
12
        """
13
        make node, take full backup, stop it in the middle
14
        run validate, expect it to successfully executed,
15
        concurrent RUNNING backup with pid file and active process is legal
16
        """
17
        self._check_gdb_flag_or_skip_test()
18

19
        node = self.make_simple_node(
20
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
21
            initdb_params=['--data-checksums'])
22

23
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
24
        self.init_pb(backup_dir)
25
        self.add_instance(backup_dir, 'node', node)
26
        self.set_archiving(backup_dir, 'node', node)
27
        node.slow_start()
28

29
        self.backup_node(backup_dir, 'node', node)
30

31
        gdb = self.backup_node(
32
            backup_dir, 'node', node, gdb=True)
33

34
        gdb.set_breakpoint('backup_non_data_file')
35
        gdb.run_until_break()
36

37
        gdb.continue_execution_until_break(20)
38

39
        self.assertEqual(
40
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
41

42
        self.assertEqual(
43
            'RUNNING', self.show_pb(backup_dir, 'node')[1]['status'])
44

45
        validate_output = self.validate_pb(
46
            backup_dir, options=['--log-level-console=LOG'])
47

48
        backup_id = self.show_pb(backup_dir, 'node')[1]['id']
49

50
        self.assertIn(
51
            "is using backup {0}, and is still running".format(backup_id),
52
            validate_output,
53
            '\n Unexpected Validate Output: {0}\n'.format(repr(validate_output)))
54

55
        self.assertEqual(
56
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
57

58
        self.assertEqual(
59
            'RUNNING', self.show_pb(backup_dir, 'node')[1]['status'])
60

61
        # Clean after yourself
62
        gdb.kill()
63

64
    def test_locking_running_validate_2(self):
65
        """
66
        make node, take full backup, stop it in the middle,
67
        kill process so no cleanup is done - pid file is in place,
68
        run validate, expect it to not successfully executed,
69
        RUNNING backup with pid file AND without active pid is legal,
70
        but his status must be changed to ERROR and pid file is deleted
71
        """
72
        self._check_gdb_flag_or_skip_test()
73

74
        node = self.make_simple_node(
75
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
76
            initdb_params=['--data-checksums'])
77

78
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
79
        self.init_pb(backup_dir)
80
        self.add_instance(backup_dir, 'node', node)
81
        self.set_archiving(backup_dir, 'node', node)
82
        node.slow_start()
83

84
        self.backup_node(backup_dir, 'node', node)
85

86
        gdb = self.backup_node(
87
            backup_dir, 'node', node, gdb=True)
88

89
        gdb.set_breakpoint('backup_non_data_file')
90
        gdb.run_until_break()
91

92
        gdb.continue_execution_until_break(20)
93

94
        gdb._execute('signal SIGKILL')
95
        gdb.continue_execution_until_error()
96

97
        self.assertEqual(
98
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
99

100
        self.assertEqual(
101
            'RUNNING', self.show_pb(backup_dir, 'node')[1]['status'])
102

103
        backup_id = self.show_pb(backup_dir, 'node')[1]['id']
104

105
        try:
106
            self.validate_pb(backup_dir)
107
            self.assertEqual(
108
                1, 0,
109
                "Expecting Error because RUNNING backup is no longer active.\n "
110
                "Output: {0} \n CMD: {1}".format(
111
                    repr(self.output), self.cmd))
112
        except ProbackupException as e:
113
            self.assertTrue(
114
                "which used backup {0} no longer exists".format(
115
                    backup_id) in e.message and
116
                "Backup {0} has status RUNNING, change it "
117
                "to ERROR and skip validation".format(
118
                    backup_id) in e.message and
119
                "WARNING: Some backups are not valid" in
120
                e.message,
121
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
122
                    repr(e.message), self.cmd))
123

124
        self.assertEqual(
125
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
126

127
        self.assertEqual(
128
            'ERROR', self.show_pb(backup_dir, 'node')[1]['status'])
129

130
        # Clean after yourself
131
        gdb.kill()
132

133
    def test_locking_running_validate_2_specific_id(self):
134
        """
135
        make node, take full backup, stop it in the middle,
136
        kill process so no cleanup is done - pid file is in place,
137
        run validate on this specific backup,
138
        expect it to not successfully executed,
139
        RUNNING backup with pid file AND without active pid is legal,
140
        but his status must be changed to ERROR and pid file is deleted
141
        """
142
        self._check_gdb_flag_or_skip_test()
143

144
        node = self.make_simple_node(
145
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
146
            initdb_params=['--data-checksums'])
147

148
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
149
        self.init_pb(backup_dir)
150
        self.add_instance(backup_dir, 'node', node)
151
        self.set_archiving(backup_dir, 'node', node)
152
        node.slow_start()
153

154
        self.backup_node(backup_dir, 'node', node)
155

156
        gdb = self.backup_node(
157
            backup_dir, 'node', node, gdb=True)
158

159
        gdb.set_breakpoint('backup_non_data_file')
160
        gdb.run_until_break()
161

162
        gdb.continue_execution_until_break(20)
163

164
        gdb._execute('signal SIGKILL')
165
        gdb.continue_execution_until_error()
166

167
        self.assertEqual(
168
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
169

170
        self.assertEqual(
171
            'RUNNING', self.show_pb(backup_dir, 'node')[1]['status'])
172

173
        backup_id = self.show_pb(backup_dir, 'node')[1]['id']
174

175
        try:
176
            self.validate_pb(backup_dir, 'node', backup_id)
177
            self.assertEqual(
178
                1, 0,
179
                "Expecting Error because RUNNING backup is no longer active.\n "
180
                "Output: {0} \n CMD: {1}".format(
181
                    repr(self.output), self.cmd))
182
        except ProbackupException as e:
183
            self.assertTrue(
184
                "which used backup {0} no longer exists".format(
185
                    backup_id) in e.message and
186
                "Backup {0} has status RUNNING, change it "
187
                "to ERROR and skip validation".format(
188
                    backup_id) in e.message and
189
                "ERROR: Backup {0} has status: ERROR".format(backup_id) in
190
                e.message,
191
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
192
                    repr(e.message), self.cmd))
193

194
        self.assertEqual(
195
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
196

197
        self.assertEqual(
198
            'ERROR', self.show_pb(backup_dir, 'node')[1]['status'])
199

200
        try:
201
            self.validate_pb(backup_dir, 'node', backup_id)
202
            self.assertEqual(
203
                1, 0,
204
                "Expecting Error because backup has status ERROR.\n "
205
                "Output: {0} \n CMD: {1}".format(
206
                    repr(self.output), self.cmd))
207
        except ProbackupException as e:
208
            self.assertIn(
209
                "ERROR: Backup {0} has status: ERROR".format(backup_id),
210
                e.message,
211
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
212
                    repr(e.message), self.cmd))
213

214
        try:
215
            self.validate_pb(backup_dir)
216
            self.assertEqual(
217
                1, 0,
218
                "Expecting Error because backup has status ERROR.\n "
219
                "Output: {0} \n CMD: {1}".format(
220
                    repr(self.output), self.cmd))
221
        except ProbackupException as e:
222
            self.assertTrue(
223
                "WARNING: Backup {0} has status ERROR. Skip validation".format(
224
                    backup_id) in e.message and
225
                "WARNING: Some backups are not valid" in e.message,
226
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
227
                    repr(e.message), self.cmd))
228

229
        # Clean after yourself
230
        gdb.kill()
231

232
    def test_locking_running_3(self):
233
        """
234
        make node, take full backup, stop it in the middle,
235
        terminate process, delete pid file,
236
        run validate, expect it to not successfully executed,
237
        RUNNING backup without pid file AND without active pid is legal,
238
        his status must be changed to ERROR
239
        """
240
        self._check_gdb_flag_or_skip_test()
241

242
        node = self.make_simple_node(
243
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
244
            initdb_params=['--data-checksums'])
245

246
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
247
        self.init_pb(backup_dir)
248
        self.add_instance(backup_dir, 'node', node)
249
        self.set_archiving(backup_dir, 'node', node)
250
        node.slow_start()
251

252
        self.backup_node(backup_dir, 'node', node)
253

254
        gdb = self.backup_node(
255
            backup_dir, 'node', node, gdb=True)
256

257
        gdb.set_breakpoint('backup_non_data_file')
258
        gdb.run_until_break()
259

260
        gdb.continue_execution_until_break(20)
261

262
        gdb._execute('signal SIGKILL')
263
        gdb.continue_execution_until_error()
264

265
        self.assertEqual(
266
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
267

268
        self.assertEqual(
269
            'RUNNING', self.show_pb(backup_dir, 'node')[1]['status'])
270

271
        backup_id = self.show_pb(backup_dir, 'node')[1]['id']
272

273
        os.remove(
274
            os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.pid'))
275

276
        try:
277
            self.validate_pb(backup_dir)
278
            self.assertEqual(
279
                1, 0,
280
                "Expecting Error because RUNNING backup is no longer active.\n "
281
                "Output: {0} \n CMD: {1}".format(
282
                    repr(self.output), self.cmd))
283
        except ProbackupException as e:
284
            self.assertTrue(
285
                "Backup {0} has status RUNNING, change it "
286
                "to ERROR and skip validation".format(
287
                    backup_id) in e.message and
288
                "WARNING: Some backups are not valid" in
289
                e.message,
290
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
291
                    repr(e.message), self.cmd))
292

293
        self.assertEqual(
294
            'OK', self.show_pb(backup_dir, 'node')[0]['status'])
295

296
        self.assertEqual(
297
            'ERROR', self.show_pb(backup_dir, 'node')[1]['status'])
298

299
        # Clean after yourself
300
        gdb.kill()
301

302
    def test_locking_restore_locked(self):
303
        """
304
        make node, take full backup, take two page backups,
305
        launch validate on PAGE1 and stop it in the middle,
306
        launch restore of PAGE2.
307
        Expect restore to sucseed because read-only locks
308
        do not conflict
309
        """
310
        self._check_gdb_flag_or_skip_test()
311

312
        node = self.make_simple_node(
313
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
314
            initdb_params=['--data-checksums'])
315

316
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
317
        self.init_pb(backup_dir)
318
        self.add_instance(backup_dir, 'node', node)
319
        self.set_archiving(backup_dir, 'node', node)
320
        node.slow_start()
321

322
        # FULL
323
        full_id = self.backup_node(backup_dir, 'node', node)
324

325
        # PAGE1
326
        backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
327

328
        # PAGE2
329
        self.backup_node(backup_dir, 'node', node, backup_type='page')
330

331
        gdb = self.validate_pb(
332
            backup_dir, 'node', backup_id=backup_id, gdb=True)
333

334
        gdb.set_breakpoint('pgBackupValidate')
335
        gdb.run_until_break()
336

337
        node.cleanup()
338

339
        self.restore_node(backup_dir, 'node', node)
340

341
        # Clean after yourself
342
        gdb.kill()
343

344
    def test_concurrent_delete_and_restore(self):
345
        """
346
        make node, take full backup, take page backup,
347
        launch validate on FULL and stop it in the middle,
348
        launch restore of PAGE.
349
        Expect restore to fail because validation of
350
        intermediate backup is impossible
351
        """
352
        self._check_gdb_flag_or_skip_test()
353

354
        node = self.make_simple_node(
355
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
356
            initdb_params=['--data-checksums'])
357

358
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
359
        self.init_pb(backup_dir)
360
        self.add_instance(backup_dir, 'node', node)
361
        self.set_archiving(backup_dir, 'node', node)
362
        node.slow_start()
363

364
        # FULL
365
        backup_id = self.backup_node(backup_dir, 'node', node)
366

367
        # PAGE1
368
        restore_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
369

370
        gdb = self.delete_pb(
371
            backup_dir, 'node', backup_id=backup_id, gdb=True)
372

373
        # gdb.set_breakpoint('pgFileDelete')
374
        gdb.set_breakpoint('delete_backup_files')
375
        gdb.run_until_break()
376

377
        node.cleanup()
378

379
        try:
380
            self.restore_node(
381
                backup_dir, 'node', node, options=['--no-validate'])
382
            self.assertEqual(
383
                1, 0,
384
                "Expecting Error because restore without whole chain validation "
385
                "is prohibited unless --no-validate provided.\n "
386
                "Output: {0} \n CMD: {1}".format(
387
                    repr(self.output), self.cmd))
388
        except ProbackupException as e:
389
            self.assertTrue(
390
                "Backup {0} is used without validation".format(
391
                    restore_id) in e.message and
392
                'is using backup {0}, and is still running'.format(
393
                    backup_id) in e.message and
394
                'ERROR: Cannot lock backup' in e.message,
395
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
396
                    repr(e.message), self.cmd))
397

398
        # Clean after yourself
399
        gdb.kill()
400

401
    def test_locking_concurrent_validate_and_backup(self):
402
        """
403
        make node, take full backup, launch validate
404
        and stop it in the middle, take page backup.
405
        Expect PAGE backup to be successfully executed
406
        """
407
        self._check_gdb_flag_or_skip_test()
408

409
        node = self.make_simple_node(
410
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
411
            initdb_params=['--data-checksums'])
412

413
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
414
        self.init_pb(backup_dir)
415
        self.add_instance(backup_dir, 'node', node)
416
        self.set_archiving(backup_dir, 'node', node)
417
        node.slow_start()
418

419
        # FULL
420
        self.backup_node(backup_dir, 'node', node)
421

422
        # PAGE2
423
        backup_id = self.backup_node(backup_dir, 'node', node, backup_type='page')
424

425
        gdb = self.validate_pb(
426
            backup_dir, 'node', backup_id=backup_id, gdb=True)
427

428
        gdb.set_breakpoint('pgBackupValidate')
429
        gdb.run_until_break()
430

431
        # This PAGE backup is expected to be successfull
432
        self.backup_node(backup_dir, 'node', node, backup_type='page')
433

434
        # Clean after yourself
435
        gdb.kill()
436

437
    def test_locking_concurren_restore_and_delete(self):
438
        """
439
        make node, take full backup, launch restore
440
        and stop it in the middle, delete full backup.
441
        Expect it to fail.
442
        """
443
        self._check_gdb_flag_or_skip_test()
444

445
        node = self.make_simple_node(
446
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
447
            initdb_params=['--data-checksums'])
448

449
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
450
        self.init_pb(backup_dir)
451
        self.add_instance(backup_dir, 'node', node)
452
        self.set_archiving(backup_dir, 'node', node)
453
        node.slow_start()
454

455
        # FULL
456
        full_id = self.backup_node(backup_dir, 'node', node)
457

458
        node.cleanup()
459
        gdb = self.restore_node(backup_dir, 'node', node, gdb=True)
460

461
        gdb.set_breakpoint('create_data_directories')
462
        gdb.run_until_break()
463

464
        try:
465
            self.delete_pb(backup_dir, 'node', full_id)
466
            self.assertEqual(
467
                1, 0,
468
                "Expecting Error because backup is locked\n "
469
                "Output: {0} \n CMD: {1}".format(
470
                    repr(self.output), self.cmd))
471
        except ProbackupException as e:
472
            self.assertIn(
473
                "ERROR: Cannot lock backup {0} directory".format(full_id),
474
                e.message,
475
                '\n Unexpected Error Message: {0}\n CMD: {1}'.format(
476
                    repr(e.message), self.cmd))
477

478
        # Clean after yourself
479
        gdb.kill()
480

481
    def test_backup_directory_name(self):
482
        """
483
        """
484
        node = self.make_simple_node(
485
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
486
            initdb_params=['--data-checksums'])
487

488
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
489
        self.init_pb(backup_dir)
490
        self.add_instance(backup_dir, 'node', node)
491
        self.set_archiving(backup_dir, 'node', node)
492
        node.slow_start()
493

494
        # FULL
495
        full_id_1 = self.backup_node(backup_dir, 'node', node)
496
        page_id_1 = self.backup_node(backup_dir, 'node', node, backup_type='page')
497

498
        full_id_2 = self.backup_node(backup_dir, 'node', node)
499
        page_id_2 = self.backup_node(backup_dir, 'node', node, backup_type='page')
500

501
        node.cleanup()
502

503
        old_path = os.path.join(backup_dir, 'backups', 'node', full_id_1)
504
        new_path = os.path.join(backup_dir, 'backups', 'node', 'hello_kitty')
505

506
        os.rename(old_path, new_path)
507

508
        # This PAGE backup is expected to be successfull
509
        self.show_pb(backup_dir, 'node', full_id_1)
510

511
        self.validate_pb(backup_dir)
512
        self.validate_pb(backup_dir, 'node')
513
        self.validate_pb(backup_dir, 'node', full_id_1)
514

515
        self.restore_node(backup_dir, 'node', node, backup_id=full_id_1)
516

517
        self.delete_pb(backup_dir, 'node', full_id_1)
518

519
        old_path = os.path.join(backup_dir, 'backups', 'node', full_id_2)
520
        new_path = os.path.join(backup_dir, 'backups', 'node', 'hello_kitty')
521

522
        self.set_backup(
523
            backup_dir, 'node', full_id_2, options=['--note=hello'])
524

525
        self.merge_backup(backup_dir, 'node', page_id_2, options=["-j", "4"])
526

527
        self.assertNotIn(
528
            'note',
529
            self.show_pb(backup_dir, 'node', page_id_2))
530

531
        # Clean after yourself
532

533
    def test_empty_lock_file(self):
534
        """
535
        https://github.com/postgrespro/pg_probackup/issues/308
536
        """
537
        node = self.make_simple_node(
538
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
539
            initdb_params=['--data-checksums'])
540

541
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
542
        self.init_pb(backup_dir)
543
        self.add_instance(backup_dir, 'node', node)
544
        self.set_archiving(backup_dir, 'node', node)
545
        node.slow_start()
546

547
        # Fill with data
548
        node.pgbench_init(scale=100)
549

550
        # FULL
551
        backup_id = self.backup_node(backup_dir, 'node', node)
552

553
        lockfile = os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.pid')
554
        with open(lockfile, "w+") as f:
555
            f.truncate()
556

557
        out = self.validate_pb(backup_dir, 'node', backup_id)
558

559
        self.assertIn(
560
            "Waiting 30 seconds on empty exclusive lock for backup", out)
561

562
#        lockfile = os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.pid')
563
#        with open(lockfile, "w+") as f:
564
#            f.truncate()
565
#
566
#        p1 = self.validate_pb(backup_dir, 'node', backup_id, asynchronous=True,
567
#            options=['--log-level-file=LOG', '--log-filename=validate.log'])
568
#        sleep(3)
569
#        p2 = self.delete_pb(backup_dir, 'node', backup_id, asynchronous=True,
570
#            options=['--log-level-file=LOG', '--log-filename=delete.log'])
571
#
572
#        p1.wait()
573
#        p2.wait()
574

575
    def test_shared_lock(self):
576
        """
577
        Make sure that shared lock leaves no files with pids
578
        """
579
        self._check_gdb_flag_or_skip_test()
580

581
        node = self.make_simple_node(
582
            base_dir=os.path.join(self.module_name, self.fname, 'node'),
583
            initdb_params=['--data-checksums'])
584

585
        backup_dir = os.path.join(self.tmp_path, self.module_name, self.fname, 'backup')
586
        self.init_pb(backup_dir)
587
        self.add_instance(backup_dir, 'node', node)
588
        self.set_archiving(backup_dir, 'node', node)
589
        node.slow_start()
590

591
        # Fill with data
592
        node.pgbench_init(scale=1)
593

594
        # FULL
595
        backup_id = self.backup_node(backup_dir, 'node', node)
596

597
        lockfile_excl = os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup.pid')
598
        lockfile_shr = os.path.join(backup_dir, 'backups', 'node', backup_id, 'backup_ro.pid')
599

600
        self.validate_pb(backup_dir, 'node', backup_id)
601

602
        self.assertFalse(
603
            os.path.exists(lockfile_excl),
604
            "File should not exist: {0}".format(lockfile_excl))
605

606
        self.assertFalse(
607
            os.path.exists(lockfile_shr),
608
            "File should not exist: {0}".format(lockfile_shr))
609
        
610
        gdb = self.validate_pb(backup_dir, 'node', backup_id, gdb=True)
611

612
        gdb.set_breakpoint('validate_one_page')
613
        gdb.run_until_break()
614
        gdb.kill()
615

616
        self.assertTrue(
617
            os.path.exists(lockfile_shr),
618
            "File should exist: {0}".format(lockfile_shr))
619
        
620
        self.validate_pb(backup_dir, 'node', backup_id)
621

622
        self.assertFalse(
623
            os.path.exists(lockfile_excl),
624
            "File should not exist: {0}".format(lockfile_excl))
625

626
        self.assertFalse(
627
            os.path.exists(lockfile_shr),
628
            "File should not exist: {0}".format(lockfile_shr))
629

630

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.