pytorch

muji_test.py
82 строки · 3.0 Кб
Перенос по словам
1
import numpy as np
2
import unittest
3

4
from caffe2.python import core, workspace, muji, test_util
5

6

7
@unittest.skipIf(not workspace.has_gpu_support, "no gpu")
8
class TestMuji(test_util.TestCase):
9
    def RunningAllreduceWithGPUs(self, gpu_ids, allreduce_function):
10
        """A base function to test different scenarios."""
11
        net = core.Net("mujitest")
12
        for id in gpu_ids:
13
            net.ConstantFill(
14
                [],
15
                "testblob_gpu_" + str(id),
16
                shape=[1, 2, 3, 4],
17
                value=float(id + 1),
18
                device_option=muji.OnGPU(id)
19
            )
20
        allreduce_function(
21
            net, ["testblob_gpu_" + str(i)
22
                  for i in gpu_ids], "_reduced", gpu_ids
23
        )
24
        workspace.RunNetOnce(net)
25
        target_value = sum(gpu_ids) + len(gpu_ids)
26
        all_blobs = workspace.Blobs()
27
        all_blobs.sort()
28
        for blob in all_blobs:
29
            print('{} {}'.format(blob, workspace.FetchBlob(blob)))
30

31
        for idx in gpu_ids:
32
            blob = workspace.FetchBlob("testblob_gpu_" + str(idx) + "_reduced")
33
            np.testing.assert_array_equal(
34
                blob,
35
                target_value,
36
                err_msg="gpu id %d of %s" % (idx, str(gpu_ids))
37
            )
38

39
    def testAllreduceFallback(self):
40
        self.RunningAllreduceWithGPUs(
41
            list(range(workspace.NumGpuDevices())), muji.AllreduceFallback
42
        )
43

44
    def testAllreduceSingleGPU(self):
45
        for i in range(workspace.NumGpuDevices()):
46
            self.RunningAllreduceWithGPUs([i], muji.Allreduce)
47

48
    def testAllreduceWithTwoGPUs(self):
49
        pattern = workspace.GetGpuPeerAccessPattern()
50
        if pattern.shape[0] >= 2 and np.all(pattern[:2, :2]):
51
            self.RunningAllreduceWithGPUs([0, 1], muji.Allreduce2)
52
        else:
53
            print('Skipping allreduce with 2 gpus. Not peer access ready.')
54

55
    def testAllreduceWithFourGPUs(self):
56
        pattern = workspace.GetGpuPeerAccessPattern()
57
        if pattern.shape[0] >= 4 and np.all(pattern[:4, :4]):
58
            self.RunningAllreduceWithGPUs([0, 1, 2, 3], muji.Allreduce4)
59
        else:
60
            print('Skipping allreduce with 4 gpus. Not peer access ready.')
61

62
    def testAllreduceWithFourGPUsAndTwoGroups(self):
63
        pattern = workspace.GetGpuPeerAccessPattern()
64
        if pattern.shape[0] >= 4 and np.all(pattern[:2, :2]) and np.all(pattern[2:4, 2:4]):
65
            self.RunningAllreduceWithGPUs([0, 1, 2, 3], muji.Allreduce4Group2)
66
        else:
67
            print('Skipping allreduce with 4 gpus and 2 groups. Not peer access ready.')
68

69
    def testAllreduceWithEightGPUs(self):
70
        pattern = workspace.GetGpuPeerAccessPattern()
71
        if (
72
            pattern.shape[0] >= 8 and np.all(pattern[:4, :4]) and
73
            np.all(pattern[4:, 4:])
74
        ):
75
            self.RunningAllreduceWithGPUs(
76
                list(range(8)), muji.Allreduce8)
77
        else:
78
            print('Skipping allreduce with 8 gpus. Not peer access ready.')
79

80

81
if __name__ == '__main__':
82
    unittest.main()
83
pytorch

Использование cookies