5
from caffe2.python import core, workspace
6
from caffe2.proto import caffe2_pb2
11
GB = 1024 * 1024 * 1024
15
def build_net(net_name, cross_socket):
16
init_net = core.Net(net_name + "_init")
17
init_net.Proto().type = "async_scheduling"
18
numa_device_option = caffe2_pb2.DeviceOption()
19
numa_device_option.device_type = caffe2_pb2.CPU
20
numa_device_option.numa_node_id = 0
21
for replica_id in range(NUM_REPLICAS):
22
init_net.XavierFill([], net_name + "/input_blob_" + str(replica_id),
23
shape=[SHAPE_LEN, SHAPE_LEN], device_option=numa_device_option)
25
net = core.Net(net_name)
26
net.Proto().type = "async_scheduling"
28
numa_device_option.numa_node_id = 1
29
for replica_id in range(NUM_REPLICAS):
30
net.Copy(net_name + "/input_blob_" + str(replica_id),
31
net_name + "/output_blob_" + str(replica_id),
32
device_option=numa_device_option)
37
assert workspace.IsNUMAEnabled() and workspace.GetNumNUMANodes() >= 2
39
single_init, single_net = build_net("single_net", False)
40
cross_init, cross_net = build_net("cross_net", True)
42
workspace.CreateNet(single_init)
43
workspace.RunNet(single_init.Name())
44
workspace.CreateNet(cross_init)
45
workspace.RunNet(cross_init.Name())
47
workspace.CreateNet(single_net)
48
workspace.CreateNet(cross_net)
52
workspace.RunNet(single_net.Name(), NUM_ITER)
54
print("Single socket time:", dt)
55
single_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB
56
print("Single socket BW: {} GB/s".format(single_bw))
59
workspace.RunNet(cross_net.Name(), NUM_ITER)
61
print("Cross socket time:", dt)
62
cross_bw = 4 * SHAPE_LEN * SHAPE_LEN * NUM_REPLICAS * NUM_ITER / dt / GB
63
print("Cross socket BW: {} GB/s".format(cross_bw))
64
print("Single BW / Cross BW: {}".format(single_bw / cross_bw))
67
if __name__ == '__main__':
68
core.GlobalInit(["caffe2", "--caffe2_cpu_numa_enabled=1"])