paddlenlp

comm_groups.py
108 строк · 3.3 Кб
Перенос по словам
1
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
import paddle.distributed as dist
16
from paddle.distributed import fleet
17
from paddle.distributed.fleet.base.orthogonal_strategy import OrthogonalStrategy
18
from paddle.distributed.fleet.base.strategy_group import (
19
    DPGroup,
20
    MPGroup,
21
    PPGroup,
22
    ShardingGroup,
23
)
24

25

26
def create_hcg(strategy, hcg_name):
27
    if hcg_name == "HybridCommunicateGroup":
28
        fleet.init(is_collective=True, strategy=strategy)
29
        hcg = fleet.get_hybrid_communicate_group()
30
    else:
31
        dist.init_parallel_env()
32
        hcg = eval("{}".format(hcg_name))(strategy)
33

34
    return hcg
35

36

37
class Hybrid4DCommGroup(OrthogonalStrategy):
38
    def __init__(self, list_of_strategy=None, fused_strategy_dict={}):
39
        list_of_strategy = (
40
            [
41
                ("dp", 1, DPGroup),
42
                ("mp", 1, MPGroup),
43
                ("pp", 1, PPGroup),
44
                ("sharding", 1, ShardingGroup),
45
            ]
46
            if list_of_strategy is None
47
            else list_of_strategy
48
        )
49

50
        fused_strategy_dict["check"] = ["mp", "pp"]
51

52
        super().__init__(list_of_strategy, fused_strategy_dict)
53

54
    # data parallel
55
    def get_data_parallel_rank(self):
56
        return self.rank_in_strategy("dp")
57

58
    def get_data_parallel_world_size(self):
59
        return self.strategy_group("dp").world_size
60

61
    def get_data_parallel_group(self):
62
        return self.strategy_group("dp").group
63

64
    def get_data_parallel_group_src_rank(self):
65
        return self.strategy_group("dp").group.ranks[0]
66

67
    # tensor parallel
68
    def get_model_parallel_rank(self):
69
        return self.rank_in_strategy("mp")
70

71
    def get_model_parallel_world_size(self):
72
        return self.strategy_group("mp").world_size
73

74
    def get_model_parallel_group(self):
75
        return self.strategy_group("mp").group
76

77
    def get_model_parallel_group_src_rank(self):
78
        return self.strategy_group("mp").group.ranks[0]
79

80
    # pipeline parallel
81
    def get_stage_id(self):
82
        return self.rank_in_strategy("pp")
83

84
    def get_pipe_parallel_world_size(self):
85
        return self.strategy_group("pp").world_size
86

87
    def get_pipe_parallel_group(self):
88
        return self.strategy_group("pp").group
89

90
    def get_p2p_groups(self):
91
        return self.strategy_group("pp").p2p_groups
92

93
    # group sharded parallel
94
    def get_sharding_parallel_rank(self):
95
        return self.rank_in_strategy("sharding")
96

97
    def get_sharding_parallel_world_size(self):
98
        return self.strategy_group("sharding").world_size
99

100
    def get_sharding_parallel_group(self):
101
        return self.strategy_group("sharding")
102

103
    def get_sharding_parallel_group_src_rank(self):
104
        return self.strategy_group("sharding").ranks[0]
105

106
    # check parallel group
107
    def get_check_parallel_group(self):
108
        return self.strategy_group("check").group
109
paddlenlp

Использование cookies