stable-diffusion-webui

lowvram.py
147 строк · 5.3 Кб
Перенос по словам
1
import torch
2
from modules import devices, shared
3

4
module_in_gpu = None
5
cpu = torch.device("cpu")
6

7

8
def send_everything_to_cpu():
9
    global module_in_gpu
10

11
    if module_in_gpu is not None:
12
        module_in_gpu.to(cpu)
13

14
    module_in_gpu = None
15

16

17
def is_needed(sd_model):
18
    return shared.cmd_opts.lowvram or shared.cmd_opts.medvram or shared.cmd_opts.medvram_sdxl and hasattr(sd_model, 'conditioner')
19

20

21
def apply(sd_model):
22
    enable = is_needed(sd_model)
23
    shared.parallel_processing_allowed = not enable
24

25
    if enable:
26
        setup_for_low_vram(sd_model, not shared.cmd_opts.lowvram)
27
    else:
28
        sd_model.lowvram = False
29

30

31
def setup_for_low_vram(sd_model, use_medvram):
32
    if getattr(sd_model, 'lowvram', False):
33
        return
34

35
    sd_model.lowvram = True
36

37
    parents = {}
38

39
    def send_me_to_gpu(module, _):
40
        """send this module to GPU; send whatever tracked module was previous in GPU to CPU;
41
        we add this as forward_pre_hook to a lot of modules and this way all but one of them will
42
        be in CPU
43
        """
44
        global module_in_gpu
45

46
        module = parents.get(module, module)
47

48
        if module_in_gpu == module:
49
            return
50

51
        if module_in_gpu is not None:
52
            module_in_gpu.to(cpu)
53

54
        module.to(devices.device)
55
        module_in_gpu = module
56

57
    # see below for register_forward_pre_hook;
58
    # first_stage_model does not use forward(), it uses encode/decode, so register_forward_pre_hook is
59
    # useless here, and we just replace those methods
60

61
    first_stage_model = sd_model.first_stage_model
62
    first_stage_model_encode = sd_model.first_stage_model.encode
63
    first_stage_model_decode = sd_model.first_stage_model.decode
64

65
    def first_stage_model_encode_wrap(x):
66
        send_me_to_gpu(first_stage_model, None)
67
        return first_stage_model_encode(x)
68

69
    def first_stage_model_decode_wrap(z):
70
        send_me_to_gpu(first_stage_model, None)
71
        return first_stage_model_decode(z)
72

73
    to_remain_in_cpu = [
74
        (sd_model, 'first_stage_model'),
75
        (sd_model, 'depth_model'),
76
        (sd_model, 'embedder'),
77
        (sd_model, 'model'),
78
        (sd_model, 'embedder'),
79
    ]
80

81
    is_sdxl = hasattr(sd_model, 'conditioner')
82
    is_sd2 = not is_sdxl and hasattr(sd_model.cond_stage_model, 'model')
83

84
    if is_sdxl:
85
        to_remain_in_cpu.append((sd_model, 'conditioner'))
86
    elif is_sd2:
87
        to_remain_in_cpu.append((sd_model.cond_stage_model, 'model'))
88
    else:
89
        to_remain_in_cpu.append((sd_model.cond_stage_model, 'transformer'))
90

91
    # remove several big modules: cond, first_stage, depth/embedder (if applicable), and unet from the model
92
    stored = []
93
    for obj, field in to_remain_in_cpu:
94
        module = getattr(obj, field, None)
95
        stored.append(module)
96
        setattr(obj, field, None)
97

98
    # send the model to GPU.
99
    sd_model.to(devices.device)
100

101
    # put modules back. the modules will be in CPU.
102
    for (obj, field), module in zip(to_remain_in_cpu, stored):
103
        setattr(obj, field, module)
104

105
    # register hooks for those the first three models
106
    if is_sdxl:
107
        sd_model.conditioner.register_forward_pre_hook(send_me_to_gpu)
108
    elif is_sd2:
109
        sd_model.cond_stage_model.model.register_forward_pre_hook(send_me_to_gpu)
110
        sd_model.cond_stage_model.model.token_embedding.register_forward_pre_hook(send_me_to_gpu)
111
        parents[sd_model.cond_stage_model.model] = sd_model.cond_stage_model
112
        parents[sd_model.cond_stage_model.model.token_embedding] = sd_model.cond_stage_model
113
    else:
114
        sd_model.cond_stage_model.transformer.register_forward_pre_hook(send_me_to_gpu)
115
        parents[sd_model.cond_stage_model.transformer] = sd_model.cond_stage_model
116

117
    sd_model.first_stage_model.register_forward_pre_hook(send_me_to_gpu)
118
    sd_model.first_stage_model.encode = first_stage_model_encode_wrap
119
    sd_model.first_stage_model.decode = first_stage_model_decode_wrap
120
    if sd_model.depth_model:
121
        sd_model.depth_model.register_forward_pre_hook(send_me_to_gpu)
122
    if sd_model.embedder:
123
        sd_model.embedder.register_forward_pre_hook(send_me_to_gpu)
124

125
    if use_medvram:
126
        sd_model.model.register_forward_pre_hook(send_me_to_gpu)
127
    else:
128
        diff_model = sd_model.model.diffusion_model
129

130
        # the third remaining model is still too big for 4 GB, so we also do the same for its submodules
131
        # so that only one of them is in GPU at a time
132
        stored = diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed
133
        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = None, None, None, None
134
        sd_model.model.to(devices.device)
135
        diff_model.input_blocks, diff_model.middle_block, diff_model.output_blocks, diff_model.time_embed = stored
136

137
        # install hooks for bits of third model
138
        diff_model.time_embed.register_forward_pre_hook(send_me_to_gpu)
139
        for block in diff_model.input_blocks:
140
            block.register_forward_pre_hook(send_me_to_gpu)
141
        diff_model.middle_block.register_forward_pre_hook(send_me_to_gpu)
142
        for block in diff_model.output_blocks:
143
            block.register_forward_pre_hook(send_me_to_gpu)
144

145

146
def is_enabled(sd_model):
147
    return sd_model.lowvram
148
stable-diffusion-webui

Использование cookies