pytorch-image-models
/
clean_checkpoint.py
115 строк · 4.1 Кб
1#!/usr/bin/env python3
2""" Checkpoint Cleaning Script
3
4Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc.
5and outputs a CPU tensor checkpoint with only the `state_dict` along with SHA256
6calculation for model zoo compatibility.
7
8Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
9"""
10import torch11import argparse12import os13import hashlib14import shutil15import tempfile16from timm.models import load_state_dict17try:18import safetensors.torch19_has_safetensors = True20except ImportError:21_has_safetensors = False22
23parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner')24parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',25help='path to latest checkpoint (default: none)')26parser.add_argument('--output', default='', type=str, metavar='PATH',27help='output path')28parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',29help='use ema version of weights if present')30parser.add_argument('--no-hash', dest='no_hash', action='store_true',31help='no hash in output filename')32parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true',33help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint')34parser.add_argument('--safetensors', action='store_true',35help='Save weights using safetensors instead of the default torch way (pickle).')36
37
38def main():39args = parser.parse_args()40
41if os.path.exists(args.output):42print("Error: Output filename ({}) already exists.".format(args.output))43exit(1)44
45clean_checkpoint(46args.checkpoint,47args.output,48not args.no_use_ema,49args.no_hash,50args.clean_aux_bn,51safe_serialization=args.safetensors,52)53
54
55def clean_checkpoint(56checkpoint,57output,58use_ema=True,59no_hash=False,60clean_aux_bn=False,61safe_serialization: bool=False,62):63# Load an existing checkpoint to CPU, strip everything but the state_dict and re-save64if checkpoint and os.path.isfile(checkpoint):65print("=> Loading checkpoint '{}'".format(checkpoint))66state_dict = load_state_dict(checkpoint, use_ema=use_ema)67new_state_dict = {}68for k, v in state_dict.items():69if clean_aux_bn and 'aux_bn' in k:70# If all aux_bn keys are removed, the SplitBN layers will end up as normal and71# load with the unmodified model using BatchNorm2d.72continue73name = k[7:] if k.startswith('module.') else k74new_state_dict[name] = v75print("=> Loaded state_dict from '{}'".format(checkpoint))76
77ext = ''78if output:79checkpoint_root, checkpoint_base = os.path.split(output)80checkpoint_base, ext = os.path.splitext(checkpoint_base)81else:82checkpoint_root = ''83checkpoint_base = os.path.split(checkpoint)[1]84checkpoint_base = os.path.splitext(checkpoint_base)[0]85
86temp_filename = '__' + checkpoint_base87if safe_serialization:88assert _has_safetensors, "`pip install safetensors` to use .safetensors"89safetensors.torch.save_file(new_state_dict, temp_filename)90else:91torch.save(new_state_dict, temp_filename)92
93with open(temp_filename, 'rb') as f:94sha_hash = hashlib.sha256(f.read()).hexdigest()95
96if ext:97final_ext = ext98else:99final_ext = ('.safetensors' if safe_serialization else '.pth')100
101if no_hash:102final_filename = checkpoint_base + final_ext103else:104final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext105
106shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename))107print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))108return final_filename109else:110print("Error: Checkpoint ({}) doesn't exist".format(checkpoint))111return ''112
113
114if __name__ == '__main__':115main()116