20
from multiprocessing import Process
23
Multi-process batch processing tool
25
This tool provides a multi-process batch processing method.
26
For example, multi-process batch download data, multi-process preprocessing data, etc.
28
The tool relies on executable shell commands or scripts. Its essence is to use Python's
29
multi-process library to create multiple processes, and call executable commands or
30
scripts through the os.system API.
32
Executable commands or scripts are passed in via a txt text file, organized by line.
33
For example, the following example is download, unzip and delete example.
37
wget http://xxxx.com/0.tar && tar -xf 0.tar && rm 0.tar
38
wget http://xxxx.com/1.tar && tar -xf 1.tar && rm 1.tar
40
wget http://xxxx.com/99.tar && tar -xf 99.tar && rm 99.tar
44
python multiprocess_tool.py --num_proc 10 --shell_cmd_list_filename batch_cmd.txt
49
def process_fn(cmd_list):
54
raise Exception(f"execute command: {cmd} failed.")
55
except Exception as e:
59
def read_command(shell_cmd_list_filename):
61
with open(shell_cmd_list_filename, "r") as f:
64
shell_cmd_list.append(cmd)
68
def parallel_process(cmd_list, nproc=20):
69
if nproc > multiprocessing.cpu_count():
71
"The set number of processes exceeds the number of cpu cores, please confirm whether it is reasonable."
73
num_cmd = len(cmd_list)
74
num_cmd_part = (num_cmd + nproc - 1) // nproc
76
for i in range(min(nproc, num_cmd)):
77
start = i * num_cmd_part
78
end = min(start + num_cmd_part, num_cmd)
79
p = Process(target=process_fn, args=(cmd_list[start:end],))
89
shell_cmd_list = read_command(args.shell_cmd_list_filename)
90
parallel_process(shell_cmd_list, args.num_proc)
92
print("Cost time: {:.2f}".format(end - start))
95
if __name__ == "__main__":
96
parse = argparse.ArgumentParser(description="multi-process batch processing tool")
97
parse.add_argument("--num_proc", type=int, default=20)
99
"--shell_cmd_list_filename", type=str, help="a txt file contains shell command list to be execute."
101
args = parse.parse_args()