consolidator
/
make_dump.py
46 строк · 2.0 Кб
1import glob
2import ntpath
3import pandas as pd
4
5def vm_handler(row:pd.Series)->pd.Series:
6try:
7row["Вид материала в SAP ERP"] = str(row["Вид материала в SAP ERP"])[0:4]
8except Exception as exp:
9row["Полное наименование материала"]=f"{exp}"
10finally:
11return row
12
13if __name__=="__main__":
14try:
15pattern="D:\\work\\Укрупнение\\current\\data\\2024*\\*.xlsx"
16dump_dir="D:\\work\\Укрупнение\\current\\debug_data\\"
17exclude_pattern="нение_"
18neuro_columns=["ID в SAP ERP VMZ", "Вид материала в SAP ERP","Полное наименование материала"]
19files=[f for f in glob.glob(pattern) if exclude_pattern not in f] if exclude_pattern is not None else [f for f in glob.glob(pattern)]
20data={}
21list_data=[]
22for f in files:
23short_name = ntpath.basename(f)
24print(f"{f} -> {short_name}")
25xl = pd.ExcelFile(f)
26for sheet_name in xl.sheet_names:
27df=xl.parse(sheet_name=sheet_name)
28key = f"{dump_dir}[{short_name.replace('.xlsx','')}][{sheet_name}].xlsx"
29# print(f"\t{key} -> {df.empty}")
30if not df.empty:
31if key not in data.keys():
32# data[key]=df
33df=df[neuro_columns] #.apply(vm_handler,axis=1)
34list_data.append(df)
35# else:
36# data[key]=pd.concat([data[key],df])
37# for key in data.keys():
38# df:pd.DataFrame = data[key]
39# df.to_excel(key,index=False)
40# print(f"{key} : {df.shape} : done")
41result = pd.concat(list_data)
42result = result.drop_duplicates()
43result.to_excel(f"{dump_dir}neuro_dump.xlsx",index=False)
44print(f"{result.shape} -> done")
45except Exception as exp:
46print(f"error: {exp}")