promptflow
190 строк · 6.2 Кб
1import os
2import glob
3import argparse
4from pathlib import Path
5import ntpath
6import re
7import hashlib
8import json
9from jinja2 import Environment, FileSystemLoader
10from ghactions_driver.readme_step import ReadmeStepsManage
11from ghactions_driver.resource_resolver import resolve_tutorial_resource
12from ghactions_driver.telemetry_obj import Telemetry
13
14
15def format_ipynb(notebooks):
16# run code formatter on .ipynb files
17for notebook in notebooks:
18os.system(f"black-nb --clear-output {notebook}")
19
20
21def _get_paths(paths_list):
22"""
23Convert the path list to unix format.
24:param paths_list: The input path list.
25:returns: The same list with unix-like paths.
26"""
27paths_list.sort()
28if ntpath.sep == os.path.sep:
29return [pth.replace(ntpath.sep, "/") for pth in paths_list]
30return paths_list
31
32
33def write_notebook_workflow(notebook, name, output_telemetry=Telemetry()):
34temp_name_list = re.split(r"/|\.", notebook)
35temp_name_list = [
36x
37for x in temp_name_list
38if x != "tutorials" and x != "examples" and x != "ipynb"
39]
40temp_name_list = [x.replace("-", "") for x in temp_name_list]
41workflow_name = "_".join(["samples"] + temp_name_list)
42
43place_to_write = (
44Path(ReadmeStepsManage.git_base_dir())
45/ ".github"
46/ "workflows"
47/ f"{workflow_name}.yml"
48)
49
50gh_working_dir = "/".join(notebook.split("/")[:-1])
51env = Environment(
52loader=FileSystemLoader("./scripts/readme/ghactions_driver/workflow_templates")
53)
54template = env.get_template("basic_workflow.yml.jinja2")
55
56# Schedule notebooks at different times to reduce maximum quota usage.
57name_hash = int(hashlib.sha512(workflow_name.encode()).hexdigest(), 16)
58schedule_minute = name_hash % 60
59schedule_hour = (name_hash // 60) % 4 + 19 # 19-22 UTC
60
61notebook_path = Path(ReadmeStepsManage.git_base_dir()) / str(notebook)
62try:
63# resolve tutorial resources
64path_filter = resolve_tutorial_resource(workflow_name, notebook_path.resolve(), output_telemetry)
65except Exception:
66if "examples/tutorials" in gh_working_dir:
67raise
68else:
69pass
70if "samples_configuration" in workflow_name:
71# exception, samples configuration is very simple and not related to other prompt flow examples
72path_filter = (
73"[ examples/configuration.ipynb, .github/workflows/samples_configuration.yml ]"
74)
75else:
76path_filter = f"[ {gh_working_dir}/**, examples/*requirements.txt, .github/workflows/{workflow_name}.yml ]"
77
78# these workflows require config.json to init PF/ML client
79workflows_require_config_json = [
80"configuration",
81"runflowwithpipeline",
82"quickstartazure",
83"cloudrunmanagement",
84"chatwithclassbasedflowazure",
85]
86if any(keyword in workflow_name for keyword in workflows_require_config_json):
87template = env.get_template("workflow_config_json.yml.jinja2")
88elif "chatwithpdf" in workflow_name:
89template = env.get_template("pdf_workflow.yml.jinja2")
90elif "flowasfunction" in workflow_name:
91template = env.get_template("flow_as_function.yml.jinja2")
92elif "traceautogengroupchat" in workflow_name:
93template = env.get_template("autogen_workflow.yml.jinja2")
94
95content = template.render(
96{
97"workflow_name": workflow_name,
98"ci_name": "samples_notebook_ci",
99"name": name,
100"gh_working_dir": gh_working_dir,
101"path_filter": path_filter,
102"crontab": f"{schedule_minute} {schedule_hour} * * *",
103"crontab_comment": f"Every day starting at {schedule_hour - 16}:{schedule_minute} BJT",
104}
105)
106
107# To customize workflow, add new steps in steps.py
108# make another function for special cases.
109with open(place_to_write.resolve(), "w") as f:
110f.write(content)
111print(f"Write workflow: {place_to_write.resolve()}")
112output_telemetry.workflow_name = workflow_name
113output_telemetry.name = name
114output_telemetry.gh_working_dir = gh_working_dir
115output_telemetry.path_filter = path_filter
116
117
118def write_workflows(notebooks, output_telemetries=[]):
119# process notebooks
120for notebook in notebooks:
121# get notebook name
122output_telemetry = Telemetry()
123nb_path = Path(notebook)
124name, _ = os.path.splitext(nb_path.parts[-1])
125
126# write workflow file
127write_notebook_workflow(notebook, name, output_telemetry)
128output_telemetry.notebook = nb_path
129output_telemetries.append(output_telemetry)
130
131
132def local_filter(callback, array):
133results = []
134for index, item in enumerate(array):
135result = callback(item, index, array)
136# if returned true, append item to results
137if result:
138results.append(item)
139return results
140
141
142def no_readme_generation_filter(item, index, array) -> bool:
143"""
144Set each ipynb metadata no_readme_generation to "true" to skip readme generation
145"""
146try:
147if item.endswith("test.ipynb"):
148return False
149# read in notebook
150with open(item, "r", encoding="utf-8") as f:
151data = json.load(f)
152try:
153if data["metadata"]["no_readme_generation"] is not None:
154# no_readme_generate == "true", then no generation
155return data["metadata"]["no_readme_generation"] != "true"
156except Exception:
157return True # generate readme
158except Exception:
159return False # not generate readme
160
161
162def main(input_glob, output_files=[], check=False):
163# get list of workflows
164
165notebooks = _get_paths(
166[j for i in [glob.glob(p, recursive=True) for p in input_glob] for j in i]
167)
168
169# check each workflow, get metadata.
170notebooks = local_filter(no_readme_generation_filter, notebooks)
171
172# format code
173if not check:
174format_ipynb(notebooks)
175
176# write workflows
177write_workflows(notebooks, output_files)
178
179
180# run functions
181if __name__ == "__main__":
182# setup argparse
183parser = argparse.ArgumentParser()
184parser.add_argument(
185"-g", "--input-glob", nargs="+", help="Input glob example 'examples/**/*.ipynb'"
186)
187args = parser.parse_args()
188
189# call main
190main(input_glob=args.input_glob)
191