promptflow

Форк
0
/
workflow_generator.py 
190 строк · 6.2 Кб
1
import os
2
import glob
3
import argparse
4
from pathlib import Path
5
import ntpath
6
import re
7
import hashlib
8
import json
9
from jinja2 import Environment, FileSystemLoader
10
from ghactions_driver.readme_step import ReadmeStepsManage
11
from ghactions_driver.resource_resolver import resolve_tutorial_resource
12
from ghactions_driver.telemetry_obj import Telemetry
13

14

15
def format_ipynb(notebooks):
16
    # run code formatter on .ipynb files
17
    for notebook in notebooks:
18
        os.system(f"black-nb --clear-output {notebook}")
19

20

21
def _get_paths(paths_list):
22
    """
23
    Convert the path list to unix format.
24
    :param paths_list: The input path list.
25
    :returns: The same list with unix-like paths.
26
    """
27
    paths_list.sort()
28
    if ntpath.sep == os.path.sep:
29
        return [pth.replace(ntpath.sep, "/") for pth in paths_list]
30
    return paths_list
31

32

33
def write_notebook_workflow(notebook, name, output_telemetry=Telemetry()):
34
    temp_name_list = re.split(r"/|\.", notebook)
35
    temp_name_list = [
36
        x
37
        for x in temp_name_list
38
        if x != "tutorials" and x != "examples" and x != "ipynb"
39
    ]
40
    temp_name_list = [x.replace("-", "") for x in temp_name_list]
41
    workflow_name = "_".join(["samples"] + temp_name_list)
42

43
    place_to_write = (
44
        Path(ReadmeStepsManage.git_base_dir())
45
        / ".github"
46
        / "workflows"
47
        / f"{workflow_name}.yml"
48
    )
49

50
    gh_working_dir = "/".join(notebook.split("/")[:-1])
51
    env = Environment(
52
        loader=FileSystemLoader("./scripts/readme/ghactions_driver/workflow_templates")
53
    )
54
    template = env.get_template("basic_workflow.yml.jinja2")
55

56
    # Schedule notebooks at different times to reduce maximum quota usage.
57
    name_hash = int(hashlib.sha512(workflow_name.encode()).hexdigest(), 16)
58
    schedule_minute = name_hash % 60
59
    schedule_hour = (name_hash // 60) % 4 + 19  # 19-22 UTC
60

61
    notebook_path = Path(ReadmeStepsManage.git_base_dir()) / str(notebook)
62
    try:
63
        # resolve tutorial resources
64
        path_filter = resolve_tutorial_resource(workflow_name, notebook_path.resolve(), output_telemetry)
65
    except Exception:
66
        if "examples/tutorials" in gh_working_dir:
67
            raise
68
        else:
69
            pass
70
    if "samples_configuration" in workflow_name:
71
        # exception, samples configuration is very simple and not related to other prompt flow examples
72
        path_filter = (
73
            "[ examples/configuration.ipynb, .github/workflows/samples_configuration.yml ]"
74
        )
75
    else:
76
        path_filter = f"[ {gh_working_dir}/**, examples/*requirements.txt, .github/workflows/{workflow_name}.yml ]"
77

78
    # these workflows require config.json to init PF/ML client
79
    workflows_require_config_json = [
80
        "configuration",
81
        "runflowwithpipeline",
82
        "quickstartazure",
83
        "cloudrunmanagement",
84
        "chatwithclassbasedflowazure",
85
    ]
86
    if any(keyword in workflow_name for keyword in workflows_require_config_json):
87
        template = env.get_template("workflow_config_json.yml.jinja2")
88
    elif "chatwithpdf" in workflow_name:
89
        template = env.get_template("pdf_workflow.yml.jinja2")
90
    elif "flowasfunction" in workflow_name:
91
        template = env.get_template("flow_as_function.yml.jinja2")
92
    elif "traceautogengroupchat" in workflow_name:
93
        template = env.get_template("autogen_workflow.yml.jinja2")
94

95
    content = template.render(
96
        {
97
            "workflow_name": workflow_name,
98
            "ci_name": "samples_notebook_ci",
99
            "name": name,
100
            "gh_working_dir": gh_working_dir,
101
            "path_filter": path_filter,
102
            "crontab": f"{schedule_minute} {schedule_hour} * * *",
103
            "crontab_comment": f"Every day starting at {schedule_hour - 16}:{schedule_minute} BJT",
104
        }
105
    )
106

107
    # To customize workflow, add new steps in steps.py
108
    # make another function for special cases.
109
    with open(place_to_write.resolve(), "w") as f:
110
        f.write(content)
111
    print(f"Write workflow: {place_to_write.resolve()}")
112
    output_telemetry.workflow_name = workflow_name
113
    output_telemetry.name = name
114
    output_telemetry.gh_working_dir = gh_working_dir
115
    output_telemetry.path_filter = path_filter
116

117

118
def write_workflows(notebooks, output_telemetries=[]):
119
    # process notebooks
120
    for notebook in notebooks:
121
        # get notebook name
122
        output_telemetry = Telemetry()
123
        nb_path = Path(notebook)
124
        name, _ = os.path.splitext(nb_path.parts[-1])
125

126
        # write workflow file
127
        write_notebook_workflow(notebook, name, output_telemetry)
128
        output_telemetry.notebook = nb_path
129
        output_telemetries.append(output_telemetry)
130

131

132
def local_filter(callback, array):
133
    results = []
134
    for index, item in enumerate(array):
135
        result = callback(item, index, array)
136
        # if returned true, append item to results
137
        if result:
138
            results.append(item)
139
    return results
140

141

142
def no_readme_generation_filter(item, index, array) -> bool:
143
    """
144
    Set each ipynb metadata no_readme_generation to "true" to skip readme generation
145
    """
146
    try:
147
        if item.endswith("test.ipynb"):
148
            return False
149
        # read in notebook
150
        with open(item, "r", encoding="utf-8") as f:
151
            data = json.load(f)
152
        try:
153
            if data["metadata"]["no_readme_generation"] is not None:
154
                # no_readme_generate == "true", then no generation
155
                return data["metadata"]["no_readme_generation"] != "true"
156
        except Exception:
157
            return True  # generate readme
158
    except Exception:
159
        return False  # not generate readme
160

161

162
def main(input_glob, output_files=[], check=False):
163
    # get list of workflows
164

165
    notebooks = _get_paths(
166
        [j for i in [glob.glob(p, recursive=True) for p in input_glob] for j in i]
167
    )
168

169
    # check each workflow, get metadata.
170
    notebooks = local_filter(no_readme_generation_filter, notebooks)
171

172
    # format code
173
    if not check:
174
        format_ipynb(notebooks)
175

176
    # write workflows
177
    write_workflows(notebooks, output_files)
178

179

180
# run functions
181
if __name__ == "__main__":
182
    # setup argparse
183
    parser = argparse.ArgumentParser()
184
    parser.add_argument(
185
        "-g", "--input-glob", nargs="+", help="Input glob example 'examples/**/*.ipynb'"
186
    )
187
    args = parser.parse_args()
188

189
    # call main
190
    main(input_glob=args.input_glob)
191

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.