promptflow
445 строк · 16.0 Кб
1# Generate Readme file for the examples folder
2import json3from pathlib import Path4import workflow_generator5import readme_generator6from jinja2 import Environment, FileSystemLoader7from ghactions_driver.readme_step import ReadmeStepsManage8from operator import itemgetter9import argparse10import sys11import os12import re13
14BRANCH = "main"15
16
17def get_notebook_readme_description(notebook) -> str:18"""19Set each ipynb metadata description at .metadata.description
20"""
21try:22# read in notebook23with open(notebook, "r", encoding="utf-8") as f:24data = json.load(f)25return data["metadata"]["description"]26except Exception:27print(f"{notebook} metadata description not set")28return ""29
30
31def get_notebook_buildDoc_description(notebook) -> str:32"""33Set each ipynb metadata description at .metadata.description
34"""
35try:36# read in notebook37with open(notebook, "r", encoding="utf-8") as f:38data = json.load(f)39return data["metadata"]["build_doc"]40except Exception:41print(f"{notebook} metadata build_doc not set")42return {}43
44
45def get_readme_description_first_sentence(readme) -> str:46"""47Get each readme first sentence of first paragraph
48"""
49try:50with open(readme, "r", encoding="utf-8") as f:51# read first line52line = f.readline()53sentence = ""54while True:55line = f.readline()56if line.startswith("#"):57line = ""58# skip metadata section59if (60line.startswith("---")61or line.startswith("resources:")62or line.startswith("title:")63or line.startswith("cloud:")64or line.startswith("category:")65or line.startswith("weight:")66):67line = ""68if line.strip() == "" and sentence != "":69break70elif "." in line:71sentence += " " + line.split(".")[0].strip()72break73else:74if sentence == "":75sentence += line.strip()76elif line.strip() != "":77sentence += " " + line.strip()78return sentence79except Exception:80print(f"Error during reading {readme}")81return ""82
83
84def write_readme(workflow_telemetries, readme_telemetries):85global BRANCH86
87ReadmeStepsManage.git_base_dir()88readme_file = Path(ReadmeStepsManage.git_base_dir()) / "examples/README.md"89
90quickstarts = {91"readmes": [],92"notebooks": [],93}94tutorials = {95"readmes": [],96"notebooks": [],97}98flex_flows = {99"readmes": [],100"notebooks": [],101}102prompty = {103"readmes": [],104"notebooks": [],105}106flows = {107"readmes": [],108"notebooks": [],109}110evaluations = {111"readmes": [],112"notebooks": [],113}114chats = {115"readmes": [],116"notebooks": [],117}118toolusecases = {119"readmes": [],120"notebooks": [],121}122connections = {123"readmes": [],124"notebooks": [],125}126
127for workflow_telemetry in workflow_telemetries:128notebook_name = f"{workflow_telemetry.name}.ipynb"129gh_working_dir = workflow_telemetry.gh_working_dir130pipeline_name = workflow_telemetry.workflow_name131yaml_name = f"{pipeline_name}.yml"132
133# For workflows, open ipynb as raw json and134# setup description at .metadata.description135description = get_notebook_readme_description(workflow_telemetry.notebook)136build_doc = get_notebook_buildDoc_description(workflow_telemetry.notebook)137notebook_path = gh_working_dir.replace("examples/", "") + f"/{notebook_name}"138default_workflow_item = {139"name": notebook_name,140"path": notebook_path,141"pipeline_name": pipeline_name,142"yaml_name": yaml_name,143"description": description,144"build_doc": build_doc,145"title": workflow_telemetry.title.capitalize()146if hasattr(workflow_telemetry, "title")147else "Empty title",148"cloud": workflow_telemetry.cloud.capitalize()149if hasattr(workflow_telemetry, "cloud")150else "NOT DEFINED",151"category": workflow_telemetry.category.capitalize()152if hasattr(workflow_telemetry, "category")153else "General",154"weight": workflow_telemetry.weight155if hasattr(workflow_telemetry, "weight")156else 0,157}158if gh_working_dir.startswith("examples/flows/standard"):159flows["notebooks"].append(default_workflow_item)160elif gh_working_dir.startswith("examples/connections"):161connections["notebooks"].append(default_workflow_item)162elif gh_working_dir.startswith("examples/flows/evaluation"):163evaluations["notebooks"].append(default_workflow_item)164elif gh_working_dir.startswith("examples/tutorials"):165if "quickstart" in notebook_name:166quickstarts["notebooks"].append(default_workflow_item)167else:168tutorials["notebooks"].append(default_workflow_item)169elif gh_working_dir.startswith("examples/flows/chat"):170chats["notebooks"].append(default_workflow_item)171elif gh_working_dir.startswith("examples/flex-flows"):172flex_flows["notebooks"].append(default_workflow_item)173elif gh_working_dir.startswith("examples/prompty"):174prompty["notebooks"].append(default_workflow_item)175elif gh_working_dir.startswith("examples/tools/use-cases"):176toolusecases["notebooks"].append(default_workflow_item)177else:178print(f"Unknown workflow type: {gh_working_dir}")179
180# Adjust tutorial names:181
182no_workflow_readmes = []183
184for readme_telemetry in readme_telemetries:185if readme_telemetry.readme_name.endswith("README.md"):186notebook_name = readme_telemetry.readme_folder.split("/")[-1]187else:188notebook_name = readme_telemetry.readme_name.split("/")[-1].replace(189".md", ""190)191notebook_path = readme_telemetry.readme_name.replace("examples/", "")192if not hasattr(readme_telemetry, "workflow_name"):193no_workflow_readme_item = {194"name": notebook_name,195"path": notebook_path,196"description": get_readme_description_first_sentence(197readme_telemetry.readme_name198),199"title": readme_telemetry.title.capitalize()200if hasattr(readme_telemetry, "title")201else "Empty title",202"cloud": readme_telemetry.cloud.capitalize()203if hasattr(readme_telemetry, "cloud")204else "NOT DEFINED",205"category": readme_telemetry.category.capitalize()206if hasattr(readme_telemetry, "category")207else "General",208"weight": readme_telemetry.weight209if hasattr(readme_telemetry, "weight")210else 0,211}212no_workflow_readmes.append(no_workflow_readme_item)213continue214
215pipeline_name = readme_telemetry.workflow_name216yaml_name = f"{readme_telemetry.workflow_name}.yml"217description = get_readme_description_first_sentence(218readme_telemetry.readme_name219)220readme_folder = readme_telemetry.readme_folder221
222default_readme_item = {223"name": notebook_name,224"path": notebook_path,225"pipeline_name": pipeline_name,226"yaml_name": yaml_name,227"description": description,228"title": readme_telemetry.title.capitalize()229if hasattr(readme_telemetry, "title")230else "Empty title",231"cloud": readme_telemetry.cloud.capitalize()232if hasattr(readme_telemetry, "cloud")233else "NOT DEFINED",234"category": readme_telemetry.category.capitalize()235if hasattr(readme_telemetry, "category")236else "General",237"weight": readme_telemetry.weight238if hasattr(readme_telemetry, "weight")239else 0,240}241if readme_folder.startswith("examples/flows/standard"):242flows["readmes"].append(default_readme_item)243elif readme_folder.startswith("examples/connections"):244connections["readmes"].append(default_readme_item)245elif readme_folder.startswith("examples/flows/evaluation"):246evaluations["readmes"].append(default_readme_item)247elif readme_folder.startswith("examples/tutorials"):248if "quickstart" in notebook_name:249quickstarts["readmes"].append(default_readme_item)250else:251tutorials["readmes"].append(default_readme_item)252elif readme_folder.startswith("examples/flows/chat"):253chats["readmes"].append(default_readme_item)254elif readme_folder.startswith("examples/flex-flows"):255flex_flows["readmes"].append(default_readme_item)256elif readme_folder.startswith("examples/prompty"):257prompty["readmes"].append(default_readme_item)258elif readme_folder.startswith("examples/tools/use-cases"):259toolusecases["readmes"].append(default_readme_item)260else:261print(f"Unknown workflow type: {readme_folder}")262
263quickstarts["notebooks"] = sorted(264quickstarts["notebooks"],265key=itemgetter("name"),266reverse=True,267)268
269# Debug this replacement to check if generated correctly270replacement = {271"branch": BRANCH,272"tutorials": tutorials,273"flex_flows": flex_flows,274"prompty": prompty,275"flows": flows,276"evaluations": evaluations,277"chats": chats,278"toolusecases": toolusecases,279"connections": connections,280"quickstarts": quickstarts,281}282
283print("writing README.md...")284env = Environment(285loader=FileSystemLoader(286Path(ReadmeStepsManage.git_base_dir())287/ "scripts/readme/ghactions_driver/readme_templates"288)289)290template = env.get_template("README.md.jinja2")291with open(readme_file, "w") as f:292f.write(template.render(replacement))293print(f"finished writing {str(readme_file)}")294
295# Build a table out of replacement296# |Area|Cloud|Category|Sample|Description|297new_items = []298for row in replacement.keys():299if row == "branch":300continue301for item in replacement[row]["notebooks"]:302item[303"url"304] = f"https://github.com/microsoft/promptflow/blob/main/examples/{item['path']}"305item["area"] = "SDK"306if "azure" in item["name"].lower():307item["weight"] += 1000308new_items.append(item)309for item in replacement[row]["readmes"]:310if item.get("category", "General") == "General":311print(312f"Tutorial Index: Skipping {item['path']} for not having a category"313)314continue315item[316"url"317] = f"https://github.com/microsoft/promptflow/blob/main/examples/{item['path']}"318item["area"] = "CLI"319new_items.append(item)320for item in no_workflow_readmes:321if not item["path"].startswith("tutorials"):322print(f"Tutorial Index: Skipping {item['path']} for not being in tutorials")323continue324if item.get("category", "General") == "General":325print(f"Tutorial Index: Skipping {item['path']} for not having a category")326continue327item[328"url"329] = f"https://github.com/microsoft/promptflow/blob/main/examples/{item['path']}"330item["area"] = "CLI"331new_items.append(item)332
333# sort new_items by category334tracing_category = sorted(335[item for item in new_items if item["category"] == "Tracing"],336key=lambda x: x["weight"],337)338prompty_category = sorted(339[item for item in new_items if item["category"] == "Prompty"],340key=lambda x: x["weight"],341)342flow_category = sorted(343[item for item in new_items if item["category"] == "Flow"],344key=lambda x: x["weight"],345)346deployment_category = sorted(347[item for item in new_items if item["category"] == "Deployment"],348key=lambda x: x["weight"],349)350rag_category = sorted(351[item for item in new_items if item["category"] == "Rag"],352key=lambda x: x["weight"],353)354
355real_new_items = [356*tracing_category,357*prompty_category,358*flow_category,359*deployment_category,360*rag_category,361]362tutorial_items = {"items": real_new_items}363tutorial_index_file = (364Path(ReadmeStepsManage.git_base_dir()) / "docs/tutorials/index.md"365)366template_tutorial = env.get_template("tutorial_index.md.jinja2")367with open(tutorial_index_file, "w") as f:368f.write(template_tutorial.render(tutorial_items))369print(f"Tutorial Index: finished writing {str(tutorial_index_file)}")370
371
372def main(check):373if check:374# Disable print375sys.stdout = open(os.devnull, "w")376
377input_glob = ["examples/**/*.ipynb"]378workflow_telemetry = []379workflow_generator.main(input_glob, workflow_telemetry, check=check)380
381input_glob_readme = [382"examples/flows/**/README.md",383"examples/flex-flows/**/README.md",384"examples/prompty/**/README.md",385"examples/connections/**/README.md",386"examples/tutorials/**/*.md",387"examples/tools/use-cases/**/README.md",388]389# exclude the readme since this is 3p integration folder, pipeline generation is not included390input_glob_readme_exclude = ["examples/flows/integrations/**/README.md"]391readme_telemetry = []392readme_generator.main(393input_glob_readme, input_glob_readme_exclude, readme_telemetry394)395
396write_readme(workflow_telemetry, readme_telemetry)397
398if check:399output_object = {}400for workflow in workflow_telemetry:401workflow_items = re.split(r"\[|,| |\]", workflow.path_filter)402workflow_items = list(filter(None, workflow_items))403output_object[workflow.workflow_name] = []404for item in workflow_items:405if item == "examples/*requirements.txt":406output_object[workflow.workflow_name].append(407"examples/requirements.txt"408)409output_object[workflow.workflow_name].append(410"examples/dev_requirements.txt"411)412continue413output_object[workflow.workflow_name].append(item)414for readme in readme_telemetry:415if not hasattr(readme_telemetry, "workflow_name"):416continue417output_object[readme.workflow_name] = []418readme_items = re.split(r"\[|,| |\]", readme.path_filter)419readme_items = list(filter(None, readme_items))420for item in readme_items:421if item == "examples/*requirements.txt":422output_object[readme.workflow_name].append(423"examples/requirements.txt"424)425output_object[readme.workflow_name].append(426"examples/dev_requirements.txt"427)428continue429output_object[readme.workflow_name].append(item)430# enable output431sys.stdout = sys.__stdout__432return output_object433else:434return ""435
436
437if __name__ == "__main__":438# setup argparse439parser = argparse.ArgumentParser()440parser.add_argument(441"-c", "--check", action="store_true", help="Check what file is affected"442)443args = parser.parse_args()444output = main(args.check)445print(json.dumps(output))446