3
from __future__ import annotations
6
import concurrent.futures
17
from dataclasses import dataclass
18
from multiprocessing import current_process
19
from pathlib import Path
20
from typing import Dict, Optional, Tuple, Union
22
from termcolor import colored
27
print("pyyaml not found.\n\nPlease install pyyaml:\n\tpip install pyyaml\n")
32
from nbclient.client import (
38
if current_process().name == "MainProcess":
39
print("nbclient not found.\n\nPlease install nbclient:\n\tpip install nbclient\n")
40
print("test won't work without nbclient")
44
from nbformat import NotebookNode
46
if current_process().name == "MainProcess":
47
print("nbformat not found.\n\nPlease install nbformat:\n\tpip install nbformat\n")
48
print("test won't work without nbclient")
52
def __init__(self, returncode: int, stdout: str, stderr: str):
53
self.returncode = returncode
58
def check_quarto_bin(quarto_bin: str = "quarto") -> None:
59
"""Check if quarto is installed."""
61
version = subprocess.check_output([quarto_bin, "--version"], text=True).strip()
62
version = tuple(map(int, version.split(".")))
63
if version < (1, 5, 23):
64
print("Quarto version is too old. Please upgrade to 1.5.23 or later.")
67
except FileNotFoundError:
68
print("Quarto is not installed. Please install it from https://quarto.org")
72
def notebooks_target_dir(website_directory: Path) -> Path:
73
"""Return the target directory for notebooks."""
74
return website_directory / "docs" / "notebooks"
77
def load_metadata(notebook: Path) -> typing.Dict:
78
content = json.load(notebook.open(encoding="utf-8"))
79
return content["metadata"]
82
def skip_reason_or_none_if_ok(notebook: Path) -> typing.Optional[str]:
83
"""Return a reason to skip the notebook, or None if it should not be skipped."""
85
if notebook.suffix != ".ipynb":
86
return "not a notebook"
88
if not notebook.exists():
89
return "file does not exist"
91
# Extra checks for notebooks in the notebook directory
92
if "notebook" not in notebook.parts:
95
with open(notebook, "r", encoding="utf-8") as f:
98
# Load the json and get the first cell
99
json_content = json.loads(content)
100
first_cell = json_content["cells"][0]
102
# <!-- and --> must exists on lines on their own
103
if first_cell["cell_type"] == "markdown" and first_cell["source"][0].strip() == "<!--":
105
f"Error in {str(notebook.resolve())} - Front matter should be defined in the notebook metadata now."
108
metadata = load_metadata(notebook)
110
if "skip_render" in metadata:
111
return metadata["skip_render"]
113
if "front_matter" not in metadata:
114
return "front matter missing from notebook metadata ⚠️"
116
front_matter = metadata["front_matter"]
118
if "tags" not in front_matter:
119
return "tags is not in front matter"
121
if "description" not in front_matter:
122
return "description is not in front matter"
124
# Make sure tags is a list of strings
125
if not all([isinstance(tag, str) for tag in front_matter["tags"]]):
126
return "tags must be a list of strings"
128
# Make sure description is a string
129
if not isinstance(front_matter["description"], str):
130
return "description must be a string"
135
def extract_title(notebook: Path) -> Optional[str]:
136
"""Extract the title of the notebook."""
137
with open(notebook, "r", encoding="utf-8") as f:
140
# Load the json and get the first cell
141
json_content = json.loads(content)
142
first_cell = json_content["cells"][0]
145
for line in first_cell["source"]:
146
if line.startswith("# "):
147
title = line[2:].strip()
148
# Strip off the { if it exists
150
title = title[: title.find("{")].strip()
156
def process_notebook(src_notebook: Path, website_dir: Path, notebook_dir: Path, quarto_bin: str, dry_run: bool) -> str:
157
"""Process a single notebook."""
159
in_notebook_dir = "notebook" in src_notebook.parts
161
metadata = load_metadata(src_notebook)
163
title = extract_title(src_notebook)
165
return fmt_error(src_notebook, "Title not found in notebook")
168
if "front_matter" in metadata:
169
front_matter = metadata["front_matter"]
171
front_matter["title"] = title
174
relative_notebook = src_notebook.resolve().relative_to(notebook_dir.resolve())
175
dest_dir = notebooks_target_dir(website_directory=website_dir)
176
target_file = dest_dir / relative_notebook.with_suffix(".mdx")
177
intermediate_notebook = dest_dir / relative_notebook
179
# If the intermediate_notebook already exists, check if it is newer than the source file
180
if target_file.exists():
181
if target_file.stat().st_mtime > src_notebook.stat().st_mtime:
182
return fmt_skip(src_notebook, f"target file ({target_file.name}) is newer ☑️")
185
return colored(f"Would process {src_notebook.name}", "green")
187
# Copy notebook to target dir
188
# The reason we copy the notebook is that quarto does not support rendering from a different directory
189
shutil.copy(src_notebook, intermediate_notebook)
191
# Check if another file has to be copied too
192
# Solely added for the purpose of agent_library_example.json
193
if "extra_files_to_copy" in metadata:
194
for file in metadata["extra_files_to_copy"]:
195
shutil.copy(src_notebook.parent / file, dest_dir / file)
198
result = subprocess.run(
199
[quarto_bin, "render", intermediate_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
201
if result.returncode != 0:
203
src_notebook, f"Failed to render {src_notebook}\n\nstderr:\n{result.stderr}\nstdout:\n{result.stdout}"
206
# Unlink intermediate files
207
intermediate_notebook.unlink()
209
target_file = src_notebook.with_suffix(".mdx")
211
# If the intermediate_notebook already exists, check if it is newer than the source file
212
if target_file.exists():
213
if target_file.stat().st_mtime > src_notebook.stat().st_mtime:
214
return fmt_skip(src_notebook, f"target file ({target_file.name}) is newer ☑️")
217
return colored(f"Would process {src_notebook.name}", "green")
219
result = subprocess.run(
220
[quarto_bin, "render", src_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
222
if result.returncode != 0:
224
src_notebook, f"Failed to render {src_notebook}\n\nstderr:\n{result.stderr}\nstdout:\n{result.stdout}"
227
post_process_mdx(target_file, src_notebook, front_matter)
229
return fmt_ok(src_notebook)
232
# Notebook execution based on nbmake: https://github.com/treebeardtech/nbmakes
236
error_value: Optional[str]
249
def test_notebook(notebook_path: Path, timeout: int = 300) -> Tuple[Path, Optional[Union[NotebookError, NotebookSkip]]]:
250
nb = nbformat.read(str(notebook_path), NB_VERSION)
252
if "skip_test" in nb.metadata:
253
return notebook_path, NotebookSkip(reason=nb.metadata.skip_test)
262
os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "1"
263
os.environ["TOKENIZERS_PARALLELISM"] = "false"
264
with tempfile.TemporaryDirectory() as tempdir:
265
c.execute(cwd=tempdir)
266
except CellExecutionError:
267
error = get_error_info(nb)
268
assert error is not None
269
return notebook_path, error
270
except CellTimeoutError:
271
error = get_timeout_info(nb)
272
assert error is not None
273
return notebook_path, error
275
return notebook_path, None
278
# Find the first code cell which did not complete.
281
) -> Optional[NotebookError]:
282
for i, cell in enumerate(nb.cells):
283
if cell.cell_type != "code":
285
if "shell.execute_reply" not in cell.metadata.execution:
286
return NotebookError(
287
error_name="timeout",
290
cell_source="".join(cell["source"]),
296
def get_error_info(nb: NotebookNode) -> Optional[NotebookError]:
297
for cell in nb["cells"]: # get LAST error
298
if cell["cell_type"] != "code":
300
errors = [output for output in cell["outputs"] if output["output_type"] == "error" or "ename" in output]
303
traceback = "\n".join(errors[0].get("traceback", ""))
304
return NotebookError(
305
error_name=errors[0].get("ename", ""),
306
error_value=errors[0].get("evalue", ""),
308
cell_source="".join(cell["source"]),
313
# rendered_notebook is the final mdx file
314
def post_process_mdx(rendered_mdx: Path, source_notebooks: Path, front_matter: Dict) -> None:
315
with open(rendered_mdx, "r", encoding="utf-8") as f:
318
# If there is front matter in the mdx file, we need to remove it
319
if content.startswith("---"):
320
front_matter_end = content.find("---", 3)
321
front_matter = yaml.safe_load(content[4:front_matter_end])
322
content = content[front_matter_end + 3 :]
324
# Each intermediate path needs to be resolved for this to work reliably
325
repo_root = Path(__file__).parent.resolve().parent.resolve()
326
repo_relative_notebook = source_notebooks.resolve().relative_to(repo_root)
327
front_matter["source_notebook"] = f"/{repo_relative_notebook}"
328
front_matter["custom_edit_url"] = f"https://github.com/microsoft/autogen/edit/main/{repo_relative_notebook}"
330
# Is there a title on the content? Only search up until the first code cell
331
first_code_cell = content.find("```")
332
if first_code_cell != -1:
333
title_search_content = content[:first_code_cell]
335
title_search_content = content
337
title_exists = title_search_content.find("\n# ") != -1
339
content = f"# {front_matter['title']}\n{content}"
341
# inject in content directly after the markdown title the word done
342
# Find the end of the line with the title
343
title_end = content.find("\n", content.find("#"))
346
title = content[content.find("#") + 1 : content.find("\n", content.find("#"))].strip()
347
# If there is a { in the title we trim off the { and everything after it
349
title = title[: title.find("{")].strip()
351
github_link = f"https://github.com/microsoft/autogen/blob/main/{repo_relative_notebook}"
354
+ "\n[![Open on GitHub](https://img.shields.io/badge/Open%20on%20GitHub-grey?logo=github)]("
357
+ content[title_end:]
360
# If no colab link is present, insert one
361
if "colab-badge.svg" not in content:
362
colab_link = f"https://colab.research.google.com/github/microsoft/autogen/blob/main/{repo_relative_notebook}"
365
+ "\n[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]("
368
+ content[title_end:]
371
# Dump front_matter to ysaml
372
front_matter = yaml.dump(front_matter, default_flow_style=False)
374
# Rewrite the content as
379
new_content = f"---\n{front_matter}---\n{content}"
380
with open(rendered_mdx, "w", encoding="utf-8") as f:
384
def path(path_str: str) -> Path:
385
"""Return a Path object."""
386
return Path(path_str)
389
def collect_notebooks(notebook_directory: Path, website_directory: Path) -> typing.List[Path]:
390
notebooks = list(notebook_directory.glob("*.ipynb"))
391
notebooks.extend(list(website_directory.glob("docs/**/*.ipynb")))
395
def fmt_skip(notebook: Path, reason: str) -> str:
396
return f"{colored('[Skip]', 'yellow')} {colored(notebook.name, 'blue')}: {reason}"
399
def fmt_ok(notebook: Path) -> str:
400
return f"{colored('[OK]', 'green')} {colored(notebook.name, 'blue')} ✅"
403
def fmt_error(notebook: Path, error: Union[NotebookError, str]) -> str:
404
if isinstance(error, str):
405
return f"{colored('[Error]', 'red')} {colored(notebook.name, 'blue')}: {error}"
406
elif isinstance(error, NotebookError):
407
return f"{colored('[Error]', 'red')} {colored(notebook.name, 'blue')}: {error.error_name} - {error.error_value}"
409
raise ValueError("error must be a string or a NotebookError")
412
def start_thread_to_terminate_when_parent_process_dies(ppid: int):
420
os.kill(pid, signal.SIGTERM)
423
thread = threading.Thread(target=f, daemon=True)
428
script_dir = Path(__file__).parent.absolute()
429
parser = argparse.ArgumentParser()
430
subparsers = parser.add_subparsers(dest="subcommand")
433
"--notebook-directory",
435
help="Directory containing notebooks to process",
436
default=script_dir / "../notebook",
439
"--website-directory", type=path, help="Root directory of docusarus website", default=script_dir
442
render_parser = subparsers.add_parser("render")
443
render_parser.add_argument("--quarto-bin", help="Path to quarto binary", default="quarto")
444
render_parser.add_argument("--dry-run", help="Don't render", action="store_true")
445
render_parser.add_argument("notebooks", type=path, nargs="*", default=None)
447
test_parser = subparsers.add_parser("test")
448
test_parser.add_argument("--timeout", help="Timeout for each notebook", type=int, default=60)
449
test_parser.add_argument("--exit-on-first-fail", "-e", help="Exit after first test fail", action="store_true")
450
test_parser.add_argument("notebooks", type=path, nargs="*", default=None)
451
test_parser.add_argument("--workers", help="Number of workers to use", type=int, default=-1)
453
args = parser.parse_args()
455
if args.subcommand is None:
456
print("No subcommand specified")
460
collected_notebooks = args.notebooks
462
collected_notebooks = collect_notebooks(args.notebook_directory, args.website_directory)
464
filtered_notebooks = []
465
for notebook in collected_notebooks:
466
reason = skip_reason_or_none_if_ok(notebook)
468
print(fmt_skip(notebook, reason))
470
filtered_notebooks.append(notebook)
472
if args.subcommand == "test":
473
if args.workers == -1:
476
with concurrent.futures.ProcessPoolExecutor(
477
max_workers=args.workers,
478
initializer=start_thread_to_terminate_when_parent_process_dies,
479
initargs=(os.getpid(),),
481
futures = [executor.submit(test_notebook, f, args.timeout) for f in filtered_notebooks]
482
for future in concurrent.futures.as_completed(futures):
483
notebook, optional_error_or_skip = future.result()
484
if isinstance(optional_error_or_skip, NotebookError):
485
if optional_error_or_skip.error_name == "timeout":
486
print(fmt_error(notebook, optional_error_or_skip.error_name))
491
print(fmt_error(notebook, optional_error_or_skip))
492
print(optional_error_or_skip.traceback)
494
if args.exit_on_first_fail:
497
elif isinstance(optional_error_or_skip, NotebookSkip):
498
print(fmt_skip(notebook, optional_error_or_skip.reason))
500
print(fmt_ok(notebook))
505
elif args.subcommand == "render":
506
check_quarto_bin(args.quarto_bin)
508
if not notebooks_target_dir(args.website_directory).exists():
509
notebooks_target_dir(args.website_directory).mkdir(parents=True)
511
for notebook in filtered_notebooks:
514
notebook, args.website_directory, args.notebook_directory, args.quarto_bin, args.dry_run
518
print("Unknown subcommand")
522
if __name__ == "__main__":