1
from pathlib import Path
2
from typing import Iterator
8
def docstrings_checksum(python_files: Iterator[Path]):
9
files_content = (f.read_text() for f in python_files)
10
trees = (ast.parse(c) for c in files_content)
12
# Get all docstrings from async functions, functions,
13
# classes and modules definitions
16
for node in ast.walk(tree):
17
if not isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.ClassDef, ast.Module)):
18
# Skip all node types that can't have docstrings to prevent failures
20
docstring = ast.get_docstring(node)
22
docstrings.append(docstring)
24
# Sort them to be safe, since ast.walk() returns
25
# nodes in no specified order.
26
# See https://docs.python.org/3/library/ast.html#ast.walk
29
return hashlib.md5(str(docstrings).encode("utf-8")).hexdigest()
32
if __name__ == "__main__":
35
parser = argparse.ArgumentParser()
36
parser.add_argument("--root", help="Haystack root folder", required=True, type=Path)
37
args = parser.parse_args()
39
# Get all Haystack and rest_api python files
40
root: Path = args.root.absolute()
41
haystack_files = root.glob("haystack/**/*.py")
42
rest_api_files = root.glob("rest_api/**/*.py")
46
python_files = itertools.chain(haystack_files, rest_api_files)
48
md5 = docstrings_checksum(python_files)