instructor
115 строк · 4.7 Кб
1from openai import OpenAI
2from io import StringIO
3from typing import Annotated, Any
4from pydantic import (
5BaseModel,
6BeforeValidator,
7PlainSerializer,
8InstanceOf,
9WithJsonSchema,
10)
11import instructor
12import pandas as pd
13from rich.console import Console
14
15console = Console()
16client = instructor.from_openai(
17client=OpenAI(),
18mode=instructor.Mode.TOOLS,
19)
20
21
22def md_to_df(data: Any) -> Any:
23if isinstance(data, str):
24return (
25pd.read_csv(
26StringIO(data), # Get rid of whitespaces
27sep="|",
28index_col=1,
29)
30.dropna(axis=1, how="all")
31.iloc[1:]
32.map(lambda x: x.strip())
33) # type: ignore
34return data
35
36
37MarkdownDataFrame = Annotated[
38InstanceOf[pd.DataFrame],
39BeforeValidator(md_to_df),
40PlainSerializer(lambda x: x.to_markdown()),
41WithJsonSchema(
42{
43"type": "string",
44"description": """
45The markdown representation of the table,
46each one should be tidy, do not try to join tables
47that should be seperate""",
48}
49),
50]
51
52
53class Table(BaseModel):
54caption: str
55dataframe: MarkdownDataFrame
56
57
58def extract(url: str):
59return client.chat.completions.create(
60model="gpt-4-turbo",
61max_tokens=4000,
62response_model=Table,
63messages=[
64{
65"role": "user",
66"content": [
67{
68"type": "image_url",
69"image_url": {"url": url},
70},
71{
72"type": "text",
73"text": """
74Analyze the organizational chart image and extract the relevant information to reconstruct the hierarchy.
75
76Create a list of People objects, where each person has the following attributes:
77- id: A unique identifier for the person
78- name: The person's name
79- role: The person's role or position in the organization
80- manager_name: The name of the person who manages this person
81- manager_role: The role of the person who manages this person
82
83Ensure that the relationships between people are accurately captured in the reports and manages attributes.
84
85Return the list of People objects as the people attribute of an Organization object.
86""",
87},
88],
89}
90],
91)
92
93
94print(
95extract(
96"https://www.mindmanager.com/static/mm/images/features/org-chart/hierarchical-chart.png"
97).model_dump()["dataframe"]
98)
99"""
100| id | name | role | manager_name | manager_role |
101|-------:|:-------------------|:-----------------------------------------|:------------------|:-----------------------------|
102| 1 | Adele Morana | Founder, Chairman & CEO | | |
103| 2 | Winston Cole | COO | Adele Morana | Founder, Chairman & CEO |
104| 3 | Marcus Kim | CFO | Adele Morana | Founder, Chairman & CEO |
105| 4 | Karin Ludovicus | CPO | Adele Morana | Founder, Chairman & CEO |
106| 5 | Lea Erastos | Chief Business Officer | Winston Cole | COO |
107| 6 | John McKinley | Chief Accounting Officer | Winston Cole | COO |
108| 7 | Zahida Mahtab | VP, Global Affairs & Communication | Winston Cole | COO |
109| 8 | Adelaide Zhu | VP, Central Services | Winston Cole | COO |
110| 9 | Gabriel Drummond | VP, Investor Relations | Marcus Kim | CFO |
111| 10 | Felicie Vasili | VP, Finance | Marcus Kim | CFO |
112| 11 | Ayda Williams | VP, Global Customer & Business Marketing | Karin Ludovicius | CPO |
113| 12 | Nicholas Brambilla | VP, Company Brand | Karin Ludovicius | CPO |
114| 13 | Sandra Herminius | VP, Product Marketing | Karin Ludovicius | CPO |
115"""
116