instructor

Форк
0
/
run_vision_org_table.py 
115 строк · 4.7 Кб
1
from openai import OpenAI
2
from io import StringIO
3
from typing import Annotated, Any
4
from pydantic import (
5
    BaseModel,
6
    BeforeValidator,
7
    PlainSerializer,
8
    InstanceOf,
9
    WithJsonSchema,
10
)
11
import instructor
12
import pandas as pd
13
from rich.console import Console
14

15
console = Console()
16
client = instructor.from_openai(
17
    client=OpenAI(),
18
    mode=instructor.Mode.TOOLS,
19
)
20

21

22
def md_to_df(data: Any) -> Any:
23
    if isinstance(data, str):
24
        return (
25
            pd.read_csv(
26
                StringIO(data),  # Get rid of whitespaces
27
                sep="|",
28
                index_col=1,
29
            )
30
            .dropna(axis=1, how="all")
31
            .iloc[1:]
32
            .map(lambda x: x.strip())
33
        )  # type: ignore
34
    return data
35

36

37
MarkdownDataFrame = Annotated[
38
    InstanceOf[pd.DataFrame],
39
    BeforeValidator(md_to_df),
40
    PlainSerializer(lambda x: x.to_markdown()),
41
    WithJsonSchema(
42
        {
43
            "type": "string",
44
            "description": """
45
                The markdown representation of the table, 
46
                each one should be tidy, do not try to join tables
47
                that should be seperate""",
48
        }
49
    ),
50
]
51

52

53
class Table(BaseModel):
54
    caption: str
55
    dataframe: MarkdownDataFrame
56

57

58
def extract(url: str):
59
    return client.chat.completions.create(
60
        model="gpt-4-turbo",
61
        max_tokens=4000,
62
        response_model=Table,
63
        messages=[
64
            {
65
                "role": "user",
66
                "content": [
67
                    {
68
                        "type": "image_url",
69
                        "image_url": {"url": url},
70
                    },
71
                    {
72
                        "type": "text",
73
                        "text": """
74
                            Analyze the organizational chart image and extract the relevant information to reconstruct the hierarchy.
75
                            
76
                            Create a list of People objects, where each person has the following attributes:
77
                            - id: A unique identifier for the person
78
                            - name: The person's name
79
                            - role: The person's role or position in the organization
80
                            - manager_name: The name of the person who manages this person
81
                            - manager_role: The role of the person who manages this person
82
                            
83
                            Ensure that the relationships between people are accurately captured in the reports and manages attributes.
84
                            
85
                            Return the list of People objects as the people attribute of an Organization object.
86
                        """,
87
                    },
88
                ],
89
            }
90
        ],
91
    )
92

93

94
print(
95
    extract(
96
        "https://www.mindmanager.com/static/mm/images/features/org-chart/hierarchical-chart.png"
97
    ).model_dump()["dataframe"]
98
)
99
"""
100
|    id  |  name              |  role                                    |  manager_name     |  manager_role                |
101
|-------:|:-------------------|:-----------------------------------------|:------------------|:-----------------------------|
102
|    1   | Adele Morana       | Founder, Chairman & CEO                  |                   |                              |
103
|    2   | Winston Cole       | COO                                      | Adele Morana      | Founder, Chairman & CEO      |
104
|    3   | Marcus Kim         | CFO                                      | Adele Morana      | Founder, Chairman & CEO      |
105
|    4   | Karin Ludovicus    | CPO                                      | Adele Morana      | Founder, Chairman & CEO      |
106
|    5   | Lea Erastos        | Chief Business Officer                   | Winston Cole      | COO                          |
107
|    6   | John McKinley      | Chief Accounting Officer                 | Winston Cole      | COO                          |
108
|    7   | Zahida Mahtab      | VP, Global Affairs & Communication       | Winston Cole      | COO                          |
109
|    8   | Adelaide Zhu       | VP, Central Services                     | Winston Cole      | COO                          |
110
|    9   | Gabriel Drummond   | VP, Investor Relations                   | Marcus Kim        | CFO                          |
111
|    10  | Felicie Vasili     | VP, Finance                              | Marcus Kim        | CFO                          |
112
|    11  | Ayda Williams      | VP, Global Customer & Business Marketing | Karin Ludovicius  | CPO                          |
113
|    12  | Nicholas Brambilla | VP, Company Brand                        | Karin Ludovicius  | CPO                          |
114
|    13  | Sandra Herminius   | VP, Product Marketing                    | Karin Ludovicius  | CPO                          |
115
"""
116

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.