instructor

Форк
0
152 строки · 5.2 Кб
1
from openai import OpenAI
2
from io import StringIO
3
from typing import Annotated, Any
4
from pydantic import (
5
    BaseModel,
6
    BeforeValidator,
7
    PlainSerializer,
8
    InstanceOf,
9
    WithJsonSchema,
10
)
11
import instructor
12
import pandas as pd
13
from rich.console import Console
14

15
console = Console()
16
client = instructor.from_openai(
17
    client=OpenAI(),
18
    mode=instructor.Mode.TOOLS,
19
)
20

21

22
def md_to_df(data: Any) -> Any:
23
    if isinstance(data, str):
24
        return (
25
            pd.read_csv(
26
                StringIO(data),  # Get rid of whitespaces
27
                sep="|",
28
                index_col=1,
29
            )
30
            .dropna(axis=1, how="all")
31
            .iloc[1:]
32
            .map(lambda x: x.strip())
33
        )  # type: ignore
34
    return data
35

36

37
MarkdownDataFrame = Annotated[
38
    InstanceOf[pd.DataFrame],
39
    BeforeValidator(md_to_df),
40
    PlainSerializer(lambda x: x.to_markdown()),
41
    WithJsonSchema(
42
        {
43
            "type": "string",
44
            "description": """
45
                The markdown representation of the table, 
46
                each one should be tidy, do not try to join tables
47
                that should be seperate""",
48
        }
49
    ),
50
]
51

52

53
class Table(BaseModel):
54
    caption: str
55
    dataframe: MarkdownDataFrame
56

57

58
class MultipleTables(BaseModel):
59
    tables: list[Table]
60

61

62
example = MultipleTables(
63
    tables=[
64
        Table(
65
            caption="This is a caption",
66
            dataframe=pd.DataFrame(
67
                {
68
                    "Chart A": [10, 40],
69
                    "Chart B": [20, 50],
70
                    "Chart C": [30, 60],
71
                }
72
            ),
73
        )
74
    ]
75
)
76

77

78
def extract(url: str) -> MultipleTables:
79
    return client.chat.completions.create(
80
        model="gpt-4-turbo",
81
        max_tokens=4000,
82
        response_model=MultipleTables,
83
        messages=[
84
            {
85
                "role": "user",
86
                "content": [
87
                    {
88
                        "type": "image_url",
89
                        "image_url": {"url": url},
90
                    },
91
                    {
92
                        "type": "text",
93
                        "text": """
94
                            First, analyze the image to determine the most appropriate headers for the tables.
95
                            Generate a descriptive h1 for the overall image, followed by a brief summary of the data it contains. 
96
                            For each identified table, create an informative h2 title and a concise description of its contents.
97
                            Finally, output the markdown representation of each table.
98

99

100
                            Make sure to escape the markdown table properly, and make sure to include the caption and the dataframe.
101
                            including escaping all the newlines and quotes. Only return a markdown table in dataframe, nothing else.
102
                        """,
103
                    },
104
                ],
105
            }
106
        ],
107
    )
108

109

110
urls = [
111
    "https://a.storyblok.com/f/47007/2400x1260/f816b031cb/uk-ireland-in-three-charts_chart_a.png/m/2880x0",
112
    "https://a.storyblok.com/f/47007/2400x2000/bf383abc3c/231031_uk-ireland-in-three-charts_table_v01_b.png/m/2880x0",
113
]
114

115
for url in urls:
116
    for table in extract(url).tables:
117
        console.print(table.caption, "\n", table.dataframe)
118
"""
119
Growth in app installations and sessions across different app categories in Q3 2022 compared to Q2 2022 for Ireland and U.K. 
120
              Install Growth (%)  Session Growth (%) 
121
 Category                                           
122
Education                      7                   6
123
Games                         13                   3
124
Social                         4                  -3
125
Utilities                      6                -0.4
126
Top 10 Grossing Android Apps in Ireland, October 2023 
127
                              App Name           Category 
128
 Rank                                                    
129
1                           Google One       Productivity
130
2                              Disney+      Entertainment
131
3        TikTok - Videos, Music & LIVE      Entertainment
132
4                     Candy Crush Saga              Games
133
5       Tinder: Dating, Chat & Friends  Social networking
134
6                          Coin Master              Games
135
7                               Roblox              Games
136
8       Bumble - Dating & Make Friends             Dating
137
9                          Royal Match              Games
138
10         Spotify: Music and Podcasts      Music & Audio
139
Top 10 Grossing iOS Apps in Ireland, October 2023 
140
                              App Name           Category 
141
 Rank                                                    
142
1       Tinder: Dating, Chat & Friends  Social networking
143
2                              Disney+      Entertainment
144
3       YouTube: Watch, Listen, Stream      Entertainment
145
4         Audible: Audio Entertainment      Entertainment
146
5                     Candy Crush Saga              Games
147
6        TikTok - Videos, Music & LIVE      Entertainment
148
7       Bumble - Dating & Make Friends             Dating
149
8                               Roblox              Games
150
9          LinkedIn: Job Search & News           Business
151
10         Duolingo - Language Lessons          Education
152
"""
153

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.