cohere-python

base_client.py
3100 строк · 194.2 Кб
Перенос по словам
1
# This file was auto-generated by Fern from our API Definition.
2

3
import json
4
import os
5
import typing
6
import urllib.parse
7
from json.decoder import JSONDecodeError
8

9
import httpx
10

11
from .connectors.client import AsyncConnectorsClient, ConnectorsClient
12
from .core.api_error import ApiError
13
from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
14
from .core.jsonable_encoder import jsonable_encoder
15
from .core.remove_none_from_dict import remove_none_from_dict
16
from .core.request_options import RequestOptions
17
from .datasets.client import AsyncDatasetsClient, DatasetsClient
18
from .embed_jobs.client import AsyncEmbedJobsClient, EmbedJobsClient
19
from .environment import ClientEnvironment
20
from .errors.bad_request_error import BadRequestError
21
from .errors.internal_server_error import InternalServerError
22
from .errors.too_many_requests_error import TooManyRequestsError
23
from .finetuning.client import AsyncFinetuningClient, FinetuningClient
24
from .models.client import AsyncModelsClient, ModelsClient
25
from .types.chat_connector import ChatConnector
26
from .types.chat_document import ChatDocument
27
from .types.chat_message import ChatMessage
28
from .types.chat_request_prompt_truncation import ChatRequestPromptTruncation
29
from .types.chat_request_tool_results_item import ChatRequestToolResultsItem
30
from .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
31
from .types.chat_stream_request_tool_results_item import ChatStreamRequestToolResultsItem
32
from .types.classify_example import ClassifyExample
33
from .types.classify_request_truncate import ClassifyRequestTruncate
34
from .types.classify_response import ClassifyResponse
35
from .types.detokenize_response import DetokenizeResponse
36
from .types.embed_input_type import EmbedInputType
37
from .types.embed_request_truncate import EmbedRequestTruncate
38
from .types.embed_response import EmbedResponse
39
from .types.embedding_type import EmbeddingType
40
from .types.generate_request_return_likelihoods import GenerateRequestReturnLikelihoods
41
from .types.generate_request_truncate import GenerateRequestTruncate
42
from .types.generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods
43
from .types.generate_stream_request_truncate import GenerateStreamRequestTruncate
44
from .types.generate_streamed_response import GenerateStreamedResponse
45
from .types.generation import Generation
46
from .types.non_streamed_chat_response import NonStreamedChatResponse
47
from .types.rerank_request_documents_item import RerankRequestDocumentsItem
48
from .types.rerank_response import RerankResponse
49
from .types.streamed_chat_response import StreamedChatResponse
50
from .types.summarize_request_extractiveness import SummarizeRequestExtractiveness
51
from .types.summarize_request_format import SummarizeRequestFormat
52
from .types.summarize_request_length import SummarizeRequestLength
53
from .types.summarize_response import SummarizeResponse
54
from .types.tokenize_response import TokenizeResponse
55
from .types.tool import Tool
56

57
try:
58
    import pydantic.v1 as pydantic  # type: ignore
59
except ImportError:
60
    import pydantic  # type: ignore
61

62
# this is used as the default value for optional parameters
63
OMIT = typing.cast(typing.Any, ...)
64

65

66
class BaseCohere:
67
    """
68
    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.
69

70
    Parameters:
71
        - base_url: typing.Optional[str]. The base url to use for requests from the client.
72

73
        - environment: ClientEnvironment. The environment to use for requests from the client. from .environment import ClientEnvironment
74

75
                                          Defaults to ClientEnvironment.PRODUCTION
76

77
        - client_name: typing.Optional[str].
78

79
        - token: typing.Optional[typing.Union[str, typing.Callable[[], str]]].
80

81
        - timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds, unless a custom httpx client is used, in which case a default is not set.
82

83
        - httpx_client: typing.Optional[httpx.Client]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
84
    ---
85
    from cohere.client import Client
86

87
    client = Client(
88
        client_name="YOUR_CLIENT_NAME",
89
        token="YOUR_TOKEN",
90
    )
91
    """
92

93
    def __init__(
94
        self,
95
        *,
96
        base_url: typing.Optional[str] = None,
97
        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,
98
        client_name: typing.Optional[str] = None,
99
        token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CO_API_KEY"),
100
        timeout: typing.Optional[float] = None,
101
        httpx_client: typing.Optional[httpx.Client] = None,
102
    ):
103
        _defaulted_timeout = timeout if timeout is not None else 300 if httpx_client is None else None
104
        if token is None:
105
            raise ApiError(body="The client must be instantiated be either passing in token or setting CO_API_KEY")
106
        self._client_wrapper = SyncClientWrapper(
107
            base_url=_get_base_url(base_url=base_url, environment=environment),
108
            client_name=client_name,
109
            token=token,
110
            httpx_client=httpx.Client(timeout=_defaulted_timeout) if httpx_client is None else httpx_client,
111
            timeout=_defaulted_timeout,
112
        )
113
        self.embed_jobs = EmbedJobsClient(client_wrapper=self._client_wrapper)
114
        self.datasets = DatasetsClient(client_wrapper=self._client_wrapper)
115
        self.connectors = ConnectorsClient(client_wrapper=self._client_wrapper)
116
        self.models = ModelsClient(client_wrapper=self._client_wrapper)
117
        self.finetuning = FinetuningClient(client_wrapper=self._client_wrapper)
118

119
    def chat_stream(
120
        self,
121
        *,
122
        message: str,
123
        model: typing.Optional[str] = OMIT,
124
        preamble: typing.Optional[str] = OMIT,
125
        chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
126
        conversation_id: typing.Optional[str] = OMIT,
127
        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
128
        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
129
        search_queries_only: typing.Optional[bool] = OMIT,
130
        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
131
        temperature: typing.Optional[float] = OMIT,
132
        max_tokens: typing.Optional[int] = OMIT,
133
        k: typing.Optional[int] = OMIT,
134
        p: typing.Optional[float] = OMIT,
135
        seed: typing.Optional[float] = OMIT,
136
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
137
        frequency_penalty: typing.Optional[float] = OMIT,
138
        presence_penalty: typing.Optional[float] = OMIT,
139
        raw_prompting: typing.Optional[bool] = OMIT,
140
        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
141
        tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]] = OMIT,
142
        request_options: typing.Optional[RequestOptions] = None,
143
    ) -> typing.Iterator[StreamedChatResponse]:
144
        """
145
        Generates a text response to a user message.
146
        To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
147

148
        Parameters:
149
            - message: str. Text input for the model to respond to.
150

151
            - model: typing.Optional[str]. Defaults to `command`.
152

153
                                           The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
154

155
            - preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
156

157
                                              The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
158

159
            - chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
160

161
                                                                           Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
162

163
                                                                           The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
164

165
            - conversation_id: typing.Optional[str]. An alternative to `chat_history`.
166

167
                                                     Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
168

169
            - prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
170

171
                                                                                     Dictates how the prompt will be constructed.
172

173
                                                                                     With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
174

175
                                                                                     With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
176

177
                                                                                     With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
178

179
            - connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
180

181
                                                                           When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
182

183
            - search_queries_only: typing.Optional[bool]. Defaults to `false`.
184

185
                                                          When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
186

187
            - documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
188

189
                                                                         Example:
190
                                                                         `[
191
                                                                           { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
192
                                                                           { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
193
                                                                         ]`
194

195
                                                                         Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
196

197
                                                                         Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
198

199
                                                                         An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
200

201
                                                                         An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
202

203
                                                                         See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
204

205
            - temperature: typing.Optional[float]. Defaults to `0.3`.
206

207
                                                   A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
208

209
                                                   Randomness can be further maximized by increasing the  value of the `p` parameter.
210

211
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
212

213
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
214
                                       Defaults to `0`, min value of `0`, max value of `500`.
215

216
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
217
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
218

219
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
220

221
            - stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
222

223
            - frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
224

225
                                                         Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
226

227
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
228

229
                                                        Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
230

231
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
232

233
            - tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
234

235
                                                             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
236

237
            - tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
238
                                                                                                Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
239

240
                                                                                                **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
241
                                                                                                ```
242
                                                                                                tool_results = [
243
                                                                                                  {
244
                                                                                                    "call": {
245
                                                                                                      "name": <tool name>,
246
                                                                                                      "parameters": {
247
                                                                                                        <param name>: <param value>
248
                                                                                                      }
249
                                                                                                    },
250
                                                                                                    "outputs": [{
251
                                                                                                      <key>: <value>
252
                                                                                                    }]
253
                                                                                                  },
254
                                                                                                  ...
255
                                                                                                ]
256
                                                                                                ```
257
                                                                                                **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
258

259
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
260
        ---
261
        from cohere import (
262
            ChatConnector,
263
            ChatMessage,
264
            ChatStreamRequestConnectorsSearchOptions,
265
            ChatStreamRequestPromptOverride,
266
            ChatStreamRequestToolResultsItem,
267
            Tool,
268
            ToolCall,
269
            ToolParameterDefinitionsValue,
270
        )
271
        from cohere.client import Client
272

273
        client = Client(
274
            client_name="YOUR_CLIENT_NAME",
275
            token="YOUR_TOKEN",
276
        )
277
        client.chat_stream(
278
            message="string",
279
            model="string",
280
            preamble="string",
281
            chat_history=[
282
                ChatMessage(
283
                    role="CHATBOT",
284
                    message="string",
285
                )
286
            ],
287
            conversation_id="string",
288
            prompt_truncation="OFF",
289
            connectors=[
290
                ChatConnector(
291
                    id="string",
292
                    user_access_token="string",
293
                    continue_on_failure=True,
294
                    options={"string": {"key": "value"}},
295
                )
296
            ],
297
            search_queries_only=True,
298
            documents=[{"string": "string"}],
299
            citation_quality="fast",
300
            temperature=1.1,
301
            max_tokens=1,
302
            k=1,
303
            p=1.1,
304
            seed=1.1,
305
            stop_sequences=["string"],
306
            connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
307
                model={"key": "value"},
308
                temperature={"key": "value"},
309
                max_tokens={"key": "value"},
310
                preamble={"key": "value"},
311
                seed=1.1,
312
            ),
313
            prompt_override=ChatStreamRequestPromptOverride(
314
                preamble={"key": "value"},
315
                task_description={"key": "value"},
316
                style_guide={"key": "value"},
317
            ),
318
            frequency_penalty=1.1,
319
            presence_penalty=1.1,
320
            raw_prompting=True,
321
            tools=[
322
                Tool(
323
                    name="string",
324
                    description="string",
325
                    parameter_definitions={
326
                        "string": ToolParameterDefinitionsValue(
327
                            description="string",
328
                            type="string",
329
                            required=True,
330
                        )
331
                    },
332
                )
333
            ],
334
            tool_results=[
335
                ChatStreamRequestToolResultsItem(
336
                    call=ToolCall(),
337
                    outputs=[{"string": {"key": "value"}}],
338
                )
339
            ],
340
        )
341
        """
342
        _request: typing.Dict[str, typing.Any] = {"message": message, "stream": True}
343
        if model is not OMIT:
344
            _request["model"] = model
345
        if preamble is not OMIT:
346
            _request["preamble"] = preamble
347
        if chat_history is not OMIT:
348
            _request["chat_history"] = chat_history
349
        if conversation_id is not OMIT:
350
            _request["conversation_id"] = conversation_id
351
        if prompt_truncation is not OMIT:
352
            _request["prompt_truncation"] = prompt_truncation
353
        if connectors is not OMIT:
354
            _request["connectors"] = connectors
355
        if search_queries_only is not OMIT:
356
            _request["search_queries_only"] = search_queries_only
357
        if documents is not OMIT:
358
            _request["documents"] = documents
359
        if temperature is not OMIT:
360
            _request["temperature"] = temperature
361
        if max_tokens is not OMIT:
362
            _request["max_tokens"] = max_tokens
363
        if k is not OMIT:
364
            _request["k"] = k
365
        if p is not OMIT:
366
            _request["p"] = p
367
        if seed is not OMIT:
368
            _request["seed"] = seed
369
        if stop_sequences is not OMIT:
370
            _request["stop_sequences"] = stop_sequences
371
        if frequency_penalty is not OMIT:
372
            _request["frequency_penalty"] = frequency_penalty
373
        if presence_penalty is not OMIT:
374
            _request["presence_penalty"] = presence_penalty
375
        if raw_prompting is not OMIT:
376
            _request["raw_prompting"] = raw_prompting
377
        if tools is not OMIT:
378
            _request["tools"] = tools
379
        if tool_results is not OMIT:
380
            _request["tool_results"] = tool_results
381
        with self._client_wrapper.httpx_client.stream(
382
            "POST",
383
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
384
            params=jsonable_encoder(
385
                request_options.get("additional_query_parameters") if request_options is not None else None
386
            ),
387
            json=jsonable_encoder(_request)
388
            if request_options is None or request_options.get("additional_body_parameters") is None
389
            else {
390
                **jsonable_encoder(_request),
391
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
392
            },
393
            headers=jsonable_encoder(
394
                remove_none_from_dict(
395
                    {
396
                        **self._client_wrapper.get_headers(),
397
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
398
                    }
399
                )
400
            ),
401
            timeout=request_options.get("timeout_in_seconds")
402
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
403
            else self._client_wrapper.get_timeout(),
404
            retries=0,
405
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
406
        ) as _response:
407
            if 200 <= _response.status_code < 300:
408
                for _text in _response.iter_lines():
409
                    if len(_text) == 0:
410
                        continue
411
                    yield pydantic.parse_obj_as(StreamedChatResponse, json.loads(_text))  # type: ignore
412
                return
413
            _response.read()
414
            if _response.status_code == 429:
415
                raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
416
            try:
417
                _response_json = _response.json()
418
            except JSONDecodeError:
419
                raise ApiError(status_code=_response.status_code, body=_response.text)
420
            raise ApiError(status_code=_response.status_code, body=_response_json)
421

422
    def chat(
423
        self,
424
        *,
425
        message: str,
426
        model: typing.Optional[str] = OMIT,
427
        preamble: typing.Optional[str] = OMIT,
428
        chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
429
        conversation_id: typing.Optional[str] = OMIT,
430
        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
431
        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
432
        search_queries_only: typing.Optional[bool] = OMIT,
433
        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
434
        temperature: typing.Optional[float] = OMIT,
435
        max_tokens: typing.Optional[int] = OMIT,
436
        k: typing.Optional[int] = OMIT,
437
        p: typing.Optional[float] = OMIT,
438
        seed: typing.Optional[float] = OMIT,
439
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
440
        frequency_penalty: typing.Optional[float] = OMIT,
441
        presence_penalty: typing.Optional[float] = OMIT,
442
        raw_prompting: typing.Optional[bool] = OMIT,
443
        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
444
        tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]] = OMIT,
445
        request_options: typing.Optional[RequestOptions] = None,
446
    ) -> NonStreamedChatResponse:
447
        """
448
        Generates a text response to a user message.
449
        To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
450

451
        Parameters:
452
            - message: str. Text input for the model to respond to.
453

454
            - model: typing.Optional[str]. Defaults to `command`.
455

456
                                           The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
457

458
            - preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
459

460
                                              The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
461

462
            - chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
463

464
                                                                           Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
465

466
                                                                           The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
467

468
            - conversation_id: typing.Optional[str]. An alternative to `chat_history`.
469

470
                                                     Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
471

472
            - prompt_truncation: typing.Optional[ChatRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
473

474
                                                                               Dictates how the prompt will be constructed.
475

476
                                                                               With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
477

478
                                                                               With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
479

480
                                                                               With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
481

482
            - connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
483

484
                                                                           When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
485

486
            - search_queries_only: typing.Optional[bool]. Defaults to `false`.
487

488
                                                          When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
489

490
            - documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
491

492
                                                                         Example:
493
                                                                         `[
494
                                                                           { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
495
                                                                           { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
496
                                                                         ]`
497

498
                                                                         Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
499

500
                                                                         Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
501

502
                                                                         An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
503

504
                                                                         An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
505

506
                                                                         See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
507

508
            - temperature: typing.Optional[float]. Defaults to `0.3`.
509

510
                                                   A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
511

512
                                                   Randomness can be further maximized by increasing the  value of the `p` parameter.
513

514
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
515

516
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
517
                                       Defaults to `0`, min value of `0`, max value of `500`.
518

519
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
520
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
521

522
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
523

524
            - stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
525

526
            - frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
527

528
                                                         Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
529

530
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
531

532
                                                        Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
533

534
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
535

536
            - tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
537

538
                                                             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
539

540
            - tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
541
                                                                                          Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
542

543
                                                                                          **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
544
                                                                                          ```
545
                                                                                          tool_results = [
546
                                                                                            {
547
                                                                                              "call": {
548
                                                                                                "name": <tool name>,
549
                                                                                                "parameters": {
550
                                                                                                  <param name>: <param value>
551
                                                                                                }
552
                                                                                              },
553
                                                                                              "outputs": [{
554
                                                                                                <key>: <value>
555
                                                                                              }]
556
                                                                                            },
557
                                                                                            ...
558
                                                                                          ]
559
                                                                                          ```
560
                                                                                          **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
561

562
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
563
        ---
564
        from cohere import ChatMessage
565
        from cohere.client import Client
566

567
        client = Client(
568
            client_name="YOUR_CLIENT_NAME",
569
            token="YOUR_TOKEN",
570
        )
571
        client.chat(
572
            message="Can you give me a global market overview of solar panels?",
573
            chat_history=[
574
                ChatMessage(
575
                    role="CHATBOT",
576
                    message="Hi!",
577
                ),
578
                ChatMessage(
579
                    role="CHATBOT",
580
                    message="How can I help you today?",
581
                ),
582
            ],
583
            prompt_truncation="OFF",
584
            temperature=0.3,
585
        )
586
        """
587
        _request: typing.Dict[str, typing.Any] = {"message": message, "stream": False}
588
        if model is not OMIT:
589
            _request["model"] = model
590
        if preamble is not OMIT:
591
            _request["preamble"] = preamble
592
        if chat_history is not OMIT:
593
            _request["chat_history"] = chat_history
594
        if conversation_id is not OMIT:
595
            _request["conversation_id"] = conversation_id
596
        if prompt_truncation is not OMIT:
597
            _request["prompt_truncation"] = prompt_truncation
598
        if connectors is not OMIT:
599
            _request["connectors"] = connectors
600
        if search_queries_only is not OMIT:
601
            _request["search_queries_only"] = search_queries_only
602
        if documents is not OMIT:
603
            _request["documents"] = documents
604
        if temperature is not OMIT:
605
            _request["temperature"] = temperature
606
        if max_tokens is not OMIT:
607
            _request["max_tokens"] = max_tokens
608
        if k is not OMIT:
609
            _request["k"] = k
610
        if p is not OMIT:
611
            _request["p"] = p
612
        if seed is not OMIT:
613
            _request["seed"] = seed
614
        if stop_sequences is not OMIT:
615
            _request["stop_sequences"] = stop_sequences
616
        if frequency_penalty is not OMIT:
617
            _request["frequency_penalty"] = frequency_penalty
618
        if presence_penalty is not OMIT:
619
            _request["presence_penalty"] = presence_penalty
620
        if raw_prompting is not OMIT:
621
            _request["raw_prompting"] = raw_prompting
622
        if tools is not OMIT:
623
            _request["tools"] = tools
624
        if tool_results is not OMIT:
625
            _request["tool_results"] = tool_results
626
        _response = self._client_wrapper.httpx_client.request(
627
            "POST",
628
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
629
            params=jsonable_encoder(
630
                request_options.get("additional_query_parameters") if request_options is not None else None
631
            ),
632
            json=jsonable_encoder(_request)
633
            if request_options is None or request_options.get("additional_body_parameters") is None
634
            else {
635
                **jsonable_encoder(_request),
636
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
637
            },
638
            headers=jsonable_encoder(
639
                remove_none_from_dict(
640
                    {
641
                        **self._client_wrapper.get_headers(),
642
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
643
                    }
644
                )
645
            ),
646
            timeout=request_options.get("timeout_in_seconds")
647
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
648
            else self._client_wrapper.get_timeout(),
649
            retries=0,
650
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
651
        )
652
        if 200 <= _response.status_code < 300:
653
            return pydantic.parse_obj_as(NonStreamedChatResponse, _response.json())  # type: ignore
654
        if _response.status_code == 429:
655
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
656
        try:
657
            _response_json = _response.json()
658
        except JSONDecodeError:
659
            raise ApiError(status_code=_response.status_code, body=_response.text)
660
        raise ApiError(status_code=_response.status_code, body=_response_json)
661

662
    def generate_stream(
663
        self,
664
        *,
665
        prompt: str,
666
        model: typing.Optional[str] = OMIT,
667
        num_generations: typing.Optional[int] = OMIT,
668
        max_tokens: typing.Optional[int] = OMIT,
669
        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
670
        temperature: typing.Optional[float] = OMIT,
671
        seed: typing.Optional[float] = OMIT,
672
        preset: typing.Optional[str] = OMIT,
673
        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
674
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
675
        k: typing.Optional[int] = OMIT,
676
        p: typing.Optional[float] = OMIT,
677
        frequency_penalty: typing.Optional[float] = OMIT,
678
        presence_penalty: typing.Optional[float] = OMIT,
679
        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
680
        raw_prompting: typing.Optional[bool] = OMIT,
681
        request_options: typing.Optional[RequestOptions] = None,
682
    ) -> typing.Iterator[GenerateStreamedResponse]:
683
        """
684
        > 🚧 Warning
685
        >
686
        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
687

688
        Generates realistic text conditioned on a given input.
689

690
        Parameters:
691
            - prompt: str. The input text that serves as the starting point for generating the response.
692
                           Note: The prompt will be pre-processed and modified before reaching the model.
693

694
            - model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
695
                                           Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
696
            - num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
697

698
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
699

700
                                                This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
701

702
                                                Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
703

704
            - truncate: typing.Optional[GenerateStreamRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
705

706
                                                                        Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
707

708
                                                                        If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
709
            - temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
710
                                                   Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
711

712
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
713

714
            - preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
715
                                            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
716

717
            - end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
718

719
            - stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
720

721
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
722
                                       Defaults to `0`, min value of `0`, max value of `500`.
723

724
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
725
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
726

727
            - frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
728

729
                                                         Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
730

731
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
732

733
                                                        Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
734

735
                                                        Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
736

737
            - return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
738

739
                                                                                           If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
740

741
                                                                                           If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
742
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
743

744
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
745
        ---
746
        from cohere.client import Client
747

748
        client = Client(
749
            client_name="YOUR_CLIENT_NAME",
750
            token="YOUR_TOKEN",
751
        )
752
        client.generate_stream(
753
            prompt="string",
754
            model="string",
755
            num_generations=1,
756
            max_tokens=1,
757
            truncate="NONE",
758
            temperature=1.1,
759
            seed=1.1,
760
            preset="string",
761
            end_sequences=["string"],
762
            stop_sequences=["string"],
763
            k=1,
764
            p=1.1,
765
            frequency_penalty=1.1,
766
            presence_penalty=1.1,
767
            return_likelihoods="GENERATION",
768
            raw_prompting=True,
769
        )
770
        """
771
        _request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": True}
772
        if model is not OMIT:
773
            _request["model"] = model
774
        if num_generations is not OMIT:
775
            _request["num_generations"] = num_generations
776
        if max_tokens is not OMIT:
777
            _request["max_tokens"] = max_tokens
778
        if truncate is not OMIT:
779
            _request["truncate"] = truncate
780
        if temperature is not OMIT:
781
            _request["temperature"] = temperature
782
        if seed is not OMIT:
783
            _request["seed"] = seed
784
        if preset is not OMIT:
785
            _request["preset"] = preset
786
        if end_sequences is not OMIT:
787
            _request["end_sequences"] = end_sequences
788
        if stop_sequences is not OMIT:
789
            _request["stop_sequences"] = stop_sequences
790
        if k is not OMIT:
791
            _request["k"] = k
792
        if p is not OMIT:
793
            _request["p"] = p
794
        if frequency_penalty is not OMIT:
795
            _request["frequency_penalty"] = frequency_penalty
796
        if presence_penalty is not OMIT:
797
            _request["presence_penalty"] = presence_penalty
798
        if return_likelihoods is not OMIT:
799
            _request["return_likelihoods"] = return_likelihoods
800
        if raw_prompting is not OMIT:
801
            _request["raw_prompting"] = raw_prompting
802
        with self._client_wrapper.httpx_client.stream(
803
            "POST",
804
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
805
            params=jsonable_encoder(
806
                request_options.get("additional_query_parameters") if request_options is not None else None
807
            ),
808
            json=jsonable_encoder(_request)
809
            if request_options is None or request_options.get("additional_body_parameters") is None
810
            else {
811
                **jsonable_encoder(_request),
812
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
813
            },
814
            headers=jsonable_encoder(
815
                remove_none_from_dict(
816
                    {
817
                        **self._client_wrapper.get_headers(),
818
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
819
                    }
820
                )
821
            ),
822
            timeout=request_options.get("timeout_in_seconds")
823
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
824
            else self._client_wrapper.get_timeout(),
825
            retries=0,
826
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
827
        ) as _response:
828
            if 200 <= _response.status_code < 300:
829
                for _text in _response.iter_lines():
830
                    if len(_text) == 0:
831
                        continue
832
                    yield pydantic.parse_obj_as(GenerateStreamedResponse, json.loads(_text))  # type: ignore
833
                return
834
            _response.read()
835
            if _response.status_code == 400:
836
                raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
837
            if _response.status_code == 429:
838
                raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
839
            if _response.status_code == 500:
840
                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
841
            try:
842
                _response_json = _response.json()
843
            except JSONDecodeError:
844
                raise ApiError(status_code=_response.status_code, body=_response.text)
845
            raise ApiError(status_code=_response.status_code, body=_response_json)
846

847
    def generate(
848
        self,
849
        *,
850
        prompt: str,
851
        model: typing.Optional[str] = OMIT,
852
        num_generations: typing.Optional[int] = OMIT,
853
        max_tokens: typing.Optional[int] = OMIT,
854
        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
855
        temperature: typing.Optional[float] = OMIT,
856
        seed: typing.Optional[float] = OMIT,
857
        preset: typing.Optional[str] = OMIT,
858
        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
859
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
860
        k: typing.Optional[int] = OMIT,
861
        p: typing.Optional[float] = OMIT,
862
        frequency_penalty: typing.Optional[float] = OMIT,
863
        presence_penalty: typing.Optional[float] = OMIT,
864
        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
865
        raw_prompting: typing.Optional[bool] = OMIT,
866
        request_options: typing.Optional[RequestOptions] = None,
867
    ) -> Generation:
868
        """
869
        > 🚧 Warning
870
        >
871
        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
872

873
        Generates realistic text conditioned on a given input.
874

875
        Parameters:
876
            - prompt: str. The input text that serves as the starting point for generating the response.
877
                           Note: The prompt will be pre-processed and modified before reaching the model.
878

879
            - model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
880
                                           Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
881
            - num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
882

883
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
884

885
                                                This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
886

887
                                                Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
888

889
            - truncate: typing.Optional[GenerateRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
890

891
                                                                  Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
892

893
                                                                  If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
894
            - temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
895
                                                   Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
896

897
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
898

899
            - preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
900
                                            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
901

902
            - end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
903

904
            - stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
905

906
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
907
                                       Defaults to `0`, min value of `0`, max value of `500`.
908

909
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
910
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
911

912
            - frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
913

914
                                                         Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
915

916
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
917

918
                                                        Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
919

920
                                                        Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
921

922
            - return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
923

924
                                                                                     If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
925

926
                                                                                     If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
927
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
928

929
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
930
        ---
931
        from cohere.client import Client
932

933
        client = Client(
934
            client_name="YOUR_CLIENT_NAME",
935
            token="YOUR_TOKEN",
936
        )
937
        client.generate(
938
            prompt="Please explain to me how LLMs work",
939
            preset="my-preset-a58sbd",
940
        )
941
        """
942
        _request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": False}
943
        if model is not OMIT:
944
            _request["model"] = model
945
        if num_generations is not OMIT:
946
            _request["num_generations"] = num_generations
947
        if max_tokens is not OMIT:
948
            _request["max_tokens"] = max_tokens
949
        if truncate is not OMIT:
950
            _request["truncate"] = truncate
951
        if temperature is not OMIT:
952
            _request["temperature"] = temperature
953
        if seed is not OMIT:
954
            _request["seed"] = seed
955
        if preset is not OMIT:
956
            _request["preset"] = preset
957
        if end_sequences is not OMIT:
958
            _request["end_sequences"] = end_sequences
959
        if stop_sequences is not OMIT:
960
            _request["stop_sequences"] = stop_sequences
961
        if k is not OMIT:
962
            _request["k"] = k
963
        if p is not OMIT:
964
            _request["p"] = p
965
        if frequency_penalty is not OMIT:
966
            _request["frequency_penalty"] = frequency_penalty
967
        if presence_penalty is not OMIT:
968
            _request["presence_penalty"] = presence_penalty
969
        if return_likelihoods is not OMIT:
970
            _request["return_likelihoods"] = return_likelihoods
971
        if raw_prompting is not OMIT:
972
            _request["raw_prompting"] = raw_prompting
973
        _response = self._client_wrapper.httpx_client.request(
974
            "POST",
975
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
976
            params=jsonable_encoder(
977
                request_options.get("additional_query_parameters") if request_options is not None else None
978
            ),
979
            json=jsonable_encoder(_request)
980
            if request_options is None or request_options.get("additional_body_parameters") is None
981
            else {
982
                **jsonable_encoder(_request),
983
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
984
            },
985
            headers=jsonable_encoder(
986
                remove_none_from_dict(
987
                    {
988
                        **self._client_wrapper.get_headers(),
989
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
990
                    }
991
                )
992
            ),
993
            timeout=request_options.get("timeout_in_seconds")
994
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
995
            else self._client_wrapper.get_timeout(),
996
            retries=0,
997
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
998
        )
999
        if 200 <= _response.status_code < 300:
1000
            return pydantic.parse_obj_as(Generation, _response.json())  # type: ignore
1001
        if _response.status_code == 400:
1002
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1003
        if _response.status_code == 429:
1004
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1005
        if _response.status_code == 500:
1006
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1007
        try:
1008
            _response_json = _response.json()
1009
        except JSONDecodeError:
1010
            raise ApiError(status_code=_response.status_code, body=_response.text)
1011
        raise ApiError(status_code=_response.status_code, body=_response_json)
1012

1013
    def embed(
1014
        self,
1015
        *,
1016
        texts: typing.Sequence[str],
1017
        model: typing.Optional[str] = OMIT,
1018
        input_type: typing.Optional[EmbedInputType] = OMIT,
1019
        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
1020
        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
1021
        request_options: typing.Optional[RequestOptions] = None,
1022
    ) -> EmbedResponse:
1023
        """
1024
        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
1025

1026
        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
1027

1028
        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
1029

1030
        Parameters:
1031
            - texts: typing.Sequence[str]. An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
1032

1033
            - model: typing.Optional[str]. Defaults to embed-english-v2.0
1034

1035
                                           The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
1036

1037
                                           Available models and corresponding embedding dimensions:
1038

1039
                                           * `embed-english-v3.0`  1024
1040
                                           * `embed-multilingual-v3.0`  1024
1041
                                           * `embed-english-light-v3.0`  384
1042
                                           * `embed-multilingual-light-v3.0`  384
1043

1044
                                           * `embed-english-v2.0`  4096
1045
                                           * `embed-english-light-v2.0`  1024
1046
                                           * `embed-multilingual-v2.0`  768
1047
            - input_type: typing.Optional[EmbedInputType].
1048

1049
            - embedding_types: typing.Optional[typing.Sequence[EmbeddingType]]. Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
1050

1051
                                                                                * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
1052
                                                                                * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
1053
                                                                                * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
1054
                                                                                * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
1055
                                                                                * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
1056
            - truncate: typing.Optional[EmbedRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
1057

1058
                                                               Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
1059

1060
                                                               If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
1061
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1062
        ---
1063
        from cohere.client import Client
1064

1065
        client = Client(
1066
            client_name="YOUR_CLIENT_NAME",
1067
            token="YOUR_TOKEN",
1068
        )
1069
        client.embed(
1070
            texts=["string"],
1071
            model="string",
1072
            input_type="search_document",
1073
            embedding_types=["float"],
1074
            truncate="NONE",
1075
        )
1076
        """
1077
        _request: typing.Dict[str, typing.Any] = {"texts": texts}
1078
        if model is not OMIT:
1079
            _request["model"] = model
1080
        if input_type is not OMIT:
1081
            _request["input_type"] = input_type
1082
        if embedding_types is not OMIT:
1083
            _request["embedding_types"] = embedding_types
1084
        if truncate is not OMIT:
1085
            _request["truncate"] = truncate
1086
        _response = self._client_wrapper.httpx_client.request(
1087
            "POST",
1088
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "embed"),
1089
            params=jsonable_encoder(
1090
                request_options.get("additional_query_parameters") if request_options is not None else None
1091
            ),
1092
            json=jsonable_encoder(_request)
1093
            if request_options is None or request_options.get("additional_body_parameters") is None
1094
            else {
1095
                **jsonable_encoder(_request),
1096
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1097
            },
1098
            headers=jsonable_encoder(
1099
                remove_none_from_dict(
1100
                    {
1101
                        **self._client_wrapper.get_headers(),
1102
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1103
                    }
1104
                )
1105
            ),
1106
            timeout=request_options.get("timeout_in_seconds")
1107
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1108
            else self._client_wrapper.get_timeout(),
1109
            retries=0,
1110
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1111
        )
1112
        if 200 <= _response.status_code < 300:
1113
            return pydantic.parse_obj_as(EmbedResponse, _response.json())  # type: ignore
1114
        if _response.status_code == 400:
1115
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1116
        if _response.status_code == 429:
1117
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1118
        if _response.status_code == 500:
1119
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1120
        try:
1121
            _response_json = _response.json()
1122
        except JSONDecodeError:
1123
            raise ApiError(status_code=_response.status_code, body=_response.text)
1124
        raise ApiError(status_code=_response.status_code, body=_response_json)
1125

1126
    def rerank(
1127
        self,
1128
        *,
1129
        model: typing.Optional[str] = OMIT,
1130
        query: str,
1131
        documents: typing.Sequence[RerankRequestDocumentsItem],
1132
        top_n: typing.Optional[int] = OMIT,
1133
        return_documents: typing.Optional[bool] = OMIT,
1134
        max_chunks_per_doc: typing.Optional[int] = OMIT,
1135
        request_options: typing.Optional[RequestOptions] = None,
1136
    ) -> RerankResponse:
1137
        """
1138
        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
1139

1140
        Parameters:
1141
            - model: typing.Optional[str]. The identifier of the model to use, one of : `rerank-english-v2.0`, `rerank-multilingual-v2.0`
1142

1143
            - query: str. The search query
1144

1145
            - documents: typing.Sequence[RerankRequestDocumentsItem]. A list of document objects or strings to rerank.
1146
                                                                      If a document is provided the text fields is required and all other fields will be preserved in the response.
1147

1148
                                                                      The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
1149

1150
                                                                      We recommend a maximum of 1,000 documents for optimal endpoint performance.
1151
            - top_n: typing.Optional[int]. The number of most relevant documents or indices to return, defaults to the length of the documents
1152

1153
            - return_documents: typing.Optional[bool]. - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
1154
                                                       - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
1155
            - max_chunks_per_doc: typing.Optional[int]. The maximum number of chunks to produce internally from a document
1156

1157
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1158
        ---
1159
        from cohere.client import Client
1160

1161
        client = Client(
1162
            client_name="YOUR_CLIENT_NAME",
1163
            token="YOUR_TOKEN",
1164
        )
1165
        client.rerank(
1166
            model="rerank-english-v2.0",
1167
            query="What is the capital of the United States?",
1168
            documents=[
1169
                "Carson City is the capital city of the American state of Nevada.",
1170
                "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
1171
                "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
1172
                "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
1173
            ],
1174
        )
1175
        """
1176
        _request: typing.Dict[str, typing.Any] = {"query": query, "documents": documents}
1177
        if model is not OMIT:
1178
            _request["model"] = model
1179
        if top_n is not OMIT:
1180
            _request["top_n"] = top_n
1181
        if return_documents is not OMIT:
1182
            _request["return_documents"] = return_documents
1183
        if max_chunks_per_doc is not OMIT:
1184
            _request["max_chunks_per_doc"] = max_chunks_per_doc
1185
        _response = self._client_wrapper.httpx_client.request(
1186
            "POST",
1187
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "rerank"),
1188
            params=jsonable_encoder(
1189
                request_options.get("additional_query_parameters") if request_options is not None else None
1190
            ),
1191
            json=jsonable_encoder(_request)
1192
            if request_options is None or request_options.get("additional_body_parameters") is None
1193
            else {
1194
                **jsonable_encoder(_request),
1195
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1196
            },
1197
            headers=jsonable_encoder(
1198
                remove_none_from_dict(
1199
                    {
1200
                        **self._client_wrapper.get_headers(),
1201
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1202
                    }
1203
                )
1204
            ),
1205
            timeout=request_options.get("timeout_in_seconds")
1206
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1207
            else self._client_wrapper.get_timeout(),
1208
            retries=0,
1209
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1210
        )
1211
        if 200 <= _response.status_code < 300:
1212
            return pydantic.parse_obj_as(RerankResponse, _response.json())  # type: ignore
1213
        if _response.status_code == 429:
1214
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1215
        try:
1216
            _response_json = _response.json()
1217
        except JSONDecodeError:
1218
            raise ApiError(status_code=_response.status_code, body=_response.text)
1219
        raise ApiError(status_code=_response.status_code, body=_response_json)
1220

1221
    def classify(
1222
        self,
1223
        *,
1224
        inputs: typing.Sequence[str],
1225
        examples: typing.Sequence[ClassifyExample],
1226
        model: typing.Optional[str] = OMIT,
1227
        preset: typing.Optional[str] = OMIT,
1228
        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
1229
        request_options: typing.Optional[RequestOptions] = None,
1230
    ) -> ClassifyResponse:
1231
        """
1232
        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
1233
        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
1234

1235
        Parameters:
1236
            - inputs: typing.Sequence[str]. A list of up to 96 texts to be classified. Each one must be a non-empty string.
1237
                                            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
1238
                                            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
1239
            - examples: typing.Sequence[ClassifyExample]. An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
1240
                                                          Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
1241
            - model: typing.Optional[str]. The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
1242

1243
            - preset: typing.Optional[str]. The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.ai/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
1244

1245
            - truncate: typing.Optional[ClassifyRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
1246
                                                                  Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
1247
                                                                  If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
1248
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1249
        ---
1250
        from cohere import ClassifyExample
1251
        from cohere.client import Client
1252

1253
        client = Client(
1254
            client_name="YOUR_CLIENT_NAME",
1255
            token="YOUR_TOKEN",
1256
        )
1257
        client.classify(
1258
            inputs=["Confirm your email address", "hey i need u to send some $"],
1259
            examples=[
1260
                ClassifyExample(
1261
                    text="Dermatologists don't like her!",
1262
                    label="Spam",
1263
                ),
1264
                ClassifyExample(
1265
                    text="Hello, open to this?",
1266
                    label="Spam",
1267
                ),
1268
                ClassifyExample(
1269
                    text="I need help please wire me $1000 right now",
1270
                    label="Spam",
1271
                ),
1272
                ClassifyExample(
1273
                    text="Nice to know you ;)",
1274
                    label="Spam",
1275
                ),
1276
                ClassifyExample(
1277
                    text="Please help me?",
1278
                    label="Spam",
1279
                ),
1280
                ClassifyExample(
1281
                    text="Your parcel will be delivered today",
1282
                    label="Not spam",
1283
                ),
1284
                ClassifyExample(
1285
                    text="Review changes to our Terms and Conditions",
1286
                    label="Not spam",
1287
                ),
1288
                ClassifyExample(
1289
                    text="Weekly sync notes",
1290
                    label="Not spam",
1291
                ),
1292
                ClassifyExample(
1293
                    text="Re: Follow up from today’s meeting",
1294
                    label="Not spam",
1295
                ),
1296
                ClassifyExample(
1297
                    text="Pre-read for tomorrow",
1298
                    label="Not spam",
1299
                ),
1300
            ],
1301
            preset="my-preset-a58sbd",
1302
        )
1303
        """
1304
        _request: typing.Dict[str, typing.Any] = {"inputs": inputs, "examples": examples}
1305
        if model is not OMIT:
1306
            _request["model"] = model
1307
        if preset is not OMIT:
1308
            _request["preset"] = preset
1309
        if truncate is not OMIT:
1310
            _request["truncate"] = truncate
1311
        _response = self._client_wrapper.httpx_client.request(
1312
            "POST",
1313
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "classify"),
1314
            params=jsonable_encoder(
1315
                request_options.get("additional_query_parameters") if request_options is not None else None
1316
            ),
1317
            json=jsonable_encoder(_request)
1318
            if request_options is None or request_options.get("additional_body_parameters") is None
1319
            else {
1320
                **jsonable_encoder(_request),
1321
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1322
            },
1323
            headers=jsonable_encoder(
1324
                remove_none_from_dict(
1325
                    {
1326
                        **self._client_wrapper.get_headers(),
1327
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1328
                    }
1329
                )
1330
            ),
1331
            timeout=request_options.get("timeout_in_seconds")
1332
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1333
            else self._client_wrapper.get_timeout(),
1334
            retries=0,
1335
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1336
        )
1337
        if 200 <= _response.status_code < 300:
1338
            return pydantic.parse_obj_as(ClassifyResponse, _response.json())  # type: ignore
1339
        if _response.status_code == 400:
1340
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1341
        if _response.status_code == 429:
1342
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1343
        if _response.status_code == 500:
1344
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1345
        try:
1346
            _response_json = _response.json()
1347
        except JSONDecodeError:
1348
            raise ApiError(status_code=_response.status_code, body=_response.text)
1349
        raise ApiError(status_code=_response.status_code, body=_response_json)
1350

1351
    def summarize(
1352
        self,
1353
        *,
1354
        text: str,
1355
        length: typing.Optional[SummarizeRequestLength] = OMIT,
1356
        format: typing.Optional[SummarizeRequestFormat] = OMIT,
1357
        model: typing.Optional[str] = OMIT,
1358
        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
1359
        temperature: typing.Optional[float] = OMIT,
1360
        additional_command: typing.Optional[str] = OMIT,
1361
        request_options: typing.Optional[RequestOptions] = None,
1362
    ) -> SummarizeResponse:
1363
        """
1364
        > 🚧 Warning
1365
        >
1366
        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
1367

1368
        Generates a summary in English for a given text.
1369

1370
        Parameters:
1371
            - text: str. The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
1372

1373
            - length: typing.Optional[SummarizeRequestLength]. One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
1374

1375
            - format: typing.Optional[SummarizeRequestFormat]. One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
1376

1377
            - model: typing.Optional[str]. The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
1378

1379
            - extractiveness: typing.Optional[SummarizeRequestExtractiveness]. One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
1380

1381
            - temperature: typing.Optional[float]. Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
1382

1383
            - additional_command: typing.Optional[str]. A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
1384

1385
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1386
        ---
1387
        from cohere.client import Client
1388

1389
        client = Client(
1390
            client_name="YOUR_CLIENT_NAME",
1391
            token="YOUR_TOKEN",
1392
        )
1393
        client.summarize(
1394
            text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
1395
        )
1396
        """
1397
        _request: typing.Dict[str, typing.Any] = {"text": text}
1398
        if length is not OMIT:
1399
            _request["length"] = length
1400
        if format is not OMIT:
1401
            _request["format"] = format
1402
        if model is not OMIT:
1403
            _request["model"] = model
1404
        if extractiveness is not OMIT:
1405
            _request["extractiveness"] = extractiveness
1406
        if temperature is not OMIT:
1407
            _request["temperature"] = temperature
1408
        if additional_command is not OMIT:
1409
            _request["additional_command"] = additional_command
1410
        _response = self._client_wrapper.httpx_client.request(
1411
            "POST",
1412
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "summarize"),
1413
            params=jsonable_encoder(
1414
                request_options.get("additional_query_parameters") if request_options is not None else None
1415
            ),
1416
            json=jsonable_encoder(_request)
1417
            if request_options is None or request_options.get("additional_body_parameters") is None
1418
            else {
1419
                **jsonable_encoder(_request),
1420
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1421
            },
1422
            headers=jsonable_encoder(
1423
                remove_none_from_dict(
1424
                    {
1425
                        **self._client_wrapper.get_headers(),
1426
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1427
                    }
1428
                )
1429
            ),
1430
            timeout=request_options.get("timeout_in_seconds")
1431
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1432
            else self._client_wrapper.get_timeout(),
1433
            retries=0,
1434
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1435
        )
1436
        if 200 <= _response.status_code < 300:
1437
            return pydantic.parse_obj_as(SummarizeResponse, _response.json())  # type: ignore
1438
        if _response.status_code == 429:
1439
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1440
        try:
1441
            _response_json = _response.json()
1442
        except JSONDecodeError:
1443
            raise ApiError(status_code=_response.status_code, body=_response.text)
1444
        raise ApiError(status_code=_response.status_code, body=_response_json)
1445

1446
    def tokenize(
1447
        self, *, text: str, model: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None
1448
    ) -> TokenizeResponse:
1449
        """
1450
        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
1451

1452
        Parameters:
1453
            - text: str. The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
1454

1455
            - model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
1456

1457
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1458
        ---
1459
        from cohere.client import Client
1460

1461
        client = Client(
1462
            client_name="YOUR_CLIENT_NAME",
1463
            token="YOUR_TOKEN",
1464
        )
1465
        client.tokenize(
1466
            text="tokenize me! :D",
1467
            model="command",
1468
        )
1469
        """
1470
        _request: typing.Dict[str, typing.Any] = {"text": text}
1471
        if model is not OMIT:
1472
            _request["model"] = model
1473
        _response = self._client_wrapper.httpx_client.request(
1474
            "POST",
1475
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "tokenize"),
1476
            params=jsonable_encoder(
1477
                request_options.get("additional_query_parameters") if request_options is not None else None
1478
            ),
1479
            json=jsonable_encoder(_request)
1480
            if request_options is None or request_options.get("additional_body_parameters") is None
1481
            else {
1482
                **jsonable_encoder(_request),
1483
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1484
            },
1485
            headers=jsonable_encoder(
1486
                remove_none_from_dict(
1487
                    {
1488
                        **self._client_wrapper.get_headers(),
1489
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1490
                    }
1491
                )
1492
            ),
1493
            timeout=request_options.get("timeout_in_seconds")
1494
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1495
            else self._client_wrapper.get_timeout(),
1496
            retries=0,
1497
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1498
        )
1499
        if 200 <= _response.status_code < 300:
1500
            return pydantic.parse_obj_as(TokenizeResponse, _response.json())  # type: ignore
1501
        if _response.status_code == 400:
1502
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1503
        if _response.status_code == 429:
1504
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1505
        if _response.status_code == 500:
1506
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1507
        try:
1508
            _response_json = _response.json()
1509
        except JSONDecodeError:
1510
            raise ApiError(status_code=_response.status_code, body=_response.text)
1511
        raise ApiError(status_code=_response.status_code, body=_response_json)
1512

1513
    def detokenize(
1514
        self,
1515
        *,
1516
        tokens: typing.Sequence[int],
1517
        model: typing.Optional[str] = OMIT,
1518
        request_options: typing.Optional[RequestOptions] = None,
1519
    ) -> DetokenizeResponse:
1520
        """
1521
        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
1522

1523
        Parameters:
1524
            - tokens: typing.Sequence[int]. The list of tokens to be detokenized.
1525

1526
            - model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
1527

1528
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1529
        ---
1530
        from cohere.client import Client
1531

1532
        client = Client(
1533
            client_name="YOUR_CLIENT_NAME",
1534
            token="YOUR_TOKEN",
1535
        )
1536
        client.detokenize(
1537
            tokens=[10104, 12221, 1315, 34, 1420, 69],
1538
        )
1539
        """
1540
        _request: typing.Dict[str, typing.Any] = {"tokens": tokens}
1541
        if model is not OMIT:
1542
            _request["model"] = model
1543
        _response = self._client_wrapper.httpx_client.request(
1544
            "POST",
1545
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "detokenize"),
1546
            params=jsonable_encoder(
1547
                request_options.get("additional_query_parameters") if request_options is not None else None
1548
            ),
1549
            json=jsonable_encoder(_request)
1550
            if request_options is None or request_options.get("additional_body_parameters") is None
1551
            else {
1552
                **jsonable_encoder(_request),
1553
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1554
            },
1555
            headers=jsonable_encoder(
1556
                remove_none_from_dict(
1557
                    {
1558
                        **self._client_wrapper.get_headers(),
1559
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1560
                    }
1561
                )
1562
            ),
1563
            timeout=request_options.get("timeout_in_seconds")
1564
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1565
            else self._client_wrapper.get_timeout(),
1566
            retries=0,
1567
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1568
        )
1569
        if 200 <= _response.status_code < 300:
1570
            return pydantic.parse_obj_as(DetokenizeResponse, _response.json())  # type: ignore
1571
        if _response.status_code == 429:
1572
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1573
        try:
1574
            _response_json = _response.json()
1575
        except JSONDecodeError:
1576
            raise ApiError(status_code=_response.status_code, body=_response.text)
1577
        raise ApiError(status_code=_response.status_code, body=_response_json)
1578

1579

1580
class AsyncBaseCohere:
1581
    """
1582
    Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.
1583

1584
    Parameters:
1585
        - base_url: typing.Optional[str]. The base url to use for requests from the client.
1586

1587
        - environment: ClientEnvironment. The environment to use for requests from the client. from .environment import ClientEnvironment
1588

1589
                                          Defaults to ClientEnvironment.PRODUCTION
1590

1591
        - client_name: typing.Optional[str].
1592

1593
        - token: typing.Optional[typing.Union[str, typing.Callable[[], str]]].
1594

1595
        - timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds, unless a custom httpx client is used, in which case a default is not set.
1596

1597
        - httpx_client: typing.Optional[httpx.AsyncClient]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
1598
    ---
1599
    from cohere.client import AsyncClient
1600

1601
    client = AsyncClient(
1602
        client_name="YOUR_CLIENT_NAME",
1603
        token="YOUR_TOKEN",
1604
    )
1605
    """
1606

1607
    def __init__(
1608
        self,
1609
        *,
1610
        base_url: typing.Optional[str] = None,
1611
        environment: ClientEnvironment = ClientEnvironment.PRODUCTION,
1612
        client_name: typing.Optional[str] = None,
1613
        token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CO_API_KEY"),
1614
        timeout: typing.Optional[float] = None,
1615
        httpx_client: typing.Optional[httpx.AsyncClient] = None,
1616
    ):
1617
        _defaulted_timeout = timeout if timeout is not None else 300 if httpx_client is None else None
1618
        if token is None:
1619
            raise ApiError(body="The client must be instantiated be either passing in token or setting CO_API_KEY")
1620
        self._client_wrapper = AsyncClientWrapper(
1621
            base_url=_get_base_url(base_url=base_url, environment=environment),
1622
            client_name=client_name,
1623
            token=token,
1624
            httpx_client=httpx.AsyncClient(timeout=_defaulted_timeout) if httpx_client is None else httpx_client,
1625
            timeout=_defaulted_timeout,
1626
        )
1627
        self.embed_jobs = AsyncEmbedJobsClient(client_wrapper=self._client_wrapper)
1628
        self.datasets = AsyncDatasetsClient(client_wrapper=self._client_wrapper)
1629
        self.connectors = AsyncConnectorsClient(client_wrapper=self._client_wrapper)
1630
        self.models = AsyncModelsClient(client_wrapper=self._client_wrapper)
1631
        self.finetuning = AsyncFinetuningClient(client_wrapper=self._client_wrapper)
1632

1633
    async def chat_stream(
1634
        self,
1635
        *,
1636
        message: str,
1637
        model: typing.Optional[str] = OMIT,
1638
        preamble: typing.Optional[str] = OMIT,
1639
        chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
1640
        conversation_id: typing.Optional[str] = OMIT,
1641
        prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
1642
        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
1643
        search_queries_only: typing.Optional[bool] = OMIT,
1644
        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
1645
        temperature: typing.Optional[float] = OMIT,
1646
        max_tokens: typing.Optional[int] = OMIT,
1647
        k: typing.Optional[int] = OMIT,
1648
        p: typing.Optional[float] = OMIT,
1649
        seed: typing.Optional[float] = OMIT,
1650
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
1651
        frequency_penalty: typing.Optional[float] = OMIT,
1652
        presence_penalty: typing.Optional[float] = OMIT,
1653
        raw_prompting: typing.Optional[bool] = OMIT,
1654
        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
1655
        tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]] = OMIT,
1656
        request_options: typing.Optional[RequestOptions] = None,
1657
    ) -> typing.AsyncIterator[StreamedChatResponse]:
1658
        """
1659
        Generates a text response to a user message.
1660
        To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
1661

1662
        Parameters:
1663
            - message: str. Text input for the model to respond to.
1664

1665
            - model: typing.Optional[str]. Defaults to `command`.
1666

1667
                                           The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
1668

1669
            - preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
1670

1671
                                              The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
1672

1673
            - chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
1674

1675
                                                                           Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
1676

1677
                                                                           The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
1678

1679
            - conversation_id: typing.Optional[str]. An alternative to `chat_history`.
1680

1681
                                                     Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
1682

1683
            - prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
1684

1685
                                                                                     Dictates how the prompt will be constructed.
1686

1687
                                                                                     With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
1688

1689
                                                                                     With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
1690

1691
                                                                                     With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
1692

1693
            - connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
1694

1695
                                                                           When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
1696

1697
            - search_queries_only: typing.Optional[bool]. Defaults to `false`.
1698

1699
                                                          When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
1700

1701
            - documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
1702

1703
                                                                         Example:
1704
                                                                         `[
1705
                                                                           { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
1706
                                                                           { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
1707
                                                                         ]`
1708

1709
                                                                         Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
1710

1711
                                                                         Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
1712

1713
                                                                         An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
1714

1715
                                                                         An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
1716

1717
                                                                         See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
1718

1719
            - temperature: typing.Optional[float]. Defaults to `0.3`.
1720

1721
                                                   A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
1722

1723
                                                   Randomness can be further maximized by increasing the  value of the `p` parameter.
1724

1725
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
1726

1727
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
1728
                                       Defaults to `0`, min value of `0`, max value of `500`.
1729

1730
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
1731
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
1732

1733
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
1734

1735
            - stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
1736

1737
            - frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
1738

1739
                                                         Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
1740

1741
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
1742

1743
                                                        Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
1744

1745
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
1746

1747
            - tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
1748

1749
                                                             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
1750

1751
            - tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
1752
                                                                                                Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
1753

1754
                                                                                                **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
1755
                                                                                                ```
1756
                                                                                                tool_results = [
1757
                                                                                                  {
1758
                                                                                                    "call": {
1759
                                                                                                      "name": <tool name>,
1760
                                                                                                      "parameters": {
1761
                                                                                                        <param name>: <param value>
1762
                                                                                                      }
1763
                                                                                                    },
1764
                                                                                                    "outputs": [{
1765
                                                                                                      <key>: <value>
1766
                                                                                                    }]
1767
                                                                                                  },
1768
                                                                                                  ...
1769
                                                                                                ]
1770
                                                                                                ```
1771
                                                                                                **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
1772

1773
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1774
        ---
1775
        from cohere import (
1776
            ChatConnector,
1777
            ChatMessage,
1778
            ChatStreamRequestConnectorsSearchOptions,
1779
            ChatStreamRequestPromptOverride,
1780
            ChatStreamRequestToolResultsItem,
1781
            Tool,
1782
            ToolCall,
1783
            ToolParameterDefinitionsValue,
1784
        )
1785
        from cohere.client import AsyncClient
1786

1787
        client = AsyncClient(
1788
            client_name="YOUR_CLIENT_NAME",
1789
            token="YOUR_TOKEN",
1790
        )
1791
        await client.chat_stream(
1792
            message="string",
1793
            model="string",
1794
            preamble="string",
1795
            chat_history=[
1796
                ChatMessage(
1797
                    role="CHATBOT",
1798
                    message="string",
1799
                )
1800
            ],
1801
            conversation_id="string",
1802
            prompt_truncation="OFF",
1803
            connectors=[
1804
                ChatConnector(
1805
                    id="string",
1806
                    user_access_token="string",
1807
                    continue_on_failure=True,
1808
                    options={"string": {"key": "value"}},
1809
                )
1810
            ],
1811
            search_queries_only=True,
1812
            documents=[{"string": "string"}],
1813
            citation_quality="fast",
1814
            temperature=1.1,
1815
            max_tokens=1,
1816
            k=1,
1817
            p=1.1,
1818
            seed=1.1,
1819
            stop_sequences=["string"],
1820
            connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
1821
                model={"key": "value"},
1822
                temperature={"key": "value"},
1823
                max_tokens={"key": "value"},
1824
                preamble={"key": "value"},
1825
                seed=1.1,
1826
            ),
1827
            prompt_override=ChatStreamRequestPromptOverride(
1828
                preamble={"key": "value"},
1829
                task_description={"key": "value"},
1830
                style_guide={"key": "value"},
1831
            ),
1832
            frequency_penalty=1.1,
1833
            presence_penalty=1.1,
1834
            raw_prompting=True,
1835
            tools=[
1836
                Tool(
1837
                    name="string",
1838
                    description="string",
1839
                    parameter_definitions={
1840
                        "string": ToolParameterDefinitionsValue(
1841
                            description="string",
1842
                            type="string",
1843
                            required=True,
1844
                        )
1845
                    },
1846
                )
1847
            ],
1848
            tool_results=[
1849
                ChatStreamRequestToolResultsItem(
1850
                    call=ToolCall(),
1851
                    outputs=[{"string": {"key": "value"}}],
1852
                )
1853
            ],
1854
        )
1855
        """
1856
        _request: typing.Dict[str, typing.Any] = {"message": message, "stream": True}
1857
        if model is not OMIT:
1858
            _request["model"] = model
1859
        if preamble is not OMIT:
1860
            _request["preamble"] = preamble
1861
        if chat_history is not OMIT:
1862
            _request["chat_history"] = chat_history
1863
        if conversation_id is not OMIT:
1864
            _request["conversation_id"] = conversation_id
1865
        if prompt_truncation is not OMIT:
1866
            _request["prompt_truncation"] = prompt_truncation
1867
        if connectors is not OMIT:
1868
            _request["connectors"] = connectors
1869
        if search_queries_only is not OMIT:
1870
            _request["search_queries_only"] = search_queries_only
1871
        if documents is not OMIT:
1872
            _request["documents"] = documents
1873
        if temperature is not OMIT:
1874
            _request["temperature"] = temperature
1875
        if max_tokens is not OMIT:
1876
            _request["max_tokens"] = max_tokens
1877
        if k is not OMIT:
1878
            _request["k"] = k
1879
        if p is not OMIT:
1880
            _request["p"] = p
1881
        if seed is not OMIT:
1882
            _request["seed"] = seed
1883
        if stop_sequences is not OMIT:
1884
            _request["stop_sequences"] = stop_sequences
1885
        if frequency_penalty is not OMIT:
1886
            _request["frequency_penalty"] = frequency_penalty
1887
        if presence_penalty is not OMIT:
1888
            _request["presence_penalty"] = presence_penalty
1889
        if raw_prompting is not OMIT:
1890
            _request["raw_prompting"] = raw_prompting
1891
        if tools is not OMIT:
1892
            _request["tools"] = tools
1893
        if tool_results is not OMIT:
1894
            _request["tool_results"] = tool_results
1895
        async with self._client_wrapper.httpx_client.stream(
1896
            "POST",
1897
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
1898
            params=jsonable_encoder(
1899
                request_options.get("additional_query_parameters") if request_options is not None else None
1900
            ),
1901
            json=jsonable_encoder(_request)
1902
            if request_options is None or request_options.get("additional_body_parameters") is None
1903
            else {
1904
                **jsonable_encoder(_request),
1905
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1906
            },
1907
            headers=jsonable_encoder(
1908
                remove_none_from_dict(
1909
                    {
1910
                        **self._client_wrapper.get_headers(),
1911
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
1912
                    }
1913
                )
1914
            ),
1915
            timeout=request_options.get("timeout_in_seconds")
1916
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
1917
            else self._client_wrapper.get_timeout(),
1918
            retries=0,
1919
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
1920
        ) as _response:
1921
            if 200 <= _response.status_code < 300:
1922
                async for _text in _response.aiter_lines():
1923
                    if len(_text) == 0:
1924
                        continue
1925
                    yield pydantic.parse_obj_as(StreamedChatResponse, json.loads(_text))  # type: ignore
1926
                return
1927
            await _response.aread()
1928
            if _response.status_code == 429:
1929
                raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
1930
            try:
1931
                _response_json = _response.json()
1932
            except JSONDecodeError:
1933
                raise ApiError(status_code=_response.status_code, body=_response.text)
1934
            raise ApiError(status_code=_response.status_code, body=_response_json)
1935

1936
    async def chat(
1937
        self,
1938
        *,
1939
        message: str,
1940
        model: typing.Optional[str] = OMIT,
1941
        preamble: typing.Optional[str] = OMIT,
1942
        chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
1943
        conversation_id: typing.Optional[str] = OMIT,
1944
        prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
1945
        connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
1946
        search_queries_only: typing.Optional[bool] = OMIT,
1947
        documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
1948
        temperature: typing.Optional[float] = OMIT,
1949
        max_tokens: typing.Optional[int] = OMIT,
1950
        k: typing.Optional[int] = OMIT,
1951
        p: typing.Optional[float] = OMIT,
1952
        seed: typing.Optional[float] = OMIT,
1953
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
1954
        frequency_penalty: typing.Optional[float] = OMIT,
1955
        presence_penalty: typing.Optional[float] = OMIT,
1956
        raw_prompting: typing.Optional[bool] = OMIT,
1957
        tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
1958
        tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]] = OMIT,
1959
        request_options: typing.Optional[RequestOptions] = None,
1960
    ) -> NonStreamedChatResponse:
1961
        """
1962
        Generates a text response to a user message.
1963
        To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
1964

1965
        Parameters:
1966
            - message: str. Text input for the model to respond to.
1967

1968
            - model: typing.Optional[str]. Defaults to `command`.
1969

1970
                                           The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
1971

1972
            - preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
1973

1974
                                              The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
1975

1976
            - chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
1977

1978
                                                                           Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
1979

1980
                                                                           The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
1981

1982
            - conversation_id: typing.Optional[str]. An alternative to `chat_history`.
1983

1984
                                                     Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
1985

1986
            - prompt_truncation: typing.Optional[ChatRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
1987

1988
                                                                               Dictates how the prompt will be constructed.
1989

1990
                                                                               With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
1991

1992
                                                                               With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
1993

1994
                                                                               With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
1995

1996
            - connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
1997

1998
                                                                           When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
1999

2000
            - search_queries_only: typing.Optional[bool]. Defaults to `false`.
2001

2002
                                                          When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
2003

2004
            - documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
2005

2006
                                                                         Example:
2007
                                                                         `[
2008
                                                                           { "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
2009
                                                                           { "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
2010
                                                                         ]`
2011

2012
                                                                         Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
2013

2014
                                                                         Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
2015

2016
                                                                         An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
2017

2018
                                                                         An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
2019

2020
                                                                         See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
2021

2022
            - temperature: typing.Optional[float]. Defaults to `0.3`.
2023

2024
                                                   A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
2025

2026
                                                   Randomness can be further maximized by increasing the  value of the `p` parameter.
2027

2028
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
2029

2030
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
2031
                                       Defaults to `0`, min value of `0`, max value of `500`.
2032

2033
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
2034
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
2035

2036
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
2037

2038
            - stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
2039

2040
            - frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2041

2042
                                                         Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
2043

2044
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2045

2046
                                                        Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
2047

2048
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
2049

2050
            - tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
2051

2052
                                                             When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
2053

2054
            - tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
2055
                                                                                          Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
2056

2057
                                                                                          **Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
2058
                                                                                          ```
2059
                                                                                          tool_results = [
2060
                                                                                            {
2061
                                                                                              "call": {
2062
                                                                                                "name": <tool name>,
2063
                                                                                                "parameters": {
2064
                                                                                                  <param name>: <param value>
2065
                                                                                                }
2066
                                                                                              },
2067
                                                                                              "outputs": [{
2068
                                                                                                <key>: <value>
2069
                                                                                              }]
2070
                                                                                            },
2071
                                                                                            ...
2072
                                                                                          ]
2073
                                                                                          ```
2074
                                                                                          **Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
2075

2076
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2077
        ---
2078
        from cohere import ChatMessage
2079
        from cohere.client import AsyncClient
2080

2081
        client = AsyncClient(
2082
            client_name="YOUR_CLIENT_NAME",
2083
            token="YOUR_TOKEN",
2084
        )
2085
        await client.chat(
2086
            message="Can you give me a global market overview of solar panels?",
2087
            chat_history=[
2088
                ChatMessage(
2089
                    role="CHATBOT",
2090
                    message="Hi!",
2091
                ),
2092
                ChatMessage(
2093
                    role="CHATBOT",
2094
                    message="How can I help you today?",
2095
                ),
2096
            ],
2097
            prompt_truncation="OFF",
2098
            temperature=0.3,
2099
        )
2100
        """
2101
        _request: typing.Dict[str, typing.Any] = {"message": message, "stream": False}
2102
        if model is not OMIT:
2103
            _request["model"] = model
2104
        if preamble is not OMIT:
2105
            _request["preamble"] = preamble
2106
        if chat_history is not OMIT:
2107
            _request["chat_history"] = chat_history
2108
        if conversation_id is not OMIT:
2109
            _request["conversation_id"] = conversation_id
2110
        if prompt_truncation is not OMIT:
2111
            _request["prompt_truncation"] = prompt_truncation
2112
        if connectors is not OMIT:
2113
            _request["connectors"] = connectors
2114
        if search_queries_only is not OMIT:
2115
            _request["search_queries_only"] = search_queries_only
2116
        if documents is not OMIT:
2117
            _request["documents"] = documents
2118
        if temperature is not OMIT:
2119
            _request["temperature"] = temperature
2120
        if max_tokens is not OMIT:
2121
            _request["max_tokens"] = max_tokens
2122
        if k is not OMIT:
2123
            _request["k"] = k
2124
        if p is not OMIT:
2125
            _request["p"] = p
2126
        if seed is not OMIT:
2127
            _request["seed"] = seed
2128
        if stop_sequences is not OMIT:
2129
            _request["stop_sequences"] = stop_sequences
2130
        if frequency_penalty is not OMIT:
2131
            _request["frequency_penalty"] = frequency_penalty
2132
        if presence_penalty is not OMIT:
2133
            _request["presence_penalty"] = presence_penalty
2134
        if raw_prompting is not OMIT:
2135
            _request["raw_prompting"] = raw_prompting
2136
        if tools is not OMIT:
2137
            _request["tools"] = tools
2138
        if tool_results is not OMIT:
2139
            _request["tool_results"] = tool_results
2140
        _response = await self._client_wrapper.httpx_client.request(
2141
            "POST",
2142
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
2143
            params=jsonable_encoder(
2144
                request_options.get("additional_query_parameters") if request_options is not None else None
2145
            ),
2146
            json=jsonable_encoder(_request)
2147
            if request_options is None or request_options.get("additional_body_parameters") is None
2148
            else {
2149
                **jsonable_encoder(_request),
2150
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2151
            },
2152
            headers=jsonable_encoder(
2153
                remove_none_from_dict(
2154
                    {
2155
                        **self._client_wrapper.get_headers(),
2156
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2157
                    }
2158
                )
2159
            ),
2160
            timeout=request_options.get("timeout_in_seconds")
2161
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2162
            else self._client_wrapper.get_timeout(),
2163
            retries=0,
2164
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2165
        )
2166
        if 200 <= _response.status_code < 300:
2167
            return pydantic.parse_obj_as(NonStreamedChatResponse, _response.json())  # type: ignore
2168
        if _response.status_code == 429:
2169
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2170
        try:
2171
            _response_json = _response.json()
2172
        except JSONDecodeError:
2173
            raise ApiError(status_code=_response.status_code, body=_response.text)
2174
        raise ApiError(status_code=_response.status_code, body=_response_json)
2175

2176
    async def generate_stream(
2177
        self,
2178
        *,
2179
        prompt: str,
2180
        model: typing.Optional[str] = OMIT,
2181
        num_generations: typing.Optional[int] = OMIT,
2182
        max_tokens: typing.Optional[int] = OMIT,
2183
        truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
2184
        temperature: typing.Optional[float] = OMIT,
2185
        seed: typing.Optional[float] = OMIT,
2186
        preset: typing.Optional[str] = OMIT,
2187
        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2188
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2189
        k: typing.Optional[int] = OMIT,
2190
        p: typing.Optional[float] = OMIT,
2191
        frequency_penalty: typing.Optional[float] = OMIT,
2192
        presence_penalty: typing.Optional[float] = OMIT,
2193
        return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
2194
        raw_prompting: typing.Optional[bool] = OMIT,
2195
        request_options: typing.Optional[RequestOptions] = None,
2196
    ) -> typing.AsyncIterator[GenerateStreamedResponse]:
2197
        """
2198
        > 🚧 Warning
2199
        >
2200
        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
2201

2202
        Generates realistic text conditioned on a given input.
2203

2204
        Parameters:
2205
            - prompt: str. The input text that serves as the starting point for generating the response.
2206
                           Note: The prompt will be pre-processed and modified before reaching the model.
2207

2208
            - model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
2209
                                           Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
2210
            - num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
2211

2212
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
2213

2214
                                                This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
2215

2216
                                                Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
2217

2218
            - truncate: typing.Optional[GenerateStreamRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2219

2220
                                                                        Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2221

2222
                                                                        If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2223
            - temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
2224
                                                   Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
2225

2226
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
2227

2228
            - preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
2229
                                            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
2230

2231
            - end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
2232

2233
            - stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
2234

2235
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
2236
                                       Defaults to `0`, min value of `0`, max value of `500`.
2237

2238
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
2239
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
2240

2241
            - frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
2242

2243
                                                         Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2244

2245
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2246

2247
                                                        Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
2248

2249
                                                        Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2250

2251
            - return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
2252

2253
                                                                                           If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
2254

2255
                                                                                           If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
2256
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
2257

2258
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2259
        ---
2260
        from cohere.client import AsyncClient
2261

2262
        client = AsyncClient(
2263
            client_name="YOUR_CLIENT_NAME",
2264
            token="YOUR_TOKEN",
2265
        )
2266
        await client.generate_stream(
2267
            prompt="string",
2268
            model="string",
2269
            num_generations=1,
2270
            max_tokens=1,
2271
            truncate="NONE",
2272
            temperature=1.1,
2273
            seed=1.1,
2274
            preset="string",
2275
            end_sequences=["string"],
2276
            stop_sequences=["string"],
2277
            k=1,
2278
            p=1.1,
2279
            frequency_penalty=1.1,
2280
            presence_penalty=1.1,
2281
            return_likelihoods="GENERATION",
2282
            raw_prompting=True,
2283
        )
2284
        """
2285
        _request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": True}
2286
        if model is not OMIT:
2287
            _request["model"] = model
2288
        if num_generations is not OMIT:
2289
            _request["num_generations"] = num_generations
2290
        if max_tokens is not OMIT:
2291
            _request["max_tokens"] = max_tokens
2292
        if truncate is not OMIT:
2293
            _request["truncate"] = truncate
2294
        if temperature is not OMIT:
2295
            _request["temperature"] = temperature
2296
        if seed is not OMIT:
2297
            _request["seed"] = seed
2298
        if preset is not OMIT:
2299
            _request["preset"] = preset
2300
        if end_sequences is not OMIT:
2301
            _request["end_sequences"] = end_sequences
2302
        if stop_sequences is not OMIT:
2303
            _request["stop_sequences"] = stop_sequences
2304
        if k is not OMIT:
2305
            _request["k"] = k
2306
        if p is not OMIT:
2307
            _request["p"] = p
2308
        if frequency_penalty is not OMIT:
2309
            _request["frequency_penalty"] = frequency_penalty
2310
        if presence_penalty is not OMIT:
2311
            _request["presence_penalty"] = presence_penalty
2312
        if return_likelihoods is not OMIT:
2313
            _request["return_likelihoods"] = return_likelihoods
2314
        if raw_prompting is not OMIT:
2315
            _request["raw_prompting"] = raw_prompting
2316
        async with self._client_wrapper.httpx_client.stream(
2317
            "POST",
2318
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
2319
            params=jsonable_encoder(
2320
                request_options.get("additional_query_parameters") if request_options is not None else None
2321
            ),
2322
            json=jsonable_encoder(_request)
2323
            if request_options is None or request_options.get("additional_body_parameters") is None
2324
            else {
2325
                **jsonable_encoder(_request),
2326
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2327
            },
2328
            headers=jsonable_encoder(
2329
                remove_none_from_dict(
2330
                    {
2331
                        **self._client_wrapper.get_headers(),
2332
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2333
                    }
2334
                )
2335
            ),
2336
            timeout=request_options.get("timeout_in_seconds")
2337
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2338
            else self._client_wrapper.get_timeout(),
2339
            retries=0,
2340
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2341
        ) as _response:
2342
            if 200 <= _response.status_code < 300:
2343
                async for _text in _response.aiter_lines():
2344
                    if len(_text) == 0:
2345
                        continue
2346
                    yield pydantic.parse_obj_as(GenerateStreamedResponse, json.loads(_text))  # type: ignore
2347
                return
2348
            await _response.aread()
2349
            if _response.status_code == 400:
2350
                raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2351
            if _response.status_code == 429:
2352
                raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2353
            if _response.status_code == 500:
2354
                raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2355
            try:
2356
                _response_json = _response.json()
2357
            except JSONDecodeError:
2358
                raise ApiError(status_code=_response.status_code, body=_response.text)
2359
            raise ApiError(status_code=_response.status_code, body=_response_json)
2360

2361
    async def generate(
2362
        self,
2363
        *,
2364
        prompt: str,
2365
        model: typing.Optional[str] = OMIT,
2366
        num_generations: typing.Optional[int] = OMIT,
2367
        max_tokens: typing.Optional[int] = OMIT,
2368
        truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
2369
        temperature: typing.Optional[float] = OMIT,
2370
        seed: typing.Optional[float] = OMIT,
2371
        preset: typing.Optional[str] = OMIT,
2372
        end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2373
        stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2374
        k: typing.Optional[int] = OMIT,
2375
        p: typing.Optional[float] = OMIT,
2376
        frequency_penalty: typing.Optional[float] = OMIT,
2377
        presence_penalty: typing.Optional[float] = OMIT,
2378
        return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
2379
        raw_prompting: typing.Optional[bool] = OMIT,
2380
        request_options: typing.Optional[RequestOptions] = None,
2381
    ) -> Generation:
2382
        """
2383
        > 🚧 Warning
2384
        >
2385
        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
2386

2387
        Generates realistic text conditioned on a given input.
2388

2389
        Parameters:
2390
            - prompt: str. The input text that serves as the starting point for generating the response.
2391
                           Note: The prompt will be pre-processed and modified before reaching the model.
2392

2393
            - model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
2394
                                           Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
2395
            - num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
2396

2397
            - max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
2398

2399
                                                This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
2400

2401
                                                Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
2402

2403
            - truncate: typing.Optional[GenerateRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2404

2405
                                                                  Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2406

2407
                                                                  If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2408
            - temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
2409
                                                   Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
2410

2411
            - seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
2412

2413
            - preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
2414
                                            When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
2415

2416
            - end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
2417

2418
            - stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
2419

2420
            - k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
2421
                                       Defaults to `0`, min value of `0`, max value of `500`.
2422

2423
            - p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
2424
                                         Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
2425

2426
            - frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
2427

2428
                                                         Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2429

2430
            - presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2431

2432
                                                        Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
2433

2434
                                                        Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2435

2436
            - return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
2437

2438
                                                                                     If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
2439

2440
                                                                                     If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
2441
            - raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
2442

2443
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2444
        ---
2445
        from cohere.client import AsyncClient
2446

2447
        client = AsyncClient(
2448
            client_name="YOUR_CLIENT_NAME",
2449
            token="YOUR_TOKEN",
2450
        )
2451
        await client.generate(
2452
            prompt="Please explain to me how LLMs work",
2453
            preset="my-preset-a58sbd",
2454
        )
2455
        """
2456
        _request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": False}
2457
        if model is not OMIT:
2458
            _request["model"] = model
2459
        if num_generations is not OMIT:
2460
            _request["num_generations"] = num_generations
2461
        if max_tokens is not OMIT:
2462
            _request["max_tokens"] = max_tokens
2463
        if truncate is not OMIT:
2464
            _request["truncate"] = truncate
2465
        if temperature is not OMIT:
2466
            _request["temperature"] = temperature
2467
        if seed is not OMIT:
2468
            _request["seed"] = seed
2469
        if preset is not OMIT:
2470
            _request["preset"] = preset
2471
        if end_sequences is not OMIT:
2472
            _request["end_sequences"] = end_sequences
2473
        if stop_sequences is not OMIT:
2474
            _request["stop_sequences"] = stop_sequences
2475
        if k is not OMIT:
2476
            _request["k"] = k
2477
        if p is not OMIT:
2478
            _request["p"] = p
2479
        if frequency_penalty is not OMIT:
2480
            _request["frequency_penalty"] = frequency_penalty
2481
        if presence_penalty is not OMIT:
2482
            _request["presence_penalty"] = presence_penalty
2483
        if return_likelihoods is not OMIT:
2484
            _request["return_likelihoods"] = return_likelihoods
2485
        if raw_prompting is not OMIT:
2486
            _request["raw_prompting"] = raw_prompting
2487
        _response = await self._client_wrapper.httpx_client.request(
2488
            "POST",
2489
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
2490
            params=jsonable_encoder(
2491
                request_options.get("additional_query_parameters") if request_options is not None else None
2492
            ),
2493
            json=jsonable_encoder(_request)
2494
            if request_options is None or request_options.get("additional_body_parameters") is None
2495
            else {
2496
                **jsonable_encoder(_request),
2497
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2498
            },
2499
            headers=jsonable_encoder(
2500
                remove_none_from_dict(
2501
                    {
2502
                        **self._client_wrapper.get_headers(),
2503
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2504
                    }
2505
                )
2506
            ),
2507
            timeout=request_options.get("timeout_in_seconds")
2508
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2509
            else self._client_wrapper.get_timeout(),
2510
            retries=0,
2511
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2512
        )
2513
        if 200 <= _response.status_code < 300:
2514
            return pydantic.parse_obj_as(Generation, _response.json())  # type: ignore
2515
        if _response.status_code == 400:
2516
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2517
        if _response.status_code == 429:
2518
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2519
        if _response.status_code == 500:
2520
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2521
        try:
2522
            _response_json = _response.json()
2523
        except JSONDecodeError:
2524
            raise ApiError(status_code=_response.status_code, body=_response.text)
2525
        raise ApiError(status_code=_response.status_code, body=_response_json)
2526

2527
    async def embed(
2528
        self,
2529
        *,
2530
        texts: typing.Sequence[str],
2531
        model: typing.Optional[str] = OMIT,
2532
        input_type: typing.Optional[EmbedInputType] = OMIT,
2533
        embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
2534
        truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
2535
        request_options: typing.Optional[RequestOptions] = None,
2536
    ) -> EmbedResponse:
2537
        """
2538
        This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
2539

2540
        Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
2541

2542
        If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
2543

2544
        Parameters:
2545
            - texts: typing.Sequence[str]. An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
2546

2547
            - model: typing.Optional[str]. Defaults to embed-english-v2.0
2548

2549
                                           The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
2550

2551
                                           Available models and corresponding embedding dimensions:
2552

2553
                                           * `embed-english-v3.0`  1024
2554
                                           * `embed-multilingual-v3.0`  1024
2555
                                           * `embed-english-light-v3.0`  384
2556
                                           * `embed-multilingual-light-v3.0`  384
2557

2558
                                           * `embed-english-v2.0`  4096
2559
                                           * `embed-english-light-v2.0`  1024
2560
                                           * `embed-multilingual-v2.0`  768
2561
            - input_type: typing.Optional[EmbedInputType].
2562

2563
            - embedding_types: typing.Optional[typing.Sequence[EmbeddingType]]. Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
2564

2565
                                                                                * `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
2566
                                                                                * `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
2567
                                                                                * `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
2568
                                                                                * `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
2569
                                                                                * `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
2570
            - truncate: typing.Optional[EmbedRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2571

2572
                                                               Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2573

2574
                                                               If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2575
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2576
        ---
2577
        from cohere.client import AsyncClient
2578

2579
        client = AsyncClient(
2580
            client_name="YOUR_CLIENT_NAME",
2581
            token="YOUR_TOKEN",
2582
        )
2583
        await client.embed(
2584
            texts=["string"],
2585
            model="string",
2586
            input_type="search_document",
2587
            embedding_types=["float"],
2588
            truncate="NONE",
2589
        )
2590
        """
2591
        _request: typing.Dict[str, typing.Any] = {"texts": texts}
2592
        if model is not OMIT:
2593
            _request["model"] = model
2594
        if input_type is not OMIT:
2595
            _request["input_type"] = input_type
2596
        if embedding_types is not OMIT:
2597
            _request["embedding_types"] = embedding_types
2598
        if truncate is not OMIT:
2599
            _request["truncate"] = truncate
2600
        _response = await self._client_wrapper.httpx_client.request(
2601
            "POST",
2602
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "embed"),
2603
            params=jsonable_encoder(
2604
                request_options.get("additional_query_parameters") if request_options is not None else None
2605
            ),
2606
            json=jsonable_encoder(_request)
2607
            if request_options is None or request_options.get("additional_body_parameters") is None
2608
            else {
2609
                **jsonable_encoder(_request),
2610
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2611
            },
2612
            headers=jsonable_encoder(
2613
                remove_none_from_dict(
2614
                    {
2615
                        **self._client_wrapper.get_headers(),
2616
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2617
                    }
2618
                )
2619
            ),
2620
            timeout=request_options.get("timeout_in_seconds")
2621
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2622
            else self._client_wrapper.get_timeout(),
2623
            retries=0,
2624
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2625
        )
2626
        if 200 <= _response.status_code < 300:
2627
            return pydantic.parse_obj_as(EmbedResponse, _response.json())  # type: ignore
2628
        if _response.status_code == 400:
2629
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2630
        if _response.status_code == 429:
2631
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2632
        if _response.status_code == 500:
2633
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2634
        try:
2635
            _response_json = _response.json()
2636
        except JSONDecodeError:
2637
            raise ApiError(status_code=_response.status_code, body=_response.text)
2638
        raise ApiError(status_code=_response.status_code, body=_response_json)
2639

2640
    async def rerank(
2641
        self,
2642
        *,
2643
        model: typing.Optional[str] = OMIT,
2644
        query: str,
2645
        documents: typing.Sequence[RerankRequestDocumentsItem],
2646
        top_n: typing.Optional[int] = OMIT,
2647
        return_documents: typing.Optional[bool] = OMIT,
2648
        max_chunks_per_doc: typing.Optional[int] = OMIT,
2649
        request_options: typing.Optional[RequestOptions] = None,
2650
    ) -> RerankResponse:
2651
        """
2652
        This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
2653

2654
        Parameters:
2655
            - model: typing.Optional[str]. The identifier of the model to use, one of : `rerank-english-v2.0`, `rerank-multilingual-v2.0`
2656

2657
            - query: str. The search query
2658

2659
            - documents: typing.Sequence[RerankRequestDocumentsItem]. A list of document objects or strings to rerank.
2660
                                                                      If a document is provided the text fields is required and all other fields will be preserved in the response.
2661

2662
                                                                      The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
2663

2664
                                                                      We recommend a maximum of 1,000 documents for optimal endpoint performance.
2665
            - top_n: typing.Optional[int]. The number of most relevant documents or indices to return, defaults to the length of the documents
2666

2667
            - return_documents: typing.Optional[bool]. - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
2668
                                                       - If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
2669
            - max_chunks_per_doc: typing.Optional[int]. The maximum number of chunks to produce internally from a document
2670

2671
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2672
        ---
2673
        from cohere.client import AsyncClient
2674

2675
        client = AsyncClient(
2676
            client_name="YOUR_CLIENT_NAME",
2677
            token="YOUR_TOKEN",
2678
        )
2679
        await client.rerank(
2680
            model="rerank-english-v2.0",
2681
            query="What is the capital of the United States?",
2682
            documents=[
2683
                "Carson City is the capital city of the American state of Nevada.",
2684
                "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
2685
                "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
2686
                "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
2687
            ],
2688
        )
2689
        """
2690
        _request: typing.Dict[str, typing.Any] = {"query": query, "documents": documents}
2691
        if model is not OMIT:
2692
            _request["model"] = model
2693
        if top_n is not OMIT:
2694
            _request["top_n"] = top_n
2695
        if return_documents is not OMIT:
2696
            _request["return_documents"] = return_documents
2697
        if max_chunks_per_doc is not OMIT:
2698
            _request["max_chunks_per_doc"] = max_chunks_per_doc
2699
        _response = await self._client_wrapper.httpx_client.request(
2700
            "POST",
2701
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "rerank"),
2702
            params=jsonable_encoder(
2703
                request_options.get("additional_query_parameters") if request_options is not None else None
2704
            ),
2705
            json=jsonable_encoder(_request)
2706
            if request_options is None or request_options.get("additional_body_parameters") is None
2707
            else {
2708
                **jsonable_encoder(_request),
2709
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2710
            },
2711
            headers=jsonable_encoder(
2712
                remove_none_from_dict(
2713
                    {
2714
                        **self._client_wrapper.get_headers(),
2715
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2716
                    }
2717
                )
2718
            ),
2719
            timeout=request_options.get("timeout_in_seconds")
2720
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2721
            else self._client_wrapper.get_timeout(),
2722
            retries=0,
2723
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2724
        )
2725
        if 200 <= _response.status_code < 300:
2726
            return pydantic.parse_obj_as(RerankResponse, _response.json())  # type: ignore
2727
        if _response.status_code == 429:
2728
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2729
        try:
2730
            _response_json = _response.json()
2731
        except JSONDecodeError:
2732
            raise ApiError(status_code=_response.status_code, body=_response.text)
2733
        raise ApiError(status_code=_response.status_code, body=_response_json)
2734

2735
    async def classify(
2736
        self,
2737
        *,
2738
        inputs: typing.Sequence[str],
2739
        examples: typing.Sequence[ClassifyExample],
2740
        model: typing.Optional[str] = OMIT,
2741
        preset: typing.Optional[str] = OMIT,
2742
        truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
2743
        request_options: typing.Optional[RequestOptions] = None,
2744
    ) -> ClassifyResponse:
2745
        """
2746
        This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
2747
        Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
2748

2749
        Parameters:
2750
            - inputs: typing.Sequence[str]. A list of up to 96 texts to be classified. Each one must be a non-empty string.
2751
                                            There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
2752
                                            Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
2753
            - examples: typing.Sequence[ClassifyExample]. An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
2754
                                                          Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
2755
            - model: typing.Optional[str]. The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
2756

2757
            - preset: typing.Optional[str]. The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.ai/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
2758

2759
            - truncate: typing.Optional[ClassifyRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2760
                                                                  Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2761
                                                                  If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2762
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2763
        ---
2764
        from cohere import ClassifyExample
2765
        from cohere.client import AsyncClient
2766

2767
        client = AsyncClient(
2768
            client_name="YOUR_CLIENT_NAME",
2769
            token="YOUR_TOKEN",
2770
        )
2771
        await client.classify(
2772
            inputs=["Confirm your email address", "hey i need u to send some $"],
2773
            examples=[
2774
                ClassifyExample(
2775
                    text="Dermatologists don't like her!",
2776
                    label="Spam",
2777
                ),
2778
                ClassifyExample(
2779
                    text="Hello, open to this?",
2780
                    label="Spam",
2781
                ),
2782
                ClassifyExample(
2783
                    text="I need help please wire me $1000 right now",
2784
                    label="Spam",
2785
                ),
2786
                ClassifyExample(
2787
                    text="Nice to know you ;)",
2788
                    label="Spam",
2789
                ),
2790
                ClassifyExample(
2791
                    text="Please help me?",
2792
                    label="Spam",
2793
                ),
2794
                ClassifyExample(
2795
                    text="Your parcel will be delivered today",
2796
                    label="Not spam",
2797
                ),
2798
                ClassifyExample(
2799
                    text="Review changes to our Terms and Conditions",
2800
                    label="Not spam",
2801
                ),
2802
                ClassifyExample(
2803
                    text="Weekly sync notes",
2804
                    label="Not spam",
2805
                ),
2806
                ClassifyExample(
2807
                    text="Re: Follow up from today’s meeting",
2808
                    label="Not spam",
2809
                ),
2810
                ClassifyExample(
2811
                    text="Pre-read for tomorrow",
2812
                    label="Not spam",
2813
                ),
2814
            ],
2815
            preset="my-preset-a58sbd",
2816
        )
2817
        """
2818
        _request: typing.Dict[str, typing.Any] = {"inputs": inputs, "examples": examples}
2819
        if model is not OMIT:
2820
            _request["model"] = model
2821
        if preset is not OMIT:
2822
            _request["preset"] = preset
2823
        if truncate is not OMIT:
2824
            _request["truncate"] = truncate
2825
        _response = await self._client_wrapper.httpx_client.request(
2826
            "POST",
2827
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "classify"),
2828
            params=jsonable_encoder(
2829
                request_options.get("additional_query_parameters") if request_options is not None else None
2830
            ),
2831
            json=jsonable_encoder(_request)
2832
            if request_options is None or request_options.get("additional_body_parameters") is None
2833
            else {
2834
                **jsonable_encoder(_request),
2835
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2836
            },
2837
            headers=jsonable_encoder(
2838
                remove_none_from_dict(
2839
                    {
2840
                        **self._client_wrapper.get_headers(),
2841
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2842
                    }
2843
                )
2844
            ),
2845
            timeout=request_options.get("timeout_in_seconds")
2846
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2847
            else self._client_wrapper.get_timeout(),
2848
            retries=0,
2849
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2850
        )
2851
        if 200 <= _response.status_code < 300:
2852
            return pydantic.parse_obj_as(ClassifyResponse, _response.json())  # type: ignore
2853
        if _response.status_code == 400:
2854
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2855
        if _response.status_code == 429:
2856
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2857
        if _response.status_code == 500:
2858
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2859
        try:
2860
            _response_json = _response.json()
2861
        except JSONDecodeError:
2862
            raise ApiError(status_code=_response.status_code, body=_response.text)
2863
        raise ApiError(status_code=_response.status_code, body=_response_json)
2864

2865
    async def summarize(
2866
        self,
2867
        *,
2868
        text: str,
2869
        length: typing.Optional[SummarizeRequestLength] = OMIT,
2870
        format: typing.Optional[SummarizeRequestFormat] = OMIT,
2871
        model: typing.Optional[str] = OMIT,
2872
        extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
2873
        temperature: typing.Optional[float] = OMIT,
2874
        additional_command: typing.Optional[str] = OMIT,
2875
        request_options: typing.Optional[RequestOptions] = None,
2876
    ) -> SummarizeResponse:
2877
        """
2878
        > 🚧 Warning
2879
        >
2880
        > This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
2881

2882
        Generates a summary in English for a given text.
2883

2884
        Parameters:
2885
            - text: str. The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
2886

2887
            - length: typing.Optional[SummarizeRequestLength]. One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
2888

2889
            - format: typing.Optional[SummarizeRequestFormat]. One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
2890

2891
            - model: typing.Optional[str]. The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
2892

2893
            - extractiveness: typing.Optional[SummarizeRequestExtractiveness]. One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
2894

2895
            - temperature: typing.Optional[float]. Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
2896

2897
            - additional_command: typing.Optional[str]. A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
2898

2899
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2900
        ---
2901
        from cohere.client import AsyncClient
2902

2903
        client = AsyncClient(
2904
            client_name="YOUR_CLIENT_NAME",
2905
            token="YOUR_TOKEN",
2906
        )
2907
        await client.summarize(
2908
            text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
2909
        )
2910
        """
2911
        _request: typing.Dict[str, typing.Any] = {"text": text}
2912
        if length is not OMIT:
2913
            _request["length"] = length
2914
        if format is not OMIT:
2915
            _request["format"] = format
2916
        if model is not OMIT:
2917
            _request["model"] = model
2918
        if extractiveness is not OMIT:
2919
            _request["extractiveness"] = extractiveness
2920
        if temperature is not OMIT:
2921
            _request["temperature"] = temperature
2922
        if additional_command is not OMIT:
2923
            _request["additional_command"] = additional_command
2924
        _response = await self._client_wrapper.httpx_client.request(
2925
            "POST",
2926
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "summarize"),
2927
            params=jsonable_encoder(
2928
                request_options.get("additional_query_parameters") if request_options is not None else None
2929
            ),
2930
            json=jsonable_encoder(_request)
2931
            if request_options is None or request_options.get("additional_body_parameters") is None
2932
            else {
2933
                **jsonable_encoder(_request),
2934
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2935
            },
2936
            headers=jsonable_encoder(
2937
                remove_none_from_dict(
2938
                    {
2939
                        **self._client_wrapper.get_headers(),
2940
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
2941
                    }
2942
                )
2943
            ),
2944
            timeout=request_options.get("timeout_in_seconds")
2945
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
2946
            else self._client_wrapper.get_timeout(),
2947
            retries=0,
2948
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
2949
        )
2950
        if 200 <= _response.status_code < 300:
2951
            return pydantic.parse_obj_as(SummarizeResponse, _response.json())  # type: ignore
2952
        if _response.status_code == 429:
2953
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
2954
        try:
2955
            _response_json = _response.json()
2956
        except JSONDecodeError:
2957
            raise ApiError(status_code=_response.status_code, body=_response.text)
2958
        raise ApiError(status_code=_response.status_code, body=_response_json)
2959

2960
    async def tokenize(
2961
        self, *, text: str, model: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None
2962
    ) -> TokenizeResponse:
2963
        """
2964
        This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
2965

2966
        Parameters:
2967
            - text: str. The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
2968

2969
            - model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
2970

2971
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2972
        ---
2973
        from cohere.client import AsyncClient
2974

2975
        client = AsyncClient(
2976
            client_name="YOUR_CLIENT_NAME",
2977
            token="YOUR_TOKEN",
2978
        )
2979
        await client.tokenize(
2980
            text="tokenize me! :D",
2981
            model="command",
2982
        )
2983
        """
2984
        _request: typing.Dict[str, typing.Any] = {"text": text}
2985
        if model is not OMIT:
2986
            _request["model"] = model
2987
        _response = await self._client_wrapper.httpx_client.request(
2988
            "POST",
2989
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "tokenize"),
2990
            params=jsonable_encoder(
2991
                request_options.get("additional_query_parameters") if request_options is not None else None
2992
            ),
2993
            json=jsonable_encoder(_request)
2994
            if request_options is None or request_options.get("additional_body_parameters") is None
2995
            else {
2996
                **jsonable_encoder(_request),
2997
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2998
            },
2999
            headers=jsonable_encoder(
3000
                remove_none_from_dict(
3001
                    {
3002
                        **self._client_wrapper.get_headers(),
3003
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
3004
                    }
3005
                )
3006
            ),
3007
            timeout=request_options.get("timeout_in_seconds")
3008
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
3009
            else self._client_wrapper.get_timeout(),
3010
            retries=0,
3011
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
3012
        )
3013
        if 200 <= _response.status_code < 300:
3014
            return pydantic.parse_obj_as(TokenizeResponse, _response.json())  # type: ignore
3015
        if _response.status_code == 400:
3016
            raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
3017
        if _response.status_code == 429:
3018
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
3019
        if _response.status_code == 500:
3020
            raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
3021
        try:
3022
            _response_json = _response.json()
3023
        except JSONDecodeError:
3024
            raise ApiError(status_code=_response.status_code, body=_response.text)
3025
        raise ApiError(status_code=_response.status_code, body=_response_json)
3026

3027
    async def detokenize(
3028
        self,
3029
        *,
3030
        tokens: typing.Sequence[int],
3031
        model: typing.Optional[str] = OMIT,
3032
        request_options: typing.Optional[RequestOptions] = None,
3033
    ) -> DetokenizeResponse:
3034
        """
3035
        This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
3036

3037
        Parameters:
3038
            - tokens: typing.Sequence[int]. The list of tokens to be detokenized.
3039

3040
            - model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
3041

3042
            - request_options: typing.Optional[RequestOptions]. Request-specific configuration.
3043
        ---
3044
        from cohere.client import AsyncClient
3045

3046
        client = AsyncClient(
3047
            client_name="YOUR_CLIENT_NAME",
3048
            token="YOUR_TOKEN",
3049
        )
3050
        await client.detokenize(
3051
            tokens=[10104, 12221, 1315, 34, 1420, 69],
3052
        )
3053
        """
3054
        _request: typing.Dict[str, typing.Any] = {"tokens": tokens}
3055
        if model is not OMIT:
3056
            _request["model"] = model
3057
        _response = await self._client_wrapper.httpx_client.request(
3058
            "POST",
3059
            urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "detokenize"),
3060
            params=jsonable_encoder(
3061
                request_options.get("additional_query_parameters") if request_options is not None else None
3062
            ),
3063
            json=jsonable_encoder(_request)
3064
            if request_options is None or request_options.get("additional_body_parameters") is None
3065
            else {
3066
                **jsonable_encoder(_request),
3067
                **(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
3068
            },
3069
            headers=jsonable_encoder(
3070
                remove_none_from_dict(
3071
                    {
3072
                        **self._client_wrapper.get_headers(),
3073
                        **(request_options.get("additional_headers", {}) if request_options is not None else {}),
3074
                    }
3075
                )
3076
            ),
3077
            timeout=request_options.get("timeout_in_seconds")
3078
            if request_options is not None and request_options.get("timeout_in_seconds") is not None
3079
            else self._client_wrapper.get_timeout(),
3080
            retries=0,
3081
            max_retries=request_options.get("max_retries") if request_options is not None else 0,  # type: ignore
3082
        )
3083
        if 200 <= _response.status_code < 300:
3084
            return pydantic.parse_obj_as(DetokenizeResponse, _response.json())  # type: ignore
3085
        if _response.status_code == 429:
3086
            raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json()))  # type: ignore
3087
        try:
3088
            _response_json = _response.json()
3089
        except JSONDecodeError:
3090
            raise ApiError(status_code=_response.status_code, body=_response.text)
3091
        raise ApiError(status_code=_response.status_code, body=_response_json)
3092

3093

3094
def _get_base_url(*, base_url: typing.Optional[str] = None, environment: ClientEnvironment) -> str:
3095
    if base_url is not None:
3096
        return base_url
3097
    elif environment is not None:
3098
        return environment.value
3099
    else:
3100
        raise Exception("Please pass in either base_url or environment to construct the client")
3101
cohere-python

Использование cookies