cohere-python
3100 строк · 194.2 Кб
1# This file was auto-generated by Fern from our API Definition.
2
3import json
4import os
5import typing
6import urllib.parse
7from json.decoder import JSONDecodeError
8
9import httpx
10
11from .connectors.client import AsyncConnectorsClient, ConnectorsClient
12from .core.api_error import ApiError
13from .core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
14from .core.jsonable_encoder import jsonable_encoder
15from .core.remove_none_from_dict import remove_none_from_dict
16from .core.request_options import RequestOptions
17from .datasets.client import AsyncDatasetsClient, DatasetsClient
18from .embed_jobs.client import AsyncEmbedJobsClient, EmbedJobsClient
19from .environment import ClientEnvironment
20from .errors.bad_request_error import BadRequestError
21from .errors.internal_server_error import InternalServerError
22from .errors.too_many_requests_error import TooManyRequestsError
23from .finetuning.client import AsyncFinetuningClient, FinetuningClient
24from .models.client import AsyncModelsClient, ModelsClient
25from .types.chat_connector import ChatConnector
26from .types.chat_document import ChatDocument
27from .types.chat_message import ChatMessage
28from .types.chat_request_prompt_truncation import ChatRequestPromptTruncation
29from .types.chat_request_tool_results_item import ChatRequestToolResultsItem
30from .types.chat_stream_request_prompt_truncation import ChatStreamRequestPromptTruncation
31from .types.chat_stream_request_tool_results_item import ChatStreamRequestToolResultsItem
32from .types.classify_example import ClassifyExample
33from .types.classify_request_truncate import ClassifyRequestTruncate
34from .types.classify_response import ClassifyResponse
35from .types.detokenize_response import DetokenizeResponse
36from .types.embed_input_type import EmbedInputType
37from .types.embed_request_truncate import EmbedRequestTruncate
38from .types.embed_response import EmbedResponse
39from .types.embedding_type import EmbeddingType
40from .types.generate_request_return_likelihoods import GenerateRequestReturnLikelihoods
41from .types.generate_request_truncate import GenerateRequestTruncate
42from .types.generate_stream_request_return_likelihoods import GenerateStreamRequestReturnLikelihoods
43from .types.generate_stream_request_truncate import GenerateStreamRequestTruncate
44from .types.generate_streamed_response import GenerateStreamedResponse
45from .types.generation import Generation
46from .types.non_streamed_chat_response import NonStreamedChatResponse
47from .types.rerank_request_documents_item import RerankRequestDocumentsItem
48from .types.rerank_response import RerankResponse
49from .types.streamed_chat_response import StreamedChatResponse
50from .types.summarize_request_extractiveness import SummarizeRequestExtractiveness
51from .types.summarize_request_format import SummarizeRequestFormat
52from .types.summarize_request_length import SummarizeRequestLength
53from .types.summarize_response import SummarizeResponse
54from .types.tokenize_response import TokenizeResponse
55from .types.tool import Tool
56
57try:
58import pydantic.v1 as pydantic # type: ignore
59except ImportError:
60import pydantic # type: ignore
61
62# this is used as the default value for optional parameters
63OMIT = typing.cast(typing.Any, ...)
64
65
66class BaseCohere:
67"""
68Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.
69
70Parameters:
71- base_url: typing.Optional[str]. The base url to use for requests from the client.
72
73- environment: ClientEnvironment. The environment to use for requests from the client. from .environment import ClientEnvironment
74
75Defaults to ClientEnvironment.PRODUCTION
76
77- client_name: typing.Optional[str].
78
79- token: typing.Optional[typing.Union[str, typing.Callable[[], str]]].
80
81- timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds, unless a custom httpx client is used, in which case a default is not set.
82
83- httpx_client: typing.Optional[httpx.Client]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
84---
85from cohere.client import Client
86
87client = Client(
88client_name="YOUR_CLIENT_NAME",
89token="YOUR_TOKEN",
90)
91"""
92
93def __init__(
94self,
95*,
96base_url: typing.Optional[str] = None,
97environment: ClientEnvironment = ClientEnvironment.PRODUCTION,
98client_name: typing.Optional[str] = None,
99token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CO_API_KEY"),
100timeout: typing.Optional[float] = None,
101httpx_client: typing.Optional[httpx.Client] = None,
102):
103_defaulted_timeout = timeout if timeout is not None else 300 if httpx_client is None else None
104if token is None:
105raise ApiError(body="The client must be instantiated be either passing in token or setting CO_API_KEY")
106self._client_wrapper = SyncClientWrapper(
107base_url=_get_base_url(base_url=base_url, environment=environment),
108client_name=client_name,
109token=token,
110httpx_client=httpx.Client(timeout=_defaulted_timeout) if httpx_client is None else httpx_client,
111timeout=_defaulted_timeout,
112)
113self.embed_jobs = EmbedJobsClient(client_wrapper=self._client_wrapper)
114self.datasets = DatasetsClient(client_wrapper=self._client_wrapper)
115self.connectors = ConnectorsClient(client_wrapper=self._client_wrapper)
116self.models = ModelsClient(client_wrapper=self._client_wrapper)
117self.finetuning = FinetuningClient(client_wrapper=self._client_wrapper)
118
119def chat_stream(
120self,
121*,
122message: str,
123model: typing.Optional[str] = OMIT,
124preamble: typing.Optional[str] = OMIT,
125chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
126conversation_id: typing.Optional[str] = OMIT,
127prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
128connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
129search_queries_only: typing.Optional[bool] = OMIT,
130documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
131temperature: typing.Optional[float] = OMIT,
132max_tokens: typing.Optional[int] = OMIT,
133k: typing.Optional[int] = OMIT,
134p: typing.Optional[float] = OMIT,
135seed: typing.Optional[float] = OMIT,
136stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
137frequency_penalty: typing.Optional[float] = OMIT,
138presence_penalty: typing.Optional[float] = OMIT,
139raw_prompting: typing.Optional[bool] = OMIT,
140tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
141tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]] = OMIT,
142request_options: typing.Optional[RequestOptions] = None,
143) -> typing.Iterator[StreamedChatResponse]:
144"""
145Generates a text response to a user message.
146To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
147
148Parameters:
149- message: str. Text input for the model to respond to.
150
151- model: typing.Optional[str]. Defaults to `command`.
152
153The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
154
155- preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
156
157The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
158
159- chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
160
161Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
162
163The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
164
165- conversation_id: typing.Optional[str]. An alternative to `chat_history`.
166
167Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
168
169- prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
170
171Dictates how the prompt will be constructed.
172
173With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
174
175With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
176
177With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
178
179- connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
180
181When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
182
183- search_queries_only: typing.Optional[bool]. Defaults to `false`.
184
185When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
186
187- documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
188
189Example:
190`[
191{ "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
192{ "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
193]`
194
195Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
196
197Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
198
199An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
200
201An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
202
203See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
204
205- temperature: typing.Optional[float]. Defaults to `0.3`.
206
207A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
208
209Randomness can be further maximized by increasing the value of the `p` parameter.
210
211- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
212
213- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
214Defaults to `0`, min value of `0`, max value of `500`.
215
216- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
217Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
218
219- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
220
221- stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
222
223- frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
224
225Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
226
227- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
228
229Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
230
231- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
232
233- tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
234
235When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
236
237- tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
238Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
239
240**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
241```
242tool_results = [
243{
244"call": {
245"name": <tool name>,
246"parameters": {
247<param name>: <param value>
248}
249},
250"outputs": [{
251<key>: <value>
252}]
253},
254...
255]
256```
257**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
258
259- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
260---
261from cohere import (
262ChatConnector,
263ChatMessage,
264ChatStreamRequestConnectorsSearchOptions,
265ChatStreamRequestPromptOverride,
266ChatStreamRequestToolResultsItem,
267Tool,
268ToolCall,
269ToolParameterDefinitionsValue,
270)
271from cohere.client import Client
272
273client = Client(
274client_name="YOUR_CLIENT_NAME",
275token="YOUR_TOKEN",
276)
277client.chat_stream(
278message="string",
279model="string",
280preamble="string",
281chat_history=[
282ChatMessage(
283role="CHATBOT",
284message="string",
285)
286],
287conversation_id="string",
288prompt_truncation="OFF",
289connectors=[
290ChatConnector(
291id="string",
292user_access_token="string",
293continue_on_failure=True,
294options={"string": {"key": "value"}},
295)
296],
297search_queries_only=True,
298documents=[{"string": "string"}],
299citation_quality="fast",
300temperature=1.1,
301max_tokens=1,
302k=1,
303p=1.1,
304seed=1.1,
305stop_sequences=["string"],
306connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
307model={"key": "value"},
308temperature={"key": "value"},
309max_tokens={"key": "value"},
310preamble={"key": "value"},
311seed=1.1,
312),
313prompt_override=ChatStreamRequestPromptOverride(
314preamble={"key": "value"},
315task_description={"key": "value"},
316style_guide={"key": "value"},
317),
318frequency_penalty=1.1,
319presence_penalty=1.1,
320raw_prompting=True,
321tools=[
322Tool(
323name="string",
324description="string",
325parameter_definitions={
326"string": ToolParameterDefinitionsValue(
327description="string",
328type="string",
329required=True,
330)
331},
332)
333],
334tool_results=[
335ChatStreamRequestToolResultsItem(
336call=ToolCall(),
337outputs=[{"string": {"key": "value"}}],
338)
339],
340)
341"""
342_request: typing.Dict[str, typing.Any] = {"message": message, "stream": True}
343if model is not OMIT:
344_request["model"] = model
345if preamble is not OMIT:
346_request["preamble"] = preamble
347if chat_history is not OMIT:
348_request["chat_history"] = chat_history
349if conversation_id is not OMIT:
350_request["conversation_id"] = conversation_id
351if prompt_truncation is not OMIT:
352_request["prompt_truncation"] = prompt_truncation
353if connectors is not OMIT:
354_request["connectors"] = connectors
355if search_queries_only is not OMIT:
356_request["search_queries_only"] = search_queries_only
357if documents is not OMIT:
358_request["documents"] = documents
359if temperature is not OMIT:
360_request["temperature"] = temperature
361if max_tokens is not OMIT:
362_request["max_tokens"] = max_tokens
363if k is not OMIT:
364_request["k"] = k
365if p is not OMIT:
366_request["p"] = p
367if seed is not OMIT:
368_request["seed"] = seed
369if stop_sequences is not OMIT:
370_request["stop_sequences"] = stop_sequences
371if frequency_penalty is not OMIT:
372_request["frequency_penalty"] = frequency_penalty
373if presence_penalty is not OMIT:
374_request["presence_penalty"] = presence_penalty
375if raw_prompting is not OMIT:
376_request["raw_prompting"] = raw_prompting
377if tools is not OMIT:
378_request["tools"] = tools
379if tool_results is not OMIT:
380_request["tool_results"] = tool_results
381with self._client_wrapper.httpx_client.stream(
382"POST",
383urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
384params=jsonable_encoder(
385request_options.get("additional_query_parameters") if request_options is not None else None
386),
387json=jsonable_encoder(_request)
388if request_options is None or request_options.get("additional_body_parameters") is None
389else {
390**jsonable_encoder(_request),
391**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
392},
393headers=jsonable_encoder(
394remove_none_from_dict(
395{
396**self._client_wrapper.get_headers(),
397**(request_options.get("additional_headers", {}) if request_options is not None else {}),
398}
399)
400),
401timeout=request_options.get("timeout_in_seconds")
402if request_options is not None and request_options.get("timeout_in_seconds") is not None
403else self._client_wrapper.get_timeout(),
404retries=0,
405max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
406) as _response:
407if 200 <= _response.status_code < 300:
408for _text in _response.iter_lines():
409if len(_text) == 0:
410continue
411yield pydantic.parse_obj_as(StreamedChatResponse, json.loads(_text)) # type: ignore
412return
413_response.read()
414if _response.status_code == 429:
415raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
416try:
417_response_json = _response.json()
418except JSONDecodeError:
419raise ApiError(status_code=_response.status_code, body=_response.text)
420raise ApiError(status_code=_response.status_code, body=_response_json)
421
422def chat(
423self,
424*,
425message: str,
426model: typing.Optional[str] = OMIT,
427preamble: typing.Optional[str] = OMIT,
428chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
429conversation_id: typing.Optional[str] = OMIT,
430prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
431connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
432search_queries_only: typing.Optional[bool] = OMIT,
433documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
434temperature: typing.Optional[float] = OMIT,
435max_tokens: typing.Optional[int] = OMIT,
436k: typing.Optional[int] = OMIT,
437p: typing.Optional[float] = OMIT,
438seed: typing.Optional[float] = OMIT,
439stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
440frequency_penalty: typing.Optional[float] = OMIT,
441presence_penalty: typing.Optional[float] = OMIT,
442raw_prompting: typing.Optional[bool] = OMIT,
443tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
444tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]] = OMIT,
445request_options: typing.Optional[RequestOptions] = None,
446) -> NonStreamedChatResponse:
447"""
448Generates a text response to a user message.
449To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
450
451Parameters:
452- message: str. Text input for the model to respond to.
453
454- model: typing.Optional[str]. Defaults to `command`.
455
456The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
457
458- preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
459
460The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
461
462- chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
463
464Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
465
466The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
467
468- conversation_id: typing.Optional[str]. An alternative to `chat_history`.
469
470Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
471
472- prompt_truncation: typing.Optional[ChatRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
473
474Dictates how the prompt will be constructed.
475
476With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
477
478With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
479
480With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
481
482- connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
483
484When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
485
486- search_queries_only: typing.Optional[bool]. Defaults to `false`.
487
488When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
489
490- documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
491
492Example:
493`[
494{ "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
495{ "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
496]`
497
498Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
499
500Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
501
502An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
503
504An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
505
506See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
507
508- temperature: typing.Optional[float]. Defaults to `0.3`.
509
510A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
511
512Randomness can be further maximized by increasing the value of the `p` parameter.
513
514- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
515
516- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
517Defaults to `0`, min value of `0`, max value of `500`.
518
519- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
520Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
521
522- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
523
524- stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
525
526- frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
527
528Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
529
530- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
531
532Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
533
534- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
535
536- tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
537
538When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
539
540- tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
541Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
542
543**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
544```
545tool_results = [
546{
547"call": {
548"name": <tool name>,
549"parameters": {
550<param name>: <param value>
551}
552},
553"outputs": [{
554<key>: <value>
555}]
556},
557...
558]
559```
560**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
561
562- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
563---
564from cohere import ChatMessage
565from cohere.client import Client
566
567client = Client(
568client_name="YOUR_CLIENT_NAME",
569token="YOUR_TOKEN",
570)
571client.chat(
572message="Can you give me a global market overview of solar panels?",
573chat_history=[
574ChatMessage(
575role="CHATBOT",
576message="Hi!",
577),
578ChatMessage(
579role="CHATBOT",
580message="How can I help you today?",
581),
582],
583prompt_truncation="OFF",
584temperature=0.3,
585)
586"""
587_request: typing.Dict[str, typing.Any] = {"message": message, "stream": False}
588if model is not OMIT:
589_request["model"] = model
590if preamble is not OMIT:
591_request["preamble"] = preamble
592if chat_history is not OMIT:
593_request["chat_history"] = chat_history
594if conversation_id is not OMIT:
595_request["conversation_id"] = conversation_id
596if prompt_truncation is not OMIT:
597_request["prompt_truncation"] = prompt_truncation
598if connectors is not OMIT:
599_request["connectors"] = connectors
600if search_queries_only is not OMIT:
601_request["search_queries_only"] = search_queries_only
602if documents is not OMIT:
603_request["documents"] = documents
604if temperature is not OMIT:
605_request["temperature"] = temperature
606if max_tokens is not OMIT:
607_request["max_tokens"] = max_tokens
608if k is not OMIT:
609_request["k"] = k
610if p is not OMIT:
611_request["p"] = p
612if seed is not OMIT:
613_request["seed"] = seed
614if stop_sequences is not OMIT:
615_request["stop_sequences"] = stop_sequences
616if frequency_penalty is not OMIT:
617_request["frequency_penalty"] = frequency_penalty
618if presence_penalty is not OMIT:
619_request["presence_penalty"] = presence_penalty
620if raw_prompting is not OMIT:
621_request["raw_prompting"] = raw_prompting
622if tools is not OMIT:
623_request["tools"] = tools
624if tool_results is not OMIT:
625_request["tool_results"] = tool_results
626_response = self._client_wrapper.httpx_client.request(
627"POST",
628urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
629params=jsonable_encoder(
630request_options.get("additional_query_parameters") if request_options is not None else None
631),
632json=jsonable_encoder(_request)
633if request_options is None or request_options.get("additional_body_parameters") is None
634else {
635**jsonable_encoder(_request),
636**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
637},
638headers=jsonable_encoder(
639remove_none_from_dict(
640{
641**self._client_wrapper.get_headers(),
642**(request_options.get("additional_headers", {}) if request_options is not None else {}),
643}
644)
645),
646timeout=request_options.get("timeout_in_seconds")
647if request_options is not None and request_options.get("timeout_in_seconds") is not None
648else self._client_wrapper.get_timeout(),
649retries=0,
650max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
651)
652if 200 <= _response.status_code < 300:
653return pydantic.parse_obj_as(NonStreamedChatResponse, _response.json()) # type: ignore
654if _response.status_code == 429:
655raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
656try:
657_response_json = _response.json()
658except JSONDecodeError:
659raise ApiError(status_code=_response.status_code, body=_response.text)
660raise ApiError(status_code=_response.status_code, body=_response_json)
661
662def generate_stream(
663self,
664*,
665prompt: str,
666model: typing.Optional[str] = OMIT,
667num_generations: typing.Optional[int] = OMIT,
668max_tokens: typing.Optional[int] = OMIT,
669truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
670temperature: typing.Optional[float] = OMIT,
671seed: typing.Optional[float] = OMIT,
672preset: typing.Optional[str] = OMIT,
673end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
674stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
675k: typing.Optional[int] = OMIT,
676p: typing.Optional[float] = OMIT,
677frequency_penalty: typing.Optional[float] = OMIT,
678presence_penalty: typing.Optional[float] = OMIT,
679return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
680raw_prompting: typing.Optional[bool] = OMIT,
681request_options: typing.Optional[RequestOptions] = None,
682) -> typing.Iterator[GenerateStreamedResponse]:
683"""
684> 🚧 Warning
685>
686> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
687
688Generates realistic text conditioned on a given input.
689
690Parameters:
691- prompt: str. The input text that serves as the starting point for generating the response.
692Note: The prompt will be pre-processed and modified before reaching the model.
693
694- model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
695Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
696- num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
697
698- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
699
700This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
701
702Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
703
704- truncate: typing.Optional[GenerateStreamRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
705
706Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
707
708If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
709- temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
710Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
711
712- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
713
714- preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
715When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
716
717- end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
718
719- stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
720
721- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
722Defaults to `0`, min value of `0`, max value of `500`.
723
724- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
725Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
726
727- frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
728
729Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
730
731- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
732
733Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
734
735Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
736
737- return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
738
739If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
740
741If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
742- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
743
744- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
745---
746from cohere.client import Client
747
748client = Client(
749client_name="YOUR_CLIENT_NAME",
750token="YOUR_TOKEN",
751)
752client.generate_stream(
753prompt="string",
754model="string",
755num_generations=1,
756max_tokens=1,
757truncate="NONE",
758temperature=1.1,
759seed=1.1,
760preset="string",
761end_sequences=["string"],
762stop_sequences=["string"],
763k=1,
764p=1.1,
765frequency_penalty=1.1,
766presence_penalty=1.1,
767return_likelihoods="GENERATION",
768raw_prompting=True,
769)
770"""
771_request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": True}
772if model is not OMIT:
773_request["model"] = model
774if num_generations is not OMIT:
775_request["num_generations"] = num_generations
776if max_tokens is not OMIT:
777_request["max_tokens"] = max_tokens
778if truncate is not OMIT:
779_request["truncate"] = truncate
780if temperature is not OMIT:
781_request["temperature"] = temperature
782if seed is not OMIT:
783_request["seed"] = seed
784if preset is not OMIT:
785_request["preset"] = preset
786if end_sequences is not OMIT:
787_request["end_sequences"] = end_sequences
788if stop_sequences is not OMIT:
789_request["stop_sequences"] = stop_sequences
790if k is not OMIT:
791_request["k"] = k
792if p is not OMIT:
793_request["p"] = p
794if frequency_penalty is not OMIT:
795_request["frequency_penalty"] = frequency_penalty
796if presence_penalty is not OMIT:
797_request["presence_penalty"] = presence_penalty
798if return_likelihoods is not OMIT:
799_request["return_likelihoods"] = return_likelihoods
800if raw_prompting is not OMIT:
801_request["raw_prompting"] = raw_prompting
802with self._client_wrapper.httpx_client.stream(
803"POST",
804urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
805params=jsonable_encoder(
806request_options.get("additional_query_parameters") if request_options is not None else None
807),
808json=jsonable_encoder(_request)
809if request_options is None or request_options.get("additional_body_parameters") is None
810else {
811**jsonable_encoder(_request),
812**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
813},
814headers=jsonable_encoder(
815remove_none_from_dict(
816{
817**self._client_wrapper.get_headers(),
818**(request_options.get("additional_headers", {}) if request_options is not None else {}),
819}
820)
821),
822timeout=request_options.get("timeout_in_seconds")
823if request_options is not None and request_options.get("timeout_in_seconds") is not None
824else self._client_wrapper.get_timeout(),
825retries=0,
826max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
827) as _response:
828if 200 <= _response.status_code < 300:
829for _text in _response.iter_lines():
830if len(_text) == 0:
831continue
832yield pydantic.parse_obj_as(GenerateStreamedResponse, json.loads(_text)) # type: ignore
833return
834_response.read()
835if _response.status_code == 400:
836raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
837if _response.status_code == 429:
838raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
839if _response.status_code == 500:
840raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
841try:
842_response_json = _response.json()
843except JSONDecodeError:
844raise ApiError(status_code=_response.status_code, body=_response.text)
845raise ApiError(status_code=_response.status_code, body=_response_json)
846
847def generate(
848self,
849*,
850prompt: str,
851model: typing.Optional[str] = OMIT,
852num_generations: typing.Optional[int] = OMIT,
853max_tokens: typing.Optional[int] = OMIT,
854truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
855temperature: typing.Optional[float] = OMIT,
856seed: typing.Optional[float] = OMIT,
857preset: typing.Optional[str] = OMIT,
858end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
859stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
860k: typing.Optional[int] = OMIT,
861p: typing.Optional[float] = OMIT,
862frequency_penalty: typing.Optional[float] = OMIT,
863presence_penalty: typing.Optional[float] = OMIT,
864return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
865raw_prompting: typing.Optional[bool] = OMIT,
866request_options: typing.Optional[RequestOptions] = None,
867) -> Generation:
868"""
869> 🚧 Warning
870>
871> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
872
873Generates realistic text conditioned on a given input.
874
875Parameters:
876- prompt: str. The input text that serves as the starting point for generating the response.
877Note: The prompt will be pre-processed and modified before reaching the model.
878
879- model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
880Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
881- num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
882
883- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
884
885This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
886
887Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
888
889- truncate: typing.Optional[GenerateRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
890
891Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
892
893If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
894- temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
895Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
896
897- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
898
899- preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
900When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
901
902- end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
903
904- stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
905
906- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
907Defaults to `0`, min value of `0`, max value of `500`.
908
909- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
910Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
911
912- frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
913
914Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
915
916- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
917
918Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
919
920Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
921
922- return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
923
924If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
925
926If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
927- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
928
929- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
930---
931from cohere.client import Client
932
933client = Client(
934client_name="YOUR_CLIENT_NAME",
935token="YOUR_TOKEN",
936)
937client.generate(
938prompt="Please explain to me how LLMs work",
939preset="my-preset-a58sbd",
940)
941"""
942_request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": False}
943if model is not OMIT:
944_request["model"] = model
945if num_generations is not OMIT:
946_request["num_generations"] = num_generations
947if max_tokens is not OMIT:
948_request["max_tokens"] = max_tokens
949if truncate is not OMIT:
950_request["truncate"] = truncate
951if temperature is not OMIT:
952_request["temperature"] = temperature
953if seed is not OMIT:
954_request["seed"] = seed
955if preset is not OMIT:
956_request["preset"] = preset
957if end_sequences is not OMIT:
958_request["end_sequences"] = end_sequences
959if stop_sequences is not OMIT:
960_request["stop_sequences"] = stop_sequences
961if k is not OMIT:
962_request["k"] = k
963if p is not OMIT:
964_request["p"] = p
965if frequency_penalty is not OMIT:
966_request["frequency_penalty"] = frequency_penalty
967if presence_penalty is not OMIT:
968_request["presence_penalty"] = presence_penalty
969if return_likelihoods is not OMIT:
970_request["return_likelihoods"] = return_likelihoods
971if raw_prompting is not OMIT:
972_request["raw_prompting"] = raw_prompting
973_response = self._client_wrapper.httpx_client.request(
974"POST",
975urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
976params=jsonable_encoder(
977request_options.get("additional_query_parameters") if request_options is not None else None
978),
979json=jsonable_encoder(_request)
980if request_options is None or request_options.get("additional_body_parameters") is None
981else {
982**jsonable_encoder(_request),
983**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
984},
985headers=jsonable_encoder(
986remove_none_from_dict(
987{
988**self._client_wrapper.get_headers(),
989**(request_options.get("additional_headers", {}) if request_options is not None else {}),
990}
991)
992),
993timeout=request_options.get("timeout_in_seconds")
994if request_options is not None and request_options.get("timeout_in_seconds") is not None
995else self._client_wrapper.get_timeout(),
996retries=0,
997max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
998)
999if 200 <= _response.status_code < 300:
1000return pydantic.parse_obj_as(Generation, _response.json()) # type: ignore
1001if _response.status_code == 400:
1002raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1003if _response.status_code == 429:
1004raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1005if _response.status_code == 500:
1006raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1007try:
1008_response_json = _response.json()
1009except JSONDecodeError:
1010raise ApiError(status_code=_response.status_code, body=_response.text)
1011raise ApiError(status_code=_response.status_code, body=_response_json)
1012
1013def embed(
1014self,
1015*,
1016texts: typing.Sequence[str],
1017model: typing.Optional[str] = OMIT,
1018input_type: typing.Optional[EmbedInputType] = OMIT,
1019embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
1020truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
1021request_options: typing.Optional[RequestOptions] = None,
1022) -> EmbedResponse:
1023"""
1024This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
1025
1026Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
1027
1028If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
1029
1030Parameters:
1031- texts: typing.Sequence[str]. An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
1032
1033- model: typing.Optional[str]. Defaults to embed-english-v2.0
1034
1035The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
1036
1037Available models and corresponding embedding dimensions:
1038
1039* `embed-english-v3.0` 1024
1040* `embed-multilingual-v3.0` 1024
1041* `embed-english-light-v3.0` 384
1042* `embed-multilingual-light-v3.0` 384
1043
1044* `embed-english-v2.0` 4096
1045* `embed-english-light-v2.0` 1024
1046* `embed-multilingual-v2.0` 768
1047- input_type: typing.Optional[EmbedInputType].
1048
1049- embedding_types: typing.Optional[typing.Sequence[EmbeddingType]]. Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
1050
1051* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
1052* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
1053* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
1054* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
1055* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
1056- truncate: typing.Optional[EmbedRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
1057
1058Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
1059
1060If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
1061- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1062---
1063from cohere.client import Client
1064
1065client = Client(
1066client_name="YOUR_CLIENT_NAME",
1067token="YOUR_TOKEN",
1068)
1069client.embed(
1070texts=["string"],
1071model="string",
1072input_type="search_document",
1073embedding_types=["float"],
1074truncate="NONE",
1075)
1076"""
1077_request: typing.Dict[str, typing.Any] = {"texts": texts}
1078if model is not OMIT:
1079_request["model"] = model
1080if input_type is not OMIT:
1081_request["input_type"] = input_type
1082if embedding_types is not OMIT:
1083_request["embedding_types"] = embedding_types
1084if truncate is not OMIT:
1085_request["truncate"] = truncate
1086_response = self._client_wrapper.httpx_client.request(
1087"POST",
1088urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "embed"),
1089params=jsonable_encoder(
1090request_options.get("additional_query_parameters") if request_options is not None else None
1091),
1092json=jsonable_encoder(_request)
1093if request_options is None or request_options.get("additional_body_parameters") is None
1094else {
1095**jsonable_encoder(_request),
1096**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1097},
1098headers=jsonable_encoder(
1099remove_none_from_dict(
1100{
1101**self._client_wrapper.get_headers(),
1102**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1103}
1104)
1105),
1106timeout=request_options.get("timeout_in_seconds")
1107if request_options is not None and request_options.get("timeout_in_seconds") is not None
1108else self._client_wrapper.get_timeout(),
1109retries=0,
1110max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1111)
1112if 200 <= _response.status_code < 300:
1113return pydantic.parse_obj_as(EmbedResponse, _response.json()) # type: ignore
1114if _response.status_code == 400:
1115raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1116if _response.status_code == 429:
1117raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1118if _response.status_code == 500:
1119raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1120try:
1121_response_json = _response.json()
1122except JSONDecodeError:
1123raise ApiError(status_code=_response.status_code, body=_response.text)
1124raise ApiError(status_code=_response.status_code, body=_response_json)
1125
1126def rerank(
1127self,
1128*,
1129model: typing.Optional[str] = OMIT,
1130query: str,
1131documents: typing.Sequence[RerankRequestDocumentsItem],
1132top_n: typing.Optional[int] = OMIT,
1133return_documents: typing.Optional[bool] = OMIT,
1134max_chunks_per_doc: typing.Optional[int] = OMIT,
1135request_options: typing.Optional[RequestOptions] = None,
1136) -> RerankResponse:
1137"""
1138This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
1139
1140Parameters:
1141- model: typing.Optional[str]. The identifier of the model to use, one of : `rerank-english-v2.0`, `rerank-multilingual-v2.0`
1142
1143- query: str. The search query
1144
1145- documents: typing.Sequence[RerankRequestDocumentsItem]. A list of document objects or strings to rerank.
1146If a document is provided the text fields is required and all other fields will be preserved in the response.
1147
1148The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
1149
1150We recommend a maximum of 1,000 documents for optimal endpoint performance.
1151- top_n: typing.Optional[int]. The number of most relevant documents or indices to return, defaults to the length of the documents
1152
1153- return_documents: typing.Optional[bool]. - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
1154- If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
1155- max_chunks_per_doc: typing.Optional[int]. The maximum number of chunks to produce internally from a document
1156
1157- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1158---
1159from cohere.client import Client
1160
1161client = Client(
1162client_name="YOUR_CLIENT_NAME",
1163token="YOUR_TOKEN",
1164)
1165client.rerank(
1166model="rerank-english-v2.0",
1167query="What is the capital of the United States?",
1168documents=[
1169"Carson City is the capital city of the American state of Nevada.",
1170"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
1171"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
1172"Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
1173],
1174)
1175"""
1176_request: typing.Dict[str, typing.Any] = {"query": query, "documents": documents}
1177if model is not OMIT:
1178_request["model"] = model
1179if top_n is not OMIT:
1180_request["top_n"] = top_n
1181if return_documents is not OMIT:
1182_request["return_documents"] = return_documents
1183if max_chunks_per_doc is not OMIT:
1184_request["max_chunks_per_doc"] = max_chunks_per_doc
1185_response = self._client_wrapper.httpx_client.request(
1186"POST",
1187urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "rerank"),
1188params=jsonable_encoder(
1189request_options.get("additional_query_parameters") if request_options is not None else None
1190),
1191json=jsonable_encoder(_request)
1192if request_options is None or request_options.get("additional_body_parameters") is None
1193else {
1194**jsonable_encoder(_request),
1195**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1196},
1197headers=jsonable_encoder(
1198remove_none_from_dict(
1199{
1200**self._client_wrapper.get_headers(),
1201**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1202}
1203)
1204),
1205timeout=request_options.get("timeout_in_seconds")
1206if request_options is not None and request_options.get("timeout_in_seconds") is not None
1207else self._client_wrapper.get_timeout(),
1208retries=0,
1209max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1210)
1211if 200 <= _response.status_code < 300:
1212return pydantic.parse_obj_as(RerankResponse, _response.json()) # type: ignore
1213if _response.status_code == 429:
1214raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1215try:
1216_response_json = _response.json()
1217except JSONDecodeError:
1218raise ApiError(status_code=_response.status_code, body=_response.text)
1219raise ApiError(status_code=_response.status_code, body=_response_json)
1220
1221def classify(
1222self,
1223*,
1224inputs: typing.Sequence[str],
1225examples: typing.Sequence[ClassifyExample],
1226model: typing.Optional[str] = OMIT,
1227preset: typing.Optional[str] = OMIT,
1228truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
1229request_options: typing.Optional[RequestOptions] = None,
1230) -> ClassifyResponse:
1231"""
1232This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
1233Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
1234
1235Parameters:
1236- inputs: typing.Sequence[str]. A list of up to 96 texts to be classified. Each one must be a non-empty string.
1237There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
1238Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
1239- examples: typing.Sequence[ClassifyExample]. An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
1240Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
1241- model: typing.Optional[str]. The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
1242
1243- preset: typing.Optional[str]. The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.ai/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
1244
1245- truncate: typing.Optional[ClassifyRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
1246Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
1247If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
1248- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1249---
1250from cohere import ClassifyExample
1251from cohere.client import Client
1252
1253client = Client(
1254client_name="YOUR_CLIENT_NAME",
1255token="YOUR_TOKEN",
1256)
1257client.classify(
1258inputs=["Confirm your email address", "hey i need u to send some $"],
1259examples=[
1260ClassifyExample(
1261text="Dermatologists don't like her!",
1262label="Spam",
1263),
1264ClassifyExample(
1265text="Hello, open to this?",
1266label="Spam",
1267),
1268ClassifyExample(
1269text="I need help please wire me $1000 right now",
1270label="Spam",
1271),
1272ClassifyExample(
1273text="Nice to know you ;)",
1274label="Spam",
1275),
1276ClassifyExample(
1277text="Please help me?",
1278label="Spam",
1279),
1280ClassifyExample(
1281text="Your parcel will be delivered today",
1282label="Not spam",
1283),
1284ClassifyExample(
1285text="Review changes to our Terms and Conditions",
1286label="Not spam",
1287),
1288ClassifyExample(
1289text="Weekly sync notes",
1290label="Not spam",
1291),
1292ClassifyExample(
1293text="Re: Follow up from today’s meeting",
1294label="Not spam",
1295),
1296ClassifyExample(
1297text="Pre-read for tomorrow",
1298label="Not spam",
1299),
1300],
1301preset="my-preset-a58sbd",
1302)
1303"""
1304_request: typing.Dict[str, typing.Any] = {"inputs": inputs, "examples": examples}
1305if model is not OMIT:
1306_request["model"] = model
1307if preset is not OMIT:
1308_request["preset"] = preset
1309if truncate is not OMIT:
1310_request["truncate"] = truncate
1311_response = self._client_wrapper.httpx_client.request(
1312"POST",
1313urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "classify"),
1314params=jsonable_encoder(
1315request_options.get("additional_query_parameters") if request_options is not None else None
1316),
1317json=jsonable_encoder(_request)
1318if request_options is None or request_options.get("additional_body_parameters") is None
1319else {
1320**jsonable_encoder(_request),
1321**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1322},
1323headers=jsonable_encoder(
1324remove_none_from_dict(
1325{
1326**self._client_wrapper.get_headers(),
1327**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1328}
1329)
1330),
1331timeout=request_options.get("timeout_in_seconds")
1332if request_options is not None and request_options.get("timeout_in_seconds") is not None
1333else self._client_wrapper.get_timeout(),
1334retries=0,
1335max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1336)
1337if 200 <= _response.status_code < 300:
1338return pydantic.parse_obj_as(ClassifyResponse, _response.json()) # type: ignore
1339if _response.status_code == 400:
1340raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1341if _response.status_code == 429:
1342raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1343if _response.status_code == 500:
1344raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1345try:
1346_response_json = _response.json()
1347except JSONDecodeError:
1348raise ApiError(status_code=_response.status_code, body=_response.text)
1349raise ApiError(status_code=_response.status_code, body=_response_json)
1350
1351def summarize(
1352self,
1353*,
1354text: str,
1355length: typing.Optional[SummarizeRequestLength] = OMIT,
1356format: typing.Optional[SummarizeRequestFormat] = OMIT,
1357model: typing.Optional[str] = OMIT,
1358extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
1359temperature: typing.Optional[float] = OMIT,
1360additional_command: typing.Optional[str] = OMIT,
1361request_options: typing.Optional[RequestOptions] = None,
1362) -> SummarizeResponse:
1363"""
1364> 🚧 Warning
1365>
1366> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
1367
1368Generates a summary in English for a given text.
1369
1370Parameters:
1371- text: str. The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
1372
1373- length: typing.Optional[SummarizeRequestLength]. One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
1374
1375- format: typing.Optional[SummarizeRequestFormat]. One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
1376
1377- model: typing.Optional[str]. The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
1378
1379- extractiveness: typing.Optional[SummarizeRequestExtractiveness]. One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
1380
1381- temperature: typing.Optional[float]. Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
1382
1383- additional_command: typing.Optional[str]. A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
1384
1385- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1386---
1387from cohere.client import Client
1388
1389client = Client(
1390client_name="YOUR_CLIENT_NAME",
1391token="YOUR_TOKEN",
1392)
1393client.summarize(
1394text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
1395)
1396"""
1397_request: typing.Dict[str, typing.Any] = {"text": text}
1398if length is not OMIT:
1399_request["length"] = length
1400if format is not OMIT:
1401_request["format"] = format
1402if model is not OMIT:
1403_request["model"] = model
1404if extractiveness is not OMIT:
1405_request["extractiveness"] = extractiveness
1406if temperature is not OMIT:
1407_request["temperature"] = temperature
1408if additional_command is not OMIT:
1409_request["additional_command"] = additional_command
1410_response = self._client_wrapper.httpx_client.request(
1411"POST",
1412urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "summarize"),
1413params=jsonable_encoder(
1414request_options.get("additional_query_parameters") if request_options is not None else None
1415),
1416json=jsonable_encoder(_request)
1417if request_options is None or request_options.get("additional_body_parameters") is None
1418else {
1419**jsonable_encoder(_request),
1420**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1421},
1422headers=jsonable_encoder(
1423remove_none_from_dict(
1424{
1425**self._client_wrapper.get_headers(),
1426**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1427}
1428)
1429),
1430timeout=request_options.get("timeout_in_seconds")
1431if request_options is not None and request_options.get("timeout_in_seconds") is not None
1432else self._client_wrapper.get_timeout(),
1433retries=0,
1434max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1435)
1436if 200 <= _response.status_code < 300:
1437return pydantic.parse_obj_as(SummarizeResponse, _response.json()) # type: ignore
1438if _response.status_code == 429:
1439raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1440try:
1441_response_json = _response.json()
1442except JSONDecodeError:
1443raise ApiError(status_code=_response.status_code, body=_response.text)
1444raise ApiError(status_code=_response.status_code, body=_response_json)
1445
1446def tokenize(
1447self, *, text: str, model: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None
1448) -> TokenizeResponse:
1449"""
1450This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
1451
1452Parameters:
1453- text: str. The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
1454
1455- model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
1456
1457- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1458---
1459from cohere.client import Client
1460
1461client = Client(
1462client_name="YOUR_CLIENT_NAME",
1463token="YOUR_TOKEN",
1464)
1465client.tokenize(
1466text="tokenize me! :D",
1467model="command",
1468)
1469"""
1470_request: typing.Dict[str, typing.Any] = {"text": text}
1471if model is not OMIT:
1472_request["model"] = model
1473_response = self._client_wrapper.httpx_client.request(
1474"POST",
1475urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "tokenize"),
1476params=jsonable_encoder(
1477request_options.get("additional_query_parameters") if request_options is not None else None
1478),
1479json=jsonable_encoder(_request)
1480if request_options is None or request_options.get("additional_body_parameters") is None
1481else {
1482**jsonable_encoder(_request),
1483**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1484},
1485headers=jsonable_encoder(
1486remove_none_from_dict(
1487{
1488**self._client_wrapper.get_headers(),
1489**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1490}
1491)
1492),
1493timeout=request_options.get("timeout_in_seconds")
1494if request_options is not None and request_options.get("timeout_in_seconds") is not None
1495else self._client_wrapper.get_timeout(),
1496retries=0,
1497max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1498)
1499if 200 <= _response.status_code < 300:
1500return pydantic.parse_obj_as(TokenizeResponse, _response.json()) # type: ignore
1501if _response.status_code == 400:
1502raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1503if _response.status_code == 429:
1504raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1505if _response.status_code == 500:
1506raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1507try:
1508_response_json = _response.json()
1509except JSONDecodeError:
1510raise ApiError(status_code=_response.status_code, body=_response.text)
1511raise ApiError(status_code=_response.status_code, body=_response_json)
1512
1513def detokenize(
1514self,
1515*,
1516tokens: typing.Sequence[int],
1517model: typing.Optional[str] = OMIT,
1518request_options: typing.Optional[RequestOptions] = None,
1519) -> DetokenizeResponse:
1520"""
1521This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
1522
1523Parameters:
1524- tokens: typing.Sequence[int]. The list of tokens to be detokenized.
1525
1526- model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
1527
1528- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1529---
1530from cohere.client import Client
1531
1532client = Client(
1533client_name="YOUR_CLIENT_NAME",
1534token="YOUR_TOKEN",
1535)
1536client.detokenize(
1537tokens=[10104, 12221, 1315, 34, 1420, 69],
1538)
1539"""
1540_request: typing.Dict[str, typing.Any] = {"tokens": tokens}
1541if model is not OMIT:
1542_request["model"] = model
1543_response = self._client_wrapper.httpx_client.request(
1544"POST",
1545urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "detokenize"),
1546params=jsonable_encoder(
1547request_options.get("additional_query_parameters") if request_options is not None else None
1548),
1549json=jsonable_encoder(_request)
1550if request_options is None or request_options.get("additional_body_parameters") is None
1551else {
1552**jsonable_encoder(_request),
1553**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1554},
1555headers=jsonable_encoder(
1556remove_none_from_dict(
1557{
1558**self._client_wrapper.get_headers(),
1559**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1560}
1561)
1562),
1563timeout=request_options.get("timeout_in_seconds")
1564if request_options is not None and request_options.get("timeout_in_seconds") is not None
1565else self._client_wrapper.get_timeout(),
1566retries=0,
1567max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1568)
1569if 200 <= _response.status_code < 300:
1570return pydantic.parse_obj_as(DetokenizeResponse, _response.json()) # type: ignore
1571if _response.status_code == 429:
1572raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1573try:
1574_response_json = _response.json()
1575except JSONDecodeError:
1576raise ApiError(status_code=_response.status_code, body=_response.text)
1577raise ApiError(status_code=_response.status_code, body=_response_json)
1578
1579
1580class AsyncBaseCohere:
1581"""
1582Use this class to access the different functions within the SDK. You can instantiate any number of clients with different configuration that will propogate to these functions.
1583
1584Parameters:
1585- base_url: typing.Optional[str]. The base url to use for requests from the client.
1586
1587- environment: ClientEnvironment. The environment to use for requests from the client. from .environment import ClientEnvironment
1588
1589Defaults to ClientEnvironment.PRODUCTION
1590
1591- client_name: typing.Optional[str].
1592
1593- token: typing.Optional[typing.Union[str, typing.Callable[[], str]]].
1594
1595- timeout: typing.Optional[float]. The timeout to be used, in seconds, for requests by default the timeout is 60 seconds, unless a custom httpx client is used, in which case a default is not set.
1596
1597- httpx_client: typing.Optional[httpx.AsyncClient]. The httpx client to use for making requests, a preconfigured client is used by default, however this is useful should you want to pass in any custom httpx configuration.
1598---
1599from cohere.client import AsyncClient
1600
1601client = AsyncClient(
1602client_name="YOUR_CLIENT_NAME",
1603token="YOUR_TOKEN",
1604)
1605"""
1606
1607def __init__(
1608self,
1609*,
1610base_url: typing.Optional[str] = None,
1611environment: ClientEnvironment = ClientEnvironment.PRODUCTION,
1612client_name: typing.Optional[str] = None,
1613token: typing.Optional[typing.Union[str, typing.Callable[[], str]]] = os.getenv("CO_API_KEY"),
1614timeout: typing.Optional[float] = None,
1615httpx_client: typing.Optional[httpx.AsyncClient] = None,
1616):
1617_defaulted_timeout = timeout if timeout is not None else 300 if httpx_client is None else None
1618if token is None:
1619raise ApiError(body="The client must be instantiated be either passing in token or setting CO_API_KEY")
1620self._client_wrapper = AsyncClientWrapper(
1621base_url=_get_base_url(base_url=base_url, environment=environment),
1622client_name=client_name,
1623token=token,
1624httpx_client=httpx.AsyncClient(timeout=_defaulted_timeout) if httpx_client is None else httpx_client,
1625timeout=_defaulted_timeout,
1626)
1627self.embed_jobs = AsyncEmbedJobsClient(client_wrapper=self._client_wrapper)
1628self.datasets = AsyncDatasetsClient(client_wrapper=self._client_wrapper)
1629self.connectors = AsyncConnectorsClient(client_wrapper=self._client_wrapper)
1630self.models = AsyncModelsClient(client_wrapper=self._client_wrapper)
1631self.finetuning = AsyncFinetuningClient(client_wrapper=self._client_wrapper)
1632
1633async def chat_stream(
1634self,
1635*,
1636message: str,
1637model: typing.Optional[str] = OMIT,
1638preamble: typing.Optional[str] = OMIT,
1639chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
1640conversation_id: typing.Optional[str] = OMIT,
1641prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation] = OMIT,
1642connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
1643search_queries_only: typing.Optional[bool] = OMIT,
1644documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
1645temperature: typing.Optional[float] = OMIT,
1646max_tokens: typing.Optional[int] = OMIT,
1647k: typing.Optional[int] = OMIT,
1648p: typing.Optional[float] = OMIT,
1649seed: typing.Optional[float] = OMIT,
1650stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
1651frequency_penalty: typing.Optional[float] = OMIT,
1652presence_penalty: typing.Optional[float] = OMIT,
1653raw_prompting: typing.Optional[bool] = OMIT,
1654tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
1655tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]] = OMIT,
1656request_options: typing.Optional[RequestOptions] = None,
1657) -> typing.AsyncIterator[StreamedChatResponse]:
1658"""
1659Generates a text response to a user message.
1660To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
1661
1662Parameters:
1663- message: str. Text input for the model to respond to.
1664
1665- model: typing.Optional[str]. Defaults to `command`.
1666
1667The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
1668
1669- preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
1670
1671The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
1672
1673- chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
1674
1675Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
1676
1677The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
1678
1679- conversation_id: typing.Optional[str]. An alternative to `chat_history`.
1680
1681Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
1682
1683- prompt_truncation: typing.Optional[ChatStreamRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
1684
1685Dictates how the prompt will be constructed.
1686
1687With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
1688
1689With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
1690
1691With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
1692
1693- connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
1694
1695When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
1696
1697- search_queries_only: typing.Optional[bool]. Defaults to `false`.
1698
1699When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
1700
1701- documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
1702
1703Example:
1704`[
1705{ "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
1706{ "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
1707]`
1708
1709Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
1710
1711Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
1712
1713An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
1714
1715An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
1716
1717See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
1718
1719- temperature: typing.Optional[float]. Defaults to `0.3`.
1720
1721A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
1722
1723Randomness can be further maximized by increasing the value of the `p` parameter.
1724
1725- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
1726
1727- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
1728Defaults to `0`, min value of `0`, max value of `500`.
1729
1730- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
1731Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
1732
1733- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
1734
1735- stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
1736
1737- frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
1738
1739Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
1740
1741- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
1742
1743Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
1744
1745- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
1746
1747- tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
1748
1749When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
1750
1751- tool_results: typing.Optional[typing.Sequence[ChatStreamRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
1752Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
1753
1754**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
1755```
1756tool_results = [
1757{
1758"call": {
1759"name": <tool name>,
1760"parameters": {
1761<param name>: <param value>
1762}
1763},
1764"outputs": [{
1765<key>: <value>
1766}]
1767},
1768...
1769]
1770```
1771**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
1772
1773- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
1774---
1775from cohere import (
1776ChatConnector,
1777ChatMessage,
1778ChatStreamRequestConnectorsSearchOptions,
1779ChatStreamRequestPromptOverride,
1780ChatStreamRequestToolResultsItem,
1781Tool,
1782ToolCall,
1783ToolParameterDefinitionsValue,
1784)
1785from cohere.client import AsyncClient
1786
1787client = AsyncClient(
1788client_name="YOUR_CLIENT_NAME",
1789token="YOUR_TOKEN",
1790)
1791await client.chat_stream(
1792message="string",
1793model="string",
1794preamble="string",
1795chat_history=[
1796ChatMessage(
1797role="CHATBOT",
1798message="string",
1799)
1800],
1801conversation_id="string",
1802prompt_truncation="OFF",
1803connectors=[
1804ChatConnector(
1805id="string",
1806user_access_token="string",
1807continue_on_failure=True,
1808options={"string": {"key": "value"}},
1809)
1810],
1811search_queries_only=True,
1812documents=[{"string": "string"}],
1813citation_quality="fast",
1814temperature=1.1,
1815max_tokens=1,
1816k=1,
1817p=1.1,
1818seed=1.1,
1819stop_sequences=["string"],
1820connectors_search_options=ChatStreamRequestConnectorsSearchOptions(
1821model={"key": "value"},
1822temperature={"key": "value"},
1823max_tokens={"key": "value"},
1824preamble={"key": "value"},
1825seed=1.1,
1826),
1827prompt_override=ChatStreamRequestPromptOverride(
1828preamble={"key": "value"},
1829task_description={"key": "value"},
1830style_guide={"key": "value"},
1831),
1832frequency_penalty=1.1,
1833presence_penalty=1.1,
1834raw_prompting=True,
1835tools=[
1836Tool(
1837name="string",
1838description="string",
1839parameter_definitions={
1840"string": ToolParameterDefinitionsValue(
1841description="string",
1842type="string",
1843required=True,
1844)
1845},
1846)
1847],
1848tool_results=[
1849ChatStreamRequestToolResultsItem(
1850call=ToolCall(),
1851outputs=[{"string": {"key": "value"}}],
1852)
1853],
1854)
1855"""
1856_request: typing.Dict[str, typing.Any] = {"message": message, "stream": True}
1857if model is not OMIT:
1858_request["model"] = model
1859if preamble is not OMIT:
1860_request["preamble"] = preamble
1861if chat_history is not OMIT:
1862_request["chat_history"] = chat_history
1863if conversation_id is not OMIT:
1864_request["conversation_id"] = conversation_id
1865if prompt_truncation is not OMIT:
1866_request["prompt_truncation"] = prompt_truncation
1867if connectors is not OMIT:
1868_request["connectors"] = connectors
1869if search_queries_only is not OMIT:
1870_request["search_queries_only"] = search_queries_only
1871if documents is not OMIT:
1872_request["documents"] = documents
1873if temperature is not OMIT:
1874_request["temperature"] = temperature
1875if max_tokens is not OMIT:
1876_request["max_tokens"] = max_tokens
1877if k is not OMIT:
1878_request["k"] = k
1879if p is not OMIT:
1880_request["p"] = p
1881if seed is not OMIT:
1882_request["seed"] = seed
1883if stop_sequences is not OMIT:
1884_request["stop_sequences"] = stop_sequences
1885if frequency_penalty is not OMIT:
1886_request["frequency_penalty"] = frequency_penalty
1887if presence_penalty is not OMIT:
1888_request["presence_penalty"] = presence_penalty
1889if raw_prompting is not OMIT:
1890_request["raw_prompting"] = raw_prompting
1891if tools is not OMIT:
1892_request["tools"] = tools
1893if tool_results is not OMIT:
1894_request["tool_results"] = tool_results
1895async with self._client_wrapper.httpx_client.stream(
1896"POST",
1897urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
1898params=jsonable_encoder(
1899request_options.get("additional_query_parameters") if request_options is not None else None
1900),
1901json=jsonable_encoder(_request)
1902if request_options is None or request_options.get("additional_body_parameters") is None
1903else {
1904**jsonable_encoder(_request),
1905**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
1906},
1907headers=jsonable_encoder(
1908remove_none_from_dict(
1909{
1910**self._client_wrapper.get_headers(),
1911**(request_options.get("additional_headers", {}) if request_options is not None else {}),
1912}
1913)
1914),
1915timeout=request_options.get("timeout_in_seconds")
1916if request_options is not None and request_options.get("timeout_in_seconds") is not None
1917else self._client_wrapper.get_timeout(),
1918retries=0,
1919max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
1920) as _response:
1921if 200 <= _response.status_code < 300:
1922async for _text in _response.aiter_lines():
1923if len(_text) == 0:
1924continue
1925yield pydantic.parse_obj_as(StreamedChatResponse, json.loads(_text)) # type: ignore
1926return
1927await _response.aread()
1928if _response.status_code == 429:
1929raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
1930try:
1931_response_json = _response.json()
1932except JSONDecodeError:
1933raise ApiError(status_code=_response.status_code, body=_response.text)
1934raise ApiError(status_code=_response.status_code, body=_response_json)
1935
1936async def chat(
1937self,
1938*,
1939message: str,
1940model: typing.Optional[str] = OMIT,
1941preamble: typing.Optional[str] = OMIT,
1942chat_history: typing.Optional[typing.Sequence[ChatMessage]] = OMIT,
1943conversation_id: typing.Optional[str] = OMIT,
1944prompt_truncation: typing.Optional[ChatRequestPromptTruncation] = OMIT,
1945connectors: typing.Optional[typing.Sequence[ChatConnector]] = OMIT,
1946search_queries_only: typing.Optional[bool] = OMIT,
1947documents: typing.Optional[typing.Sequence[ChatDocument]] = OMIT,
1948temperature: typing.Optional[float] = OMIT,
1949max_tokens: typing.Optional[int] = OMIT,
1950k: typing.Optional[int] = OMIT,
1951p: typing.Optional[float] = OMIT,
1952seed: typing.Optional[float] = OMIT,
1953stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
1954frequency_penalty: typing.Optional[float] = OMIT,
1955presence_penalty: typing.Optional[float] = OMIT,
1956raw_prompting: typing.Optional[bool] = OMIT,
1957tools: typing.Optional[typing.Sequence[Tool]] = OMIT,
1958tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]] = OMIT,
1959request_options: typing.Optional[RequestOptions] = None,
1960) -> NonStreamedChatResponse:
1961"""
1962Generates a text response to a user message.
1963To learn how to use Chat with Streaming and RAG follow [this guide](https://docs.cohere.com/docs/cochat-beta#various-ways-of-using-the-chat-endpoint).
1964
1965Parameters:
1966- message: str. Text input for the model to respond to.
1967
1968- model: typing.Optional[str]. Defaults to `command`.
1969
1970The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
1971
1972- preamble: typing.Optional[str]. When specified, the default Cohere preamble will be replaced with the provided one. Preambles are a part of the prompt used to adjust the model's overall behavior and conversation style, and use the `SYSTEM` role.
1973
1974The `SYSTEM` role is also used for the contents of the optional `chat_history=` parameter. When used with the `chat_history=` parameter it adds content throughout a conversation. Conversely, when used with the `preamble=` parameter it adds content at the start of the conversation only.
1975
1976- chat_history: typing.Optional[typing.Sequence[ChatMessage]]. A list of previous messages between the user and the model, giving the model conversational context for responding to the user's `message`.
1977
1978Each item represents a single message in the chat history, excluding the current user turn. It has two properties: `role` and `message`. The `role` identifies the sender (`CHATBOT`, `SYSTEM`, or `USER`), while the `message` contains the text content.
1979
1980The chat_history parameter should not be used for `SYSTEM` messages in most cases. Instead, to add a `SYSTEM` role message at the beginning of a conversation, the `preamble` parameter should be used.
1981
1982- conversation_id: typing.Optional[str]. An alternative to `chat_history`.
1983
1984Providing a `conversation_id` creates or resumes a persisted conversation with the specified ID. The ID can be any non empty string.
1985
1986- prompt_truncation: typing.Optional[ChatRequestPromptTruncation]. Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
1987
1988Dictates how the prompt will be constructed.
1989
1990With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be changed and ranked by relevance.
1991
1992With `prompt_truncation` set to "AUTO_PRESERVE_ORDER", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit. During this process the order of the documents and chat history will be preserved as they are inputted into the API.
1993
1994With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
1995
1996- connectors: typing.Optional[typing.Sequence[ChatConnector]]. Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
1997
1998When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
1999
2000- search_queries_only: typing.Optional[bool]. Defaults to `false`.
2001
2002When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
2003
2004- documents: typing.Optional[typing.Sequence[ChatDocument]]. A list of relevant documents that the model can cite to generate a more accurate reply. Each document is a string-string dictionary.
2005
2006Example:
2007`[
2008{ "title": "Tall penguins", "text": "Emperor penguins are the tallest." },
2009{ "title": "Penguin habitats", "text": "Emperor penguins only live in Antarctica." },
2010]`
2011
2012Keys and values from each document will be serialized to a string and passed to the model. The resulting generation will include citations that reference some of these documents.
2013
2014Some suggested keys are "text", "author", and "date". For better generation quality, it is recommended to keep the total word count of the strings in the dictionary to under 300 words.
2015
2016An `id` field (string) can be optionally supplied to identify the document in the citations. This field will not be passed to the model.
2017
2018An `_excludes` field (array of strings) can be optionally supplied to omit some key-value pairs from being shown to the model. The omitted fields will still show up in the citation object. The "_excludes" field will not be passed to the model.
2019
2020See ['Document Mode'](https://docs.cohere.com/docs/retrieval-augmented-generation-rag#document-mode) in the guide for more information.
2021
2022- temperature: typing.Optional[float]. Defaults to `0.3`.
2023
2024A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
2025
2026Randomness can be further maximized by increasing the value of the `p` parameter.
2027
2028- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
2029
2030- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
2031Defaults to `0`, min value of `0`, max value of `500`.
2032
2033- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
2034Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
2035
2036- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinism cannot be totally guaranteed.
2037
2038- stop_sequences: typing.Optional[typing.Sequence[str]]. A list of up to 5 strings that the model will use to stop generation. If the model generates a string that matches any of the strings in the list, it will stop generating tokens and return the generated text up to that point not including the stop sequence.
2039
2040- frequency_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2041
2042Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
2043
2044- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2045
2046Used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
2047
2048- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
2049
2050- tools: typing.Optional[typing.Sequence[Tool]]. A list of available tools (functions) that the model may suggest invoking before producing a text response.
2051
2052When `tools` is passed (without `tool_results`), the `text` field in the response will be `""` and the `tool_calls` field in the response will be populated with a list of tool calls that need to be made. If no calls need to be made, the `tool_calls` array will be empty.
2053
2054- tool_results: typing.Optional[typing.Sequence[ChatRequestToolResultsItem]]. A list of results from invoking tools recommended by the model in the previous chat turn. Results are used to produce a text response and will be referenced in citations. When using `tool_results`, `tools` must be passed as well.
2055Each tool_result contains information about how it was invoked, as well as a list of outputs in the form of dictionaries.
2056
2057**Note**: `outputs` must be a list of objects. If your tool returns a single object (eg `{"status": 200}`), make sure to wrap it in a list.
2058```
2059tool_results = [
2060{
2061"call": {
2062"name": <tool name>,
2063"parameters": {
2064<param name>: <param value>
2065}
2066},
2067"outputs": [{
2068<key>: <value>
2069}]
2070},
2071...
2072]
2073```
2074**Note**: Chat calls with `tool_results` should not be included in the Chat history to avoid duplication of the message text.
2075
2076- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2077---
2078from cohere import ChatMessage
2079from cohere.client import AsyncClient
2080
2081client = AsyncClient(
2082client_name="YOUR_CLIENT_NAME",
2083token="YOUR_TOKEN",
2084)
2085await client.chat(
2086message="Can you give me a global market overview of solar panels?",
2087chat_history=[
2088ChatMessage(
2089role="CHATBOT",
2090message="Hi!",
2091),
2092ChatMessage(
2093role="CHATBOT",
2094message="How can I help you today?",
2095),
2096],
2097prompt_truncation="OFF",
2098temperature=0.3,
2099)
2100"""
2101_request: typing.Dict[str, typing.Any] = {"message": message, "stream": False}
2102if model is not OMIT:
2103_request["model"] = model
2104if preamble is not OMIT:
2105_request["preamble"] = preamble
2106if chat_history is not OMIT:
2107_request["chat_history"] = chat_history
2108if conversation_id is not OMIT:
2109_request["conversation_id"] = conversation_id
2110if prompt_truncation is not OMIT:
2111_request["prompt_truncation"] = prompt_truncation
2112if connectors is not OMIT:
2113_request["connectors"] = connectors
2114if search_queries_only is not OMIT:
2115_request["search_queries_only"] = search_queries_only
2116if documents is not OMIT:
2117_request["documents"] = documents
2118if temperature is not OMIT:
2119_request["temperature"] = temperature
2120if max_tokens is not OMIT:
2121_request["max_tokens"] = max_tokens
2122if k is not OMIT:
2123_request["k"] = k
2124if p is not OMIT:
2125_request["p"] = p
2126if seed is not OMIT:
2127_request["seed"] = seed
2128if stop_sequences is not OMIT:
2129_request["stop_sequences"] = stop_sequences
2130if frequency_penalty is not OMIT:
2131_request["frequency_penalty"] = frequency_penalty
2132if presence_penalty is not OMIT:
2133_request["presence_penalty"] = presence_penalty
2134if raw_prompting is not OMIT:
2135_request["raw_prompting"] = raw_prompting
2136if tools is not OMIT:
2137_request["tools"] = tools
2138if tool_results is not OMIT:
2139_request["tool_results"] = tool_results
2140_response = await self._client_wrapper.httpx_client.request(
2141"POST",
2142urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "chat"),
2143params=jsonable_encoder(
2144request_options.get("additional_query_parameters") if request_options is not None else None
2145),
2146json=jsonable_encoder(_request)
2147if request_options is None or request_options.get("additional_body_parameters") is None
2148else {
2149**jsonable_encoder(_request),
2150**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2151},
2152headers=jsonable_encoder(
2153remove_none_from_dict(
2154{
2155**self._client_wrapper.get_headers(),
2156**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2157}
2158)
2159),
2160timeout=request_options.get("timeout_in_seconds")
2161if request_options is not None and request_options.get("timeout_in_seconds") is not None
2162else self._client_wrapper.get_timeout(),
2163retries=0,
2164max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2165)
2166if 200 <= _response.status_code < 300:
2167return pydantic.parse_obj_as(NonStreamedChatResponse, _response.json()) # type: ignore
2168if _response.status_code == 429:
2169raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2170try:
2171_response_json = _response.json()
2172except JSONDecodeError:
2173raise ApiError(status_code=_response.status_code, body=_response.text)
2174raise ApiError(status_code=_response.status_code, body=_response_json)
2175
2176async def generate_stream(
2177self,
2178*,
2179prompt: str,
2180model: typing.Optional[str] = OMIT,
2181num_generations: typing.Optional[int] = OMIT,
2182max_tokens: typing.Optional[int] = OMIT,
2183truncate: typing.Optional[GenerateStreamRequestTruncate] = OMIT,
2184temperature: typing.Optional[float] = OMIT,
2185seed: typing.Optional[float] = OMIT,
2186preset: typing.Optional[str] = OMIT,
2187end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2188stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2189k: typing.Optional[int] = OMIT,
2190p: typing.Optional[float] = OMIT,
2191frequency_penalty: typing.Optional[float] = OMIT,
2192presence_penalty: typing.Optional[float] = OMIT,
2193return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods] = OMIT,
2194raw_prompting: typing.Optional[bool] = OMIT,
2195request_options: typing.Optional[RequestOptions] = None,
2196) -> typing.AsyncIterator[GenerateStreamedResponse]:
2197"""
2198> 🚧 Warning
2199>
2200> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
2201
2202Generates realistic text conditioned on a given input.
2203
2204Parameters:
2205- prompt: str. The input text that serves as the starting point for generating the response.
2206Note: The prompt will be pre-processed and modified before reaching the model.
2207
2208- model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
2209Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
2210- num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
2211
2212- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
2213
2214This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
2215
2216Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
2217
2218- truncate: typing.Optional[GenerateStreamRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2219
2220Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2221
2222If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2223- temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
2224Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
2225
2226- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
2227
2228- preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
2229When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
2230
2231- end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
2232
2233- stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
2234
2235- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
2236Defaults to `0`, min value of `0`, max value of `500`.
2237
2238- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
2239Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
2240
2241- frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
2242
2243Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2244
2245- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2246
2247Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
2248
2249Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2250
2251- return_likelihoods: typing.Optional[GenerateStreamRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
2252
2253If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
2254
2255If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
2256- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
2257
2258- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2259---
2260from cohere.client import AsyncClient
2261
2262client = AsyncClient(
2263client_name="YOUR_CLIENT_NAME",
2264token="YOUR_TOKEN",
2265)
2266await client.generate_stream(
2267prompt="string",
2268model="string",
2269num_generations=1,
2270max_tokens=1,
2271truncate="NONE",
2272temperature=1.1,
2273seed=1.1,
2274preset="string",
2275end_sequences=["string"],
2276stop_sequences=["string"],
2277k=1,
2278p=1.1,
2279frequency_penalty=1.1,
2280presence_penalty=1.1,
2281return_likelihoods="GENERATION",
2282raw_prompting=True,
2283)
2284"""
2285_request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": True}
2286if model is not OMIT:
2287_request["model"] = model
2288if num_generations is not OMIT:
2289_request["num_generations"] = num_generations
2290if max_tokens is not OMIT:
2291_request["max_tokens"] = max_tokens
2292if truncate is not OMIT:
2293_request["truncate"] = truncate
2294if temperature is not OMIT:
2295_request["temperature"] = temperature
2296if seed is not OMIT:
2297_request["seed"] = seed
2298if preset is not OMIT:
2299_request["preset"] = preset
2300if end_sequences is not OMIT:
2301_request["end_sequences"] = end_sequences
2302if stop_sequences is not OMIT:
2303_request["stop_sequences"] = stop_sequences
2304if k is not OMIT:
2305_request["k"] = k
2306if p is not OMIT:
2307_request["p"] = p
2308if frequency_penalty is not OMIT:
2309_request["frequency_penalty"] = frequency_penalty
2310if presence_penalty is not OMIT:
2311_request["presence_penalty"] = presence_penalty
2312if return_likelihoods is not OMIT:
2313_request["return_likelihoods"] = return_likelihoods
2314if raw_prompting is not OMIT:
2315_request["raw_prompting"] = raw_prompting
2316async with self._client_wrapper.httpx_client.stream(
2317"POST",
2318urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
2319params=jsonable_encoder(
2320request_options.get("additional_query_parameters") if request_options is not None else None
2321),
2322json=jsonable_encoder(_request)
2323if request_options is None or request_options.get("additional_body_parameters") is None
2324else {
2325**jsonable_encoder(_request),
2326**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2327},
2328headers=jsonable_encoder(
2329remove_none_from_dict(
2330{
2331**self._client_wrapper.get_headers(),
2332**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2333}
2334)
2335),
2336timeout=request_options.get("timeout_in_seconds")
2337if request_options is not None and request_options.get("timeout_in_seconds") is not None
2338else self._client_wrapper.get_timeout(),
2339retries=0,
2340max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2341) as _response:
2342if 200 <= _response.status_code < 300:
2343async for _text in _response.aiter_lines():
2344if len(_text) == 0:
2345continue
2346yield pydantic.parse_obj_as(GenerateStreamedResponse, json.loads(_text)) # type: ignore
2347return
2348await _response.aread()
2349if _response.status_code == 400:
2350raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2351if _response.status_code == 429:
2352raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2353if _response.status_code == 500:
2354raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2355try:
2356_response_json = _response.json()
2357except JSONDecodeError:
2358raise ApiError(status_code=_response.status_code, body=_response.text)
2359raise ApiError(status_code=_response.status_code, body=_response_json)
2360
2361async def generate(
2362self,
2363*,
2364prompt: str,
2365model: typing.Optional[str] = OMIT,
2366num_generations: typing.Optional[int] = OMIT,
2367max_tokens: typing.Optional[int] = OMIT,
2368truncate: typing.Optional[GenerateRequestTruncate] = OMIT,
2369temperature: typing.Optional[float] = OMIT,
2370seed: typing.Optional[float] = OMIT,
2371preset: typing.Optional[str] = OMIT,
2372end_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2373stop_sequences: typing.Optional[typing.Sequence[str]] = OMIT,
2374k: typing.Optional[int] = OMIT,
2375p: typing.Optional[float] = OMIT,
2376frequency_penalty: typing.Optional[float] = OMIT,
2377presence_penalty: typing.Optional[float] = OMIT,
2378return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods] = OMIT,
2379raw_prompting: typing.Optional[bool] = OMIT,
2380request_options: typing.Optional[RequestOptions] = None,
2381) -> Generation:
2382"""
2383> 🚧 Warning
2384>
2385> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
2386
2387Generates realistic text conditioned on a given input.
2388
2389Parameters:
2390- prompt: str. The input text that serves as the starting point for generating the response.
2391Note: The prompt will be pre-processed and modified before reaching the model.
2392
2393- model: typing.Optional[str]. The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
2394Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
2395- num_generations: typing.Optional[int]. The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
2396
2397- max_tokens: typing.Optional[int]. The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
2398
2399This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
2400
2401Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
2402
2403- truncate: typing.Optional[GenerateRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2404
2405Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2406
2407If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2408- temperature: typing.Optional[float]. A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
2409Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
2410
2411- seed: typing.Optional[float]. If specified, the backend will make a best effort to sample tokens deterministically, such that repeated requests with the same seed and parameters should return the same result. However, determinsim cannot be totally guaranteed.
2412
2413- preset: typing.Optional[str]. Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
2414When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
2415
2416- end_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text.
2417
2418- stop_sequences: typing.Optional[typing.Sequence[str]]. The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text.
2419
2420- k: typing.Optional[int]. Ensures only the top `k` most likely tokens are considered for generation at each step.
2421Defaults to `0`, min value of `0`, max value of `500`.
2422
2423- p: typing.Optional[float]. Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
2424Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
2425
2426- frequency_penalty: typing.Optional[float]. Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.
2427
2428Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2429
2430- presence_penalty: typing.Optional[float]. Defaults to `0.0`, min value of `0.0`, max value of `1.0`.
2431
2432Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies.
2433
2434Using `frequency_penalty` in combination with `presence_penalty` is not supported on newer models.
2435
2436- return_likelihoods: typing.Optional[GenerateRequestReturnLikelihoods]. One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
2437
2438If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
2439
2440If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
2441- raw_prompting: typing.Optional[bool]. When enabled, the user's prompt will be sent to the model without any pre-processing.
2442
2443- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2444---
2445from cohere.client import AsyncClient
2446
2447client = AsyncClient(
2448client_name="YOUR_CLIENT_NAME",
2449token="YOUR_TOKEN",
2450)
2451await client.generate(
2452prompt="Please explain to me how LLMs work",
2453preset="my-preset-a58sbd",
2454)
2455"""
2456_request: typing.Dict[str, typing.Any] = {"prompt": prompt, "stream": False}
2457if model is not OMIT:
2458_request["model"] = model
2459if num_generations is not OMIT:
2460_request["num_generations"] = num_generations
2461if max_tokens is not OMIT:
2462_request["max_tokens"] = max_tokens
2463if truncate is not OMIT:
2464_request["truncate"] = truncate
2465if temperature is not OMIT:
2466_request["temperature"] = temperature
2467if seed is not OMIT:
2468_request["seed"] = seed
2469if preset is not OMIT:
2470_request["preset"] = preset
2471if end_sequences is not OMIT:
2472_request["end_sequences"] = end_sequences
2473if stop_sequences is not OMIT:
2474_request["stop_sequences"] = stop_sequences
2475if k is not OMIT:
2476_request["k"] = k
2477if p is not OMIT:
2478_request["p"] = p
2479if frequency_penalty is not OMIT:
2480_request["frequency_penalty"] = frequency_penalty
2481if presence_penalty is not OMIT:
2482_request["presence_penalty"] = presence_penalty
2483if return_likelihoods is not OMIT:
2484_request["return_likelihoods"] = return_likelihoods
2485if raw_prompting is not OMIT:
2486_request["raw_prompting"] = raw_prompting
2487_response = await self._client_wrapper.httpx_client.request(
2488"POST",
2489urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "generate"),
2490params=jsonable_encoder(
2491request_options.get("additional_query_parameters") if request_options is not None else None
2492),
2493json=jsonable_encoder(_request)
2494if request_options is None or request_options.get("additional_body_parameters") is None
2495else {
2496**jsonable_encoder(_request),
2497**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2498},
2499headers=jsonable_encoder(
2500remove_none_from_dict(
2501{
2502**self._client_wrapper.get_headers(),
2503**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2504}
2505)
2506),
2507timeout=request_options.get("timeout_in_seconds")
2508if request_options is not None and request_options.get("timeout_in_seconds") is not None
2509else self._client_wrapper.get_timeout(),
2510retries=0,
2511max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2512)
2513if 200 <= _response.status_code < 300:
2514return pydantic.parse_obj_as(Generation, _response.json()) # type: ignore
2515if _response.status_code == 400:
2516raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2517if _response.status_code == 429:
2518raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2519if _response.status_code == 500:
2520raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2521try:
2522_response_json = _response.json()
2523except JSONDecodeError:
2524raise ApiError(status_code=_response.status_code, body=_response.text)
2525raise ApiError(status_code=_response.status_code, body=_response_json)
2526
2527async def embed(
2528self,
2529*,
2530texts: typing.Sequence[str],
2531model: typing.Optional[str] = OMIT,
2532input_type: typing.Optional[EmbedInputType] = OMIT,
2533embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
2534truncate: typing.Optional[EmbedRequestTruncate] = OMIT,
2535request_options: typing.Optional[RequestOptions] = None,
2536) -> EmbedResponse:
2537"""
2538This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
2539
2540Embeddings can be used to create text classifiers as well as empower semantic search. To learn more about embeddings, see the embedding page.
2541
2542If you want to learn more how to use the embedding model, have a look at the [Semantic Search Guide](/docs/semantic-search).
2543
2544Parameters:
2545- texts: typing.Sequence[str]. An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
2546
2547- model: typing.Optional[str]. Defaults to embed-english-v2.0
2548
2549The identifier of the model. Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
2550
2551Available models and corresponding embedding dimensions:
2552
2553* `embed-english-v3.0` 1024
2554* `embed-multilingual-v3.0` 1024
2555* `embed-english-light-v3.0` 384
2556* `embed-multilingual-light-v3.0` 384
2557
2558* `embed-english-v2.0` 4096
2559* `embed-english-light-v2.0` 1024
2560* `embed-multilingual-v2.0` 768
2561- input_type: typing.Optional[EmbedInputType].
2562
2563- embedding_types: typing.Optional[typing.Sequence[EmbeddingType]]. Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
2564
2565* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
2566* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
2567* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
2568* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
2569* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
2570- truncate: typing.Optional[EmbedRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2571
2572Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2573
2574If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2575- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2576---
2577from cohere.client import AsyncClient
2578
2579client = AsyncClient(
2580client_name="YOUR_CLIENT_NAME",
2581token="YOUR_TOKEN",
2582)
2583await client.embed(
2584texts=["string"],
2585model="string",
2586input_type="search_document",
2587embedding_types=["float"],
2588truncate="NONE",
2589)
2590"""
2591_request: typing.Dict[str, typing.Any] = {"texts": texts}
2592if model is not OMIT:
2593_request["model"] = model
2594if input_type is not OMIT:
2595_request["input_type"] = input_type
2596if embedding_types is not OMIT:
2597_request["embedding_types"] = embedding_types
2598if truncate is not OMIT:
2599_request["truncate"] = truncate
2600_response = await self._client_wrapper.httpx_client.request(
2601"POST",
2602urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "embed"),
2603params=jsonable_encoder(
2604request_options.get("additional_query_parameters") if request_options is not None else None
2605),
2606json=jsonable_encoder(_request)
2607if request_options is None or request_options.get("additional_body_parameters") is None
2608else {
2609**jsonable_encoder(_request),
2610**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2611},
2612headers=jsonable_encoder(
2613remove_none_from_dict(
2614{
2615**self._client_wrapper.get_headers(),
2616**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2617}
2618)
2619),
2620timeout=request_options.get("timeout_in_seconds")
2621if request_options is not None and request_options.get("timeout_in_seconds") is not None
2622else self._client_wrapper.get_timeout(),
2623retries=0,
2624max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2625)
2626if 200 <= _response.status_code < 300:
2627return pydantic.parse_obj_as(EmbedResponse, _response.json()) # type: ignore
2628if _response.status_code == 400:
2629raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2630if _response.status_code == 429:
2631raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2632if _response.status_code == 500:
2633raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2634try:
2635_response_json = _response.json()
2636except JSONDecodeError:
2637raise ApiError(status_code=_response.status_code, body=_response.text)
2638raise ApiError(status_code=_response.status_code, body=_response_json)
2639
2640async def rerank(
2641self,
2642*,
2643model: typing.Optional[str] = OMIT,
2644query: str,
2645documents: typing.Sequence[RerankRequestDocumentsItem],
2646top_n: typing.Optional[int] = OMIT,
2647return_documents: typing.Optional[bool] = OMIT,
2648max_chunks_per_doc: typing.Optional[int] = OMIT,
2649request_options: typing.Optional[RequestOptions] = None,
2650) -> RerankResponse:
2651"""
2652This endpoint takes in a query and a list of texts and produces an ordered array with each text assigned a relevance score.
2653
2654Parameters:
2655- model: typing.Optional[str]. The identifier of the model to use, one of : `rerank-english-v2.0`, `rerank-multilingual-v2.0`
2656
2657- query: str. The search query
2658
2659- documents: typing.Sequence[RerankRequestDocumentsItem]. A list of document objects or strings to rerank.
2660If a document is provided the text fields is required and all other fields will be preserved in the response.
2661
2662The total max chunks (length of documents * max_chunks_per_doc) must be less than 10000.
2663
2664We recommend a maximum of 1,000 documents for optimal endpoint performance.
2665- top_n: typing.Optional[int]. The number of most relevant documents or indices to return, defaults to the length of the documents
2666
2667- return_documents: typing.Optional[bool]. - If false, returns results without the doc text - the api will return a list of {index, relevance score} where index is inferred from the list passed into the request.
2668- If true, returns results with the doc text passed in - the api will return an ordered list of {index, text, relevance score} where index + text refers to the list passed into the request.
2669- max_chunks_per_doc: typing.Optional[int]. The maximum number of chunks to produce internally from a document
2670
2671- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2672---
2673from cohere.client import AsyncClient
2674
2675client = AsyncClient(
2676client_name="YOUR_CLIENT_NAME",
2677token="YOUR_TOKEN",
2678)
2679await client.rerank(
2680model="rerank-english-v2.0",
2681query="What is the capital of the United States?",
2682documents=[
2683"Carson City is the capital city of the American state of Nevada.",
2684"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
2685"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.",
2686"Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
2687],
2688)
2689"""
2690_request: typing.Dict[str, typing.Any] = {"query": query, "documents": documents}
2691if model is not OMIT:
2692_request["model"] = model
2693if top_n is not OMIT:
2694_request["top_n"] = top_n
2695if return_documents is not OMIT:
2696_request["return_documents"] = return_documents
2697if max_chunks_per_doc is not OMIT:
2698_request["max_chunks_per_doc"] = max_chunks_per_doc
2699_response = await self._client_wrapper.httpx_client.request(
2700"POST",
2701urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "rerank"),
2702params=jsonable_encoder(
2703request_options.get("additional_query_parameters") if request_options is not None else None
2704),
2705json=jsonable_encoder(_request)
2706if request_options is None or request_options.get("additional_body_parameters") is None
2707else {
2708**jsonable_encoder(_request),
2709**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2710},
2711headers=jsonable_encoder(
2712remove_none_from_dict(
2713{
2714**self._client_wrapper.get_headers(),
2715**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2716}
2717)
2718),
2719timeout=request_options.get("timeout_in_seconds")
2720if request_options is not None and request_options.get("timeout_in_seconds") is not None
2721else self._client_wrapper.get_timeout(),
2722retries=0,
2723max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2724)
2725if 200 <= _response.status_code < 300:
2726return pydantic.parse_obj_as(RerankResponse, _response.json()) # type: ignore
2727if _response.status_code == 429:
2728raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2729try:
2730_response_json = _response.json()
2731except JSONDecodeError:
2732raise ApiError(status_code=_response.status_code, body=_response.text)
2733raise ApiError(status_code=_response.status_code, body=_response_json)
2734
2735async def classify(
2736self,
2737*,
2738inputs: typing.Sequence[str],
2739examples: typing.Sequence[ClassifyExample],
2740model: typing.Optional[str] = OMIT,
2741preset: typing.Optional[str] = OMIT,
2742truncate: typing.Optional[ClassifyRequestTruncate] = OMIT,
2743request_options: typing.Optional[RequestOptions] = None,
2744) -> ClassifyResponse:
2745"""
2746This endpoint makes a prediction about which label fits the specified text inputs best. To make a prediction, Classify uses the provided `examples` of text + label pairs as a reference.
2747Note: [Fine-tuned models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
2748
2749Parameters:
2750- inputs: typing.Sequence[str]. A list of up to 96 texts to be classified. Each one must be a non-empty string.
2751There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
2752Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
2753- examples: typing.Sequence[ClassifyExample]. An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
2754Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
2755- model: typing.Optional[str]. The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID.
2756
2757- preset: typing.Optional[str]. The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.ai/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters.
2758
2759- truncate: typing.Optional[ClassifyRequestTruncate]. One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
2760Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
2761If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
2762- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2763---
2764from cohere import ClassifyExample
2765from cohere.client import AsyncClient
2766
2767client = AsyncClient(
2768client_name="YOUR_CLIENT_NAME",
2769token="YOUR_TOKEN",
2770)
2771await client.classify(
2772inputs=["Confirm your email address", "hey i need u to send some $"],
2773examples=[
2774ClassifyExample(
2775text="Dermatologists don't like her!",
2776label="Spam",
2777),
2778ClassifyExample(
2779text="Hello, open to this?",
2780label="Spam",
2781),
2782ClassifyExample(
2783text="I need help please wire me $1000 right now",
2784label="Spam",
2785),
2786ClassifyExample(
2787text="Nice to know you ;)",
2788label="Spam",
2789),
2790ClassifyExample(
2791text="Please help me?",
2792label="Spam",
2793),
2794ClassifyExample(
2795text="Your parcel will be delivered today",
2796label="Not spam",
2797),
2798ClassifyExample(
2799text="Review changes to our Terms and Conditions",
2800label="Not spam",
2801),
2802ClassifyExample(
2803text="Weekly sync notes",
2804label="Not spam",
2805),
2806ClassifyExample(
2807text="Re: Follow up from today’s meeting",
2808label="Not spam",
2809),
2810ClassifyExample(
2811text="Pre-read for tomorrow",
2812label="Not spam",
2813),
2814],
2815preset="my-preset-a58sbd",
2816)
2817"""
2818_request: typing.Dict[str, typing.Any] = {"inputs": inputs, "examples": examples}
2819if model is not OMIT:
2820_request["model"] = model
2821if preset is not OMIT:
2822_request["preset"] = preset
2823if truncate is not OMIT:
2824_request["truncate"] = truncate
2825_response = await self._client_wrapper.httpx_client.request(
2826"POST",
2827urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "classify"),
2828params=jsonable_encoder(
2829request_options.get("additional_query_parameters") if request_options is not None else None
2830),
2831json=jsonable_encoder(_request)
2832if request_options is None or request_options.get("additional_body_parameters") is None
2833else {
2834**jsonable_encoder(_request),
2835**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2836},
2837headers=jsonable_encoder(
2838remove_none_from_dict(
2839{
2840**self._client_wrapper.get_headers(),
2841**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2842}
2843)
2844),
2845timeout=request_options.get("timeout_in_seconds")
2846if request_options is not None and request_options.get("timeout_in_seconds") is not None
2847else self._client_wrapper.get_timeout(),
2848retries=0,
2849max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2850)
2851if 200 <= _response.status_code < 300:
2852return pydantic.parse_obj_as(ClassifyResponse, _response.json()) # type: ignore
2853if _response.status_code == 400:
2854raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2855if _response.status_code == 429:
2856raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2857if _response.status_code == 500:
2858raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2859try:
2860_response_json = _response.json()
2861except JSONDecodeError:
2862raise ApiError(status_code=_response.status_code, body=_response.text)
2863raise ApiError(status_code=_response.status_code, body=_response_json)
2864
2865async def summarize(
2866self,
2867*,
2868text: str,
2869length: typing.Optional[SummarizeRequestLength] = OMIT,
2870format: typing.Optional[SummarizeRequestFormat] = OMIT,
2871model: typing.Optional[str] = OMIT,
2872extractiveness: typing.Optional[SummarizeRequestExtractiveness] = OMIT,
2873temperature: typing.Optional[float] = OMIT,
2874additional_command: typing.Optional[str] = OMIT,
2875request_options: typing.Optional[RequestOptions] = None,
2876) -> SummarizeResponse:
2877"""
2878> 🚧 Warning
2879>
2880> This API is marked as "Legacy" and is no longer maintained. Follow the [migration guide](/docs/migrating-from-cogenerate-to-cochat) to start using the Chat API.
2881
2882Generates a summary in English for a given text.
2883
2884Parameters:
2885- text: str. The text to generate a summary for. Can be up to 100,000 characters long. Currently the only supported language is English.
2886
2887- length: typing.Optional[SummarizeRequestLength]. One of `short`, `medium`, `long`, or `auto` defaults to `auto`. Indicates the approximate length of the summary. If `auto` is selected, the best option will be picked based on the input text.
2888
2889- format: typing.Optional[SummarizeRequestFormat]. One of `paragraph`, `bullets`, or `auto`, defaults to `auto`. Indicates the style in which the summary will be delivered - in a free form paragraph or in bullet points. If `auto` is selected, the best option will be picked based on the input text.
2890
2891- model: typing.Optional[str]. The identifier of the model to generate the summary with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental). Smaller, "light" models are faster, while larger models will perform better.
2892
2893- extractiveness: typing.Optional[SummarizeRequestExtractiveness]. One of `low`, `medium`, `high`, or `auto`, defaults to `auto`. Controls how close to the original text the summary is. `high` extractiveness summaries will lean towards reusing sentences verbatim, while `low` extractiveness summaries will tend to paraphrase more. If `auto` is selected, the best option will be picked based on the input text.
2894
2895- temperature: typing.Optional[float]. Ranges from 0 to 5. Controls the randomness of the output. Lower values tend to generate more “predictable” output, while higher values tend to generate more “creative” output. The sweet spot is typically between 0 and 1.
2896
2897- additional_command: typing.Optional[str]. A free-form instruction for modifying how the summaries get generated. Should complete the sentence "Generate a summary _". Eg. "focusing on the next steps" or "written by Yoda"
2898
2899- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2900---
2901from cohere.client import AsyncClient
2902
2903client = AsyncClient(
2904client_name="YOUR_CLIENT_NAME",
2905token="YOUR_TOKEN",
2906)
2907await client.summarize(
2908text='Ice cream is a sweetened frozen food typically eaten as a snack or dessert. It may be made from milk or cream and is flavoured with a sweetener, either sugar or an alternative, and a spice, such as cocoa or vanilla, or with fruit such as strawberries or peaches. It can also be made by whisking a flavored cream base and liquid nitrogen together. Food coloring is sometimes added, in addition to stabilizers. The mixture is cooled below the freezing point of water and stirred to incorporate air spaces and to prevent detectable ice crystals from forming. The result is a smooth, semi-solid foam that is solid at very low temperatures (below 2 °C or 35 °F). It becomes more malleable as its temperature increases.\n\nThe meaning of the name "ice cream" varies from one country to another. In some countries, such as the United States, "ice cream" applies only to a specific variety, and most governments regulate the commercial use of the various terms according to the relative quantities of the main ingredients, notably the amount of cream. Products that do not meet the criteria to be called ice cream are sometimes labelled "frozen dairy dessert" instead. In other countries, such as Italy and Argentina, one word is used fo\r all variants. Analogues made from dairy alternatives, such as goat\'s or sheep\'s milk, or milk substitutes (e.g., soy, cashew, coconut, almond milk or tofu), are available for those who are lactose intolerant, allergic to dairy protein or vegan.',
2909)
2910"""
2911_request: typing.Dict[str, typing.Any] = {"text": text}
2912if length is not OMIT:
2913_request["length"] = length
2914if format is not OMIT:
2915_request["format"] = format
2916if model is not OMIT:
2917_request["model"] = model
2918if extractiveness is not OMIT:
2919_request["extractiveness"] = extractiveness
2920if temperature is not OMIT:
2921_request["temperature"] = temperature
2922if additional_command is not OMIT:
2923_request["additional_command"] = additional_command
2924_response = await self._client_wrapper.httpx_client.request(
2925"POST",
2926urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "summarize"),
2927params=jsonable_encoder(
2928request_options.get("additional_query_parameters") if request_options is not None else None
2929),
2930json=jsonable_encoder(_request)
2931if request_options is None or request_options.get("additional_body_parameters") is None
2932else {
2933**jsonable_encoder(_request),
2934**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2935},
2936headers=jsonable_encoder(
2937remove_none_from_dict(
2938{
2939**self._client_wrapper.get_headers(),
2940**(request_options.get("additional_headers", {}) if request_options is not None else {}),
2941}
2942)
2943),
2944timeout=request_options.get("timeout_in_seconds")
2945if request_options is not None and request_options.get("timeout_in_seconds") is not None
2946else self._client_wrapper.get_timeout(),
2947retries=0,
2948max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
2949)
2950if 200 <= _response.status_code < 300:
2951return pydantic.parse_obj_as(SummarizeResponse, _response.json()) # type: ignore
2952if _response.status_code == 429:
2953raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
2954try:
2955_response_json = _response.json()
2956except JSONDecodeError:
2957raise ApiError(status_code=_response.status_code, body=_response.text)
2958raise ApiError(status_code=_response.status_code, body=_response_json)
2959
2960async def tokenize(
2961self, *, text: str, model: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None
2962) -> TokenizeResponse:
2963"""
2964This endpoint splits input text into smaller units called tokens using byte-pair encoding (BPE). To learn more about tokenization and byte pair encoding, see the tokens page.
2965
2966Parameters:
2967- text: str. The string to be tokenized, the minimum text length is 1 character, and the maximum text length is 65536 characters.
2968
2969- model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the tokenization uses the tokenizer used by that model.
2970
2971- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
2972---
2973from cohere.client import AsyncClient
2974
2975client = AsyncClient(
2976client_name="YOUR_CLIENT_NAME",
2977token="YOUR_TOKEN",
2978)
2979await client.tokenize(
2980text="tokenize me! :D",
2981model="command",
2982)
2983"""
2984_request: typing.Dict[str, typing.Any] = {"text": text}
2985if model is not OMIT:
2986_request["model"] = model
2987_response = await self._client_wrapper.httpx_client.request(
2988"POST",
2989urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "tokenize"),
2990params=jsonable_encoder(
2991request_options.get("additional_query_parameters") if request_options is not None else None
2992),
2993json=jsonable_encoder(_request)
2994if request_options is None or request_options.get("additional_body_parameters") is None
2995else {
2996**jsonable_encoder(_request),
2997**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
2998},
2999headers=jsonable_encoder(
3000remove_none_from_dict(
3001{
3002**self._client_wrapper.get_headers(),
3003**(request_options.get("additional_headers", {}) if request_options is not None else {}),
3004}
3005)
3006),
3007timeout=request_options.get("timeout_in_seconds")
3008if request_options is not None and request_options.get("timeout_in_seconds") is not None
3009else self._client_wrapper.get_timeout(),
3010retries=0,
3011max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
3012)
3013if 200 <= _response.status_code < 300:
3014return pydantic.parse_obj_as(TokenizeResponse, _response.json()) # type: ignore
3015if _response.status_code == 400:
3016raise BadRequestError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
3017if _response.status_code == 429:
3018raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
3019if _response.status_code == 500:
3020raise InternalServerError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
3021try:
3022_response_json = _response.json()
3023except JSONDecodeError:
3024raise ApiError(status_code=_response.status_code, body=_response.text)
3025raise ApiError(status_code=_response.status_code, body=_response_json)
3026
3027async def detokenize(
3028self,
3029*,
3030tokens: typing.Sequence[int],
3031model: typing.Optional[str] = OMIT,
3032request_options: typing.Optional[RequestOptions] = None,
3033) -> DetokenizeResponse:
3034"""
3035This endpoint takes tokens using byte-pair encoding and returns their text representation. To learn more about tokenization and byte pair encoding, see the tokens page.
3036
3037Parameters:
3038- tokens: typing.Sequence[int]. The list of tokens to be detokenized.
3039
3040- model: typing.Optional[str]. An optional parameter to provide the model name. This will ensure that the detokenization is done by the tokenizer used by that model.
3041
3042- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
3043---
3044from cohere.client import AsyncClient
3045
3046client = AsyncClient(
3047client_name="YOUR_CLIENT_NAME",
3048token="YOUR_TOKEN",
3049)
3050await client.detokenize(
3051tokens=[10104, 12221, 1315, 34, 1420, 69],
3052)
3053"""
3054_request: typing.Dict[str, typing.Any] = {"tokens": tokens}
3055if model is not OMIT:
3056_request["model"] = model
3057_response = await self._client_wrapper.httpx_client.request(
3058"POST",
3059urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "detokenize"),
3060params=jsonable_encoder(
3061request_options.get("additional_query_parameters") if request_options is not None else None
3062),
3063json=jsonable_encoder(_request)
3064if request_options is None or request_options.get("additional_body_parameters") is None
3065else {
3066**jsonable_encoder(_request),
3067**(jsonable_encoder(remove_none_from_dict(request_options.get("additional_body_parameters", {})))),
3068},
3069headers=jsonable_encoder(
3070remove_none_from_dict(
3071{
3072**self._client_wrapper.get_headers(),
3073**(request_options.get("additional_headers", {}) if request_options is not None else {}),
3074}
3075)
3076),
3077timeout=request_options.get("timeout_in_seconds")
3078if request_options is not None and request_options.get("timeout_in_seconds") is not None
3079else self._client_wrapper.get_timeout(),
3080retries=0,
3081max_retries=request_options.get("max_retries") if request_options is not None else 0, # type: ignore
3082)
3083if 200 <= _response.status_code < 300:
3084return pydantic.parse_obj_as(DetokenizeResponse, _response.json()) # type: ignore
3085if _response.status_code == 429:
3086raise TooManyRequestsError(pydantic.parse_obj_as(typing.Any, _response.json())) # type: ignore
3087try:
3088_response_json = _response.json()
3089except JSONDecodeError:
3090raise ApiError(status_code=_response.status_code, body=_response.text)
3091raise ApiError(status_code=_response.status_code, body=_response_json)
3092
3093
3094def _get_base_url(*, base_url: typing.Optional[str] = None, environment: ClientEnvironment) -> str:
3095if base_url is not None:
3096return base_url
3097elif environment is not None:
3098return environment.value
3099else:
3100raise Exception("Please pass in either base_url or environment to construct the client")
3101