AutoGPT

Форк
0
/
test_benchmark_workflow.py 
83 строки · 2.9 Кб
1
import pytest
2
import requests
3

4
URL_BENCHMARK = "http://localhost:8080/ap/v1"
5
URL_AGENT = "http://localhost:8000/ap/v1"
6

7
import datetime
8
import time
9

10

11
@pytest.mark.parametrize(
12
    "eval_id, input_text, expected_artifact_length, test_name, should_be_successful",
13
    [
14
        (
15
            "021c695a-6cc4-46c2-b93a-f3a9b0f4d123",
16
            "Write the word 'Washington' to a .txt file",
17
            0,
18
            "WriteFile",
19
            True,
20
        ),
21
        (
22
            "f219f3d3-a41b-45a9-a3d0-389832086ee8",
23
            "Read the file called file_to_read.txt and write its content to a file called output.txt",
24
            1,
25
            "ReadFile",
26
            False,
27
        ),
28
    ],
29
)
30
def test_entire_workflow(
31
    eval_id, input_text, expected_artifact_length, test_name, should_be_successful
32
):
33
    task_request = {"eval_id": eval_id, "input": input_text}
34
    response = requests.get(f"{URL_AGENT}/agent/tasks")
35
    task_count_before = response.json()["pagination"]["total_items"]
36
    # First POST request
37
    task_response_benchmark = requests.post(
38
        URL_BENCHMARK + "/agent/tasks", json=task_request
39
    )
40
    response = requests.get(f"{URL_AGENT}/agent/tasks")
41
    task_count_after = response.json()["pagination"]["total_items"]
42
    assert task_count_after == task_count_before + 1
43

44
    timestamp_after_task_eval_created = datetime.datetime.now(datetime.timezone.utc)
45
    time.sleep(1.1)  # To make sure the 2 timestamps to compare are different
46
    assert task_response_benchmark.status_code == 200
47
    task_response_benchmark = task_response_benchmark.json()
48
    assert task_response_benchmark["input"] == input_text
49

50
    task_response_benchmark_id = task_response_benchmark["task_id"]
51

52
    response_task_agent = requests.get(
53
        f"{URL_AGENT}/agent/tasks/{task_response_benchmark_id}"
54
    )
55
    assert response_task_agent.status_code == 200
56
    response_task_agent = response_task_agent.json()
57
    assert len(response_task_agent["artifacts"]) == expected_artifact_length
58

59
    step_request = {"input": input_text}
60

61
    step_response = requests.post(
62
        URL_BENCHMARK + "/agent/tasks/" + task_response_benchmark_id + "/steps",
63
        json=step_request,
64
    )
65
    assert step_response.status_code == 200
66
    step_response = step_response.json()
67
    assert step_response["is_last"] == True  # Assuming is_last is always True
68

69
    eval_response = requests.post(
70
        URL_BENCHMARK + "/agent/tasks/" + task_response_benchmark_id + "/evaluations",
71
        json={},
72
    )
73
    assert eval_response.status_code == 200
74
    eval_response = eval_response.json()
75
    print("eval_response")
76
    print(eval_response)
77
    assert eval_response["run_details"]["test_name"] == test_name
78
    assert eval_response["metrics"]["success"] == should_be_successful
79
    benchmark_start_time = datetime.datetime.fromisoformat(
80
        eval_response["run_details"]["benchmark_start_time"]
81
    )
82

83
    assert benchmark_start_time < timestamp_after_task_eval_created
84

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.