scenario/python/examples/test_testing_remote_agents_json.py at main · langwatch/scenario · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
Example: Testing an agent that returns JSON responses

This test demonstrates handling agents that return complete JSON responses via HTTP POST.
"""

from aiohttp import web
import aiohttp
import pytest
import pytest_asyncio
import scenario

# Base URL for the test server (set during server startup)
base_url = ""


class JsonAgentAdapter(scenario.AgentAdapter):
    """
    Adapter for testing agents that return JSON responses.

    This adapter:
    1. Extracts the most recent user message from conversation history
    2. Makes an HTTP POST request to the agent endpoint
    3. Parses the JSON response and returns the agent's message
    """

    async def call(self, input: scenario.AgentInput) -> scenario.AgentReturnTypes:
        # Extract the most recent user message content
        last_message = input.messages[-1]
        content = last_message["content"]  # type: ignore[typeddict-item]

        # For this example, we assume content is a string
        if not isinstance(content, str):
            raise ValueError("This example only handles string content")

        # Make HTTP POST request to your agent's endpoint
        async with aiohttp.ClientSession() as session:
            async with session.post(
                f"{base_url}/chat",
                json={"message": content},
                timeout=aiohttp.ClientTimeout(total=30),
            ) as response:
                # Parse JSON response and return the agent's message
                result = await response.json()
                return result["response"]


async def chat_handler(request: web.Request) -> web.Response:
    """
    HTTP endpoint that receives a message and returns a response.

    This simulates a production agent endpoint.
    """
    data = await request.json()
    message = data["message"]

    # In a real application, you would call your LLM here
    # For this example, we return a simple response
    response_text = f"The weather is sunny and 72°F. Your query was: {message}"

    # Return JSON response
    return web.json_response({"response": response_text})


@pytest_asyncio.fixture
async def test_server():
    """
    Start a test HTTP server before tests and shut it down after.

    This server simulates a deployed agent endpoint.
    """
    global base_url

    # Create web application
    app = web.Application()
    app.router.add_post("/chat", chat_handler)

    # Start server on random available port
    runner = web.AppRunner(app)
    await runner.setup()
    site = web.TCPSite(runner, "localhost", 0)
    await site.start()

    # Get the actual port assigned
    server = site._server
    assert server is not None
    port = server.sockets[0].getsockname()[1]  # type: ignore[union-attr]
    base_url = f"http://localhost:{port}"

    yield

    # Cleanup: stop server
    await runner.cleanup()


@pytest.mark.flaky(reruns=2)
@pytest.mark.asyncio
async def test_json_response(test_server):
    """
    Test agent via HTTP endpoint with JSON response.

    This test verifies:
    - Adapter correctly calls HTTP endpoint
    - JSON response is properly parsed
    - Agent provides relevant weather information
    - Full scenario flow works end-to-end
    """
    result = await scenario.run(
        name="JSON weather inquiry",
        description="User asks about weather and receives JSON response",
        agents=[
            scenario.UserSimulatorAgent(model="openai/gpt-4o-mini"),
            JsonAgentAdapter(),
            scenario.JudgeAgent(
                model="openai/gpt-4o-mini",
                criteria=[
                    "Agent should provide weather information",
                    "Response should be relevant to the query",
                ],
            ),
        ],
        script=[
            scenario.user("What's the weather like today?"),
            scenario.agent(),
            scenario.judge(),
        ],
        set_id="python-examples",
    )

    assert result.success