Skip to content
8 changes: 4 additions & 4 deletions agent/api/agent_server/agent_api.tsp
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ enum MessageKind {
@doc("An intermediate result/update while the agent is still running. The platform should not typically query API during this state unless restoring after a failure.")
StageResult,

@doc("The agent has paused and requires feedback or input from the user.")
FeedbackResponse,
@doc("The agent has paused and requires feedback or additional information from the user.")
RefinementRequest,

@doc("The agent encountered an error it couldn't recover from. Retrying likely requires user intervention or infrastructure changes.")
RuntimeError,

@doc("The final successful result indicating the application generation is complete and ready for deployment.")
FinalResult,
@doc("The final successful result indicating the application generation is complete and ready for review.")
ReviewResult,
}

// --- Diff summary ---
Expand Down
2 changes: 1 addition & 1 deletion agent/api/agent_server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class MessageKind(str, Enum):
STAGE_RESULT = "StageResult" # tool was used, and FSM state is expected to be updated
RUNTIME_ERROR = "RuntimeError" # things went wrong!
REFINEMENT_REQUEST = "RefinementRequest" # no tool was used, meaning the agent is asking for more information
FINAL_RESULT = "FinalResult" # generation completed successfully
REVIEW_RESULT = "ReviewResult" # generation completed successfully


class UserMessage(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion agent/api/agent_server/test_async_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ async def test_async_agent_message_endpoint(agent_type):
case AgentSseEvent():
assert event.trace_id == request.trace_id, f"Trace IDs do not match in model objects with agent_type={agent_type}"
assert event.status == AgentStatus.IDLE
assert event.message.kind in (MessageKind.STAGE_RESULT, MessageKind.FINAL_RESULT), f"Message kind {event.message.kind} is not one of the expected kinds"
assert event.message.kind in (MessageKind.STAGE_RESULT, MessageKind.REVIEW_RESULT), f"Message kind {event.message.kind} is not one of the expected kinds"



Expand Down
912 changes: 911 additions & 1 deletion agent/llm/caches/0f865f9b2ef436420336698eca3516d6.json

Large diffs are not rendered by default.

100 changes: 16 additions & 84 deletions agent/llm/caches/272bc3a87d329ea03da98685a95c69d1.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"f6627c3d7b916615b6a8442e4aaaf624": {
"b9108aec1811a97ad6ab22e7e446cdb7": {
"data": {
"role": "assistant",
"content": [
{
"type": "text",
"text": "click-counter-app\n"
"text": "button-counter\n"
}
],
"input_tokens": 100,
"output_tokens": 6,
"input_tokens": 108,
"output_tokens": 4,
"stop_reason": "end_turn",
"thinking_tokens": null
},
Expand All @@ -19,7 +19,7 @@
{
"content": [
{
"text": "Based on this application description, generate a short, concise name suitable for use as a GitHub repository name.\nThe name should be lowercase with words separated by hyphens (kebab-case) and should not include any special characters.\nApplication description: \"A simple web application with a button that counts the number of times it has been clicked. The app should display the current click count and update it in real-time whenever the button is clicked.\"\nReturn ONLY the name, nothing else.",
"text": "Based on this application description, generate a short, concise name suitable for use as a GitHub repository name.\nThe name should be lowercase with words separated by hyphens (kebab-case) and should not include any special characters.\nApplication description: \"A simple web application that displays a button and a counter. Every time the user clicks the button, the counter increments by one. The application should display the current count prominently. The count should persist between sessions using server-side storage.\"\nReturn ONLY the name, nothing else.",
"type": "text"
}
],
Expand All @@ -32,17 +32,17 @@
"tools": null
}
},
"c7a08f39b5b24a1d4430baa284462999": {
"cf9dfe84baf3624efed402f337ff7d0e": {
"data": {
"role": "assistant",
"content": [
{
"type": "text",
"text": "feat: Implement click counter web application\n"
"text": "feat: Implement button, counter, and server-side persistence\n"
}
],
"input_tokens": 86,
"output_tokens": 8,
"input_tokens": 94,
"output_tokens": 13,
"stop_reason": "end_turn",
"thinking_tokens": null
},
Expand All @@ -52,7 +52,7 @@
{
"content": [
{
"text": "Based on this application description, generate a concise Git commit message that follows best practices.\nThe message should be clear, descriptive, and follow conventional commit format.\nApplication description: \"A simple web application with a button that counts the number of times it has been clicked. The app should display the current click count and update it in real-time whenever the button is clicked.\"\nReturn ONLY the commit message, nothing else.",
"text": "Based on this application description, generate a concise Git commit message that follows best practices.\nThe message should be clear, descriptive, and follow conventional commit format.\nApplication description: \"A simple web application that displays a button and a counter. Every time the user clicks the button, the counter increments by one. The application should display the current count prominently. The count should persist between sessions using server-side storage.\"\nReturn ONLY the commit message, nothing else.",
"type": "text"
}
],
Expand All @@ -65,96 +65,28 @@
"tools": null
}
},
"83dafe4fd7f0e112416cde1e3705ffe5": {
"d3c3e37427c9bdf57409f823181ad58e": {
"data": {
"role": "assistant",
"content": [
{
"type": "text",
"text": "<reason>The frontend code is correctly setting up the UI elements like the \"Click me\" button and displaying the initial counter text. The error messages indicate issues with the backend, specifically in how the data is being returned or initialized. Since the prompt is to assess the frontend and ignore the backend, the frontend part of the application is correct. The error is related to the backend, and it is not the focus of the assessment.<\/reason>\n<answer>yes<\/answer>\n"
"text": "<reason>The frontend code renders the button, a label, and has a loading state while fetching the counter. Even though there are errors in the backend, it's not relevant to the frontend implementation.<\/reason>\n<answer>yes<\/answer>\n"
}
],
"input_tokens": 908,
"output_tokens": 98,
"input_tokens": 1388,
"output_tokens": 53,
"stop_reason": "end_turn",
"thinking_tokens": null
},
"params": {
"attach_files": "bc8e993228da6c7629d3d95e2951bd1c",
"attach_files": "444a3de992e1cc95a19c2519c8b04d50",
"max_tokens": 1024,
"messages": [
{
"content": [
{
"text": "Given the attached screenshot, decide where the frontend code is correct and relevant to the original prompt. Keep in mind that the backend is currently not implemented, so you can only validate the frontend code and ignore the backend part.\nOriginal prompt to generate this website: A simple web application with a button that counts the number of times it has been clicked. The app should display the current click count and update it in real-time whenever the button is clicked..\n\nConsole logs from the browsers:\n\nchromium:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[info] %cDownload the React DevTools for a better development experience: https:\/\/react.dev\/link\/react-devtools font-weight:bold\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\nwebkit:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n\nAnswer \"yes\" or \"no\" wrapped in <answer> tag. Follow the example below.\n\nExample 1:\n<reason>the website looks valid<\/reason>\n<answer>yes<\/answer>\n\nExample 2:\n<reason>there is nothing on the screenshot, could be rendering issue<\/reason>\n<answer>no<\/answer>\n\nExample 3:\n<reason>the website looks okay, but displays database connection error. Given it is not frontend-related, I should answer yes<\/reason>\n<answer>yes<\/answer>",
"type": "text"
}
],
"role": "user"
}
],
"model": null,
"temperature": 1.0,
"tool_choice": null,
"tools": null
}
},
"04104e379e619b98a37305dd08a3f81d": {
"data": {
"role": "assistant",
"content": [
{
"type": "text",
"text": "<reason>The frontend displays a title and a button, meeting the basic requirements of the prompt. The logs indicate backend issues (\"Counter not found\"), which are irrelevant to the frontend assessment.<\/reason>\n<answer>yes<\/answer>\n"
}
],
"input_tokens": 902,
"output_tokens": 49,
"stop_reason": "end_turn",
"thinking_tokens": null
},
"params": {
"attach_files": "aedcb4da5dcb3ed9355ea24a306f556d",
"max_tokens": 1024,
"messages": [
{
"content": [
{
"text": "Given the attached screenshot, decide where the frontend code is correct and relevant to the original prompt. Keep in mind that the backend is currently not implemented, so you can only validate the frontend code and ignore the backend part.\nOriginal prompt to generate this website: A simple web application with a button that counts the number of times it has been clicked. The app should display the current click count and update it in real-time whenever the button is clicked..\n\nConsole logs from the browsers:\n\nchromium:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[info] %cDownload the React DevTools for a better development experience: https:\/\/react.dev\/link\/react-devtools font-weight:bold\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[log] Counter not found, please create one\n[log] Counter not found, please create one\nwebkit:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[log] Counter not found, please create one\n[log] Counter not found, please create one\n\nAnswer \"yes\" or \"no\" wrapped in <answer> tag. Follow the example below.\n\nExample 1:\n<reason>the website looks valid<\/reason>\n<answer>yes<\/answer>\n\nExample 2:\n<reason>there is nothing on the screenshot, could be rendering issue<\/reason>\n<answer>no<\/answer>\n\nExample 3:\n<reason>the website looks okay, but displays database connection error. Given it is not frontend-related, I should answer yes<\/reason>\n<answer>yes<\/answer>",
"type": "text"
}
],
"role": "user"
}
],
"model": null,
"temperature": 1.0,
"tool_choice": null,
"tools": null
}
},
"321ab65d501a0e8ff9bc067f0d0d7ddc": {
"data": {
"role": "assistant",
"content": [
{
"type": "text",
"text": "<reason>The frontend code displays a counter, an input field, and a button. It shows the initial value of the counter (0). The console logs suggest that the backend, which is not the subject of this answer, is not yet implemented and throws 500 error. The frontend part appears to be valid based on the prompt.<\/reason>\n<answer>yes<\/answer>"
}
],
"input_tokens": 914,
"output_tokens": 80,
"stop_reason": "end_turn",
"thinking_tokens": null
},
"params": {
"attach_files": "b627cb5d33bb8a0ace6758322de49439",
"max_tokens": 1024,
"messages": [
{
"content": [
{
"text": "Given the attached screenshot, decide where the frontend code is correct and relevant to the original prompt. Keep in mind that the backend is currently not implemented, so you can only validate the frontend code and ignore the backend part.\nOriginal prompt to generate this website: A simple web application with a button that counts the number of times it has been clicked. The app should display the current click count and update it in real-time whenever the button is clicked..\n\nConsole logs from the browsers:\n\nchromium:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[info] %cDownload the React DevTools for a better development experience: https:\/\/react.dev\/link\/react-devtools font-weight:bold\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[log] Counter not found, will be created on first click\n[log] Counter not found, will be created on first click\nwebkit:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[log] Counter not found, will be created on first click\n[log] Counter not found, will be created on first click\n\nAnswer \"yes\" or \"no\" wrapped in <answer> tag. Follow the example below.\n\nExample 1:\n<reason>the website looks valid<\/reason>\n<answer>yes<\/answer>\n\nExample 2:\n<reason>there is nothing on the screenshot, could be rendering issue<\/reason>\n<answer>no<\/answer>\n\nExample 3:\n<reason>the website looks okay, but displays database connection error. Given it is not frontend-related, I should answer yes<\/reason>\n<answer>yes<\/answer>",
"text": "Given the attached screenshot, decide where the frontend code is correct and relevant to the original prompt. Keep in mind that the backend is currently not implemented, so you can only validate the frontend code and ignore the backend part.\nOriginal prompt to generate this website: A simple web application that displays a button and a counter. Every time the user clicks the button, the counter increments by one. The application should display the current count prominently. The count should persist between sessions using server-side storage..\n\nConsole logs from the browsers:\n\nchromium:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[info] %cDownload the React DevTools for a better development experience: https:\/\/react.dev\/link\/react-devtools font-weight:bold\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[error] Error fetching counter: TRPCClientError: Failed to execute 'json' on 'Response': Unexpected end of JSON input\n at _TRPCClientError.from (http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=)\n at http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=\n[error] Error fetching counter: TRPCClientError: Failed to execute 'json' on 'Response': Unexpected end of JSON input\n at _TRPCClientError.from (http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=)\n at http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[error] Error creating counter: TRPCClientError: Failed to execute 'json' on 'Response': Unexpected end of JSON input\n at _TRPCClientError.from (http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=)\n at http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=\n[error] Error creating counter: TRPCClientError: Failed to execute 'json' on 'Response': Unexpected end of JSON input\n at _TRPCClientError.from (http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=)\n at http:\/\/debughost:5173\/node_modules\/.vite\/deps\/@trpc_client.js?v=\nwebkit:\n[debug] [vite] connecting...\n[debug] [vite] connected.\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[error] Error fetching counter: TRPCClientError: The string did not match the expected pattern.\n[error] Error fetching counter: TRPCClientError: The string did not match the expected pattern.\n[error] Failed to load resource: the server responded with a status of 500 (Internal Server Error)\n[error] Error creating counter: TRPCClientError: The string did not match the expected pattern.\n[error] Error creating counter: TRPCClientError: The string did not match the expected pattern.\n\nAnswer \"yes\" or \"no\" wrapped in <answer> tag. Follow the example below.\n\nExample 1:\n<reason>the website looks valid<\/reason>\n<answer>yes<\/answer>\n\nExample 2:\n<reason>there is nothing on the screenshot, could be rendering issue<\/reason>\n<answer>no<\/answer>\n\nExample 3:\n<reason>the website looks okay, but displays database connection error. Given it is not frontend-related, I should answer yes<\/reason>\n<answer>yes<\/answer>",
"type": "text"
}
],
Expand Down
Loading