2222 - judge
2323 - stuck
2424 - compression
25+ - agent
2526
2627permissions :
2728 contents : read
5657 run : mkdir -p evals/results
5758
5859 - name : Run Judge Evaluation
59- if : ${{ github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'judge' || github.event_name == 'pull_request' }}
60+ if : ${{ ( github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'judge' || github.event_name == 'pull_request') && secrets.AZURE_OPENAI_API_KEY != '' && secrets.AZURE_OPENAI_BASE_URL != ' ' }}
6061 env :
6162 AZURE_OPENAI_API_KEY : ${{ secrets.AZURE_OPENAI_API_KEY }}
6263 # Must be the base host, e.g. https://vibebrowser-dev.openai.azure.com
@@ -65,21 +66,29 @@ jobs:
6566 run : npm run eval:judge -- --no-progress-bar -o evals/results/judge-results.json
6667
6768 - name : Run Stuck Detection Evaluation
68- if : ${{ github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'stuck' }}
69+ if : ${{ ( github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'stuck') && secrets.AZURE_OPENAI_API_KEY != '' && secrets.AZURE_OPENAI_BASE_URL != ' ' }}
6970 env :
7071 AZURE_OPENAI_API_KEY : ${{ secrets.AZURE_OPENAI_API_KEY }}
7172 AZURE_OPENAI_BASE_URL : ${{ secrets.AZURE_OPENAI_BASE_URL }}
7273 AZURE_OPENAI_API_BASE_URL : ${{ secrets.AZURE_OPENAI_BASE_URL }}
7374 run : npm run eval:stuck -- --no-progress-bar -o evals/results/stuck-results.json
7475
7576 - name : Run Post-Compression Evaluation
76- if : ${{ github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'compression' }}
77+ if : ${{ ( github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'compression') && secrets.AZURE_OPENAI_API_KEY != '' && secrets.AZURE_OPENAI_BASE_URL != ' ' }}
7778 env :
7879 AZURE_OPENAI_API_KEY : ${{ secrets.AZURE_OPENAI_API_KEY }}
7980 AZURE_OPENAI_BASE_URL : ${{ secrets.AZURE_OPENAI_BASE_URL }}
8081 AZURE_OPENAI_API_BASE_URL : ${{ secrets.AZURE_OPENAI_BASE_URL }}
8182 run : npm run eval:compression -- --no-progress-bar -o evals/results/compression-results.json
8283
84+ - name : Run Agent Evaluation
85+ if : ${{ (github.event.inputs.eval_type == 'all' || github.event.inputs.eval_type == 'agent') && secrets.AZURE_OPENAI_API_KEY != '' && secrets.AZURE_OPENAI_BASE_URL != '' }}
86+ env :
87+ AZURE_OPENAI_API_KEY : ${{ secrets.AZURE_OPENAI_API_KEY }}
88+ AZURE_OPENAI_BASE_URL : ${{ secrets.AZURE_OPENAI_BASE_URL }}
89+ AZURE_OPENAI_API_BASE_URL : ${{ secrets.AZURE_OPENAI_BASE_URL }}
90+ run : npm run eval:agent -- --no-progress-bar -o evals/results/agent-results.json
91+
8392 - name : Upload Evaluation Results
8493 if : ${{ always() }}
8594 uses : actions/upload-artifact@v4
0 commit comments