Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 207 additions & 27 deletions .github/workflows/deepeval-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,52 +3,227 @@ name: DeepEval RAG System Tests
on:
pull_request:
types: [opened, synchronize, reopened]
branches: ["rag-33-debug", "RAG-33-31okt", "wip_3_12"]
paths:
- 'src/**'
- 'tests/**'
- 'data/**'
- 'docker-compose-eval.yml'
- 'Dockerfile.llm_orchestration_service'
- '.github/workflows/deepeval-tests.yml'

jobs:
deepeval-tests:
runs-on: ubuntu-latest
timeout-minutes: 40
timeout-minutes: 80

steps:
- name: Checkout code
uses: actions/checkout@v4


- name: Validate required secrets
id: validate_secrets
run: |
echo "Validating required environment variables..."
MISSING_SECRETS=()

# Check Azure OpenAI secrets
if [ -z "${{ secrets.AZURE_OPENAI_ENDPOINT }}" ]; then
MISSING_SECRETS+=("AZURE_OPENAI_ENDPOINT")
fi

if [ -z "${{ secrets.AZURE_OPENAI_API_KEY }}" ]; then
MISSING_SECRETS+=("AZURE_OPENAI_API_KEY")
fi

if [ -z "${{ secrets.AZURE_OPENAI_DEPLOYMENT }}" ]; then
MISSING_SECRETS+=("AZURE_OPENAI_DEPLOYMENT")
fi

if [ -z "${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}" ]; then
MISSING_SECRETS+=("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
fi

if [ -z "${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}" ]; then
MISSING_SECRETS+=("AZURE_OPENAI_DEEPEVAL_DEPLOYMENT")
fi



if [ -z "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" ]; then
MISSING_SECRETS+=("AZURE_STORAGE_CONNECTION_STRING")
fi

if [ -z "${{ secrets.AZURE_STORAGE_CONTAINER_NAME }}" ]; then
MISSING_SECRETS+=("AZURE_STORAGE_CONTAINER_NAME")
fi

if [ -z "${{ secrets.AZURE_STORAGE_BLOB_NAME }}" ]; then
MISSING_SECRETS+=("AZURE_STORAGE_BLOB_NAME")
fi


# If any secrets are missing, fail
if [ ${#MISSING_SECRETS[@]} -gt 0 ]; then
echo "missing=true" >> $GITHUB_OUTPUT
echo "secrets_list=${MISSING_SECRETS[*]}" >> $GITHUB_OUTPUT
echo " Missing required secrets: ${MISSING_SECRETS[*]}"
exit 1
else
echo "missing=false" >> $GITHUB_OUTPUT
echo " All required secrets are configured"
fi

- name: Comment PR with missing secrets error
if: failure() && steps.validate_secrets.outputs.missing == 'true'
uses: actions/github-script@v7
with:
script: |
const missingSecrets = '${{ steps.validate_secrets.outputs.secrets_list }}'.split(' ');
const secretsList = missingSecrets.map(s => `- \`${s}\``).join('\n');

const comment = `## DeepEval Tests: Missing Required Secrets

The DeepEval RAG system tests cannot run because the following GitHub secrets are not configured:

${secretsList}

### How to Fix

1. Go to **Settings** → **Secrets and variables** → **Actions**
2. Add the missing secrets with the appropriate values:

**Azure OpenAI Configuration:**
- \`AZURE_OPENAI_ENDPOINT\` - Your Azure OpenAI resource endpoint (e.g., \`https://your-resource.openai.azure.com/\`)
- \`AZURE_OPENAI_API_KEY\` - Your Azure OpenAI API key
- \`AZURE_OPENAI_DEPLOYMENT\` - Chat model deployment name (e.g., \`gpt-4o-mini\`)
- \`AZURE_OPENAI_EMBEDDING_DEPLOYMENT\` - Embedding model deployment name (e.g., \`text-embedding-3-large\`)
- \`AZURE_STORAGE_CONNECTION_STRING\` - Connection string for Azure Blob Storage
- \`AZURE_STORAGE_CONTAINER_NAME\` - Container name in Azure Blob Storage
- \`AZURE_STORAGE_BLOB_NAME\` - Blob name for dataset in Azure
- \`AZURE_OPENAI_DEEPEVAL_DEPLOYMENT\` - DeepEval model deployment name (e.g., \`gpt-4.1\`)

3. Re-run the workflow after adding the secrets

### Note
Tests will not run until all required secrets are configured.

---
*Workflow: ${context.workflow} | Run: [#${context.runNumber}](${context.payload.repository.html_url}/actions/runs/${context.runId})*`;

// Find existing comment
const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number
});

const existingComment = comments.data.find(
comment => comment.user.login === 'github-actions[bot]' &&
comment.body.includes('DeepEval Tests: Missing Required Secrets')
);

if (existingComment) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existingComment.id,
body: comment
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: comment
});
}

- name: Set up Python
if: success()
uses: actions/setup-python@v5
with:
python-version-file: '.python-version'

- name: Set up uv
if: success()
uses: astral-sh/setup-uv@v6

- name: Install dependencies (locked)
if: success()
run: uv sync --frozen

- name: Run DeepEval tests

- name: Create test directories with proper permissions
if: success()
run: |
mkdir -p test-vault/agents/llm
mkdir -p test-vault/agent-out
# Set ownership to current user and make writable
sudo chown -R $(id -u):$(id -g) test-vault
chmod -R 777 test-vault
# Ensure the agent-out directory is world-readable after writes
sudo chmod -R a+rwX test-vault/agent-out

- name: Set up Deepeval with azure
if: success()
run: |
uv run deepeval set-azure-openai \
--openai-endpoint "${{ secrets.AZURE_OPENAI_ENDPOINT }}" \
--openai-api-key "${{ secrets.AZURE_OPENAI_API_KEY }}" \
--deployment-name "${{ secrets.AZURE_OPENAI_DEPLOYMENT }}" \
--openai-model-name "${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}" \
--openai-api-version="2024-12-01-preview"

- name: Run DeepEval tests with testcontainers
if: success()
id: run_tests
continue-on-error: true
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short

# LLM API Keys
AZURE_OPENAI_DEEPEVAL_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_DEEPEVAL_DEPLOYMENT }}
# Azure OpenAI - Chat Model
AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_DEPLOYMENT }}
# Azure OpenAI - Embedding Model
AZURE_OPENAI_EMBEDDING_DEPLOYMENT: ${{ secrets.AZURE_OPENAI_EMBEDDING_DEPLOYMENT }}
# Evaluation mode
AZURE_STORAGE_CONNECTION_STRING: ${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}
AZURE_STORAGE_CONTAINER_NAME: ${{ secrets.AZURE_STORAGE_CONTAINER_NAME }}
AZURE_STORAGE_BLOB_NAME: ${{ secrets.AZURE_STORAGE_BLOB_NAME }}
EVAL_MODE: "true"
run: |
# Run tests with testcontainers managing Docker Compose
uv run python -m pytest tests/deepeval_tests/standard_tests.py -v --tb=short --log-cli-level=INFO

- name: Fix permissions on test artifacts
if: always()
run: |
sudo chown -R $(id -u):$(id -g) test-vault || true
sudo chmod -R a+rX test-vault || true

- name: Generate evaluation report
if: always()
run: python tests/deepeval_tests/report_generator.py

run: uv run python tests/deepeval_tests/report_generator.py

- name: Save test artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: test-results
path: |
pytest_captured_results.json
test_report.md
retention-days: 30

- name: Comment PR with test results
if: always() && github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');

try {
const reportContent = fs.readFileSync('test_report.md', 'utf8');

const comments = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
Expand All @@ -57,7 +232,7 @@ jobs:

const existingComment = comments.data.find(
comment => comment.user.login === 'github-actions[bot]' &&
comment.body.includes('RAG System Evaluation Report')
comment.body.includes('RAG System Evaluation Report')
);

if (existingComment) {
Expand All @@ -75,36 +250,35 @@ jobs:
body: reportContent
});
}

} catch (error) {
console.error('Failed to post test results:', error);

await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `## RAG System Evaluation Report\n\n**Error generating test report**\n\nFailed to read or post test results. Check workflow logs for details.\n\nError: ${error.message}`
});
}

- name: Check test results and fail if needed
if: always()
run: |
# Check if pytest ran (look at step output)
if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
# Check if pytest ran (look at step output)
if [ "${{ steps.run_tests.outcome }}" == "failure" ]; then
echo "Tests ran but failed - this is expected if RAG performance is below threshold"
fi
if [ -f "pytest_captured_results.json" ]; then
fi

if [ -f "pytest_captured_results.json" ]; then
total_tests=$(jq '.total_tests // 0' pytest_captured_results.json)
passed_tests=$(jq '.passed_tests // 0' pytest_captured_results.json)

if [ "$total_tests" -eq 0 ]; then
echo "ERROR: No tests were executed"
exit 1
fi

pass_rate=$(awk "BEGIN {print ($passed_tests / $total_tests) * 100}")

echo "DeepEval Test Results:"
echo "Total Tests: $total_tests"
echo "Passed Tests: $passed_tests"
Expand All @@ -117,7 +291,13 @@ jobs:
else
echo "TEST SUCCESS: Pass rate $pass_rate% meets threshold 70%"
fi
else
else
echo "ERROR: No test results file found"
exit 1
fi
fi

- name: Cleanup Docker resources
if: always()
run: |
docker compose -f docker-compose-eval.yml down -v --remove-orphans || true
docker system prune -f || true
Loading
Loading