Smoke tests: split compaction test and use debug build (#6984)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Jack Amadeo
2026-02-07 21:35:58 -05:00
committed by GitHub
parent a251fec55e
commit e670f348bd
5 changed files with 59 additions and 30 deletions
+48 -19
View File
@@ -45,7 +45,7 @@ jobs:
- '!documentation/**' - '!documentation/**'
build-binary: build-binary:
name: Build Release Binary name: Build Binary
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: changes needs: changes
if: needs.changes.outputs.code == 'true' || github.event_name == 'workflow_dispatch' if: needs.changes.outputs.code == 'true' || github.event_name == 'workflow_dispatch'
@@ -65,15 +65,15 @@ jobs:
- name: Cache Rust dependencies - name: Cache Rust dependencies
uses: Swatinem/rust-cache@v2 uses: Swatinem/rust-cache@v2
- name: Build Release Binary for Smoke Tests - name: Build Binary for Smoke Tests
run: | run: |
cargo build --release cargo build --bin goose
- name: Upload Binary for Smoke Tests - name: Upload Binary for Smoke Tests
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with: with:
name: goose-binary name: goose-binary
path: target/release/goose path: target/debug/goose
retention-days: 1 retention-days: 1
smoke-tests: smoke-tests:
@@ -90,10 +90,10 @@ jobs:
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with: with:
name: goose-binary name: goose-binary
path: target/release path: target/debug
- name: Make Binary Executable - name: Make Binary Executable
run: chmod +x target/release/goose run: chmod +x target/debug/goose
- name: Run Smoke Tests with Provider Script - name: Run Smoke Tests with Provider Script
env: env:
@@ -152,17 +152,6 @@ jobs:
run: | run: |
bash scripts/test_subrecipes.sh bash scripts/test_subrecipes.sh
- name: Run Compaction Tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOSE_PROVIDER: anthropic
GOOSE_MODEL: claude-sonnet-4-5-20250929
HOME: /tmp/goose-home
GOOSE_DISABLE_KEYRING: 1
SKIP_BUILD: 1
run: |
bash scripts/test_compaction.sh
smoke-tests-code-exec: smoke-tests-code-exec:
name: Smoke Tests (Code Execution) name: Smoke Tests (Code Execution)
runs-on: ubuntu-latest runs-on: ubuntu-latest
@@ -177,10 +166,10 @@ jobs:
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0 uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with: with:
name: goose-binary name: goose-binary
path: target/release path: target/debug
- name: Make Binary Executable - name: Make Binary Executable
run: chmod +x target/release/goose run: chmod +x target/debug/goose
- name: Run Provider Tests (Code Execution Mode) - name: Run Provider Tests (Code Execution Mode)
env: env:
@@ -200,3 +189,43 @@ jobs:
mkdir -p $HOME/.local/share/goose/sessions mkdir -p $HOME/.local/share/goose/sessions
mkdir -p $HOME/.config/goose mkdir -p $HOME/.config/goose
bash scripts/test_providers.sh --code-exec bash scripts/test_providers.sh --code-exec
compaction-tests:
name: Compaction Tests
runs-on: ubuntu-latest
needs: build-binary
steps:
- name: Checkout Code
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Download Binary
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
name: goose-binary
path: target/debug
- name: Make Binary Executable
run: chmod +x target/debug/goose
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b # v7.2.0
- name: Run Compaction Tests
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GOOSE_PROVIDER: anthropic
GOOSE_MODEL: claude-sonnet-4-5-20250929
HOME: /tmp/goose-home
GOOSE_DISABLE_KEYRING: 1
SKIP_BUILD: 1
run: |
mkdir -p $HOME/.local/share/goose/sessions
mkdir -p $HOME/.config/goose
bash scripts/test_compaction.sh
+2 -2
View File
@@ -14,7 +14,7 @@ fi
if [ -z "$SKIP_BUILD" ]; then if [ -z "$SKIP_BUILD" ]; then
echo "Building goose..." echo "Building goose..."
cargo build --release --bin goose cargo build --bin goose
echo "" echo ""
else else
echo "Skipping build (SKIP_BUILD is set)..." echo "Skipping build (SKIP_BUILD is set)..."
@@ -22,7 +22,7 @@ else
fi fi
SCRIPT_DIR=$(pwd) SCRIPT_DIR=$(pwd)
GOOSE_BIN="$SCRIPT_DIR/target/release/goose" GOOSE_BIN="$SCRIPT_DIR/target/debug/goose"
# Apply provider/model overrides if set # Apply provider/model overrides if set
if [ -n "$COMPACTION_PROVIDER" ]; then if [ -n "$COMPACTION_PROVIDER" ]; then
+2 -2
View File
@@ -3,7 +3,7 @@ set -e
if [ -z "$SKIP_BUILD" ]; then if [ -z "$SKIP_BUILD" ]; then
echo "Building goose..." echo "Building goose..."
cargo build --release --bin goose cargo build --bin goose
echo "" echo ""
else else
echo "Skipping build (SKIP_BUILD is set)..." echo "Skipping build (SKIP_BUILD is set)..."
@@ -11,7 +11,7 @@ else
fi fi
SCRIPT_DIR=$(pwd) SCRIPT_DIR=$(pwd)
GOOSE_BIN="$SCRIPT_DIR/target/release/goose" GOOSE_BIN="$SCRIPT_DIR/target/debug/goose"
TEST_PROVIDER=${GOOSE_PROVIDER:-anthropic} TEST_PROVIDER=${GOOSE_PROVIDER:-anthropic}
TEST_MODEL=${GOOSE_MODEL:-claude-haiku-4-5-20251001} TEST_MODEL=${GOOSE_MODEL:-claude-haiku-4-5-20251001}
+2 -2
View File
@@ -31,7 +31,7 @@ fi
if [ -z "$SKIP_BUILD" ]; then if [ -z "$SKIP_BUILD" ]; then
echo "Building goose..." echo "Building goose..."
cargo build --release --bin goose cargo build --bin goose
echo "" echo ""
else else
echo "Skipping build (SKIP_BUILD is set)..." echo "Skipping build (SKIP_BUILD is set)..."
@@ -247,7 +247,7 @@ run_test() {
( (
export GOOSE_PROVIDER="$provider" export GOOSE_PROVIDER="$provider"
export GOOSE_MODEL="$model" export GOOSE_MODEL="$model"
cd "$testdir" && "$SCRIPT_DIR/target/release/goose" run --text "Immediately use the shell tool to run 'ls'. Do not ask for confirmation." --with-builtin "$BUILTINS" 2>&1 cd "$testdir" && "$SCRIPT_DIR/target/debug/goose" run --text "Immediately use the shell tool to run 'ls'. Do not ask for confirmation." --with-builtin "$BUILTINS" 2>&1
) > "$output_file" 2>&1 ) > "$output_file" 2>&1
# Check result # Check result
+5 -5
View File
@@ -7,7 +7,7 @@ fi
if [ -z "$SKIP_BUILD" ]; then if [ -z "$SKIP_BUILD" ]; then
echo "Building goose..." echo "Building goose..."
cargo build --release --bin goose cargo build --bin goose
echo "" echo ""
else else
echo "Skipping build (SKIP_BUILD is set)..." echo "Skipping build (SKIP_BUILD is set)..."
@@ -17,7 +17,7 @@ fi
SCRIPT_DIR=$(pwd) SCRIPT_DIR=$(pwd)
# Add goose binary to PATH so subagents can find it when spawning # Add goose binary to PATH so subagents can find it when spawning
export PATH="$SCRIPT_DIR/target/release:$PATH" export PATH="$SCRIPT_DIR/target/debug:$PATH"
# Set default provider and model if not already set # Set default provider and model if not already set
# Use fast model for CI to speed up tests # Use fast model for CI to speed up tests
@@ -76,7 +76,7 @@ RESULTS=()
check_recipe_output() { check_recipe_output() {
local tmpfile=$1 local tmpfile=$1
local mode=$2 local mode=$2
# Check for unified subagent tool invocation (new format: "─── subagent |") # Check for unified subagent tool invocation (new format: "─── subagent |")
if grep -q "─── subagent" "$tmpfile"; then if grep -q "─── subagent" "$tmpfile"; then
echo "✓ SUCCESS: Subagent tool invoked" echo "✓ SUCCESS: Subagent tool invoked"
@@ -85,7 +85,7 @@ check_recipe_output() {
echo "✗ FAILED: No evidence of subagent tool invocation" echo "✗ FAILED: No evidence of subagent tool invocation"
RESULTS+=("✗ Subagent tool invocation ($mode)") RESULTS+=("✗ Subagent tool invocation ($mode)")
fi fi
# Check that both subrecipes were called (shown as "subrecipe: <name>" in output) # Check that both subrecipes were called (shown as "subrecipe: <name>" in output)
if grep -q "subrecipe:.*file_stats\|file_stats.*subrecipe" "$tmpfile" && grep -q "subrecipe:.*code_patterns\|code_patterns.*subrecipe" "$tmpfile"; then if grep -q "subrecipe:.*file_stats\|file_stats.*subrecipe" "$tmpfile" && grep -q "subrecipe:.*code_patterns\|code_patterns.*subrecipe" "$tmpfile"; then
echo "✓ SUCCESS: Both subrecipes (file_stats, code_patterns) found in output" echo "✓ SUCCESS: Both subrecipes (file_stats, code_patterns) found in output"
@@ -98,7 +98,7 @@ check_recipe_output() {
echo "Running recipe with parallel subrecipes..." echo "Running recipe with parallel subrecipes..."
TMPFILE=$(mktemp) TMPFILE=$(mktemp)
if (cd "$TESTDIR" && "$SCRIPT_DIR/target/release/goose" run --recipe project_analyzer_parallel.yaml --no-session 2>&1) | tee "$TMPFILE"; then if (cd "$TESTDIR" && "$SCRIPT_DIR/target/debug/goose" run --recipe project_analyzer_parallel.yaml --no-session 2>&1) | tee "$TMPFILE"; then
echo "✓ SUCCESS: Recipe completed successfully" echo "✓ SUCCESS: Recipe completed successfully"
RESULTS+=("✓ Recipe exit code") RESULTS+=("✓ Recipe exit code")
check_recipe_output "$TMPFILE" "parallel" check_recipe_output "$TMPFILE" "parallel"