mirror of
https://github.com/ruvnet/RuView.git
synced 2026-06-02 00:58:56 +02:00
a6808568a2
AetherArena ("AA") — the official, project-agnostic Spatial-Intelligence Benchmark
(ADR-149, Accepted). Iteration 1 of the long-horizon build:
- ADR-149 accepted: name locked (ruvnet/aether-arena), v0 metrics locked
(pose/presence/latency/determinism), dataset legality resolved (MM-Fi CC BY-NC
only; Wi-Pose excluded). Adds four-part framing, threat model, arena_score
formula, submission state machine, neutrality/governance, and the §7 acceptance test.
- aa_score_runner: deterministic scorer bin reusing the real ruview_metrics pose
harness on a fixed seed=42 fixture → RuViewTier-style verdict + cross-platform
SHA-256 proof hash. Builds --no-default-features (no torch/GPU). VERDICT: PASS.
- CI harness gate: .github/workflows/aether-arena-harness.yml runs the scorer on
every PR — the "PR that runs the harness as part of the build" requirement.
- Scaffold: aether-arena/{README,VERIFY,STATUS}.md + schema/aa-submission.toml.
- Horizon record persisted (.claude-flow/horizons/aether-arena-aa.json).
Infra = the deliverable; model SOTA (MM-Fi PCK@20) is a separate effort blocked on
ADR-079 data collection, tracked as a stretch goal, not an infra exit.
Co-Authored-By: claude-flow <ruv@ruv.net>
120 lines
8.0 KiB
JSON
120 lines
8.0 KiB
JSON
{
|
|
"id": "aether-arena-aa",
|
|
"name": "AetherArena (AA) — Official Spatial-Intelligence Benchmark",
|
|
"adr": "ADR-149",
|
|
"adrPath": "docs/adr/ADR-149-public-community-leaderboard-huggingface.md",
|
|
"status": "Accepted",
|
|
"initializedDate": "2026-05-30",
|
|
"targetDate": "2026-08-31",
|
|
"exitCriteria": "Benchmark INFRASTRUCTURE done, tested, CI-gated, deploy-ready: aa_score_runner.rs passes deterministic fixture test; CI harness-gate green on every PR; aether-arena repo scaffold committed (README four-part framing + aa-submission.toml schema + VERIFY.md); public smoke split committed; HF Space lifecycle skeleton deployed; signed Parquet ledger functional; RuView baseline PCK@20 ~2.5% entered; ADR-149 §7 acceptance test (five-step stranger test) passes. NOTE: ML SOTA (MM-Fi PCK@20 ~72%) is a separate long-running stretch goal blocked on ADR-079 camera-ground-truth — it is NOT an infra exit criterion.",
|
|
"baselineState": {
|
|
"adrStatus": "Accepted, committed 2026-05-30",
|
|
"scorerCode": "ruview_metrics.rs + ablation.rs + proof.rs exist in wifi-densepose-train; aa_score_runner.rs not yet created",
|
|
"aetherArenaRepo": "does not exist yet — needs user authorization to create ruvnet/aether-arena public repo",
|
|
"hfSpace": "does not exist yet — needs HF_TOKEN and user authorization to deploy ruvnet/aether-arena HF Space",
|
|
"smokeDataset": "not committed",
|
|
"resultsLedger": "not created",
|
|
"ruviewBaseline": "PCK@20 ~2.5% self-reported, not formally entered",
|
|
"ciGate": "not added to workflow"
|
|
},
|
|
"milestones": {
|
|
"m1": {
|
|
"name": "ADR-149 Accepted + committed",
|
|
"status": "DONE",
|
|
"completedDate": "2026-05-30",
|
|
"completionCriteria": "ADR-149 file committed to docs/adr/ with status Accepted",
|
|
"notes": "Done this session. File at docs/adr/ADR-149-public-community-leaderboard-huggingface.md"
|
|
},
|
|
"m2": {
|
|
"name": "Deterministic scorer runner bin (aa_score_runner.rs)",
|
|
"status": "NOT_STARTED",
|
|
"completionCriteria": "aa_score_runner.rs compiles, runs ruview_metrics on a committed fixture, emits RuViewTier + SHA-256 proof hash, mirrors existing *_proof_runner.rs pattern; cargo test passes",
|
|
"estimatedEffort": "3-5 days",
|
|
"owner": "wifi-densepose-train crate or new aa-scorer crate"
|
|
},
|
|
"m3": {
|
|
"name": "CI harness-gate: GitHub Actions workflow",
|
|
"status": "NOT_STARTED",
|
|
"completionCriteria": "A GitHub Actions workflow runs aa_score_runner on every PR as a build gate; PR fails if scorer fails determinism check; workflow committed and green",
|
|
"estimatedEffort": "2-3 days",
|
|
"dependency": "M2 must be done first"
|
|
},
|
|
"m4": {
|
|
"name": "aether-arena repo scaffold",
|
|
"status": "NOT_STARTED",
|
|
"completionCriteria": "ruvnet/aether-arena repo created with: README (four-part framing: Public leaderboard / Private eval split / Open scorer / Signed results); aa-submission.toml manifest schema; VERIFY.md (ADR-149 §7 stranger acceptance test); neutrality/governance section (§2.8); contribution guide",
|
|
"estimatedEffort": "3-5 days",
|
|
"blockers": ["Needs user authorization to create public ruvnet/aether-arena repo on GitHub"]
|
|
},
|
|
"m5": {
|
|
"name": "Public smoke split committed + private MM-Fi held-out split prep",
|
|
"status": "NOT_STARTED",
|
|
"completionCriteria": "Public smoke split committed to aether-arena repo (stranger can score locally); private MM-Fi held-out split prepared under non-public path with CC BY-NC 4.0 attribution; Wi-Pose explicitly excluded from v0",
|
|
"estimatedEffort": "5-7 days",
|
|
"riskNotes": "MM-Fi CC BY-NC 4.0: AA must remain non-commercial and carry MM-Fi attribution; raw frames stay in private split; only derived CSI features + scores may be exposed"
|
|
},
|
|
"m6": {
|
|
"name": "HF Space (Gradio) skeleton",
|
|
"status": "BLOCKED",
|
|
"completionCriteria": "HF Space deployed at ruvnet/aether-arena with submission lifecycle (submitted->validated->quarantined->smoke_scored->full_scored->published/rejected); sandboxed scorer container wired; basic leaderboard table rendered",
|
|
"estimatedEffort": "7-10 days",
|
|
"blockers": [
|
|
"Needs HF_TOKEN — check .env for HF_TOKEN or HUGGINGFACE_TOKEN",
|
|
"Needs user authorization to create/deploy ruvnet/aether-arena HF Space (outward-facing public deployment)"
|
|
]
|
|
},
|
|
"m7": {
|
|
"name": "Signed append-only Parquet results ledger",
|
|
"status": "NOT_STARTED",
|
|
"completionCriteria": "HF dataset ruvnet/aether-arena-results created; append-only Parquet ledger with signed rows; determinism_gate enforced; no row can be silently edited",
|
|
"estimatedEffort": "3-5 days",
|
|
"ledgerSchema": "submitter, model_ref, category, feature_set, tier, pck20, oks, mota, vitals_bpm_err, latency_p50, latency_p95, privacy_leakage, cross_room_deg, proof_sha256, scored_at, harness_version",
|
|
"dependency": "M6 must be scaffolded first"
|
|
},
|
|
"m8": {
|
|
"name": "RuView baseline entry + public launch",
|
|
"status": "NOT_STARTED",
|
|
"completionCriteria": "RuView wifi-densepose-pretrained baseline entered (honest PCK@20 ~2.5%); ADR-149 §7 five-step stranger acceptance test passes; v0 live with Presence + Pose + Edge-latency + Determinism categories active; Privacy and Cross-room shown as gated/coming-soon",
|
|
"estimatedEffort": "3-5 days",
|
|
"dependency": "M4+M5+M6+M7 complete",
|
|
"notes": "ML SOTA improvement (PCK@20 ~72%) is a SEPARATE stretch goal blocked on ADR-079 P7-P9 camera ground truth. NOT a blocker for infra launch."
|
|
}
|
|
},
|
|
"activeMilestone": "m2",
|
|
"completedMilestones": ["m1"],
|
|
"knownRisks": [
|
|
"HF_TOKEN not confirmed present in .env — check before M6 work begins",
|
|
"ruvnet/aether-arena public repo creation is outward-facing — needs explicit user authorization",
|
|
"MM-Fi CC BY-NC 4.0: AA must stay legally non-commercial and brand-distinct from commercial RuView product; or seek MM-Fi commercial grant before any paid tier",
|
|
"Wi-Pose has research-use-only terms (no redistribution grant) — excluded from v0; revisit only if terms are clarified with authors",
|
|
"HF Space free CPU tier may be too slow for Candle/tch inference pipeline — may need ZeroGPU or self-hosted scorer on cognitum-20260110 GCloud A100/L4",
|
|
"ADR-079 camera-ground-truth (PCK@20 SOTA) is P7-P9 pending — NOT an infra blocker; must not be conflated with AA infra completion",
|
|
"Neutrality/governance risk: RuView seeded the scorer — must be demonstrably scored through the same public pipeline as any other entrant (§2.8 controls)"
|
|
],
|
|
"driftSignals": {
|
|
"timeline": "GREEN — just initialized, no timeline pressure yet",
|
|
"scope": "GREEN — scope locked at four-part structure per ADR-149 §2 decision",
|
|
"approach": "GREEN — reuse pattern (existing ruview_metrics + proof.rs) confirmed in ADR-149",
|
|
"dependency": "YELLOW — HF_TOKEN and ruvnet/aether-arena repo authorization are external blockers with unknown ETA",
|
|
"priority": "GREEN — active feature branch feat/adr-136-146-streaming-engine in progress; AA infra can proceed in parallel on its own branch"
|
|
},
|
|
"stretchGoals": {
|
|
"sotaML": "MM-Fi PCK@20 SOTA ~72% — separate ML effort blocked on ADR-079 P7-P9 camera-ground-truth data collection; NOT an infra exit criterion",
|
|
"privacyAxis": "ADR-145 §10 membership-inference attacker — activate Privacy leaderboard axis once attacker is implemented and published",
|
|
"crossRoom": "Multi-room held-out split — activate Cross-room generalization axis",
|
|
"multiOrgSteering": "Invite co-maintainers from other projects once >=N external entries land"
|
|
},
|
|
"sessionHistory": [
|
|
{
|
|
"date": "2026-05-30",
|
|
"type": "initialization",
|
|
"accomplished": [
|
|
"ADR-149 Accepted and committed to docs/adr/",
|
|
"Horizon record initialized in .claude-flow/horizons/aether-arena-aa.json",
|
|
"Memory stored in horizons namespace under key horizon-aether-arena-aa",
|
|
"Session check-in record stored in horizon-sessions namespace"
|
|
]
|
|
}
|
|
]
|
|
}
|