Files
wifi-ruview/aether-arena/calibration/test_calibration.py
T
ruv 76cc57294d test(calibration): self-contained end-to-end regression test
The committed calibration service (model.py/calibrate.py/infer.py) had no
automated test — only ad-hoc verification. Adds a CPU-only, no-real-checkpoint
test that exercises the CLI end-to-end on synthetic data: build base ->
calibrate.py fits adapter -> infer.py runs base+adapter, asserting adapter
size (<200KB), keypoint shape [N,17,2], finiteness, [0,1] range, and that the
adapter actually changes the output. Passes on Windows CPU (torch 2.11).

Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-31 05:02:24 -04:00

104 lines
3.8 KiB
Python

"""Self-contained regression test for the RuView calibration service.
Exercises the committed CLI end-to-end on synthetic data (CPU, no GPU, no real checkpoint):
build a base -> calibrate.py fits an adapter -> infer.py runs base+adapter -> assert the
adapter is small, inference is shape-correct and finite, and the adapter actually changes output.
Run: python test_calibration.py (or via pytest)
"""
import json
import subprocess
import sys
import tempfile
from pathlib import Path
import numpy as np
import torch
HERE = Path(__file__).parent
sys.path.insert(0, str(HERE))
from model import PoseNet, standardize # noqa: E402
def _make_base(path: Path):
torch.manual_seed(0)
net = PoseNet()
# Save without the deterministic gr.A buffer (mirrors the published checkpoint;
# calibrate.py/infer.py load with strict=False).
sd = {k: v for k, v in net.state_dict().items() if k != "gr.A"}
torch.save(sd, path)
def _make_data(path: Path, n: int, seed: int):
rng = np.random.default_rng(seed)
X = rng.standard_normal((n, 3, 114, 10)).astype(np.float32)
Y = rng.random((n, 17, 2)).astype(np.float32) # keypoints in [0,1]
np.savez(path, X=X, Y=Y)
def _run(*args):
r = subprocess.run(
[sys.executable, str(HERE / args[0]), *map(str, args[1:])],
capture_output=True, text=True,
)
assert r.returncode == 0, f"{args[0]} failed:\n{r.stdout}\n{r.stderr}"
return r.stdout
def test_calibration_end_to_end():
with tempfile.TemporaryDirectory() as d:
d = Path(d)
base = d / "base.pt"
calib = d / "calib.npz"
frames = d / "frames.npz"
adapter = d / "room.adapter.npz"
kp = d / "kp.npy"
_make_base(base)
_make_data(calib, n=40, seed=1) # ≥20 → no underfit warning
_make_data(frames, n=16, seed=2)
# 1) calibrate -> adapter
out = _run("calibrate.py", "--base", base, "--data", calib, "--out", adapter,
"--iters", "50", "--device", "cpu")
assert adapter.exists(), "adapter not written"
assert "saved" in out.lower()
sz = adapter.stat().st_size
assert sz < 200_000, f"adapter unexpectedly large ({sz} bytes)"
# adapter contains the expected LoRA tensors (materialize + close so the
# Windows tempdir can be cleaned up — np.load keeps a lazy file handle).
with np.load(adapter) as z:
keys = [k for k in z.files if k.endswith(".A") or k.endswith(".B")]
assert keys, f"adapter has no LoRA tensors: {z.files}"
lora = {k: z[k].astype(np.float32) for k in keys}
# 2) infer with adapter -> keypoints
_run("infer.py", "--base", base, "--adapter", adapter, "--data", frames,
"--out", kp, "--device", "cpu")
out_kp = np.load(kp)
assert out_kp.shape == (16, 17, 2), f"bad keypoint shape {out_kp.shape}"
assert np.isfinite(out_kp).all(), "non-finite keypoints"
assert (out_kp >= 0).all() and (out_kp <= 1).all(), "keypoints out of [0,1]"
# 3) adapter must actually change the output vs the zero-shot base
with np.load(frames) as fz:
frames_x = fz["X"][:]
net = PoseNet()
net.load_state_dict(torch.load(base, map_location="cpu"), strict=False)
net.eval()
x = standardize(torch.tensor(frames_x))
with torch.no_grad():
base_kp = net(x).reshape(16, 17, 2).numpy()
net.add_lora()
net.load_lora(lora)
net.eval()
with torch.no_grad():
cal_kp = net(x).reshape(16, 17, 2).numpy()
assert np.abs(base_kp - cal_kp).sum() > 1e-4, "adapter did not change output"
if __name__ == "__main__":
test_calibration_end_to_end()
print("PASS: calibration service end-to-end (calibrate -> adapter -> infer)")