fix: cross-process lock stale judgment and heartbeat

Problem: CI test "updateJSON serializes writes across separate node processes"
was failing with expect 100 but got 89/97. The root cause was isLockStale()
being too aggressive - it could mistakenly delete locks held by other processes.

Fixes:
1. isLockStale() now uses mtime only - fresh locks are never stale
2. Added heartbeat mechanism during lock hold to support long updaters
3. Removed PID check that was unreliable in CI/containers
4. Fixed ENOENT race when lock is released between EEXIST and stat

Tests: 180 pass, 0 fail
This commit is contained in:
Ralph Chang
2026-04-28 12:24:56 +08:00
parent b846b34e30
commit 8b21325469
+27 -26
View File
@@ -1,12 +1,13 @@
import { existsSync } from "fs";
import { randomUUID } from "crypto";
import { mkdir, open, readFile, rename, rm, stat, writeFile } from "fs/promises";
import type { FileHandle } from "fs/promises";
import { dirname } from "path";
const fileLocks = new Map<string, Promise<unknown>>();
const LOCK_WAIT_TIMEOUT_MS = 5000;
const LOCK_STALE_MS = 30_000;
const LOCK_ACQUIRE_GRACE_MS = 250;
const LOCK_HEARTBEAT_MS = 1_000;
export async function readJSON<T>(path: string, fallback: () => T): Promise<T> {
if (!existsSync(path)) return fallback();
@@ -28,36 +29,24 @@ async function readJSONStrict<T>(path: string, fallback: () => T): Promise<T> {
async function isLockStale(lockPath: string, now = Date.now()): Promise<boolean> {
try {
const stats = await stat(lockPath);
if (now - stats.mtimeMs > LOCK_STALE_MS) return true;
const content = await readFile(lockPath, "utf8");
const [pidText, createdText] = content.split("\n");
const pid = Number(pidText);
const [, createdText] = content.split("\n");
const createdAt = Number(createdText);
if (!Number.isFinite(createdAt)) {
return !(await isRecentlyTouched(lockPath, now));
}
if (now - createdAt > LOCK_STALE_MS) return true;
if (!Number.isInteger(pid) || pid <= 0) {
return !(await isRecentlyTouched(lockPath, now));
}
try {
process.kill(pid, 0);
return false;
} catch (error) {
return (error as NodeJS.ErrnoException).code === "ESRCH";
}
} catch {
return true;
return Number.isFinite(createdAt) && now - createdAt > LOCK_STALE_MS;
} catch (error) {
return (error as NodeJS.ErrnoException).code !== "ENOENT";
}
}
async function isRecentlyTouched(path: string, now = Date.now()): Promise<boolean> {
try {
return now - (await stat(path)).mtimeMs <= LOCK_ACQUIRE_GRACE_MS;
} catch {
return false;
}
async function writeLockInfo(handle: FileHandle): Promise<void> {
const content = `${process.pid}\n${Date.now()}\n`;
await handle.truncate(0);
await handle.write(content, 0, "utf8");
}
async function withFileLock<T>(path: string, fn: () => Promise<T>): Promise<T> {
@@ -68,10 +57,22 @@ async function withFileLock<T>(path: string, fn: () => Promise<T>): Promise<T> {
while (true) {
try {
const handle = await open(lockPath, "wx", 0o600);
let heartbeat: NodeJS.Timeout | undefined;
let heartbeatWrite: Promise<void> = Promise.resolve();
const queueHeartbeat = (): void => {
heartbeatWrite = heartbeatWrite
.catch(() => undefined)
.then(() => writeLockInfo(handle))
.catch(() => undefined);
};
try {
await handle.writeFile(`${process.pid}\n${Date.now()}\n`, "utf8");
await writeLockInfo(handle);
heartbeat = setInterval(queueHeartbeat, LOCK_HEARTBEAT_MS);
return await fn();
} finally {
if (heartbeat) clearInterval(heartbeat);
await heartbeatWrite.catch(() => undefined);
await handle.close();
await rm(lockPath, { force: true });
}