Skip to content

Commit 0143ade

Browse files
committed
fix(webapp): close pass-2 cross-table gaps (span-detail 500, one-time-token claim)
- The strengthened findRuns guard threw on GET /api/v1/runs/:runId/spans/:spanId, which pages child runs with take and no orderBy across both tables. Add a createdAt order so it takes the bounded cross-table merge (and the 50-row cap is now deterministic, most recent first) instead of throwing for every org. - Key the one-time-use-token cross-table claim on the token alone (a reserved task slot), matching the task-independent oneTimeUseToken unique constraint, so a multi-task token cannot mint twice across the flip. Stop excluding triggerAndWait from the token claim. Always resolve a held claim on the success path (publish, else release) so it cannot leak until its TTL.
1 parent 8ee83c5 commit 0143ade

4 files changed

Lines changed: 64 additions & 32 deletions

File tree

apps/webapp/app/routes/api.v1.runs.$runId.spans.$spanId.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ export const loader = createLoaderApiRoute(
126126
const triggeredRuns = await runStore.findRuns(
127127
{
128128
take: 50,
129+
// A parentSpanId predicate spans both run tables (it carries no id), so
130+
// the cross-table store requires a total-order key to bound the merge;
131+
// createdAt also makes the 50-row cap deterministic (most recent first)
132+
// rather than an arbitrary single-table slice.
133+
orderBy: { createdAt: "desc" },
129134
select: {
130135
friendlyId: true,
131136
taskIdentifier: true,

apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@ import type { TraceEventConcern, TriggerTaskRequest } from "../types";
2121
// handleTriggerRequest.
2222
const resolveOrgMollifierFlag = makeResolveMollifierFlag();
2323

24+
// Reserved task slot for the cross-table one-time-use-token claim. The DB
25+
// constraint `@@unique([oneTimeUseToken])` is TASK-INDEPENDENT, so the claim
26+
// must be keyed on the token alone, not (task, token): a single token can
27+
// authorise more than one task, and two presentations for different tasks
28+
// straddling a `runTableV2` flip would otherwise build different claim keys and
29+
// both proceed. Folding the token into one constant task slot makes the claim
30+
// key (envId, token)-scoped, matching the DB constraint's scope. Paired with
31+
// the `otu:` idempotencyKey prefix, collision with a real task's idempotency
32+
// claim would require a task literally named this AND an idempotency key of the
33+
// form `otu:<token-hash>`.
34+
const ONE_TIME_USE_TOKEN_CLAIM_TASK = "__one_time_use_token__";
35+
2436
// Claim ownership context returned to the caller when the
2537
// IdempotencyKeyConcern won a pre-gate claim. Caller MUST publish the
2638
// winning runId on pipeline success (`publishClaim`) or release the
@@ -222,29 +234,30 @@ export class IdempotencyKeyConcern {
222234
// unique constraint on `oneTimeUseToken` can't see across the two tables,
223235
// so neither INSERT raises P2002 and one token spawns two runs. For
224236
// v2-cutover orgs, serialise on the token via a Redis claim so the first
225-
// presentation wins and the rest resolve to it. Excludes
226-
// resumeParentOnCompletion (triggerAndWait) to match the buffer
227-
// fallback's handling — a one-time PUBLIC_JWT token is a fire-and-forget
228-
// public trigger, not a parent/child wait, so that case is left to the
229-
// per-table constraint.
237+
// presentation wins and the rest are rejected as already-used. Not
238+
// excluded for resumeParentOnCompletion: for v2 orgs the idempotency-keyed
239+
// claim covers triggerAndWait too (claimEligible short-circuits on
240+
// shouldUseV2RunTable), so the token claim is consistent in doing the same;
241+
// the loser is rejected (not returned a cached run), so there is no
242+
// waitpoint-blocking subtlety to avoid.
230243
const oneTimeUseToken = request.options?.oneTimeUseToken;
231-
if (oneTimeUseToken && !request.body.options?.resumeParentOnCompletion) {
244+
if (oneTimeUseToken) {
232245
const orgFeatureFlags =
233246
(request.environment.organization?.featureFlags as
234247
| Record<string, unknown>
235248
| null
236249
| undefined) ?? null;
237250
if (shouldUseV2RunTable(orgFeatureFlags)) {
238-
// Namespace the claim key so a token can never collide with a real
239-
// idempotency key in the same (envId, taskIdentifier) slot. The TTL is
240-
// a fixed pipeline-dwell bound, NOT the customer idempotencyKeyTTL:
241-
// there is no idempotency key in this path, so a client-supplied TTL
242-
// has no meaning here, and a tiny value would expire the claim
243-
// mid-flight and reopen the cross-table dup window.
251+
// Key the claim on (envId, token), task-independent, to match the DB's
252+
// task-independent oneTimeUseToken constraint (see the constant's
253+
// comment). The TTL is a fixed pipeline-dwell bound, NOT the customer
254+
// idempotencyKeyTTL: there is no idempotency key in this path, so a
255+
// client-supplied TTL has no meaning here, and a tiny value would
256+
// expire the claim mid-flight and reopen the cross-table dup window.
244257
const claimKey = `otu:${oneTimeUseToken}`;
245258
const outcome = await claimOrAwait({
246259
envId: request.environment.id,
247-
taskIdentifier: request.taskId,
260+
taskIdentifier: ONE_TIME_USE_TOKEN_CLAIM_TASK,
248261
idempotencyKey: claimKey,
249262
ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS,
250263
safetyNetMs: env.TRIGGER_MOLLIFIER_CLAIM_WAIT_MS,
@@ -274,7 +287,7 @@ export class IdempotencyKeyConcern {
274287
idempotencyKeyExpiresAt,
275288
claim: {
276289
envId: request.environment.id,
277-
taskIdentifier: request.taskId,
290+
taskIdentifier: ONE_TIME_USE_TOKEN_CLAIM_TASK,
278291
idempotencyKey: claimKey,
279292
token: outcome.token,
280293
},

apps/webapp/app/runEngine/services/triggerTask.server.ts

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -716,17 +716,24 @@ export class RunEngineTriggerTaskService {
716716
}
717717
},
718718
);
719-
// Pipeline returned successfully — publish the claim if we held
720-
// one. Waiters polling for our key resolve to this runId.
721-
if (idempotencyClaim && result?.run?.friendlyId) {
722-
await publishMollifierClaim({
723-
envId: idempotencyClaim.envId,
724-
taskIdentifier: idempotencyClaim.taskIdentifier,
725-
idempotencyKey: idempotencyClaim.idempotencyKey,
726-
token: idempotencyClaim.token,
727-
runId: result.run.friendlyId,
728-
ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS,
729-
});
719+
// Pipeline returned — resolve the claim if we held one. On success (a run
720+
// with a friendlyId) publish it so waiters resolve to this runId;
721+
// otherwise release it. Never leave a held claim unresolved on the success
722+
// path: an orphaned claim would block concurrent waiters for the full
723+
// safety-net window even though this request did not produce a run.
724+
if (idempotencyClaim) {
725+
if (result?.run?.friendlyId) {
726+
await publishMollifierClaim({
727+
envId: idempotencyClaim.envId,
728+
taskIdentifier: idempotencyClaim.taskIdentifier,
729+
idempotencyKey: idempotencyClaim.idempotencyKey,
730+
token: idempotencyClaim.token,
731+
runId: result.run.friendlyId,
732+
ttlSeconds: env.TRIGGER_MOLLIFIER_CLAIM_TTL_SECONDS,
733+
});
734+
} else {
735+
await releaseMollifierClaim(idempotencyClaim);
736+
}
730737
}
731738
return result;
732739
} catch (err) {

apps/webapp/test/oneTimeUseTokenClaim.test.ts

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,18 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () =>
7070

7171
expect(result.isCached).toBe(false);
7272
if (result.isCached === false) {
73-
// The trigger pipeline must publish/release this claim — keyed on the
74-
// namespaced token so it can never collide with a real idempotency key.
73+
// The trigger pipeline must publish/release this claim. It is keyed on
74+
// the namespaced token AND a reserved, task-independent slot — matching
75+
// the task-independent oneTimeUseToken DB constraint, NOT request.taskId.
7576
expect(result.claim?.idempotencyKey).toBe("otu:tok-1");
7677
expect(result.claim?.envId).toBe("env_a");
77-
expect(result.claim?.taskIdentifier).toBe("my-task");
78+
expect(result.claim?.taskIdentifier).toBe("__one_time_use_token__");
7879
}
7980
expect(claimIdempotency).toHaveBeenCalledTimes(1);
80-
expect(claimIdempotency.mock.calls[0][0]).toMatchObject({ idempotencyKey: "otu:tok-1" });
81+
expect(claimIdempotency.mock.calls[0][0]).toMatchObject({
82+
idempotencyKey: "otu:tok-1",
83+
taskIdentifier: "__one_time_use_token__",
84+
});
8185
});
8286

8387
it("v2 org: a concurrent winner (claim resolved) rejects the second presentation as already-used", async () => {
@@ -113,7 +117,7 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () =>
113117
expect(claimIdempotency).not.toHaveBeenCalled();
114118
});
115119

116-
it("triggerAndWait one-time token: left to the per-table constraint (not claimed here)", async () => {
120+
it("triggerAndWait one-time token IS claimed (v2 orgs serialise it like the keyed claim)", async () => {
117121
const claimIdempotency = vi.fn(async () => ({ kind: "claimed" as const }));
118122
h.buffer = {
119123
claimIdempotency,
@@ -127,9 +131,12 @@ describe("IdempotencyKeyConcern · one-time-use token cross-table claim", () =>
127131

128132
expect(result.isCached).toBe(false);
129133
if (result.isCached === false) {
130-
expect(result.claim).toBeUndefined();
134+
// resumeParentOnCompletion is NOT excluded from the token claim: for a v2
135+
// org the cross-table dup hole is identical, and the loser is rejected
136+
// (no cached-run waitpoint subtlety to avoid).
137+
expect(result.claim?.idempotencyKey).toBe("otu:tok-1");
131138
}
132-
expect(claimIdempotency).not.toHaveBeenCalled();
139+
expect(claimIdempotency).toHaveBeenCalledTimes(1);
133140
});
134141

135142
it("no one-time token: ordinary no-idempotency-key trigger is unaffected", async () => {

0 commit comments

Comments
 (0)