From 039f92cc6be94f0b2cb20ddf8f1d988b6b540df8 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Thu, 25 Jun 2026 02:28:22 +0200 Subject: [PATCH 1/4] feat(results): temp-branch fallback for needs_human_merge sync conflicts When the auto-merge loop hits a genuine conflict, push the diverged local work to a fresh flat timestamped temp branch (agentv/results-sync/- -) instead of only blocking. The flat name avoids a git D/F conflict against the canonical results branch. Surface a pending_merge wire block (snake_case) so the Dashboard can link to a GitHub compare/PR. Adds confirmResultsMergeAndPull (the explicit "OK" action): fetch the remote, ancestor-guarded fast-forward of the target into the local results checkout, then resume normal sync. A premature OK is a safe no-op that leaves local work diverged and intact. Never force-pushes and never touches the canonical branch. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- packages/core/src/evaluation/results-repo.ts | 349 +++++++++++++++++-- packages/core/src/index.ts | 4 + 2 files changed, 331 insertions(+), 22 deletions(-) diff --git a/packages/core/src/evaluation/results-repo.ts b/packages/core/src/evaluation/results-repo.ts index eba76ace3..65f3130b9 100644 --- a/packages/core/src/evaluation/results-repo.ts +++ b/packages/core/src/evaluation/results-repo.ts @@ -1,5 +1,5 @@ import { execFile, spawn } from 'node:child_process'; -import { createHash } from 'node:crypto'; +import { createHash, randomBytes } from 'node:crypto'; import { existsSync, mkdirSync, @@ -215,6 +215,20 @@ export type ResultsRepoSyncStatus = | 'needs_human_merge' | 'syncing'; +/** + * Layer 2 of the no-force-push results sync: when a genuine conflict cannot be + * auto-merged, the diverged local work is pushed to a fresh timestamped temp + * branch (never the canonical branch) and surfaced here so the user can merge it + * on GitHub and click OK to resume sync. Wire shape (snake_case). + */ +export interface ResultsPendingMerge { + readonly temp_branch: string; + readonly target_branch: string; + readonly compare_url?: string; + readonly contributed_run_count?: number; + readonly created_at: string; +} + export interface ResultsRepoStatus { readonly configured: boolean; readonly available: boolean; @@ -250,6 +264,16 @@ export interface ResultsRepoStatus { readonly previous_remote_commit?: string; readonly force_pushed_commit?: string; readonly lease_commit?: string; + readonly pending_merge?: ResultsPendingMerge; +} + +/** Internal camelCase mirror of {@link ResultsPendingMerge}. */ +export interface PendingMergeDetails { + readonly tempBranch: string; + readonly targetBranch: string; + readonly compareUrl?: string; + readonly contributedRunCount?: number; + readonly createdAt: string; } export interface NormalizedResultsConfig { @@ -284,6 +308,7 @@ export interface DirectPushResultsResult { readonly previous_remote_commit?: string; readonly force_pushed_commit?: string; readonly lease_commit?: string; + readonly pending_merge?: ResultsPendingMerge; } export interface CheckedOutResultsRepoBranch { @@ -1412,6 +1437,7 @@ type ResultsBranchPushDetails = { readonly previousRemoteCommit?: string; readonly forcePushedCommit?: string; readonly leaseCommit?: string; + readonly pendingMerge?: PendingMergeDetails; }; type ResultsBranchPushOutcome = @@ -1433,6 +1459,18 @@ class ResultsBranchPushConflictError extends Error { } } +function pendingMergeToWire(details: PendingMergeDetails): ResultsPendingMerge { + return { + temp_branch: details.tempBranch, + target_branch: details.targetBranch, + ...(details.compareUrl !== undefined && { compare_url: details.compareUrl }), + ...(details.contributedRunCount !== undefined && { + contributed_run_count: details.contributedRunCount, + }), + created_at: details.createdAt, + }; +} + function pushDetailsToWire( details?: ResultsBranchPushDetails, ): Pick< @@ -1446,6 +1484,7 @@ function pushDetailsToWire( | 'previous_remote_commit' | 'force_pushed_commit' | 'lease_commit' + | 'pending_merge' > { if (!details) { return {}; @@ -1464,6 +1503,9 @@ function pushDetailsToWire( force_pushed_commit: details.forcePushedCommit, }), ...(details.leaseCommit !== undefined && { lease_commit: details.leaseCommit }), + ...(details.pendingMerge !== undefined && { + pending_merge: pendingMergeToWire(details.pendingMerge), + }), }; } @@ -1672,6 +1714,172 @@ function buildNeedsHumanMergeReason( )}). The remote branch is unchanged and no history was rewritten; resolve it with a GitHub pull request.`; } +// Stable, flat, slug-based naming for the Layer 2 temp branches. A flat name is +// required: a nested `agentv/results/v1/sync-...` ref would D/F-conflict with the +// canonical `agentv/results/v1` branch (git cannot store a ref as both a file and +// a directory). Mirrors the old backup-ref scheme. +function timestampForResultsRef(date = new Date()): string { + const pad = (value: number) => String(value).padStart(2, '0'); + return `${date.getUTCFullYear()}${pad(date.getUTCMonth() + 1)}${pad(date.getUTCDate())}T${pad( + date.getUTCHours(), + )}${pad(date.getUTCMinutes())}${pad(date.getUTCSeconds())}Z`; +} + +function slugifyResultsTargetBranch(branch: string): string { + return ( + branch + .trim() + .replace(/[^A-Za-z0-9._-]+/g, '-') + .replace(/^-+|-+$/g, '') || 'results' + ); +} + +function randomResultsRefToken(): string { + return randomBytes(4).toString('hex').slice(0, 6); +} + +// `agentv/results-sync/--` — a flat ref under a +// dedicated namespace so it never collides with (or D/F-conflicts against) the +// canonical results branch. `` avoids same-second collisions between +// concurrent writers. +function buildResultsSyncBranchName(targetBranch: string, token = randomResultsRefToken()): string { + return `agentv/results-sync/${timestampForResultsRef()}-${slugifyResultsTargetBranch( + targetBranch, + )}-${token}`; +} + +// Build a GitHub compare URL for the temp branch, but only when the remote is a +// GitHub remote. Returns undefined for non-GitHub remotes (zero-infra: URL +// construction only, never a `gh` shell-out). +export function buildResultsCompareUrl( + remoteUrl: string | undefined, + targetBranch: string, + syncBranch: string, +): string | undefined { + if (!remoteUrl) { + return undefined; + } + const match = remoteUrl.trim().match(/github\.com[:/]([^/\s]+)\/([^/\s]+?)(?:\.git)?\/?$/i); + if (!match) { + return undefined; + } + const [, owner, repo] = match; + return `https://github.com/${owner}/${repo}/compare/${encodeURIComponent( + targetBranch, + )}...${encodeURIComponent(syncBranch)}?expand=1`; +} + +async function resolveRemotePushUrl(repoDir: string, remote: string): Promise { + for (const args of [ + ['remote', 'get-url', '--push', remote], + ['remote', 'get-url', remote], + ]) { + const { stdout, exitCode } = await runGit(args, { cwd: repoDir, check: false }); + const url = stdout.trim(); + if (exitCode === 0 && url.length > 0) { + return url; + } + } + return undefined; +} + +// Count the unique top-level `runs//` run directories that the diverged +// local commit adds on top of the remote target tip. Best-effort: returns +// undefined when the diff cannot be computed. +async function countContributedRunDirs( + repoDir: string, + remoteTargetCommit: string | undefined, + sourceCommit: string, +): Promise { + if (!remoteTargetCommit) { + return undefined; + } + const { stdout, exitCode } = await runGit( + [ + 'diff', + '--name-only', + `${remoteTargetCommit}...${sourceCommit}`, + '--', + `${RESULTS_REPO_RUNS_DIR}/`, + ], + { cwd: repoDir, check: false }, + ); + if (exitCode !== 0) { + return undefined; + } + const runDirs = new Set(); + for (const line of stdout.split(/\r?\n/)) { + const file = line.trim(); + if (!file) { + continue; + } + const segments = file.split('/'); + // runs///... + if (segments[0] === RESULTS_REPO_RUNS_DIR && segments.length >= 3) { + runDirs.add(`${segments[0]}/${segments[1]}/${segments[2]}`); + } + } + return runDirs.size; +} + +// Layer 2 push: create-only push of the diverged local commit to a fresh +// timestamped temp branch. Never force, never the canonical branch. On the rare +// same-second ref collision, regenerate the random token and retry once. +async function pushResultsSyncBranch(params: { + readonly repoDir: string; + readonly remote: string; + readonly sourceCommit: string; + readonly targetBranch: string; +}): Promise { + const { repoDir, remote, sourceCommit, targetBranch } = params; + const remoteTargetCommit = await getCommitSha(repoDir, remoteBranchRef(targetBranch, remote)); + + let syncBranch = buildResultsSyncBranchName(targetBranch); + await assertValidResultsBranchName(repoDir, syncBranch); + try { + await runGit(['push', '--porcelain', remote, `${sourceCommit}:refs/heads/${syncBranch}`], { + cwd: repoDir, + }); + } catch (error) { + if (!isRefAlreadyExistsPushError(error)) { + throw error; + } + syncBranch = buildResultsSyncBranchName(targetBranch); + await assertValidResultsBranchName(repoDir, syncBranch); + await runGit(['push', '--porcelain', remote, `${sourceCommit}:refs/heads/${syncBranch}`], { + cwd: repoDir, + }); + } + + const compareUrl = buildResultsCompareUrl( + await resolveRemotePushUrl(repoDir, remote), + targetBranch, + syncBranch, + ); + const contributedRunCount = await countContributedRunDirs( + repoDir, + remoteTargetCommit, + sourceCommit, + ).catch(() => undefined); + + return { + tempBranch: syncBranch, + targetBranch, + ...(compareUrl !== undefined && { compareUrl }), + ...(contributedRunCount !== undefined && { contributedRunCount }), + createdAt: new Date().toISOString(), + }; +} + +function isRefAlreadyExistsPushError(error: unknown): boolean { + const text = gitErrorText(error); + return ( + text.includes('already exists') || + text.includes('cannot lock ref') || + text.includes('reference already exists') + ); +} + // Merge `remoteCommit` into the currently checked-out branch via a real // `git merge`, so the working tree and index advance together (the ref-only // path uses merge-tree + update-ref instead). Returns the merge exit code; a @@ -1729,6 +1937,43 @@ async function resolveResultBranchPushConflict(params: { targetBranch, }; + // Layer 2: a genuine conflict could not be auto-merged. Push the diverged + // local work to a fresh temp branch (never canonical, never force) and surface + // a `pending_merge` block so the user can merge it on GitHub and click OK. If + // the temp-branch push itself fails, still return the blocked outcome with the + // error appended to the reason — never crash the sync. + const buildHumanMergeOutcome = async ( + reason: string, + details: ResultsBranchPushDetails, + ): Promise => { + if (!details.localCommit) { + return { blocked: true, blockReason: reason, details, syncStatus: 'needs_human_merge' }; + } + try { + const pendingMerge = await pushResultsSyncBranch({ + repoDir, + remote, + sourceCommit: details.localCommit, + targetBranch: details.targetBranch, + }); + return { + blocked: true, + blockReason: reason, + details: { ...details, pendingMerge }, + syncStatus: 'needs_human_merge', + }; + } catch (error) { + return { + blocked: true, + blockReason: `${reason} (could not create the temp branch for the GitHub merge: ${getStatusMessage( + error, + )})`, + details, + syncStatus: 'needs_human_merge', + }; + } + }; + for (let attempt = 1; attempt <= RESULTS_PUSH_MERGE_MAX_ATTEMPTS; attempt += 1) { await fetchResultsRepo(repoDir, remote, targetBranch); const remoteCommit = await getCommitSha(repoDir, remoteRef); @@ -1780,23 +2025,19 @@ async function resolveResultBranchPushConflict(params: { const exitCode = await mergeRemoteIntoCheckedOutBranch(repoDir, remoteCommit); if (exitCode !== 0) { await runGit(['merge', '--abort'], { cwd: repoDir, check: false }); - return { - blocked: true, - blockReason: buildNeedsHumanMergeReason(targetBranch, remoteCommit, localCommit), + return await buildHumanMergeOutcome( + buildNeedsHumanMergeReason(targetBranch, remoteCommit, localCommit), details, - syncStatus: 'needs_human_merge', - }; + ); } pushSpec = 'HEAD'; } else { const base = await getMergeBaseCommit(repoDir, localCommit, remoteCommit); if (!base) { - return { - blocked: true, - blockReason: buildNeedsHumanMergeReason(targetBranch, remoteCommit, localCommit), + return await buildHumanMergeOutcome( + buildNeedsHumanMergeReason(targetBranch, remoteCommit, localCommit), details, - syncStatus: 'needs_human_merge', - }; + ); } const merge = await runGit( ['merge-tree', '--write-tree', `--merge-base=${base}`, localCommit, remoteCommit], @@ -1804,12 +2045,10 @@ async function resolveResultBranchPushConflict(params: { ); const mergedTree = merge.stdout.trim().split(/\r?\n/)[0]?.trim(); if (merge.exitCode !== 0 || !mergedTree) { - return { - blocked: true, - blockReason: buildNeedsHumanMergeReason(targetBranch, remoteCommit, localCommit), + return await buildHumanMergeOutcome( + buildNeedsHumanMergeReason(targetBranch, remoteCommit, localCommit), details, - syncStatus: 'needs_human_merge', - }; + ); } const { stdout: mergeCommitOut } = await runGitWithFallbackCommitIdentity( [ @@ -1848,12 +2087,10 @@ async function resolveResultBranchPushConflict(params: { }; } - return { - blocked: true, - blockReason: `Results branch ${targetBranch} could not be reconciled after ${RESULTS_PUSH_MERGE_MAX_ATTEMPTS} attempts because the remote kept advancing; retry sync. The remote branch is unchanged.`, - details: lastDetails, - syncStatus: 'needs_human_merge', - }; + return await buildHumanMergeOutcome( + `Results branch ${targetBranch} could not be reconciled after ${RESULTS_PUSH_MERGE_MAX_ATTEMPTS} attempts because the remote kept advancing; retry sync. The remote branch is unchanged.`, + lastDetails, + ); } async function statusFromInspection( @@ -2467,6 +2704,74 @@ export async function syncResultsRepoForProject(config: ResultsConfig): Promise< } } +// Ancestor-guarded fast-forward of the local results checkout toward the merged +// target branch. Only advances when the local commit is already an ancestor of +// the remote target (i.e. the target genuinely contains the local work), so a +// premature OK — where the user has not actually merged — is a no-op that leaves +// the local work diverged and intact. Operates only on the results checkout, +// never the user's source repo branch. +async function fastForwardResultsTowardTarget( + repoDir: string, + normalized: NormalizedResultsConfig, + targetBranch: string, +): Promise { + const upstreamRef = remoteBranchRef(targetBranch, normalized.remote); + if (!(await gitRefExists(repoDir, upstreamRef))) { + return; + } + const upstreamCommit = await getCommitSha(repoDir, upstreamRef); + if (!upstreamCommit) { + return; + } + + if (usesStorageBranchWorktree(normalized) && normalized.branch) { + const localRef = `refs/heads/${normalized.branch}`; + if (!(await gitRefExists(repoDir, localRef))) { + await runGit(['update-ref', localRef, upstreamCommit], { cwd: repoDir, check: false }); + await runGit(['branch', '--set-upstream-to', upstreamRef, normalized.branch], { + cwd: repoDir, + check: false, + }); + return; + } + const localCommit = await getCommitSha(repoDir, localRef); + if (localCommit && (await isAncestorCommit(repoDir, localCommit, upstreamCommit))) { + await runGit(['update-ref', localRef, upstreamCommit, localCommit], { + cwd: repoDir, + check: false, + }); + } + return; + } + + // Checkout mode: a plain fast-forward-only merge. It succeeds when the target + // contains the local work and is a harmless no-op (non-zero, ignored) when the + // local checkout is still diverged. + await runGit(['merge', '--ff-only', upstreamRef], { cwd: repoDir, check: false }); +} + +/** + * The explicit "OK" action of the Layer 2 human-merge flow. The user has merged + * the temp branch into the target on GitHub; AgentV fetches the remote, pulls the + * merged target into the local results checkout (ancestor-guarded fast-forward), + * and resumes the normal sync path which clears the `pending_merge` state. + * + * A premature OK (target not actually merged) is safe: the fast-forward is a + * no-op, the local work stays diverged and intact, and the resumed sync simply + * re-creates a fresh temp branch — no data loss, no force push. Only the results + * checkout is touched; the user's source repo branch is never modified. + */ +export async function confirmResultsMergeAndPull( + config: ResultsConfig, +): Promise { + const normalized = normalizeResultsConfig(config); + const repoDir = await ensureResultsRepoClone(normalized); + const targetBranch = normalized.branch ?? (await resolveDefaultBranch(repoDir)); + await fetchResultsRepo(repoDir, normalized.remote, normalized.branch).catch(() => undefined); + await fastForwardResultsTowardTarget(repoDir, normalized, targetBranch); + return syncResultsRepoForProject(config); +} + export async function checkoutResultsRepoBranch( config: ResultsConfig, branchName: string, diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index ad24e28a2..bd41ac4bf 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -107,6 +107,8 @@ export { ensureResultsRepoClone, syncResultsRepo, syncResultsRepoForProject, + confirmResultsMergeAndPull, + buildResultsCompareUrl, getResultsRepoLocalPaths, getResultsRepoStatus, getResultsRepoSyncStatus, @@ -139,6 +141,8 @@ export { type ResultsRepoLocalPaths, type ResultsRepoSyncStatus, type ResultsRepoStatus, + type ResultsPendingMerge, + type PendingMergeDetails, type WipWorktreeHandle, } from './evaluation/results-repo.js'; export { From 47085ef0b77ce1802559f9ff38af1212b310a68e Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Thu, 25 Jun 2026 02:28:44 +0200 Subject: [PATCH 2/4] feat(cli): add confirm-merge API for the results human-merge flow Expose confirmRemoteResultsMerge (remote.ts) mirroring syncRemoteResults' config-load + error-wrap pattern, and wire it to two HTTP routes (POST /api/remote/sync/confirm-merge and the project-scoped variant) mirroring the existing sync handlers. RemoteResultsStatus inherits the new pending_merge block from ResultsRepoStatus. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- apps/cli/src/commands/results/remote.ts | 38 +++++++++++++++++++++++++ apps/cli/src/commands/results/serve.ts | 9 ++++++ 2 files changed, 47 insertions(+) diff --git a/apps/cli/src/commands/results/remote.ts b/apps/cli/src/commands/results/remote.ts index 1581dab1b..22c5c5e3c 100644 --- a/apps/cli/src/commands/results/remote.ts +++ b/apps/cli/src/commands/results/remote.ts @@ -8,6 +8,7 @@ import { type NormalizedResultsConfig, type ResultsConfig, type ResultsRepoStatus, + confirmResultsMergeAndPull, directPushResultsWithDetails, directorySizeBytes, getProject, @@ -384,6 +385,43 @@ export async function syncRemoteResults( } } +/** + * The Layer 2 "OK" action: the user has merged the pending temp branch into the + * target on GitHub. Pull the merged target into the local results checkout and + * resume normal sync. Mirrors {@link syncRemoteResults}'s config-load and + * error-wrap behavior. + */ +export async function confirmRemoteResultsMerge( + cwd: string, + projectId?: string, +): Promise { + const config = await loadNormalizedResultsConfig(cwd, projectId); + if (!config) { + return { + ...(await getResultsRepoSyncStatus()), + run_count: 0, + }; + } + + try { + const status = await confirmResultsMergeAndPull(config); + invalidateGitRunsCache(config.path); + return { + ...status, + run_count: await getRemoteRunCount(config, status), + }; + } catch (error) { + const status = await getResultsRepoSyncStatus(config); + return { + ...status, + run_count: await getRemoteRunCount(config, status), + last_error: getStatusMessage(error), + blocked: true, + block_reason: getStatusMessage(error), + }; + } +} + function dedupeSyncedRunCopies(runs: SourcedResultFileMeta[]): SourcedResultFileMeta[] { const byRunId = new Map(); diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts index f4251fbb8..7685f7660 100644 --- a/apps/cli/src/commands/results/serve.ts +++ b/apps/cli/src/commands/results/serve.ts @@ -96,6 +96,7 @@ import { import { type SourcedResultFileMeta, clearRemoteRunTags, + confirmRemoteResultsMerge, ensureRemoteRunAvailable, findRunById, getRemoteResultsStatus, @@ -3002,6 +3003,9 @@ export function createApp( app.post('/api/remote/sync', async (c) => c.json(await syncRemoteResults(searchDir, defaultCtx.projectId)), ); + app.post('/api/remote/sync/confirm-merge', async (c) => + c.json(await confirmRemoteResultsMerge(searchDir, defaultCtx.projectId)), + ); app.get('/api/runs', (c) => handleRuns(c, defaultCtx)); app.post('/api/runs/combine', (c) => { if (readOnly) { @@ -3122,6 +3126,11 @@ export function createApp( ctx.json(await syncRemoteResults(dataCtx.searchDir, dataCtx.projectId)), ), ); + app.post('/api/projects/:projectId/remote/sync/confirm-merge', (c) => + withProject(c, async (ctx, dataCtx) => + ctx.json(await confirmRemoteResultsMerge(dataCtx.searchDir, dataCtx.projectId)), + ), + ); app.get('/api/projects/:projectId/runs', (c) => withProject(c, handleRuns)); app.post('/api/projects/:projectId/runs/combine', (c) => { if (readOnly) { From 91d1ab22cc98cd91b566a1d3d5ab9b20981d3126 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Thu, 25 Jun 2026 02:28:44 +0200 Subject: [PATCH 3/4] test(results): cover temp-branch fallback and confirm-merge resync Core tests: a genuine overlay conflict yields needs_human_merge + a pending_merge whose temp_branch exists on the remote and carries the local work (no backups ref, no force push, canonical untouched, flat D/F-safe name); confirm-merge clears pending after the temp branch is merged on the remote; a premature confirm-merge keeps local artifacts intact and re-creates a temp branch on the next sync; and a direct unit test of the GitHub compare-URL builder (built for GitHub remotes, omitted for non-GitHub). Serve tests: confirm-merge routes (unscoped + project-scoped) resume sync. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- apps/cli/test/commands/results/serve.test.ts | 111 ++++++++++ .../core/test/evaluation/results-repo.test.ts | 199 ++++++++++++++++++ 2 files changed, 310 insertions(+) diff --git a/apps/cli/test/commands/results/serve.test.ts b/apps/cli/test/commands/results/serve.test.ts index 8a6c5ef62..64fe8d68a 100644 --- a/apps/cli/test/commands/results/serve.test.ts +++ b/apps/cli/test/commands/results/serve.test.ts @@ -2452,6 +2452,117 @@ describe('serve app', () => { }, 20000); }); + describe('POST /api/remote/sync/confirm-merge', () => { + it('resumes sync by pulling the target branch (unscoped)', async () => { + const { remoteDir, cloneDir, seedDir } = initializeRemoteRepo(tempDir); + const runId = writeRemoteRunArtifact( + seedDir, + 'confirm-merge-unscoped', + '2026-03-26T14-00-00-000Z', + RESULT_A, + ); + + writeResultsConfig(tempDir, { remote: `file://${remoteDir}`, path: cloneDir }); + + const app = createApp([], tempDir, tempDir, undefined, { studioDir }); + const res = await app.request('/api/remote/sync/confirm-merge', { method: 'POST' }); + + expect(res.status).toBe(200); + const data = (await res.json()) as { + sync_status: string; + blocked?: boolean; + pull_performed?: boolean; + run_count: number; + }; + expect(data.sync_status).toBe('clean'); + expect(data.blocked).toBe(false); + expect(data.run_count).toBe(1); + expect( + existsSync( + path.join( + cloneDir, + 'runs', + 'confirm-merge-unscoped', + '2026-03-26T14-00-00-000Z', + 'index.jsonl', + ), + ), + ).toBe(true); + expect(runId).toBe('confirm-merge-unscoped::2026-03-26T14-00-00-000Z'); + }, 15000); + + it('resumes sync by pulling the target branch (project-scoped)', async () => { + const previousHome = process.env.AGENTV_HOME; + const homeDir = path.join(tempDir, 'agentv-home-confirm-merge-scoped'); + process.env.AGENTV_HOME = homeDir; + + try { + const { remoteDir, cloneDir, seedDir } = initializeRemoteRepo(tempDir); + const projectDir = path.join(tempDir, 'source-project-confirm-merge'); + mkdirSync(path.join(projectDir, '.agentv'), { recursive: true }); + mkdirSync(homeDir, { recursive: true }); + saveProjectRegistry({ + projects: [ + { + id: 'project-confirm-merge', + name: 'Project Confirm Merge', + path: projectDir, + results: { + repoUrl: `file://${remoteDir}`, + path: cloneDir, + sync: { autoPush: false }, + }, + addedAt: '2026-01-01T00:00:00.000Z', + lastOpenedAt: '2026-01-01T00:00:00.000Z', + }, + ], + }); + const runId = writeRemoteRunArtifact( + seedDir, + 'project-confirm-merge', + '2026-03-26T15-00-00-000Z', + RESULT_A, + ); + + const app = createApp([], tempDir, tempDir, undefined, { studioDir }); + const res = await app.request( + '/api/projects/project-confirm-merge/remote/sync/confirm-merge', + { method: 'POST' }, + ); + + expect(res.status).toBe(200); + const data = (await res.json()) as { + sync_status: string; + blocked?: boolean; + pull_performed?: boolean; + run_count: number; + }; + expect(data).toMatchObject({ + sync_status: 'clean', + blocked: false, + run_count: 1, + }); + expect( + existsSync( + path.join( + cloneDir, + 'runs', + 'project-confirm-merge', + runId.replace('project-confirm-merge::', ''), + 'index.jsonl', + ), + ), + ).toBe(true); + } finally { + if (previousHome === undefined) { + process.env.AGENTV_HOME = undefined; + } else { + process.env.AGENTV_HOME = previousHome; + } + } + }, 15000); + }); + // ── GET /api/runs/:filename ───────────────────────────────────────── describe('GET /api/runs/:filename', () => { diff --git a/packages/core/test/evaluation/results-repo.test.ts b/packages/core/test/evaluation/results-repo.test.ts index 38dc2b6de..280089205 100644 --- a/packages/core/test/evaluation/results-repo.test.ts +++ b/packages/core/test/evaluation/results-repo.test.ts @@ -18,7 +18,9 @@ import type { ResultsConfig } from '../../src/evaluation/loaders/config-loader.j import { AGENTV_RESULTS_REFS } from '../../src/evaluation/result-artifact-contract.js'; import { DEFAULT_RESULTS_BRANCH, + buildResultsCompareUrl, buildWipBranchName, + confirmResultsMergeAndPull, deleteWipBranch, directPushResults, directPushResultsWithDetails, @@ -164,6 +166,77 @@ async function createStaleResultBranchPushFixture(params: { }; } +// Sets up a genuine (non-auto-mergeable) overlay conflict where the canonical +// results branch is `agentv/results/v1` (the clone's default/checked-out +// branch). The local checkout and the remote both commit a conflicting scalar +// field on the same overlay file, so a sync routes to the Layer 2 human-merge +// path (needs_human_merge + a temp branch). The canonical branch name is +// deliberately the nested-looking `agentv/results/v1` to prove the flat +// temp-branch name never D/F-conflicts with it. +async function setupCanonicalOverlayConflict(rootDir: string): Promise<{ + remoteDir: string; + seedDir: string; + cloneDir: string; + targetBranch: string; + tagsRel: string; + config: ResultsConfig; + remoteBefore: string; +}> { + const targetBranch = DEFAULT_RESULTS_BRANCH; + const remoteDir = path.join(rootDir, `remote-${randomToken()}.git`); + const seedDir = path.join(rootDir, `seed-${randomToken()}`); + git(`git init --bare --initial-branch=${targetBranch} --quiet "${remoteDir}"`, rootDir); + git(`git clone --quiet "${remoteDir}" "${seedDir}"`, rootDir); + git('git config user.email "test@example.com"', seedDir); + git('git config user.name "Test User"', seedDir); + + const tagsRel = path.join( + 'metadata', + 'runs', + 'conflict', + '2026-06-24T10-00-00-000Z', + 'tags.json', + ); + const writeOverlay = (repoDir: string, rating: number) => { + const tagPath = path.join(repoDir, tagsRel); + mkdirSync(path.dirname(tagPath), { recursive: true }); + writeFileSync(tagPath, `${JSON.stringify({ tags: ['keep'], rating }, null, 2)}\n`); + }; + + writeFileSync(path.join(seedDir, 'README.md'), '# results\n'); + writeOverlay(seedDir, 1); + git('git add . && git commit --quiet -m "seed overlay"', seedDir); + git(`git push --quiet origin HEAD:${targetBranch}`, seedDir); + + const cloneDir = path.join(rootDir, `results-clone-${randomToken()}`); + const config: ResultsConfig = { + mode: 'github', + repo: `file://${remoteDir}`, + path: cloneDir, + auto_push: true, + }; + await ensureResultsRepoClone(config); + git('git config user.email "test@example.com"', cloneDir); + git('git config user.name "Test User"', cloneDir); + git('git pull --ff-only --quiet', cloneDir); + + // Local edit: a conflicting scalar (rating=3) committed on the canonical branch. + writeOverlay(cloneDir, 3); + git('git add metadata && git commit --quiet -m "local overlay scalar"', cloneDir); + + // Remote advances with a different conflicting scalar (rating=5). + writeOverlay(seedDir, 5); + git('git add metadata && git commit --quiet -m "remote overlay scalar"', seedDir); + git(`git push --quiet origin HEAD:${targetBranch}`, seedDir); + const remoteBefore = git(`git --git-dir "${remoteDir}" rev-parse ${targetBranch}`, rootDir); + + return { remoteDir, seedDir, cloneDir, targetBranch, tagsRel, config, remoteBefore }; +} + +function randomToken(): string { + return Math.random().toString(36).slice(2, 8); +} + function writeRunArtifacts(runDir: string, experiment: string, timestamp: string): void { mkdirSync(runDir, { recursive: true }); writeFileSync(path.join(runDir, 'index.jsonl'), '{"test_id":"alpha"}\n'); @@ -2100,6 +2173,132 @@ describe('results repo write path', () => { expect(git('git status --porcelain', cloneDir)).toBe(''); }, 20000); + it('pushes diverged work to a flat temp branch and surfaces pending_merge on a genuine conflict', async () => { + const { remoteDir, cloneDir, targetBranch, tagsRel, config, remoteBefore } = + await setupCanonicalOverlayConflict(rootDir); + + const status = await syncResultsRepoForProject(config); + + expect(status.sync_status).toBe('needs_human_merge'); + expect(status.blocked).toBe(true); + expect(status.block_reason).not.toMatch(/force/i); + + // A pending_merge wire block (snake_case) is surfaced for the Dashboard. + const pending = status.pending_merge; + expect(pending).toBeDefined(); + expect(pending?.target_branch).toBe(targetBranch); + expect(typeof pending?.created_at).toBe('string'); + expect(typeof pending?.contributed_run_count).toBe('number'); + // The branch is flat under a dedicated namespace, never nested under the + // canonical branch, so it cannot D/F-conflict with agentv/results/v1. + expect(pending?.temp_branch).toMatch(/^agentv\/results-sync\/.+/); + expect(pending?.temp_branch.startsWith(`${targetBranch}/`)).toBe(false); + // Snake_case keys only on the wire shape. + expect(Object.keys(pending ?? {})).toEqual( + expect.arrayContaining(['temp_branch', 'target_branch', 'created_at']), + ); + // A non-GitHub (file://) remote yields no compare_url. + expect(pending?.compare_url).toBeUndefined(); + + const tempBranch = pending?.temp_branch ?? ''; + // The temp branch exists on the remote alongside the canonical branch (the + // create-only push succeeded — no D/F-conflict) and carries the local work. + const remoteRefs = git( + `git --git-dir "${remoteDir}" for-each-ref --format="%(refname:short)" refs/heads`, + rootDir, + ); + expect(remoteRefs).toContain(tempBranch); + expect(remoteRefs).toContain(targetBranch); + const overlayOnTemp = JSON.parse( + git(`git --git-dir "${remoteDir}" show ${tempBranch}:${tagsRel}`, rootDir), + ); + expect(overlayOnTemp.rating).toBe(3); + + // The canonical branch is untouched: no merge commit, no force, no backup. + expect(git(`git --git-dir "${remoteDir}" rev-parse ${targetBranch}`, rootDir)).toBe( + remoteBefore, + ); + expect( + git(`git --git-dir "${remoteDir}" for-each-ref refs/heads/agentv/backups`, rootDir), + ).toBe(''); + + // The local work is intact on disk. + expect(JSON.parse(readFileSync(path.join(cloneDir, tagsRel), 'utf8')).rating).toBe(3); + }, 30000); + + it('clears the pending merge after the temp branch is merged into the target', async () => { + const { remoteDir, cloneDir, targetBranch, tagsRel, config } = + await setupCanonicalOverlayConflict(rootDir); + + const blocked = await syncResultsRepoForProject(config); + expect(blocked.sync_status).toBe('needs_human_merge'); + const tempBranch = blocked.pending_merge?.temp_branch ?? ''; + expect(tempBranch).not.toBe(''); + + // Simulate the user merging the temp branch into the target on GitHub: a + // fresh clone merges the temp branch (resolving the scalar toward the temp + // side) and pushes the result onto the canonical branch. + const mergeDir = path.join(rootDir, `merge-${randomToken()}`); + git(`git clone --quiet "${remoteDir}" "${mergeDir}"`, rootDir); + git('git config user.email "merge@example.com"', mergeDir); + git('git config user.name "Merge Bot"', mergeDir); + git(`git fetch --quiet origin "+refs/heads/*:refs/remotes/origin/*"`, mergeDir); + git(`git checkout --quiet -B ${targetBranch} origin/${targetBranch}`, mergeDir); + git(`git merge --no-edit -X theirs origin/${tempBranch}`, mergeDir); + git(`git push --quiet origin HEAD:${targetBranch}`, mergeDir); + + const resumed = await confirmResultsMergeAndPull(config); + + expect(resumed.blocked ?? false).toBe(false); + expect(resumed.sync_status).not.toBe('needs_human_merge'); + expect(resumed.sync_status).toBe('clean'); + expect(resumed.pending_merge).toBeUndefined(); + // The local checkout now matches the merged target which carries the work. + expect(JSON.parse(readFileSync(path.join(cloneDir, tagsRel), 'utf8')).rating).toBe(3); + }, 30000); + + it('keeps local work intact on a premature confirm-merge (target not yet merged)', async () => { + const { cloneDir, targetBranch, tagsRel, config } = + await setupCanonicalOverlayConflict(rootDir); + + const blocked = await syncResultsRepoForProject(config); + expect(blocked.sync_status).toBe('needs_human_merge'); + const firstTempBranch = blocked.pending_merge?.temp_branch ?? ''; + + // OK clicked without merging on GitHub: pulling the target is a no-op, the + // local work stays diverged, and the resumed sync re-creates a temp branch. + const resumed = await confirmResultsMergeAndPull(config); + + expect(resumed.sync_status).toBe('needs_human_merge'); + expect(resumed.blocked).toBe(true); + expect(resumed.pending_merge?.temp_branch).toMatch(/^agentv\/results-sync\/.+/); + expect(resumed.pending_merge?.target_branch).toBe(targetBranch); + // No data loss: the local work survives the premature OK. + expect(JSON.parse(readFileSync(path.join(cloneDir, tagsRel), 'utf8')).rating).toBe(3); + expect(firstTempBranch).not.toBe(''); + }, 30000); + + it('builds a GitHub compare URL only for GitHub remotes', () => { + expect( + buildResultsCompareUrl( + 'https://github.com/EntityProcess/agentv.git', + 'agentv/results/v1', + 'agentv/results-sync/20260624T100000Z-agentv-results-v1-ab12cd', + ), + ).toBe( + 'https://github.com/EntityProcess/agentv/compare/agentv%2Fresults%2Fv1...agentv%2Fresults-sync%2F20260624T100000Z-agentv-results-v1-ab12cd?expand=1', + ); + expect(buildResultsCompareUrl('git@github.com:EntityProcess/agentv.git', 'main', 'temp')).toBe( + 'https://github.com/EntityProcess/agentv/compare/main...temp?expand=1', + ); + // Non-GitHub remotes (local file://, other hosts) get no compare URL. + expect(buildResultsCompareUrl('file:///tmp/remote.git', 'main', 'temp')).toBeUndefined(); + expect( + buildResultsCompareUrl('https://gitlab.com/owner/repo.git', 'main', 'temp'), + ).toBeUndefined(); + expect(buildResultsCompareUrl(undefined, 'main', 'temp')).toBeUndefined(); + }); + it('supersedes stale sync errors with the current conflicted status', async () => { const { remoteDir, seedDir } = initializeRemoteRepo(rootDir); const cloneDir = path.join(rootDir, 'results-clone'); From 722247e1ae8b89499638d4d868aee4ba9a4349ab Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Thu, 25 Jun 2026 02:28:44 +0200 Subject: [PATCH 4/4] docs(plan): correct results temp-branch naming to a flat D/F-safe scheme The nested agentv/results/v1/sync- name D/F-conflicts with the canonical agentv/results/v1 ref. Use the flat agentv/results-sync/-- scheme so code and design doc agree. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- ...24-001-feat-conflict-free-results-sync-plan.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/plans/2026-06-24-001-feat-conflict-free-results-sync-plan.md b/docs/plans/2026-06-24-001-feat-conflict-free-results-sync-plan.md index 2edef97db..e3b94bc45 100644 --- a/docs/plans/2026-06-24-001-feat-conflict-free-results-sync-plan.md +++ b/docs/plans/2026-06-24-001-feat-conflict-free-results-sync-plan.md @@ -147,8 +147,13 @@ already manages a dedicated checkout / storage-branch worktree, so this is a one When Layer 1 returns `needs_human_merge`: 1. **Push to a new timestamped temp branch**, never canonical: - `agentv/results/v1/sync--` (create-only push; `` avoids - same-second collisions between concurrent writers). + `agentv/results-sync/--` (create-only push; + `` is the slugified target branch and `` avoids same-second + collisions between concurrent writers). The name is deliberately **flat** under + a dedicated `agentv/results-sync/` namespace: a nested + `agentv/results/v1/sync-...` ref would D/F-conflict with the canonical + `agentv/results/v1` branch (git cannot store one ref as both a file and a + directory). 2. **Surface a link** in the Dashboard: - A **compare/PR URL**. With a GitHub remote and `gh`, build `https://github.com///compare/...?expand=1` @@ -183,7 +188,7 @@ push. #### Concurrency -Each writer uses a unique `sync--` branch, so temp pushes never collide, +Each writer uses a unique `agentv/results-sync/--` branch, so temp pushes never collide, and the runs they carry live in disjoint `runs///` dirs. The target branch absorbs N temp PRs through N normal merges. The only true contention is the mutable overlay, which Layer 1's JSON-union driver already handles for add/remove; a genuine @@ -284,7 +289,7 @@ conflicts itself, which keeps the core tiny. ### Phase 2 — Temp-branch + OK-to-resync - Core helpers: `pushResultsSyncBranch()` (create-only push to - `sync--`) and `pullResultsTargetBranch()` (fetch + FF/merge target into + `agentv/results-sync/--`) and `pullResultsTargetBranch()` (fetch + FF/merge target into the local checkout, invoked on OK). - API: extend `POST /api/remote/sync` to return a `pending_merge` block (`temp_branch`, `compare_url`, `contributed_run_count`); add @@ -316,7 +321,7 @@ conflicts itself, which keeps the core tiny. - Automatic merge detection (tree-equality/ancestor/deletion watching) — replaced by an explicit OK. - **Temp-branch deletion/cleanup.** The user owns the merge on GitHub, so deleting - the merged `sync--` branch belongs to that same GitHub flow + the merged `agentv/results-sync/--` branch belongs to that same GitHub flow (auto-delete-on-merge, or the user's manual cleanup). AgentV does not delete temp branches and does not track them for cleanup. AgentV only creates the temp branch and reads the target on OK.