diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 7f2c0b6..6d36a6b 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "agent-eval-rpc" -version = "0.99.0" +version = "0.100.0" description = "Python RPC client for @tangle-network/agent-eval — judge content against rubrics over HTTP or stdio RPC. Eval logic runs in the Node runtime; this package is a thin wire client." readme = "README.md" requires-python = ">=3.10" diff --git a/clients/python/src/agent_eval_rpc/__init__.py b/clients/python/src/agent_eval_rpc/__init__.py index 45ac544..dc207aa 100644 --- a/clients/python/src/agent_eval_rpc/__init__.py +++ b/clients/python/src/agent_eval_rpc/__init__.py @@ -58,7 +58,7 @@ try: __version__ = version("agent-eval-rpc") except PackageNotFoundError: - __version__ = "0.99.0" + __version__ = "0.100.0" __all__ = [ "Client", diff --git a/package.json b/package.json index d463106..7cd0413 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@tangle-network/agent-eval", - "version": "0.99.0", + "version": "0.100.0", "description": "Evaluate and improve AI agents from runs, traces, judges, and feedback. Compare candidates, cluster failures, measure lift, and gate releases.", "homepage": "https://github.com/tangle-network/agent-eval#readme", "repository": { diff --git a/src/hidden-criteria-grading.test.ts b/src/hidden-criteria-grading.test.ts new file mode 100644 index 0000000..8554076 --- /dev/null +++ b/src/hidden-criteria-grading.test.ts @@ -0,0 +1,214 @@ +import { describe, expect, it } from 'vitest' +import type { JudgeScore } from './campaign/types' +import { ValidationError } from './errors' +import { + agentVisibleFields, + assertNoHiddenLeak, + blendHeldout, + defaultBlendWeights, + type FieldDestination, + gradeOnHidden, + type HiddenCriteriaGrader, + hiddenGrade, + isHiddenDestination, + routeFields, + withHeldoutBlend, +} from './hidden-criteria-grading' + +// A NON-coding domain proves the firewall has no domain coupling: here it is a +// legal-brief task. The agent sees the question + a sample citation; the hidden +// "answer key" (the required holdings) and the rubric anchors are graded but +// never reach the agent. Any domain plugs in its own grader the same way. +const legalRouting = { + question: 'agent-visible', + sampleCitation: 'develop-against', + requiredHoldings: 'grading-only', + rubricNote: 'judge-only', +} as const satisfies Record + +const legalValues = { + question: 'Draft a brief arguing the search violated the Fourth Amendment.', + sampleCitation: 'See Katz v. United States, 389 U.S. 347 (1967).', + requiredHoldings: 'Must cite Carpenter v. United States and the third-party-doctrine limit.', + rubricNote: 'Reward a clean reasonable-expectation-of-privacy framing.', +} as const + +describe('field routing by destination', () => { + it('classifies grading-only and judge-only as hidden, the rest as visible', () => { + expect(isHiddenDestination('grading-only')).toBe(true) + expect(isHiddenDestination('judge-only')).toBe(true) + expect(isHiddenDestination('agent-visible')).toBe(false) + expect(isHiddenDestination('develop-against')).toBe(false) + }) + + it('routes a domain field map into RoutedFields', () => { + const fields = routeFields(legalRouting, legalValues) + expect(fields).toHaveLength(4) + expect(fields.find((f) => f.name === 'requiredHoldings')?.destination).toBe('grading-only') + }) + + it('fails loud when a routed field has no value', () => { + expect(() => + routeFields({ a: 'agent-visible', b: 'grading-only' }, { a: 'present' } as unknown as Record< + 'a' | 'b', + string + >), + ).toThrow(ValidationError) + }) + + it('agentVisibleFields keeps only the non-hidden fields', () => { + const visible = agentVisibleFields(routeFields(legalRouting, legalValues)) + const names = visible.map((f) => f.name).sort() + expect(names).toEqual(['question', 'sampleCitation']) + }) +}) + +describe('assertNoHiddenLeak — the firewall', () => { + const fields = routeFields(legalRouting, legalValues) + + it('passes when the agent context holds only visible fields', () => { + const cleanContext = `${legalValues.question}\n${legalValues.sampleCitation}` + expect(() => assertNoHiddenLeak(fields, cleanContext)).not.toThrow() + }) + + it('REJECTS when a grading-only field reaches the agent context', () => { + const leakyContext = `${legalValues.question}\n${legalValues.requiredHoldings}` + let thrown: unknown + try { + assertNoHiddenLeak(fields, leakyContext) + } catch (err) { + thrown = err + } + expect(thrown).toBeInstanceOf(ValidationError) + expect((thrown as Error).message).toMatch(/requiredHoldings/) + expect((thrown as Error).message).toMatch(/grading-only/) + }) + + it('REJECTS when a judge-only field reaches the agent context', () => { + const leakyContext = `${legalValues.question}\n${legalValues.rubricNote}` + expect(() => assertNoHiddenLeak(fields, leakyContext)).toThrow(/judge-only/) + }) + + it('does not flag a develop-against field that appears in the context (intentional)', () => { + const tddContext = `${legalValues.question}\n${legalValues.sampleCitation}` + expect(() => assertNoHiddenLeak(fields, tddContext)).not.toThrow() + }) + + it('skips a too-short hidden value (no spurious substring match)', () => { + const fields = routeFields( + { task: 'agent-visible', key: 'grading-only' }, + { task: 'Summarize the contract clause about indemnity.', key: 'A' }, + ) + expect(() => assertNoHiddenLeak(fields, 'A wholly innocent prompt.')).not.toThrow() + }) +}) + +describe('hiddenGrade — honest pass-rate normalization', () => { + it('computes passRate = passed / total', () => { + expect(hiddenGrade(3, 4).passRate).toBeCloseTo(0.75) + }) + + it('returns 0 (honest no-run) when total is 0', () => { + const g = hiddenGrade(0, 0, 'criteria did not run') + expect(g.passRate).toBe(0) + expect(g.total).toBe(0) + expect(g.notes).toBe('criteria did not run') + }) + + it('never reports passed above total', () => { + expect(hiddenGrade(9, 4).passed).toBe(4) + }) +}) + +describe('gradeOnHidden — firewall + domain grader wired', () => { + // The domain's OWN grader: check the brief artifact against the hidden + // required-holdings string. The substrate bakes in NO node/test/exec — this + // grader is pure string matching; a coding domain would run node --test here. + const legalGrader: HiddenCriteriaGrader<{ brief: string }, { mustCite: string[] }> = ( + artifact, + hidden, + ) => { + const passed = hidden.mustCite.filter((c) => artifact.brief.includes(c)).length + return hiddenGrade(passed, hidden.mustCite.length) + } + + const fields = routeFields(legalRouting, legalValues) + const agentContext = `${legalValues.question}\n${legalValues.sampleCitation}` + + it('grades against the hidden criteria behind the firewall', async () => { + const result = await gradeOnHidden({ + artifact: { brief: 'We rely on Carpenter v. United States and Katz.' }, + hiddenCriteria: { mustCite: ['Carpenter v. United States', 'Katz'] }, + grader: legalGrader, + firewall: { fields, agentContext }, + }) + expect(result.passRate).toBe(1) + expect(result.total).toBe(2) + }) + + it('throws BEFORE grading if the firewall is breached at grading time', async () => { + const leaky = `${agentContext}\n${legalValues.requiredHoldings}` + await expect( + gradeOnHidden({ + artifact: { brief: 'irrelevant' }, + hiddenCriteria: { mustCite: ['Carpenter v. United States'] }, + grader: legalGrader, + firewall: { fields, agentContext: leaky }, + }), + ).rejects.toBeInstanceOf(ValidationError) + }) +}) + +describe('blendHeldout — composite weighting', () => { + it('composes with the default 0.7 / 0.3 weights', () => { + // 0.7 * 1.0 (perfect held-out) + 0.3 * 0.5 (mediocre judge) = 0.85 + expect(blendHeldout(1, 0.5)).toBeCloseTo(0.85) + // 0.7 * 0.0 (failed held-out) + 0.3 * 1.0 (loved by judge) = 0.30 (capped low) + expect(blendHeldout(0, 1)).toBeCloseTo(0.3) + expect(defaultBlendWeights).toEqual({ heldout: 0.7, judge: 0.3 }) + }) + + it('renormalizes arbitrary positive weight ratios', () => { + // 3:1 ratio == 0.75 / 0.25 + expect(blendHeldout(1, 0, { heldout: 3, judge: 1 })).toBeCloseTo(0.75) + expect(blendHeldout(0, 1, { heldout: 1, judge: 1 })).toBeCloseTo(0.5) + }) + + it('clamps out-of-range inputs to [0,1]', () => { + expect(blendHeldout(2, -1)).toBeCloseTo(0.7) // 0.7*1 + 0.3*0 + }) + + it('throws on a non-positive weight sum', () => { + expect(() => blendHeldout(1, 1, { heldout: 0, judge: 0 })).toThrow(ValidationError) + }) +}) + +describe('withHeldoutBlend — judge composite becomes the blend', () => { + const baseScore = (_input: { artifact: { heldoutPassRate: number } }): JudgeScore => ({ + dimensions: { quality: 0.5 }, + composite: 0.5, + notes: 'style ok', + }) + + it('replaces the judge composite with the held-out-weighted blend', async () => { + const blended = withHeldoutBlend(baseScore, (a) => a.heldoutPassRate) + const out = await blended({ artifact: { heldoutPassRate: 1 } }) + // 0.7 * 1.0 + 0.3 * 0.5 = 0.85 + expect(out.composite).toBeCloseTo(0.85) + expect(out.dimensions).toEqual({ quality: 0.5 }) + expect(out.notes).toMatch(/held-out 100%/) + }) + + it('passes a failed judge verdict through untouched', async () => { + const failing = (_i: { artifact: { heldoutPassRate: number } }): JudgeScore => ({ + dimensions: {}, + composite: 0, + notes: 'judge errored', + failed: true, + }) + const blended = withHeldoutBlend(failing, (a) => a.heldoutPassRate) + const out = await blended({ artifact: { heldoutPassRate: 1 } }) + expect(out.failed).toBe(true) + expect(out.composite).toBe(0) + }) +}) diff --git a/src/hidden-criteria-grading.ts b/src/hidden-criteria-grading.ts new file mode 100644 index 0000000..6e4836f --- /dev/null +++ b/src/hidden-criteria-grading.ts @@ -0,0 +1,329 @@ +/** + * Hidden-criteria grading firewall — grade an agent on criteria it never saw. + * + * A trustworthy benchmark splits every scenario's data by WHERE each field is + * allowed to flow, then proves the held-out / judge-only fields never reach the + * agent during the run. The coding bench expresses this with four destinations + * (prompt / develop-against / held-out suite / rubric); this module lifts the + * domain-FREE core out of it so research, legal, tax, content — any domain — + * can declare the same routing and get the same firewall enforcement and the + * same held-out-weighted composite, plugging in its OWN grader. + * + * Two reusable pieces, both domain-agnostic: + * + * 1. FIELD ROUTING BY DESTINATION. A scenario declares each field's + * `FieldDestination`; `assertNoHiddenLeak` is a pure checker that throws if + * a grading-only or judge-only field's value appears in what reaches the + * agent. The domain decides which fields exist and where they go — the + * substrate only enforces "hidden stays hidden". + * + * 2. HIDDEN-CRITERIA GRADING. The domain supplies its own grader + * `(artifact, hiddenCriteria) => { passRate, total }` — the coding + * node-test executor is ONE such grader a consumer plugs in; the substrate + * bakes in NO node/test/TS/exec/regex. `gradeOnHidden` runs that grader + * behind the firewall and `blendHeldout` composes its pass rate with a + * judge score into the final number the leaderboard ranks on. + * + * Shape mirrors `treatment-gate`/`authenticity`: pure predicates and pure + * composition over already-computed values, fail-loud, with the + * "which field / which weight / which grader" decisions left as parameters and + * no domain literal anywhere in the module. + * + * Lives next to `test-graded-scenario` and `partition-held-out` — it is a + * scorecard/grading concept that makes sense without a running agent loop. + */ + +import type { JudgeScore } from './campaign/types' +import { ValidationError } from './errors' + +// ── 1. field routing by destination ────────────────────────────────────────── + +/** + * Where one scenario field is allowed to flow. The firewall guarantee is keyed + * on this tag, not on a field name — a domain can have any number of fields per + * destination. + * + * - `agent-visible` reaches the agent's context during the run (the prompt, + * the task statement — what the agent reads to act). + * - `develop-against` seeded into the agent's environment during the run so it + * can iterate (a visible example/test/reference). The + * agent MAY read it — that is intentional (real TDD). Not + * a leak: it is example-grade, not the grading criteria. + * - `grading-only` the hidden criteria. Used ONLY at grading, after the run + * — the held-out suite / answer key / hidden requirements. + * Must NEVER reach the agent context. This is what makes a + * good score un-memorizable. + * - `judge-only` grading context for the judge only (rubric anchors, + * design intent). Lives with the judge, never in the agent + * context. + */ +export type FieldDestination = 'agent-visible' | 'develop-against' | 'grading-only' | 'judge-only' + +/** The destinations a value must be kept OUT of the agent context for. */ +const hiddenDestinations: ReadonlySet = new Set([ + 'grading-only', + 'judge-only', +]) + +/** True for the destinations whose values must never reach the agent context. */ +export function isHiddenDestination(destination: FieldDestination): boolean { + return hiddenDestinations.has(destination) +} + +/** + * A scenario's fields routed by destination. The domain owns the field set + * (`TFields` — a record of its named fields to their string-renderable values) + * and declares one `FieldDestination` per field. `routeFields` builds this from + * a domain's `(value, destination)` map; the firewall reads it. + */ +export interface RoutedField { + /** The field's name — for diagnostics only. */ + name: string + /** The field's value as it would be rendered into text. The firewall compares + * this against the agent context, so a domain that ships structured data + * passes a stable string projection (e.g. JSON) of the hidden value. */ + value: string + destination: FieldDestination +} + +/** + * Route a domain's named fields by destination into the firewall's input shape. + * The `routing` declares each field's destination; the `values` carry each + * field's renderable string. A field present in `routing` but missing from + * `values` is an authoring error (fail loud) — every routed field must have a + * value the firewall can check. + */ +export function routeFields( + routing: Readonly>, + values: Readonly>, +): RoutedField[] { + const out: RoutedField[] = [] + for (const name of Object.keys(routing) as TName[]) { + const value = values[name] + if (value === undefined) { + throw new ValidationError( + `routed field "${name}" has a destination but no value — every routed field must carry its value`, + ) + } + out.push({ name, value, destination: routing[name] }) + } + return out +} + +/** A single detected leak: a hidden field whose value appears in the agent context. */ +export interface HiddenLeak { + field: string + destination: FieldDestination +} + +export interface NoLeakOptions { + /** Minimum hidden-value length to check. A hidden value shorter than this is + * skipped — a one-word or empty hidden field would substring-match innocuous + * prose and is not meaningful evidence of a leak. Default 12. */ + minMatchLength?: number +} + +/** + * The FIREWALL. Throws `ValidationError` if any `grading-only`/`judge-only` + * field's value is found inside `agentContext` — the exact text that reaches the + * agent during the run (its prompt, its seeded files concatenated, whatever the + * caller assembled). `agent-visible` and `develop-against` fields are never + * checked: they are meant to be there. + * + * Substring containment is the check: it is domain-free and catches the failure + * that matters — a hidden answer key, held-out case, or rubric anchor pasted + * into the prompt. Returns the routed fields on success so a caller can chain. + */ +export function assertNoHiddenLeak( + fields: readonly RoutedField[], + agentContext: string, + opts: NoLeakOptions = {}, +): readonly RoutedField[] { + const minLen = opts.minMatchLength ?? 12 + const leaks: HiddenLeak[] = [] + for (const field of fields) { + if (!isHiddenDestination(field.destination)) continue + const needle = field.value.trim() + if (needle.length < minLen) continue + if (agentContext.includes(needle)) { + leaks.push({ field: field.name, destination: field.destination }) + } + } + if (leaks.length > 0) { + const detail = leaks.map((l) => `"${l.field}" (${l.destination})`).join(', ') + throw new ValidationError( + `hidden-criteria firewall breached: ${leaks.length} hidden field(s) reached the agent context: ${detail}`, + ) + } + return fields +} + +/** Collect the values a domain may safely render into the agent context — the + * `agent-visible` (and, by intent, `develop-against`) fields — so a caller can + * ASSEMBLE the context from the routing rather than hand-picking fields and + * risking a slip. `develop-against` is included because it is seeded into the + * agent's environment during the run on purpose. */ +export function agentVisibleFields(fields: readonly RoutedField[]): RoutedField[] { + return fields.filter((f) => !isHiddenDestination(f.destination)) +} + +// ── 2. hidden-criteria grading ──────────────────────────────────────────────── + +/** What a hidden-criteria grader reports. `passRate = passed / total` over the + * hidden checks; `total === 0` means the criteria never ran (e.g. the artifact + * did not even load) — an honest zero, never a spurious pass. */ +export interface HiddenGradeResult { + /** Hidden checks that passed. */ + passed: number + /** Total hidden checks attempted. 0 when the criteria could not run at all. */ + total: number + /** `passed / total`, or 0 when `total === 0`. The PRIMARY correctness score. */ + passRate: number + /** Free-form provenance the caller may record (runner output, reason for 0). */ + notes?: string +} + +/** + * The domain's grader: given the agent's artifact and the HIDDEN criteria, + * return a pass rate. This is the ONE seam a non-coding domain implements — the + * coding node-test executor is a single implementation of it; a legal grader + * checks the brief against hidden required holdings, a research grader checks an + * answer against held-out facts, a tax grader runs hidden return assertions. + * The substrate calls it ONLY at grading time, behind the firewall. + * + * `THidden` is the domain's hidden-criteria payload (the held-out suite, the + * answer key, the hidden requirements) — opaque to the substrate. + */ +export type HiddenCriteriaGrader = ( + artifact: TArtifact, + hiddenCriteria: THidden, + signal?: AbortSignal, +) => Promise | HiddenGradeResult + +/** Normalize a grader's raw `{passed, total}` into a `HiddenGradeResult` with a + * consistent, fail-loud `passRate` — the canonical "honest zero on no-run" + * rule, single-sourced so every domain grader gets it. */ +export function hiddenGrade(passed: number, total: number, notes?: string): HiddenGradeResult { + const p = Number.isFinite(passed) && passed > 0 ? Math.floor(passed) : 0 + const t = Number.isFinite(total) && total > 0 ? Math.floor(total) : 0 + const passRate = t > 0 ? Math.min(1, p / t) : 0 + return { passed: Math.min(p, t > 0 ? t : p), total: t, passRate, notes } +} + +/** + * Run a domain's hidden-criteria grader behind the firewall. Before grading, it + * re-asserts the firewall against the agent context the run actually used — + * proving (at grading time, on real data) that the hidden criteria never + * reached the agent — then invokes the grader and returns its pass rate. A + * domain that wants the firewall and the grader wired together in one call uses + * this; a domain that already asserted the firewall at dispatch time can call + * its grader directly and feed the result to `blendHeldout`. + */ +export async function gradeOnHidden(args: { + artifact: TArtifact + hiddenCriteria: THidden + grader: HiddenCriteriaGrader + /** The routed fields + the exact agent context, re-checked before grading. */ + firewall: { fields: readonly RoutedField[]; agentContext: string; options?: NoLeakOptions } + signal?: AbortSignal +}): Promise { + assertNoHiddenLeak(args.firewall.fields, args.firewall.agentContext, args.firewall.options) + const result = await args.grader(args.artifact, args.hiddenCriteria, args.signal) + return hiddenGrade(result.passed, result.total, result.notes) +} + +// ── the composite: hidden correctness (PRIMARY) + judge quality (secondary) ──── + +/** Weights for the held-out / judge blend. Must be finite and non-negative; + * they are renormalized to sum to 1 so a caller can pass any positive ratio. */ +export interface BlendWeights { + /** Weight on the hidden-criteria pass rate (the primary, ungameable score). */ + heldout: number + /** Weight on the judge's quality composite (the secondary style/quality score). */ + judge: number +} + +/** Default blend: 0.7 hidden correctness, 0.3 judge quality. The coding bench's + * long-standing split — execution truth dominates, style refines. */ +export const defaultBlendWeights: BlendWeights = { heldout: 0.7, judge: 0.3 } + +/** The input shape a judge's `score` receives — exactly `JudgeConfig.score`'s + * argument: the artifact, plus any scenario/signal fields the judge carries. + * `withHeldoutBlend` only reads `artifact`; the rest rides through. */ +export interface JudgeScoreInput { + artifact: TArtifact + /** Pass-through for the judge's extra input fields (scenario, signal). */ + [key: string]: unknown +} + +function normalizeWeights(weights: BlendWeights): { heldout: number; judge: number } { + const h = Number.isFinite(weights.heldout) && weights.heldout >= 0 ? weights.heldout : 0 + const j = Number.isFinite(weights.judge) && weights.judge >= 0 ? weights.judge : 0 + const sum = h + j + if (sum <= 0) { + throw new ValidationError( + 'blend weights must have a positive sum (got heldout+judge <= 0) — cannot weight a composite by zero', + ) + } + return { heldout: h / sum, judge: j / sum } +} + +/** + * Compose the PRIMARY hidden-criteria pass rate with the SECONDARY judge + * composite into the single score the leaderboard ranks on. Weights are + * renormalized, so a solution that fails the hidden criteria is capped low no + * matter how the judge felt about its style, while a stylistically-mediocre but + * CORRECT solution still earns the bulk of the points. Both inputs are clamped + * to [0,1] — a judge on a non-unit scale must be normalized by the caller first. + */ +export function blendHeldout( + heldoutPassRate: number, + judgeScore: number, + weights: BlendWeights = defaultBlendWeights, +): number { + const w = normalizeWeights(weights) + const heldout = clampUnit(heldoutPassRate) + const judge = clampUnit(judgeScore) + return w.heldout * heldout + w.judge * judge +} + +/** + * Wrap a judge's `score` so the `composite` it REPORTS is the held-out-weighted + * blend. The judge still scores its quality dimensions (recorded, secondary), + * but the composite that downstream selection/scorecard reads becomes + * `blendHeldout(heldoutPassRate(artifact), judgeComposite, weights)`. The held- + * out pass rate is read off the artifact via `heldoutPassRate` — already + * computed before the judge runs — so no second grading pass is needed. + * + * Generic over the artifact type, inferred from `heldoutPassRate`, so it + * composes with both a `campaign` `JudgeConfig.score` and a bare scoring + * function. The input is the judge's `{ artifact, ... }` — any extra fields + * (`scenario`, `signal`) ride through untouched via the index signature. + */ +export function withHeldoutBlend( + score: (input: JudgeScoreInput) => JudgeScore | Promise, + heldoutPassRate: (artifact: TArtifact) => number, + weights: BlendWeights = defaultBlendWeights, +): (input: JudgeScoreInput) => Promise { + return async (input: JudgeScoreInput): Promise => { + const base = await score(input) + if (base.failed) return base + const rate = clampUnit(heldoutPassRate(input.artifact)) + const composite = blendHeldout(rate, base.composite, weights) + const w = normalizeWeights(weights) + return { + ...base, + composite, + notes: + `composite=${composite.toFixed(3)} ` + + `(held-out ${(rate * 100).toFixed(0)}% × ${w.heldout.toFixed(2)} + ` + + `quality ${base.composite.toFixed(3)} × ${w.judge.toFixed(2)})` + + (base.notes ? ` — ${base.notes}` : ''), + } + } +} + +function clampUnit(value: number): number { + if (!Number.isFinite(value)) return 0 + return Math.max(0, Math.min(1, value)) +} diff --git a/src/index.ts b/src/index.ts index 2a84b8b..0e1402f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -609,6 +609,27 @@ export { DEFAULT_RULES as DEFAULT_FAILURE_RULES, FAILURE_CLASSES, } from './failure-taxonomy' +export type { + BlendWeights, + FieldDestination, + HiddenCriteriaGrader, + HiddenGradeResult, + HiddenLeak, + JudgeScoreInput, + NoLeakOptions, + RoutedField, +} from './hidden-criteria-grading' +export { + agentVisibleFields, + assertNoHiddenLeak, + blendHeldout, + defaultBlendWeights, + gradeOnHidden, + hiddenGrade, + isHiddenDestination, + routeFields, + withHeldoutBlend, +} from './hidden-criteria-grading' export type { ProjectRuntimeTrajectoryEvidenceOptions, RuntimeTrajectoryEvidenceProjection,