anomalyco
diff --git a/‎cli.ts‎
Lines changed: 15 additions & 28 deletions b/‎cli.ts‎
Lines changed: 15 additions & 28 deletions
diff --git a/‎evals/datadog-lambda-python/diff.patch‎
Lines changed: 212 additions & 0 deletions b/‎evals/datadog-lambda-python/diff.patch‎
Lines changed: 212 additions & 0 deletions
diff --git a/‎evals/helix-db-cli-update-blocking/diff.patch‎
Lines changed: 18 additions & 0 deletions b/‎evals/helix-db-cli-update-blocking/diff.patch‎
Lines changed: 18 additions & 0 deletions
@@ -41,21 +41,15 @@ const cli = yargs(hideBin(process.argv))
       "$0 opencode --model opencode/claude-sonnet-4-5 --eval DataDog/datadog-lambda-python@93d4a07..d776378 --output results.json",
     ],
   ])
-  .fail((msg) => {
-    console.error(msg);
-    process.exit(1);
-  })
   .strict();
 
 cli.command(
   "generate",
   "Generate dataset for all evaluations",
   async (yargs) =>
     yargs.example([["orvl generate", "Generate dataset for all evaluations"]]),
-  async ({ eval: evalId }) => {
-    const logger = Logger.create();
-    logger.log(`Generating dataset...`);
-
+  async () => {
+    const logger = Logger.create("[generate]");
     await Eval.generate({ logger });
   },
 );
@@ -107,7 +101,7 @@ cli.command(
     const evals = await Eval.load();
     const agent = getAgent(agentName);
     const model = getModel(agent, modelFilter);
-    const evalDef = getEval(evals, evalId);
+    const ev = getEval(evals, evalId);
     const logger = Logger.create(`[model ${model}]`);
 
     // Run episodes
@@ -116,14 +110,11 @@ cli.command(
         const index = offset + 1;
         const childLogger = logger.child(`[episode ${index}/${episodes}]`);
         childLogger.log(`Starting episode with ${timeoutMins}min timeout...`);
-        return withRetries(
-          () => runEpisode(evalDef, agent, model, childLogger),
-          {
-            retries: 3,
-            timeoutMs: timeoutMins * 60 * 1000,
-            logger: childLogger,
-          },
-        ).then((result) => ({ index, ...result }));
+        return withRetries(() => runEpisode(ev, agent, model, childLogger), {
+          retries: 3,
+          timeoutMs: timeoutMins * 60 * 1000,
+          logger: childLogger,
+        }).then((result) => ({ index, ...result }));
       }),
     );
 
@@ -174,15 +165,11 @@ cli.command(
     });
 
     // Generate summary from all episodes' actions
-    const summary = await generateActionsSummary(
-      evalDef,
-      model,
-      episodesActions,
-    );
+    const summary = await generateActionsSummary(ev, model, episodesActions);
 
     const evaluationResult = summarizeAggregation(
       agent.name,
-      evalDef,
+      ev,
       model,
       aggregatedInputs,
       episodeExports,
@@ -229,13 +216,13 @@ function getModel(agent: Agent.Registration, modelFilter: string) {
 }
 
 function getEval(evals: Eval.Instance[], evalId: string) {
-  const evalDef = evals.find((ev) => ev.id === evalId);
-  if (!evalDef) throw new Error(`Eval ${evalId} was not found.`);
-  if (!evalDef.scores.length)
+  const ev = evals.find((ev) => ev.id === evalId);
+  if (!ev) throw new Error(`Eval ${evalId} was not found.`);
+  if (!ev.scores.length)
     throw new Error(
-      `Evaluation ${evalDef.repo} has no score assignments configured.`,
+      `Evaluation ${ev.repo} has no score assignments configured.`,
     );
-  return evalDef;
+  return ev;
 }
 
 async function runEpisode(
 
@@ -0,0 +1,212 @@
+diff --git a/datadog_lambda/metric.py b/datadog_lambda/metric.py
+index 73bbeca3..5df0812f 100644
+--- a/datadog_lambda/metric.py
++++ b/datadog_lambda/metric.py
+@@ -214,6 +214,33 @@ def submit_errors_metric(lambda_context):
+     submit_enhanced_metric("errors", lambda_context)
+ 
+ 
++def submit_batch_item_failures_metric(response, lambda_context):
++    """Submit aws.lambda.enhanced.batch_item_failures metric with the count of batch item failures
++
++    Args:
++        response (dict): Lambda function response object
++        lambda_context (object): Lambda context dict passed to the function by AWS
++    """
++    if not config.enhanced_metrics_enabled:
++        logger.debug(
++            "Not submitting batch_item_failures metric because enhanced metrics are disabled"
++        )
++        return
++
++    if not isinstance(response, dict):
++        return
++
++    batch_item_failures = response.get("batchItemFailures")
++    if batch_item_failures is not None and isinstance(batch_item_failures, list):
++        lambda_metric(
++            "aws.lambda.enhanced.batch_item_failures",
++            len(batch_item_failures),
++            timestamp=None,
++            tags=get_enhanced_metrics_tags(lambda_context),
++            force_async=True,
++        )
++
++
+ def submit_dynamodb_stream_type_metric(event):
+     stream_view_type = (
+         event.get("Records", [{}])[0].get("dynamodb", {}).get("StreamViewType")
+diff --git a/datadog_lambda/wrapper.py b/datadog_lambda/wrapper.py
+index 0cbedd9f..8dbd7e35 100644
+--- a/datadog_lambda/wrapper.py
++++ b/datadog_lambda/wrapper.py
+@@ -291,6 +291,10 @@ def _before(self, event, context):
+ 
+     def _after(self, event, context):
+         try:
++            from datadog_lambda.metric import submit_batch_item_failures_metric
++
++            submit_batch_item_failures_metric(self.response, context)
++
+             status_code = extract_http_status_code_tag(self.trigger_tags, self.response)
+ 
+             if self.span:
+diff --git a/tests/test_metric.py b/tests/test_metric.py
+index aa537d34..fe3df247 100644
+--- a/tests/test_metric.py
++++ b/tests/test_metric.py
+@@ -12,6 +12,7 @@
+     _select_metrics_handler,
+     flush_stats,
+     lambda_metric,
++    submit_batch_item_failures_metric,
+ )
+ from datadog_lambda.tags import dd_lambda_layer_tag
+ from datadog_lambda.thread_stats_writer import ThreadStatsWriter
+@@ -324,3 +325,80 @@ def decrypt(self, CiphertextBlob=None, EncryptionContext={}):
+             mock_kms_client, MOCK_ENCRYPTED_API_KEY_BASE64
+         )
+         self.assertEqual(decrypted_key, EXPECTED_DECRYPTED_API_KEY)
++
++
++class TestBatchItemFailuresMetric(unittest.TestCase):
++    def setUp(self):
++        patcher = patch("datadog_lambda.metric.lambda_metric")
++        self.mock_lambda_metric = patcher.start()
++        self.addCleanup(patcher.stop)
++
++        patcher = patch("datadog_lambda.config.Config.enhanced_metrics_enabled", True)
++        self.mock_enhanced_metrics_enabled = patcher.start()
++        self.addCleanup(patcher.stop)
++
++    def test_submit_batch_item_failures_with_failures(self):
++        response = {
++            "batchItemFailures": [
++                {"itemIdentifier": "msg-1"},
++                {"itemIdentifier": "msg-2"},
++                {"itemIdentifier": "msg-3"},
++            ]
++        }
++        context = unittest.mock.Mock()
++
++        with patch("datadog_lambda.metric.get_enhanced_metrics_tags") as mock_get_tags:
++            mock_get_tags.return_value = ["tag1:value1"]
++            submit_batch_item_failures_metric(response, context)
++
++            self.mock_lambda_metric.assert_called_once_with(
++                "aws.lambda.enhanced.batch_item_failures",
++                3,
++                timestamp=None,
++                tags=["tag1:value1"],
++                force_async=True,
++            )
++
++    def test_submit_batch_item_failures_with_no_failures(self):
++        response = {"batchItemFailures": []}
++        context = unittest.mock.Mock()
++
++        with patch("datadog_lambda.metric.get_enhanced_metrics_tags") as mock_get_tags:
++            mock_get_tags.return_value = ["tag1:value1"]
++            submit_batch_item_failures_metric(response, context)
++            self.mock_lambda_metric.assert_called_once_with(
++                "aws.lambda.enhanced.batch_item_failures",
++                0,
++                timestamp=None,
++                tags=["tag1:value1"],
++                force_async=True,
++            )
++
++    def test_submit_batch_item_failures_with_no_field(self):
++        response = {"statusCode": 200}
++        context = unittest.mock.Mock()
++        submit_batch_item_failures_metric(response, context)
++        self.mock_lambda_metric.assert_not_called()
++
++    def test_submit_batch_item_failures_with_none_response(self):
++        response = None
++        context = unittest.mock.Mock()
++        submit_batch_item_failures_metric(response, context)
++        self.mock_lambda_metric.assert_not_called()
++
++    def test_submit_batch_item_failures_with_non_list_value(self):
++        response = {"batchItemFailures": "invalid"}
++        context = unittest.mock.Mock()
++        submit_batch_item_failures_metric(response, context)
++        self.mock_lambda_metric.assert_not_called()
++
++    @patch("datadog_lambda.config.Config.enhanced_metrics_enabled", False)
++    def test_submit_batch_item_failures_enhanced_metrics_disabled(self):
++        response = {
++            "batchItemFailures": [
++                {"itemIdentifier": "msg-1"},
++            ]
++        }
++        context = unittest.mock.Mock()
++        submit_batch_item_failures_metric(response, context)
++        self.mock_lambda_metric.assert_not_called()
+diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py
+index fe7678ac..512a51f8 100644
+--- a/tests/test_wrapper.py
++++ b/tests/test_wrapper.py
+@@ -899,3 +899,61 @@ def lambda_handler(event, context):
+     assert response == expected_response
+     assert len(LLMObs_enable_calls) == 1
+     assert len(LLMObs_flush_calls) == 1
++
++
++@patch("datadog_lambda.config.Config.trace_enabled", False)
++def test_batch_item_failures_metric():
++    with patch(
++        "datadog_lambda.metric.submit_batch_item_failures_metric"
++    ) as mock_submit:
++
++        @wrapper.datadog_lambda_wrapper
++        def lambda_handler(event, context):
++            return {
++                "batchItemFailures": [
++                    {"itemIdentifier": "msg-1"},
++                    {"itemIdentifier": "msg-2"},
++                ]
++            }
++
++        lambda_handler({}, get_mock_context())
++        mock_submit.assert_called_once()
++        call_args = mock_submit.call_args[0]
++        assert call_args[0] == {
++            "batchItemFailures": [
++                {"itemIdentifier": "msg-1"},
++                {"itemIdentifier": "msg-2"},
++            ]
++        }
++
++
++@patch("datadog_lambda.config.Config.trace_enabled", False)
++def test_batch_item_failures_metric_no_failures():
++    with patch(
++        "datadog_lambda.metric.submit_batch_item_failures_metric"
++    ) as mock_submit:
++
++        @wrapper.datadog_lambda_wrapper
++        def lambda_handler(event, context):
++            return {"batchItemFailures": []}
++
++        lambda_handler({}, get_mock_context())
++        mock_submit.assert_called_once()
++        call_args = mock_submit.call_args[0]
++        assert call_args[0] == {"batchItemFailures": []}
++
++
++@patch("datadog_lambda.config.Config.trace_enabled", False)
++def test_batch_item_failures_metric_no_response():
++    with patch(
++        "datadog_lambda.metric.submit_batch_item_failures_metric"
++    ) as mock_submit:
++
++        @wrapper.datadog_lambda_wrapper
++        def lambda_handler(event, context):
++            return None
++
++        lambda_handler({}, get_mock_context())
++        mock_submit.assert_called_once()
++        call_args = mock_submit.call_args[0]
++        assert call_args[0] is None
@@ -0,0 +1,18 @@
+diff --git a/helix-cli/src/commands/update.rs b/helix-cli/src/commands/update.rs
+index 22269ae2a..137b73850 100644
+--- a/helix-cli/src/commands/update.rs
++++ b/helix-cli/src/commands/update.rs
+@@ -4,6 +4,13 @@ use self_update::cargo_crate_version;
+ use crate::utils::{print_error_with_hint, print_status, print_success};
+ 
+ pub async fn run(force: bool) -> Result<()> {
++    // We're using the self_update crate which is very handy but doesn't support async.
++    // Still, this is good enough, but because it panics in an async context we must
++    // do a spawn_blocking
++    tokio::task::spawn_blocking(move || run_sync(force)).await?
++}
++
++fn run_sync(force: bool) -> Result<()> {
+     print_status("UPDATE", "Checking for updates...");
+ 
+     let status = self_update::backends::github::Update::configure()