Skip to content

Commit 2e69e37

Browse files
authored
refactor: jq middleware improvements from gojq module review (#3026)
## Summary Implements the quick-win improvements identified in the gojq module review (#2985). ## Changes ### 1. Remove duplicate test helper Deleted `integrationPayloadMetadataToMap` from `jqschema_integration_test.go` — it was byte-for-byte identical to `payloadMetadataToMap` in `jqschema_test.go`. Both files are in `package middleware`, so the shared helper is directly usable. ### 2. Document custom `walk` vs built-in Added a comment block explaining why `jqSchemaFilter` defines its own `walk` function instead of using gojq's built-in `walk(f)`: - Built-in `walk(f)` applies `f` post-recursion but preserves structure - Our custom walk replaces leaf values with type names AND collapses arrays to first element - These behaviors are incompatible with standard `walk(f)` semantics ### 3. Document single-result iterator contract Added comment explaining why `iter.Next()` is called only once — the `walk(.)` filter produces exactly one output value. ### 4. UTF-8 safety comment on preview truncation Documented why byte-level slicing at `PayloadPreviewSize` is safe: `json.Marshal` escapes non-ASCII runes as `\uXXXX`, so every byte boundary is a valid UTF-8 boundary. ### 5. Skip redundant JSON unmarshal Added a type-switch in `WrapToolHandler` that skips the `json.Unmarshal` step when `data` is already `map[string]interface{}` or `[]interface{}`. This avoids a needless marshal→unmarshal round-trip on hot paths. Closes #2985
2 parents fde9c63 + faff0b8 commit 2e69e37

2 files changed

Lines changed: 33 additions & 22 deletions

File tree

internal/middleware/jqschema.go

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"path/filepath"
1111
"strings"
1212
"time"
13+
"unicode/utf8"
1314

1415
"github.com/github/gh-aw-mcpg/internal/logger"
1516
"github.com/itchyny/gojq"
@@ -54,6 +55,15 @@ type PayloadMetadata struct {
5455
//
5556
// For arrays, only the first element's schema is retained to represent the array structure.
5657
// Empty arrays are preserved as [].
58+
//
59+
// NOTE: This defines a custom walk function rather than using gojq's built-in walk(f).
60+
// The built-in walk(f) applies f to every node but preserves the original structure.
61+
// Our custom walk does two things the built-in cannot:
62+
// 1. Replaces leaf values with their type name (e.g., "test" → "string")
63+
// 2. Collapses arrays to only the first element for schema inference
64+
//
65+
// These behaviors are incompatible with standard walk(f) semantics, which would
66+
// apply f post-recursion without structural changes to arrays.
5767
const jqSchemaFilter = `
5868
def walk(f):
5969
. as $in |
@@ -143,6 +153,8 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro
143153
}
144154

145155
// Run the pre-compiled query with context support (much faster than Parse+Run)
156+
// The iterator is consumed only once because the walk(.) filter produces exactly
157+
// one output value (the fully-transformed schema). There is no need to drain it.
146158
iter := jqSchemaCode.RunWithContext(ctx, jsonData)
147159
v, ok := iter.Next()
148160
if !ok {
@@ -331,10 +343,16 @@ func WrapToolHandler(
331343
logger.LogDebug("payload", "Applying jq schema transformation: tool=%s, queryID=%s", toolName, queryID)
332344
var schemaObj interface{}
333345
if schemaErr := func() error {
334-
// Unmarshal to interface{} for jq processing
346+
// Prepare data for jq processing. If data is already a native Go type
347+
// (map or slice), use it directly to avoid a redundant JSON round-trip.
335348
var jsonData interface{}
336-
if err := json.Unmarshal(payloadJSON, &jsonData); err != nil {
337-
return fmt.Errorf("failed to unmarshal for schema: %w", err)
349+
switch data.(type) {
350+
case map[string]interface{}, []interface{}:
351+
jsonData = data
352+
default:
353+
if err := json.Unmarshal(payloadJSON, &jsonData); err != nil {
354+
return fmt.Errorf("failed to unmarshal for schema: %w", err)
355+
}
338356
}
339357

340358
schema, err := applyJqSchema(ctx, jsonData)
@@ -356,14 +374,22 @@ func WrapToolHandler(
356374
logger.LogDebug("payload", "Schema transformation completed: tool=%s, queryID=%s, schemaSize=%d bytes",
357375
toolName, queryID, len(schemaBytes))
358376

359-
// Build the transformed response: first PayloadPreviewSize chars + schema.
377+
// Build the transformed response: first PayloadPreviewSize bytes + schema.
360378
// Slice the bytes before converting to string to avoid allocating a full copy of the
361-
// (potentially multi-MB) payload when only the first PayloadPreviewSize bytes are needed.
379+
// (potentially multi-MB) payload when only a short preview is needed.
380+
//
381+
// json.Marshal emits raw UTF-8 for non-ASCII runes, so a naive byte slice could
382+
// split a multi-byte sequence. We adjust the cut point backward to the nearest
383+
// valid rune boundary to guarantee the preview is valid UTF-8.
362384
payloadLen := len(payloadJSON)
363385
var preview string
364386
truncated := payloadLen > PayloadPreviewSize
365387
if truncated {
366-
preview = string(payloadJSON[:PayloadPreviewSize]) + "..."
388+
cutPoint := PayloadPreviewSize
389+
for cutPoint > 0 && !utf8.RuneStart(payloadJSON[cutPoint]) {
390+
cutPoint--
391+
}
392+
preview = string(payloadJSON[:cutPoint]) + "..."
367393
logger.LogInfo("payload", "Payload truncated for preview: tool=%s, queryID=%s, originalSize=%d bytes, previewSize=%d bytes",
368394
toolName, queryID, payloadLen, PayloadPreviewSize)
369395
} else {

internal/middleware/jqschema_integration_test.go

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,6 @@ import (
1313
"github.com/stretchr/testify/require"
1414
)
1515

16-
// integrationPayloadMetadataToMap converts PayloadMetadata to map[string]interface{} for test assertions
17-
func integrationPayloadMetadataToMap(t *testing.T, data interface{}) map[string]interface{} {
18-
t.Helper()
19-
pm, ok := data.(PayloadMetadata)
20-
if !ok {
21-
t.Fatalf("expected PayloadMetadata, got %T", data)
22-
}
23-
jsonBytes, err := json.Marshal(pm)
24-
require.NoError(t, err)
25-
var result map[string]interface{}
26-
err = json.Unmarshal(jsonBytes, &result)
27-
require.NoError(t, err)
28-
return result
29-
}
30-
3116
// TestMiddlewareIntegration tests the complete middleware flow
3217
func TestMiddlewareIntegration(t *testing.T) {
3318
// Create temporary directory for test
@@ -214,7 +199,7 @@ func TestMiddlewareWithLargePayload(t *testing.T) {
214199
}
215200

216201
// Also check data return value
217-
dataMap := integrationPayloadMetadataToMap(t, data)
202+
dataMap := payloadMetadataToMap(t, data)
218203

219204
// Verify preview truncation (check if it ends with ...)
220205
preview := dataMap["payloadPreview"].(string)

0 commit comments

Comments
 (0)