Skip to content

Commit faff0b8

Browse files
lpcoxCopilot
andcommitted
fix: use rune-boundary-safe UTF-8 truncation for preview
json.Marshal emits raw UTF-8 (not \uXXXX escapes) for non-ASCII runes, so byte-level slicing can split multi-byte sequences. Adjust the cut point backward to the nearest valid rune boundary using utf8.RuneStart. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent b5b750a commit faff0b8

1 file changed

Lines changed: 11 additions & 6 deletions

File tree

internal/middleware/jqschema.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"path/filepath"
1111
"strings"
1212
"time"
13+
"unicode/utf8"
1314

1415
"github.com/github/gh-aw-mcpg/internal/logger"
1516
"github.com/itchyny/gojq"
@@ -373,18 +374,22 @@ func WrapToolHandler(
373374
logger.LogDebug("payload", "Schema transformation completed: tool=%s, queryID=%s, schemaSize=%d bytes",
374375
toolName, queryID, len(schemaBytes))
375376

376-
// Build the transformed response: first PayloadPreviewSize chars + schema.
377+
// Build the transformed response: first PayloadPreviewSize bytes + schema.
377378
// Slice the bytes before converting to string to avoid allocating a full copy of the
378-
// (potentially multi-MB) payload when only the first PayloadPreviewSize bytes are needed.
379+
// (potentially multi-MB) payload when only a short preview is needed.
379380
//
380-
// Byte-level slicing is safe here because json.Marshal produces ASCII-clean output:
381-
// non-ASCII runes are escaped as \uXXXX sequences, so every byte boundary is a
382-
// valid UTF-8 boundary.
381+
// json.Marshal emits raw UTF-8 for non-ASCII runes, so a naive byte slice could
382+
// split a multi-byte sequence. We adjust the cut point backward to the nearest
383+
// valid rune boundary to guarantee the preview is valid UTF-8.
383384
payloadLen := len(payloadJSON)
384385
var preview string
385386
truncated := payloadLen > PayloadPreviewSize
386387
if truncated {
387-
preview = string(payloadJSON[:PayloadPreviewSize]) + "..."
388+
cutPoint := PayloadPreviewSize
389+
for cutPoint > 0 && !utf8.RuneStart(payloadJSON[cutPoint]) {
390+
cutPoint--
391+
}
392+
preview = string(payloadJSON[:cutPoint]) + "..."
388393
logger.LogInfo("payload", "Payload truncated for preview: tool=%s, queryID=%s, originalSize=%d bytes, previewSize=%d bytes",
389394
toolName, queryID, payloadLen, PayloadPreviewSize)
390395
} else {

0 commit comments

Comments
 (0)