Skip to content

Commit 25f52d0

Browse files
lpcoxCopilot
andcommitted
fix(guard): use UTF-8 safe string truncation in output preview logging
The label_response function panics when serialized JSON contains multi-byte UTF-8 characters (CJK, emoji, etc.) and byte index 500 falls in the middle of a code point. The panic causes a WASM trap that permanently poisons the guard instance — all subsequent MCP calls to that server fail for the rest of the session. Extract a safe_preview(s, max_bytes) helper that uses str::floor_char_boundary() (stable since Rust 1.80) to find the nearest valid character boundary at or before the limit. Replace all three preview truncation sites in label_response: - Line ~808: path-specific output preview (was panicking) - Line ~939: general output preview (was panicking) - Line ~752: input preview (was silently dropping the log) Add 8 unit tests covering ASCII, CJK (3-byte), emoji (4-byte), accented (2-byte), mixed content, empty strings, and boundary conditions. Fixes #3711 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 20a28d2 commit 25f52d0

File tree

1 file changed

+101
-13
lines changed
  • guards/github-guard/rust-guard/src

1 file changed

+101
-13
lines changed

guards/github-guard/rust-guard/src/lib.rs

Lines changed: 101 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,19 @@ use std::sync::Mutex;
2626
const POLICY_SCOPE_ALL: &str = "all";
2727
const POLICY_SCOPE_PUBLIC: &str = "public";
2828

29+
/// Maximum number of bytes to include in a log preview of serialized JSON.
30+
const PREVIEW_MAX_BYTES: usize = 500;
31+
32+
/// Truncate a string to at most `max_bytes` bytes on a valid UTF-8 character
33+
/// boundary. Returns the full string when it is shorter than the limit.
34+
fn safe_preview(s: &str, max_bytes: usize) -> &str {
35+
if s.len() <= max_bytes {
36+
return s;
37+
}
38+
let end = s.floor_char_boundary(max_bytes);
39+
&s[..end]
40+
}
41+
2942
/// Global policy context for WASM runtime entry points.
3043
///
3144
/// `label_agent` stores the parsed policy here; `label_resource` and
@@ -748,9 +761,9 @@ pub extern "C" fn label_response(
748761
// Read input bytes
749762
let input_bytes = unsafe { slice::from_raw_parts(input_ptr as *const u8, input_len as usize) };
750763

751-
// Log first 500 chars of input to debug structure
752-
let preview_len = std::cmp::min(500, input_bytes.len());
753-
if let Ok(preview) = std::str::from_utf8(&input_bytes[..preview_len]) {
764+
// Log first 500 bytes of input to debug structure (safe on char boundary)
765+
if let Ok(full_str) = std::str::from_utf8(input_bytes) {
766+
let preview = safe_preview(full_str, PREVIEW_MAX_BYTES);
754767
log_info(&format!(" input_preview={}", preview));
755768
}
756769

@@ -804,11 +817,7 @@ pub extern "C" fn label_response(
804817
};
805818

806819
// Log output preview for debugging
807-
let output_preview = if output_json.len() > 500 {
808-
&output_json[..500]
809-
} else {
810-
&output_json
811-
};
820+
let output_preview = safe_preview(&output_json, PREVIEW_MAX_BYTES);
812821
log_info(&format!(" path_output_preview={}", output_preview));
813822

814823
if output_json.len() as u32 > output_size {
@@ -935,11 +944,7 @@ pub extern "C" fn label_response(
935944
};
936945

937946
// Log output preview for debugging
938-
let output_preview = if output_json.len() > 500 {
939-
&output_json[..500]
940-
} else {
941-
&output_json
942-
};
947+
let output_preview = safe_preview(&output_json, PREVIEW_MAX_BYTES);
943948
log_info(&format!(" output_preview={}", output_preview));
944949

945950
if output_json.len() as u32 > output_size {
@@ -1177,4 +1182,87 @@ mod tests {
11771182
final_integrity
11781183
);
11791184
}
1185+
1186+
// === UTF-8 safe preview tests (issue #3711) ===
1187+
1188+
#[test]
1189+
fn test_safe_preview_ascii_under_limit() {
1190+
let s = "hello";
1191+
assert_eq!(safe_preview(s, 500), "hello");
1192+
}
1193+
1194+
#[test]
1195+
fn test_safe_preview_ascii_at_limit() {
1196+
let s = "a".repeat(500);
1197+
assert_eq!(safe_preview(&s, 500), s.as_str());
1198+
}
1199+
1200+
#[test]
1201+
fn test_safe_preview_ascii_over_limit() {
1202+
let s = "a".repeat(600);
1203+
assert_eq!(safe_preview(&s, 500).len(), 500);
1204+
}
1205+
1206+
#[test]
1207+
fn test_safe_preview_cjk_boundary() {
1208+
// Each CJK character is 3 bytes in UTF-8. Build a string where byte 500
1209+
// falls in the middle of a character (500 is not divisible by 3).
1210+
// 166 chars = 498 bytes, 167 chars = 501 bytes.
1211+
let cjk = "中".repeat(167); // 501 bytes
1212+
assert_eq!(cjk.len(), 501);
1213+
1214+
let preview = safe_preview(&cjk, 500);
1215+
// Must truncate to 498 bytes (166 chars) — the last valid boundary before 500.
1216+
assert_eq!(preview.len(), 498);
1217+
assert_eq!(preview.chars().count(), 166);
1218+
}
1219+
1220+
#[test]
1221+
fn test_safe_preview_emoji_boundary() {
1222+
// 🎉 is 4 bytes in UTF-8. 125 emojis = 500 bytes exactly (boundary safe).
1223+
// 126 emojis = 504 bytes; truncating at 500 would split the 126th emoji.
1224+
let emoji = "🎉".repeat(126); // 504 bytes
1225+
assert_eq!(emoji.len(), 504);
1226+
1227+
let preview = safe_preview(&emoji, 500);
1228+
// Must truncate to 500 bytes (125 complete emojis).
1229+
assert_eq!(preview.len(), 500);
1230+
assert_eq!(preview.chars().count(), 125);
1231+
}
1232+
1233+
#[test]
1234+
fn test_safe_preview_mixed_content_near_boundary() {
1235+
// Simulate a JSON string with ASCII keys and a CJK value crossing byte 500.
1236+
// {"body":"<padding>中中中..."}
1237+
let prefix = "{\"body\":\""; // 9 bytes
1238+
let padding = "x".repeat(489); // 489 bytes — total so far: 498
1239+
let cjk_tail = "中中中中中"; // 5 × 3 = 15 bytes — total: 513
1240+
1241+
let json = format!("{}{}{}\"}}", prefix, padding, cjk_tail);
1242+
assert!(json.len() > 500);
1243+
1244+
let preview = safe_preview(&json, 500);
1245+
// Byte 498 is the start of the first CJK char (498..501). Byte 500 is
1246+
// mid-character, so floor_char_boundary(500) should give 498.
1247+
assert_eq!(preview.len(), 498);
1248+
// Verify it's valid UTF-8 (implicit — it's a &str).
1249+
assert!(preview.ends_with('x'));
1250+
}
1251+
1252+
#[test]
1253+
fn test_safe_preview_empty_string() {
1254+
assert_eq!(safe_preview("", 500), "");
1255+
}
1256+
1257+
#[test]
1258+
fn test_safe_preview_two_byte_chars() {
1259+
// é is 2 bytes in UTF-8. 250 chars = 500 bytes (exact boundary).
1260+
// 251 chars = 502 bytes; byte 500 is the first byte of the 251st char.
1261+
let accented = "é".repeat(251); // 502 bytes
1262+
assert_eq!(accented.len(), 502);
1263+
1264+
let preview = safe_preview(&accented, 500);
1265+
assert_eq!(preview.len(), 500);
1266+
assert_eq!(preview.chars().count(), 250);
1267+
}
11801268
}

0 commit comments

Comments
 (0)