@@ -26,6 +26,19 @@ use std::sync::Mutex;
2626const POLICY_SCOPE_ALL : & str = "all" ;
2727const POLICY_SCOPE_PUBLIC : & str = "public" ;
2828
29+ /// Maximum number of bytes to include in a log preview of serialized JSON.
30+ const PREVIEW_MAX_BYTES : usize = 500 ;
31+
32+ /// Truncate a string to at most `max_bytes` bytes on a valid UTF-8 character
33+ /// boundary. Returns the full string when it is shorter than the limit.
34+ fn safe_preview ( s : & str , max_bytes : usize ) -> & str {
35+ if s. len ( ) <= max_bytes {
36+ return s;
37+ }
38+ let end = s. floor_char_boundary ( max_bytes) ;
39+ & s[ ..end]
40+ }
41+
2942/// Global policy context for WASM runtime entry points.
3043///
3144/// `label_agent` stores the parsed policy here; `label_resource` and
@@ -748,9 +761,9 @@ pub extern "C" fn label_response(
748761 // Read input bytes
749762 let input_bytes = unsafe { slice:: from_raw_parts ( input_ptr as * const u8 , input_len as usize ) } ;
750763
751- // Log first 500 chars of input to debug structure
752- let preview_len = std:: cmp :: min ( 500 , input_bytes. len ( ) ) ;
753- if let Ok ( preview) = std :: str :: from_utf8 ( & input_bytes [ ..preview_len ] ) {
764+ // Log first 500 bytes of input to debug structure (safe on char boundary)
765+ if let Ok ( full_str ) = std:: str :: from_utf8 ( input_bytes) {
766+ let preview = safe_preview ( full_str , PREVIEW_MAX_BYTES ) ;
754767 log_info ( & format ! ( " input_preview={}" , preview) ) ;
755768 }
756769
@@ -804,11 +817,7 @@ pub extern "C" fn label_response(
804817 } ;
805818
806819 // Log output preview for debugging
807- let output_preview = if output_json. len ( ) > 500 {
808- & output_json[ ..500 ]
809- } else {
810- & output_json
811- } ;
820+ let output_preview = safe_preview ( & output_json, PREVIEW_MAX_BYTES ) ;
812821 log_info ( & format ! ( " path_output_preview={}" , output_preview) ) ;
813822
814823 if output_json. len ( ) as u32 > output_size {
@@ -935,11 +944,7 @@ pub extern "C" fn label_response(
935944 } ;
936945
937946 // Log output preview for debugging
938- let output_preview = if output_json. len ( ) > 500 {
939- & output_json[ ..500 ]
940- } else {
941- & output_json
942- } ;
947+ let output_preview = safe_preview ( & output_json, PREVIEW_MAX_BYTES ) ;
943948 log_info ( & format ! ( " output_preview={}" , output_preview) ) ;
944949
945950 if output_json. len ( ) as u32 > output_size {
@@ -1177,4 +1182,87 @@ mod tests {
11771182 final_integrity
11781183 ) ;
11791184 }
1185+
1186+ // === UTF-8 safe preview tests (issue #3711) ===
1187+
1188+ #[ test]
1189+ fn test_safe_preview_ascii_under_limit ( ) {
1190+ let s = "hello" ;
1191+ assert_eq ! ( safe_preview( s, 500 ) , "hello" ) ;
1192+ }
1193+
1194+ #[ test]
1195+ fn test_safe_preview_ascii_at_limit ( ) {
1196+ let s = "a" . repeat ( 500 ) ;
1197+ assert_eq ! ( safe_preview( & s, 500 ) , s. as_str( ) ) ;
1198+ }
1199+
1200+ #[ test]
1201+ fn test_safe_preview_ascii_over_limit ( ) {
1202+ let s = "a" . repeat ( 600 ) ;
1203+ assert_eq ! ( safe_preview( & s, 500 ) . len( ) , 500 ) ;
1204+ }
1205+
1206+ #[ test]
1207+ fn test_safe_preview_cjk_boundary ( ) {
1208+ // Each CJK character is 3 bytes in UTF-8. Build a string where byte 500
1209+ // falls in the middle of a character (500 is not divisible by 3).
1210+ // 166 chars = 498 bytes, 167 chars = 501 bytes.
1211+ let cjk = "中" . repeat ( 167 ) ; // 501 bytes
1212+ assert_eq ! ( cjk. len( ) , 501 ) ;
1213+
1214+ let preview = safe_preview ( & cjk, 500 ) ;
1215+ // Must truncate to 498 bytes (166 chars) — the last valid boundary before 500.
1216+ assert_eq ! ( preview. len( ) , 498 ) ;
1217+ assert_eq ! ( preview. chars( ) . count( ) , 166 ) ;
1218+ }
1219+
1220+ #[ test]
1221+ fn test_safe_preview_emoji_boundary ( ) {
1222+ // 🎉 is 4 bytes in UTF-8. 125 emojis = 500 bytes exactly (boundary safe).
1223+ // 126 emojis = 504 bytes; truncating at 500 would split the 126th emoji.
1224+ let emoji = "🎉" . repeat ( 126 ) ; // 504 bytes
1225+ assert_eq ! ( emoji. len( ) , 504 ) ;
1226+
1227+ let preview = safe_preview ( & emoji, 500 ) ;
1228+ // Must truncate to 500 bytes (125 complete emojis).
1229+ assert_eq ! ( preview. len( ) , 500 ) ;
1230+ assert_eq ! ( preview. chars( ) . count( ) , 125 ) ;
1231+ }
1232+
1233+ #[ test]
1234+ fn test_safe_preview_mixed_content_near_boundary ( ) {
1235+ // Simulate a JSON string with ASCII keys and a CJK value crossing byte 500.
1236+ // {"body":"<padding>中中中..."}
1237+ let prefix = "{\" body\" :\" " ; // 9 bytes
1238+ let padding = "x" . repeat ( 489 ) ; // 489 bytes — total so far: 498
1239+ let cjk_tail = "中中中中中" ; // 5 × 3 = 15 bytes — total: 513
1240+
1241+ let json = format ! ( "{}{}{}\" }}" , prefix, padding, cjk_tail) ;
1242+ assert ! ( json. len( ) > 500 ) ;
1243+
1244+ let preview = safe_preview ( & json, 500 ) ;
1245+ // Byte 498 is the start of the first CJK char (498..501). Byte 500 is
1246+ // mid-character, so floor_char_boundary(500) should give 498.
1247+ assert_eq ! ( preview. len( ) , 498 ) ;
1248+ // Verify it's valid UTF-8 (implicit — it's a &str).
1249+ assert ! ( preview. ends_with( 'x' ) ) ;
1250+ }
1251+
1252+ #[ test]
1253+ fn test_safe_preview_empty_string ( ) {
1254+ assert_eq ! ( safe_preview( "" , 500 ) , "" ) ;
1255+ }
1256+
1257+ #[ test]
1258+ fn test_safe_preview_two_byte_chars ( ) {
1259+ // é is 2 bytes in UTF-8. 250 chars = 500 bytes (exact boundary).
1260+ // 251 chars = 502 bytes; byte 500 is the first byte of the 251st char.
1261+ let accented = "é" . repeat ( 251 ) ; // 502 bytes
1262+ assert_eq ! ( accented. len( ) , 502 ) ;
1263+
1264+ let preview = safe_preview ( & accented, 500 ) ;
1265+ assert_eq ! ( preview. len( ) , 500 ) ;
1266+ assert_eq ! ( preview. chars( ) . count( ) , 250 ) ;
1267+ }
11801268}
0 commit comments