diff --git a/.changeset/four-cooks-sit.md b/.changeset/four-cooks-sit.md new file mode 100644 index 000000000..a845151cc --- /dev/null +++ b/.changeset/four-cooks-sit.md @@ -0,0 +1,2 @@ +--- +--- diff --git a/.changeset/kind-donkeys-sell.md b/.changeset/kind-donkeys-sell.md new file mode 100644 index 000000000..29869ee22 --- /dev/null +++ b/.changeset/kind-donkeys-sell.md @@ -0,0 +1,5 @@ +--- +"@lingo.dev/_sdk": patch +--- + +Optimized countWords function for better performance using an iterative DFS approach, reducing memory allocations and improving efficiency for large payloads. diff --git a/packages/sdk/src/index.spec.ts b/packages/sdk/src/index.spec.ts index a88277fdf..ff6ecbb86 100644 --- a/packages/sdk/src/index.spec.ts +++ b/packages/sdk/src/index.spec.ts @@ -87,3 +87,66 @@ describe("ReplexicaEngine", () => { }); }); }); + + +describe("LingoDotDevEngine - countWordsInRecord", () => { + const engine = new LingoDotDevEngine({ apiKey: "test" }); + + it("should return 0 for falsy values and empty inputs", () => { + expect((engine as any).countWordsInRecord(null)).toBe(0); + expect((engine as any).countWordsInRecord(undefined)).toBe(0); + expect((engine as any).countWordsInRecord("")).toBe(0); + expect((engine as any).countWordsInRecord({})).toBe(0); + expect((engine as any).countWordsInRecord([])).toBe(0); + }); + + it("should count words in a simple string", () => { + expect((engine as any).countWordsInRecord("Hello world")).toBe(2); + expect((engine as any).countWordsInRecord(" one two three ")).toBe(3); + expect((engine as any).countWordsInRecord("\tNew\nlines and\ttabs\r\n")).toBe(4); + }); + + it("should count words in an array of strings", () => { + // "a" -> 1, "b c" -> 2, "d" -> 1 + expect((engine as any).countWordsInRecord(["a", "b c", "d"])).toBe(1 + 2 + 1); + }); + + it("should count words in a nested array", () => { + // Here, the function is fully recursive: + // "a" → 1, "b c" → 2, and nested ["d e"] → "d e" → 2 words. + // Total = 1 + 2 + 2 = 5 words. + expect((engine as any).countWordsInRecord(["a", "b c", ["d e"]])).toBe(5); + }); + + it("should count words in a nested object", () => { + // Object { a: "hello", b: { c: "c d" } }: + // "hello" → 1, and "c d" → 2 words. + // Total = 1 + 2 = 3. + expect((engine as any).countWordsInRecord({ a: "hello", b: { c: "c d" } })).toBe(3); + }); + it("should perform efficiently on large payloads", () => { + // Generate a large payload with many key-value pairs (in lakhs) + const largePayload: Record = {}; + const sampleText = "This is a sample text for benchmarking the countWordsInRecord function"; + // Count words in sampleText: + // ["This", "is", "a", "sample", "text", "for", "benchmarking", "the", "countWordsInRecord", "function"] + // That is 10 words. + const repetitions = 100_000; // 1 lakh + for (let i = 0; i < repetitions; i++) { + largePayload[`key_${i}`] = sampleText; + } + const expectedTotalWords = repetitions * 10; + + const startTime = performance.now(); + const result = (engine as any).countWordsInRecord(largePayload); + const endTime = performance.now(); + const elapsed = endTime - startTime; + + console.log(`Large payload processed in ${elapsed.toFixed(2)} ms`); + + expect(result).toBe(expectedTotalWords); + // Optionally, assert that the function finishes within a reasonable time threshold: + expect(elapsed).toBeLessThan(5000); +}); +}); + diff --git a/packages/sdk/src/index.ts b/packages/sdk/src/index.ts index 6ccbc6968..7005d40b6 100644 --- a/packages/sdk/src/index.ts +++ b/packages/sdk/src/index.ts @@ -174,18 +174,74 @@ export class LingoDotDevEngine { * @param payload - The payload to count words in * @returns The total number of words */ - private countWordsInRecord(payload: any | Record | Array): number { - if (Array.isArray(payload)) { - return payload.reduce((acc, item) => acc + this.countWordsInRecord(item), 0); - } else if (typeof payload === "object" && payload !== null) { - return Object.values(payload).reduce((acc: number, item) => acc + this.countWordsInRecord(item), 0); - } else if (typeof payload === "string") { - return payload.trim().split(/\s+/).filter(Boolean).length; - } else { - return 0; + + // private countWordsInRecord(payload: any | Record | Array): number { + // if (Array.isArray(payload)) { + // return payload.reduce((acc, item) => acc + this.countWordsInRecord(item), 0); + // } else if (typeof payload === "object" && payload !== null) { + // return Object.values(payload).reduce((acc: number, item) => acc + this.countWordsInRecord(item), 0); + // } else if (typeof payload === "string") { + // return payload.trim().split(/\s+/).filter(Boolean).length; + // } else { + // return 0; + // } + // } + + private countWordsInRecord(payload: unknown): number { + let wordCount = 0; + const processingStack: unknown[] = [payload]; + const SPACE_CHAR_CODE = 32; + const TAB_CHAR_CODE = 9; + const NEWLINE_CHAR_CODE = 10; + + while (processingStack.length > 0) { + const currentItem = processingStack.pop(); + + if (typeof currentItem === 'string') { + let isBetweenWords = true; + let currentWordCount = 0; + + for (let i = 0; i < currentItem.length; i++) { + const charCode = currentItem.charCodeAt(i); + const isWhitespace = charCode === SPACE_CHAR_CODE || + charCode === TAB_CHAR_CODE || + charCode === NEWLINE_CHAR_CODE; + + if (isBetweenWords && !isWhitespace) { + currentWordCount++; + isBetweenWords = false; + } else if (!isBetweenWords && isWhitespace) { + isBetweenWords = true; + } + } + + wordCount += currentWordCount; + } + else if (Array.isArray(currentItem)) { + // Process array elements in reverse to maintain original order + for (let i = currentItem.length - 1; i >= 0; i--) { + processingStack.push(currentItem[i]); + } + } + else if (this.isRecord(currentItem)) { + // Process object properties efficiently + for (const key in currentItem) { + if (Object.prototype.hasOwnProperty.call(currentItem, key)) { + processingStack.push(currentItem[key]); + } + } + } } - } + return wordCount; +} + +// Type guard for plain objects +private isRecord(value: unknown): value is Record { + return value !== null && typeof value === 'object' && !Array.isArray(value); +} + + /** * Localize a typical JavaScript object * @param obj - The object to be localized (strings will be extracted and translated)