Skip to content

Commit cd836e4

Browse files
authored
feat(cli): cache processed data chunks, recover from cache (#432)
1 parent ed3e47f commit cd836e4

5 files changed

Lines changed: 204 additions & 9 deletions

File tree

.changeset/clean-swans-whisper.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"lingo.dev": minor
3+
---
4+
5+
cache processed data chunks, recover from cache

packages/cli/src/cli/cli/i18n.ts

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import chalk from "chalk";
1515
import { createTwoFilesPatch } from "diff";
1616
import inquirer from "inquirer";
1717
import externalEditor from "external-editor";
18+
import { cacheChunk, deleteCache, getNormalizedCache } from "../utils/cache";
1819

1920
export default new Command()
2021
.command("i18n")
@@ -90,6 +91,54 @@ export default new Command()
9091
ora.succeed("i18n.lock loaded");
9192
}
9293

94+
// recover cache if exists
95+
const cache = getNormalizedCache();
96+
if (cache) {
97+
console.log();
98+
ora.succeed(`Cache loaded. Attempting recovery...`);
99+
const cacheOra = Ora({ indent: 2 });
100+
101+
for (const bucket of buckets) {
102+
cacheOra.info(`Processing bucket: ${bucket.type}`);
103+
for (const bucketConfig of bucket.config) {
104+
const bucketOra = ora.info(`Processing path: ${bucketConfig.pathPattern}`);
105+
106+
const sourceLocale = resolveOverridenLocale(i18nConfig!.locale.source, bucketConfig.delimiter);
107+
const bucketLoader = createBucketLoader(bucket.type, bucketConfig.pathPattern);
108+
bucketLoader.setDefaultLocale(sourceLocale);
109+
await bucketLoader.init();
110+
const sourceData = await bucketLoader.pull(sourceLocale);
111+
const cachedSourceData: Record<string, string> = {};
112+
113+
for (const targetLocale in cache) {
114+
const targetData = await bucketLoader.pull(targetLocale);
115+
116+
for (const key in cache[targetLocale]) {
117+
const { source, result } = cache[targetLocale][key];
118+
119+
if (sourceData[key] === source && targetData[key] !== result) {
120+
targetData[key] = result;
121+
cachedSourceData[key] = source;
122+
}
123+
}
124+
125+
await bucketLoader.push(targetLocale, targetData);
126+
lockfileHelper.registerPartialSourceData(bucketConfig.pathPattern, cachedSourceData);
127+
128+
bucketOra.succeed(
129+
`[${sourceLocale} -> ${targetLocale}] Recovered ${Object.keys(cachedSourceData).length} entries from cache`,
130+
);
131+
}
132+
}
133+
}
134+
deleteCache();
135+
if (flags.verbose) {
136+
cacheOra.info("Cache file deleted.");
137+
}
138+
} else if (flags.verbose) {
139+
ora.info("Cache file not found. Skipping recovery.");
140+
}
141+
93142
if (flags.frozen) {
94143
ora.start("Checking for lockfile updates...");
95144
let requiresUpdate = false;
@@ -174,8 +223,18 @@ export default new Command()
174223
targetLocale,
175224
targetData,
176225
},
177-
(progress) => {
178-
bucketOra.text = `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length} entries] (${progress}%) AI localization in progress...`;
226+
(progress, sourceChunk, processedChunk) => {
227+
cacheChunk(targetLocale, sourceChunk, processedChunk);
228+
229+
const progressLog = `[${sourceLocale} -> ${targetLocale}] [${Object.keys(processableData).length} entries] (${progress}%) AI localization in progress...`;
230+
if (flags.verbose) {
231+
ora.info(progressLog);
232+
ora.info(
233+
`Caching chunk ${JSON.stringify(sourceChunk, null, 2)} -> ${JSON.stringify(processedChunk, null, 2)}`,
234+
);
235+
} else {
236+
ora.text = progressLog;
237+
}
179238
},
180239
);
181240

@@ -236,6 +295,10 @@ export default new Command()
236295
console.log();
237296
if (!hasErrors) {
238297
ora.succeed("Localization completed.");
298+
deleteCache();
299+
if (flags.verbose) {
300+
ora.info("Cache file deleted.");
301+
}
239302
} else {
240303
ora.warn("Localization completed with errors.");
241304
}
@@ -296,7 +359,11 @@ function createLocalizationEngineConnection(params: { apiKey: string; apiUrl: st
296359
targetLocale: string;
297360
targetData: Record<string, any>;
298361
},
299-
onProgress: (progress: number) => void,
362+
onProgress: (
363+
progress: number,
364+
sourceChunk: Record<string, string>,
365+
processedChunk: Record<string, string>,
366+
) => void,
300367
) => {
301368
return retryWithExponentialBackoff(
302369
() =>
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import path from "path";
2+
import fs from "fs";
3+
4+
interface CacheRow {
5+
targetLocale: string;
6+
key: string;
7+
source: string;
8+
processed: string;
9+
}
10+
11+
interface NormalizedCacheItem {
12+
source: string;
13+
result: string;
14+
}
15+
16+
type NormalizedCache = Record<string, NormalizedCacheItem>;
17+
18+
interface NormalizedLocaleCache {
19+
[targetLocale: string]: NormalizedCache;
20+
}
21+
22+
export const cacheChunk = (
23+
targetLocale: string,
24+
sourceChunk: Record<string, string>,
25+
processedChunk: Record<string, string>,
26+
) => {
27+
const rows = Object.entries(sourceChunk).map(([key, source]) => ({
28+
targetLocale,
29+
key,
30+
source,
31+
processed: processedChunk[key],
32+
}));
33+
_appendToCache(rows);
34+
};
35+
36+
export function getNormalizedCache() {
37+
const rows = _loadCache();
38+
if (!rows.length) {
39+
return null;
40+
}
41+
42+
const normalized: NormalizedLocaleCache = {};
43+
44+
for (const row of rows) {
45+
if (!normalized[row.targetLocale]) {
46+
normalized[row.targetLocale] = {};
47+
}
48+
49+
normalized[row.targetLocale][row.key] = {
50+
source: row.source,
51+
result: row.processed,
52+
};
53+
}
54+
55+
return normalized;
56+
}
57+
58+
export function deleteCache() {
59+
const cacheFilePath = _getCacheFilePath();
60+
try {
61+
fs.unlinkSync(cacheFilePath);
62+
} catch (e) {
63+
// file might not exist
64+
}
65+
}
66+
67+
function _loadCache() {
68+
const cacheFilePath = _getCacheFilePath();
69+
if (!fs.existsSync(cacheFilePath)) {
70+
return [];
71+
}
72+
const content = fs.readFileSync(cacheFilePath, "utf-8");
73+
const result = _parseJSONLines(content);
74+
return result;
75+
}
76+
77+
function _appendToCache(rows: CacheRow[]) {
78+
const cacheFilePath = _getCacheFilePath();
79+
const lines = _buildJSONLines(rows);
80+
fs.appendFileSync(cacheFilePath, lines);
81+
}
82+
83+
function _getCacheFilePath() {
84+
return path.join(process.cwd(), "i18n.cache");
85+
}
86+
87+
function _buildJSONLines(rows: CacheRow[]) {
88+
return rows.map((row) => JSON.stringify(row)).join("\n") + "\n";
89+
}
90+
91+
function _parseJSONLines(lines: string) {
92+
return lines
93+
.split("\n")
94+
.map(_tryParseJSON)
95+
.filter((line) => line !== null);
96+
}
97+
98+
function _tryParseJSON(line: string) {
99+
try {
100+
return JSON.parse(line);
101+
} catch (e) {
102+
return null;
103+
}
104+
}

packages/cli/src/cli/utils/lockfile.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ export function createLockfileHelper() {
2121

2222
_saveLockfile(lockfile);
2323
},
24+
registerPartialSourceData: (pathPattern: string, partialSourceData: Record<string, any>) => {
25+
const lockfile = _loadLockfile();
26+
27+
const sectionKey = MD5(pathPattern);
28+
const sectionChecksums = _.mapValues(partialSourceData, (value) => MD5(value));
29+
30+
lockfile.checksums[sectionKey] = _.merge({}, lockfile.checksums[sectionKey] ?? {}, sectionChecksums);
31+
32+
_saveLockfile(lockfile);
33+
},
2434
extractUpdatedData: (pathPattern: string, sourceData: Record<string, any>) => {
2535
const lockfile = _loadLockfile();
2636

packages/sdk/src/index.ts

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ export class ReplexicaEngine {
4848
payload: Z.infer<typeof payloadSchema>,
4949
params: Z.infer<typeof localizationParamsSchema>,
5050
reference?: Z.infer<typeof referenceSchema>,
51-
progressCallback?: (progress: number) => void,
51+
progressCallback?: (
52+
progress: number,
53+
sourceChunk: Record<string, string>,
54+
processedChunk: Record<string, string>,
55+
) => void,
5256
): Promise<Record<string, string>> {
5357
const finalPayload = payloadSchema.parse(payload);
5458
const finalParams = localizationParamsSchema.parse(params);
@@ -61,17 +65,18 @@ export class ReplexicaEngine {
6165
const chunk = chunkedPayload[i];
6266
const percentageCompleted = Math.round(((i + 1) / chunkedPayload.length) * 100);
6367

64-
if (progressCallback) {
65-
progressCallback(percentageCompleted);
66-
}
67-
6868
const processedPayloadChunk = await this.localizeChunk(
6969
finalParams.sourceLocale,
7070
finalParams.targetLocale,
7171
{ data: chunk, reference },
7272
workflowId,
7373
params.fast || false,
7474
);
75+
76+
if (progressCallback) {
77+
progressCallback(percentageCompleted, chunk, processedPayloadChunk);
78+
}
79+
7580
processedPayloadChunks.push(processedPayloadChunk);
7681
}
7782

@@ -187,7 +192,11 @@ export class ReplexicaEngine {
187192
async localizeObject(
188193
obj: Record<string, any>,
189194
params: Z.infer<typeof localizationParamsSchema>,
190-
progressCallback?: (progress: number) => void,
195+
progressCallback?: (
196+
progress: number,
197+
sourceChunk: Record<string, string>,
198+
processedChunk: Record<string, string>,
199+
) => void,
191200
): Promise<Record<string, any>> {
192201
return this._localizeRaw(obj, params, undefined, progressCallback);
193202
}

0 commit comments

Comments
 (0)