Skip to content

Commit 32df512

Browse files
committed
chore(docs-watch): switch to manifest metadata and harden report escaping
1 parent 3228b84 commit 32df512

9 files changed

Lines changed: 102 additions & 128 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,5 @@ docs/planning/
138138
docs/github-documentation/docs-check.json
139139
docs/github-documentation/docs-diff.md
140140
docs/github-documentation/docs-diff.patch
141+
docs/github-documentation/rate-limit.md
142+
docs/github-documentation/rate-limits-for-the-rest-api.md

docs/MAINTAINERS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ regressions and to verify the poller and post hook behave as expected.
9898
The docs-watch process is local-only and intended for maintainers.
9999

100100
- Do not store upstream GitHub documentation bodies in this public repository.
101-
- Monitored metadata files live in `docs/github-documentation/*.md`.
101+
- Monitored metadata lives in `docs/github-documentation/watch-list.json`.
102102
- Local private state defaults to `.tmp/docs-watch/state` and stores snapshots
103103
used for diffing.
104104

docs/github-documentation/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ Copyright policy constraint: upstream GitHub documentation bodies are intentiona
66

77
## Files
88

9-
- `rate-limit.md` and `rate-limits-for-the-rest-api.md`: frontmatter metadata (`redirect-link`, `content-sha256`) used for change detection.
10-
- No upstream Markdown body text is stored here.
9+
- `watch-list.json`: monitored pages and expected hashes (`redirect_link`, `content_sha256`).
10+
- Upstream page Markdown files are intentionally not tracked.
1111

1212
## Private state (required for full diff + LLM review)
1313

docs/github-documentation/rate-limit.md

Lines changed: 0 additions & 9 deletions
This file was deleted.

docs/github-documentation/rate-limits-for-the-rest-api.md

Lines changed: 0 additions & 9 deletions
This file was deleted.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"documents": [
3+
{
4+
"file": "docs/github-documentation/rate-limit.md",
5+
"markdown_link": "https://docs.github.com/en/rest/rate-limit/rate-limit.md",
6+
"redirect_link": "https://docs.github.com/api/article/body?pathname=/en/rest/rate-limit/rate-limit",
7+
"content_sha256": "44eb46956eb56734c3b78eb0d9df796a48030cabff22361034340afe556d210f"
8+
},
9+
{
10+
"file": "docs/github-documentation/rate-limits-for-the-rest-api.md",
11+
"markdown_link": "https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api.md",
12+
"redirect_link": "https://docs.github.com/api/article/body?pathname=/en/rest/using-the-rest-api/rate-limits-for-the-rest-api",
13+
"content_sha256": "b66f477c547bfd5548cca384d92d6170b30261cdca1a0e021571f9a511e7ec17"
14+
}
15+
]
16+
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
"diff:github-docs": "node scripts/render-github-doc-diff.mjs --check .tmp/docs-watch/docs-check.json --output .tmp/docs-watch/docs-diff.md --patch-output .tmp/docs-watch/docs-diff.patch --write-summary",
2828
"report:github-docs": "node scripts/write-docs-watch-report.mjs --check .tmp/docs-watch/docs-check.json --diff .tmp/docs-watch/docs-diff.patch --diff-markdown .tmp/docs-watch/docs-diff.md --output .tmp/docs-watch/docs-watch-report.md --write-summary",
2929
"docs-watch:local": "npm run check:github-docs && npm run diff:github-docs && npm run report:github-docs",
30-
"sync:github-docs-state": "node scripts/check-github-docs.mjs --state-dir \"${DOC_WATCH_STATE_DIR:-.tmp/docs-watch/state}\" --write-state --update-frontmatter --output .tmp/docs-watch/docs-check-sync.json --write-summary",
30+
"sync:github-docs-state": "node scripts/check-github-docs.mjs --state-dir \"${DOC_WATCH_STATE_DIR:-.tmp/docs-watch/state}\" --write-state --update-manifest --output .tmp/docs-watch/docs-check-sync.json --write-summary",
3131
"clean": "rm -rf dist",
3232
"prepare": "husky",
3333
"prepublishOnly": "npm run build:all"

scripts/check-github-docs.mjs

Lines changed: 76 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -13,50 +13,21 @@ function getArgValue(flag, fallback = null) {
1313
return value ?? fallback;
1414
}
1515

16-
const docsDir = getArgValue('--dir', 'docs/github-documentation');
16+
const manifestPath = getArgValue('--manifest', 'docs/github-documentation/watch-list.json');
1717
const outputPath = getArgValue('--output', null);
1818
const stateDir = getArgValue('--state-dir', null);
1919
const remoteSnapshotsDir = getArgValue('--remote-snapshots-dir', null);
2020
const useLocal = args.includes('--use-local');
21-
const updateFrontmatter = args.includes('--update-frontmatter');
22-
const replaceBody = args.includes('--replace-body');
21+
const updateManifest = args.includes('--update-manifest') || args.includes('--update-frontmatter');
2322
const writeState = args.includes('--write-state');
2423
const failOnChange = args.includes('--fail-on-change');
2524
const writeSummary = args.includes('--write-summary');
2625
const includeDiffSnippets = args.includes('--include-diff-snippets');
2726

28-
if (replaceBody && !updateFrontmatter) {
29-
throw new Error('--replace-body requires --update-frontmatter');
30-
}
31-
3227
if (writeState && !stateDir) {
3328
throw new Error('--write-state requires --state-dir');
3429
}
3530

36-
function splitFrontmatter(text) {
37-
const match = text.match(/^---\s*\r?\n([\s\S]*?)\r?\n---\s*\r?\n?/);
38-
if (!match) {
39-
return null;
40-
}
41-
const frontmatter = match[1];
42-
const body = text.slice(match[0].length);
43-
return { frontmatter, body };
44-
}
45-
46-
function parseFrontmatterMap(frontmatter) {
47-
const map = new Map();
48-
for (const line of frontmatter.split(/\r?\n/)) {
49-
const trimmed = line.trim();
50-
if (!trimmed) continue;
51-
const idx = line.indexOf(':');
52-
if (idx === -1) continue;
53-
const key = line.slice(0, idx).trim();
54-
const value = line.slice(idx + 1).trim();
55-
map.set(key, value);
56-
}
57-
return map;
58-
}
59-
6031
function normalizeBody(text) {
6132
const normalized = text.replace(/\r\n/g, '\n');
6233
return normalized.replace(/\n?$/, '\n');
@@ -89,52 +60,18 @@ function findFirstDiff(localBody, remoteBody, withSnippets) {
8960
return null;
9061
}
9162

92-
function updateFrontmatterHash(frontmatter, newHash) {
93-
const lines = frontmatter.split(/\r?\n/);
94-
const existingIndex = lines.findIndex((line) => line.trim().startsWith('content-sha256:'));
95-
const newLine = `content-sha256: ${newHash}`;
96-
97-
if (existingIndex !== -1) {
98-
lines[existingIndex] = newLine;
99-
} else {
100-
const redirectIndex = lines.findIndex((line) => line.trim().startsWith('redirect-link:'));
101-
if (redirectIndex !== -1) {
102-
lines.splice(redirectIndex + 1, 0, newLine);
103-
} else {
104-
lines.push(newLine);
105-
}
106-
}
107-
108-
return lines.join('\n');
109-
}
110-
11163
function resolveStateSnapshotPath(filePath) {
11264
if (!stateDir) return null;
11365
return path.join(stateDir, 'snapshots', path.basename(filePath));
11466
}
11567

116-
function readBaselineBody(filePath, fallbackBody) {
68+
function readBaselineBody(filePath) {
11769
const snapshotPath = resolveStateSnapshotPath(filePath);
118-
if (snapshotPath) {
119-
if (fs.existsSync(snapshotPath)) {
120-
return {
121-
body: normalizeBody(fs.readFileSync(snapshotPath, 'utf8')),
122-
source: 'state',
123-
snapshotPath,
124-
};
125-
}
70+
if (snapshotPath && fs.existsSync(snapshotPath)) {
12671
return {
127-
body: '',
128-
source: 'none',
129-
snapshotPath: null,
130-
};
131-
}
132-
133-
if (fallbackBody.trim().length > 0) {
134-
return {
135-
body: normalizeBody(fallbackBody),
136-
source: 'repo',
137-
snapshotPath: null,
72+
body: normalizeBody(fs.readFileSync(snapshotPath, 'utf8')),
73+
source: 'state',
74+
snapshotPath,
13875
};
13976
}
14077

@@ -150,6 +87,51 @@ function writeBody(filePath, body) {
15087
fs.writeFileSync(filePath, normalizeBody(body), 'utf8');
15188
}
15289

90+
function loadManifest(filePath) {
91+
if (!fs.existsSync(filePath)) {
92+
throw new Error(`Manifest file not found: ${filePath}`);
93+
}
94+
95+
const raw = JSON.parse(fs.readFileSync(filePath, 'utf8'));
96+
const documents = Array.isArray(raw) ? raw : raw.documents;
97+
98+
if (!Array.isArray(documents)) {
99+
throw new Error(`Manifest must contain a documents array: ${filePath}`);
100+
}
101+
102+
const normalizedDocs = documents.map((doc, idx) => {
103+
const file = String(doc.file ?? '').trim();
104+
const redirectLink = String(doc.redirect_link ?? '').trim();
105+
const expectedHash = doc.content_sha256 ? String(doc.content_sha256).trim() : null;
106+
107+
if (!file) {
108+
throw new Error(`Manifest document at index ${idx} is missing file`);
109+
}
110+
111+
if (!redirectLink) {
112+
throw new Error(`Manifest document at index ${idx} is missing redirect_link`);
113+
}
114+
115+
return {
116+
file,
117+
markdown_link: doc.markdown_link ? String(doc.markdown_link).trim() : null,
118+
redirect_link: redirectLink,
119+
content_sha256: expectedHash,
120+
};
121+
});
122+
123+
normalizedDocs.sort((a, b) => a.file.localeCompare(b.file));
124+
125+
return {
126+
root: raw,
127+
documents: normalizedDocs,
128+
write(updatedDocuments) {
129+
const payload = Array.isArray(raw) ? updatedDocuments : { ...raw, documents: updatedDocuments };
130+
fs.writeFileSync(filePath, JSON.stringify(payload, null, 2) + '\n', 'utf8');
131+
},
132+
};
133+
}
134+
153135
async function fetchRemoteBody(url) {
154136
const response = await fetch(url, {
155137
headers: {
@@ -163,35 +145,18 @@ async function fetchRemoteBody(url) {
163145
}
164146

165147
async function main() {
166-
const entries = fs
167-
.readdirSync(docsDir)
168-
.filter((file) => file.endsWith('.md'))
169-
.filter((file) => file.toLowerCase() !== 'readme.md')
170-
.sort()
171-
.map((file) => path.join(docsDir, file));
148+
const manifest = loadManifest(manifestPath);
172149

173150
const results = [];
174151
let changedCount = 0;
175152

176-
for (const file of entries) {
177-
const text = fs.readFileSync(file, 'utf8');
178-
const parsed = splitFrontmatter(text);
179-
if (!parsed) {
180-
throw new Error(`Missing frontmatter in ${file}`);
181-
}
182-
183-
const frontmatterMap = parseFrontmatterMap(parsed.frontmatter);
184-
const redirectLink = frontmatterMap.get('redirect-link');
185-
const expectedHash = frontmatterMap.get('content-sha256') ?? null;
186-
187-
if (!redirectLink) {
188-
throw new Error(`Missing redirect-link in ${file}`);
189-
}
153+
const updatedDocuments = [];
190154

191-
const baseline = readBaselineBody(file, parsed.body);
192-
const remoteBody = useLocal ? baseline.body : normalizeBody(await fetchRemoteBody(redirectLink));
155+
for (const doc of manifest.documents) {
156+
const baseline = readBaselineBody(doc.file);
157+
const remoteBody = useLocal ? baseline.body : normalizeBody(await fetchRemoteBody(doc.redirect_link));
193158
const actualHash = sha256(remoteBody);
194-
const changed = expectedHash !== actualHash;
159+
const changed = doc.content_sha256 !== actualHash;
195160

196161
if (changed) changedCount += 1;
197162

@@ -201,30 +166,31 @@ async function main() {
201166

202167
let remoteSnapshotFile = null;
203168
if (remoteSnapshotsDir) {
204-
const remotePath = path.join(remoteSnapshotsDir, path.basename(file));
169+
const remotePath = path.join(remoteSnapshotsDir, path.basename(doc.file));
205170
writeBody(remotePath, remoteBody);
206171
remoteSnapshotFile = path.relative(process.cwd(), remotePath);
207172
}
208173

209174
if (writeState) {
210-
const snapshotPath = resolveStateSnapshotPath(file);
175+
const snapshotPath = resolveStateSnapshotPath(doc.file);
211176
if (!snapshotPath) {
212-
throw new Error(`Unable to resolve state snapshot path for ${file}`);
177+
throw new Error(`Unable to resolve state snapshot path for ${doc.file}`);
213178
}
214179
writeBody(snapshotPath, remoteBody);
215180
}
216181

217-
if (updateFrontmatter) {
218-
const nextFrontmatter = updateFrontmatterHash(parsed.frontmatter, actualHash);
219-
const nextBody = replaceBody ? remoteBody : parsed.body;
220-
const nextText = `---\n${nextFrontmatter}\n---\n${nextBody}`;
221-
fs.writeFileSync(file, nextText, 'utf8');
222-
}
182+
const updatedDoc = {
183+
...doc,
184+
content_sha256: updateManifest ? actualHash : doc.content_sha256,
185+
};
186+
187+
updatedDocuments.push(updatedDoc);
223188

224189
results.push({
225-
file: path.relative(process.cwd(), file),
226-
redirect_link: redirectLink,
227-
expected_hash: expectedHash,
190+
file: doc.file,
191+
markdown_link: doc.markdown_link,
192+
redirect_link: doc.redirect_link,
193+
expected_hash: doc.content_sha256,
228194
actual_hash: actualHash,
229195
changed,
230196
baseline_source: baseline.source,
@@ -234,11 +200,16 @@ async function main() {
234200
});
235201
}
236202

203+
if (updateManifest) {
204+
manifest.write(updatedDocuments);
205+
}
206+
237207
const payload = {
238208
changed: changedCount > 0,
239209
changed_count: changedCount,
240210
state_dir: stateDir,
241211
remote_snapshots_dir: remoteSnapshotsDir,
212+
manifest_path: manifestPath,
242213
results,
243214
};
244215

scripts/write-docs-watch-report.mjs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,10 @@ function defaultAssessment(payload) {
9393
}
9494

9595
function escapePipe(value) {
96-
return String(value ?? '').replace(/\|/g, '\\|');
96+
return String(value ?? '')
97+
.replace(/\\/g, '\\\\')
98+
.replace(/\|/g, '\\|')
99+
.replace(/\r?\n/g, ' ');
97100
}
98101

99102
async function main() {

0 commit comments

Comments
 (0)