Skip to content

Commit cdd38c4

Browse files
feat: add retry logic to link checker and run sequentialy (#7149)
1 parent 028269d commit cdd38c4

File tree

1 file changed

+48
-36
lines changed

1 file changed

+48
-36
lines changed

.github/workflows/scripts/check-existing-doc-links.js

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,57 @@ const getUrlToCheck = (currentUrl, deploymentUrl) => {
44
return url.toString();
55
};
66

7-
const checkUrl = async (url) => {
8-
const { status } = await fetch(url, {
9-
method: "GET",
10-
});
11-
return [status, new URL(url).pathname];
12-
};
7+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
138

14-
const toChunks = (array, chunkSize) => {
15-
const chunks = [];
16-
for (let i = 0; i < array.length; i += chunkSize) {
17-
chunks.push(array.slice(i, i + chunkSize));
9+
const checkUrl = async (url, retries = 3) => {
10+
for (let i = 0; i < retries; i++) {
11+
try {
12+
const { status } = await fetch(url, {
13+
method: "GET",
14+
headers: {
15+
"User-Agent": "refine-link-checker",
16+
},
17+
});
18+
return [status, new URL(url).pathname];
19+
} catch (error) {
20+
if (i === retries - 1) {
21+
console.error(
22+
`Failed to fetch ${new URL(url).pathname} after ${retries} attempts:`,
23+
error.message,
24+
);
25+
return [500, new URL(url).pathname];
26+
}
27+
// Wait before retrying
28+
await sleep(1000 * (i + 1));
29+
}
1830
}
19-
return chunks;
2031
};
2132

22-
const checkChunk = (chunk, deploymentUrl, success, fail) => {
23-
return new Promise((resolve) => {
24-
const promises = chunk.map((url) => {
25-
return checkUrl(getUrlToCheck(url, deploymentUrl));
26-
});
33+
const checkUrls = async (urls, deploymentUrl, success, fail) => {
34+
for (let i = 0; i < urls.length; i++) {
35+
const url = urls[i];
36+
const [status, pathname] = await checkUrl(
37+
getUrlToCheck(url, deploymentUrl),
38+
);
39+
if (status === 200) {
40+
success.push(pathname);
41+
} else {
42+
fail.push(pathname);
43+
}
2744

28-
Promise.all(promises).then((results) => {
29-
results.forEach(([status, url]) => {
30-
if (status === 200) {
31-
success.push(url);
32-
} else {
33-
fail.push(url);
34-
}
35-
});
36-
resolve();
37-
});
38-
});
39-
};
45+
// Log progress every 50 URLs
46+
if ((i + 1) % 50 === 0 || i === urls.length - 1) {
47+
console.log(
48+
`Progress: ${i + 1}/${urls.length} URLs checked (${
49+
success.length
50+
} successful, ${fail.length} failed)`,
51+
);
52+
}
4053

54+
// Small delay between requests to avoid overwhelming the server
55+
await sleep(100);
56+
}
57+
};
4158
const checkExistingLinks = async (sitemapUrl, deploymentUrl) => {
4259
const data = await (await fetch(sitemapUrl)).text();
4360

@@ -50,16 +67,11 @@ const checkExistingLinks = async (sitemapUrl, deploymentUrl) => {
5067

5168
console.log("Checking for existing urls in:", sitemapUrl);
5269
console.log("Deployment url:", deploymentUrl);
70+
console.log(`Total URLs to check: ${urls.length}`);
5371

54-
const chunks = toChunks(urls, 10);
55-
56-
let done = 0;
72+
await checkUrls(urls, deploymentUrl, success, fail);
5773

58-
for (const chunk of chunks) {
59-
console.log(`Checking chunk ${done + 1}/${chunks.length}`);
60-
done++;
61-
await checkChunk(chunk, deploymentUrl, success, fail);
62-
}
74+
console.log(`\nResults: ${success.length} successful, ${fail.length} failed`);
6375

6476
if (fail.length > 0) {
6577
console.log("Broken links:");

0 commit comments

Comments
 (0)