@@ -4,40 +4,57 @@ const getUrlToCheck = (currentUrl, deploymentUrl) => {
44 return url . toString ( ) ;
55} ;
66
7- const checkUrl = async ( url ) => {
8- const { status } = await fetch ( url , {
9- method : "GET" ,
10- } ) ;
11- return [ status , new URL ( url ) . pathname ] ;
12- } ;
7+ const sleep = ( ms ) => new Promise ( ( resolve ) => setTimeout ( resolve , ms ) ) ;
138
14- const toChunks = ( array , chunkSize ) => {
15- const chunks = [ ] ;
16- for ( let i = 0 ; i < array . length ; i += chunkSize ) {
17- chunks . push ( array . slice ( i , i + chunkSize ) ) ;
9+ const checkUrl = async ( url , retries = 3 ) => {
10+ for ( let i = 0 ; i < retries ; i ++ ) {
11+ try {
12+ const { status } = await fetch ( url , {
13+ method : "GET" ,
14+ headers : {
15+ "User-Agent" : "refine-link-checker" ,
16+ } ,
17+ } ) ;
18+ return [ status , new URL ( url ) . pathname ] ;
19+ } catch ( error ) {
20+ if ( i === retries - 1 ) {
21+ console . error (
22+ `Failed to fetch ${ new URL ( url ) . pathname } after ${ retries } attempts:` ,
23+ error . message ,
24+ ) ;
25+ return [ 500 , new URL ( url ) . pathname ] ;
26+ }
27+ // Wait before retrying
28+ await sleep ( 1000 * ( i + 1 ) ) ;
29+ }
1830 }
19- return chunks ;
2031} ;
2132
22- const checkChunk = ( chunk , deploymentUrl , success , fail ) => {
23- return new Promise ( ( resolve ) => {
24- const promises = chunk . map ( ( url ) => {
25- return checkUrl ( getUrlToCheck ( url , deploymentUrl ) ) ;
26- } ) ;
33+ const checkUrls = async ( urls , deploymentUrl , success , fail ) => {
34+ for ( let i = 0 ; i < urls . length ; i ++ ) {
35+ const url = urls [ i ] ;
36+ const [ status , pathname ] = await checkUrl (
37+ getUrlToCheck ( url , deploymentUrl ) ,
38+ ) ;
39+ if ( status === 200 ) {
40+ success . push ( pathname ) ;
41+ } else {
42+ fail . push ( pathname ) ;
43+ }
2744
28- Promise . all ( promises ) . then ( ( results ) => {
29- results . forEach ( ( [ status , url ] ) => {
30- if ( status === 200 ) {
31- success . push ( url ) ;
32- } else {
33- fail . push ( url ) ;
34- }
35- } ) ;
36- resolve ( ) ;
37- } ) ;
38- } ) ;
39- } ;
45+ // Log progress every 50 URLs
46+ if ( ( i + 1 ) % 50 === 0 || i === urls . length - 1 ) {
47+ console . log (
48+ `Progress: ${ i + 1 } /${ urls . length } URLs checked (${
49+ success . length
50+ } successful, ${ fail . length } failed)`,
51+ ) ;
52+ }
4053
54+ // Small delay between requests to avoid overwhelming the server
55+ await sleep ( 100 ) ;
56+ }
57+ } ;
4158const checkExistingLinks = async ( sitemapUrl , deploymentUrl ) => {
4259 const data = await ( await fetch ( sitemapUrl ) ) . text ( ) ;
4360
@@ -50,16 +67,11 @@ const checkExistingLinks = async (sitemapUrl, deploymentUrl) => {
5067
5168 console . log ( "Checking for existing urls in:" , sitemapUrl ) ;
5269 console . log ( "Deployment url:" , deploymentUrl ) ;
70+ console . log ( `Total URLs to check: ${ urls . length } ` ) ;
5371
54- const chunks = toChunks ( urls , 10 ) ;
55-
56- let done = 0 ;
72+ await checkUrls ( urls , deploymentUrl , success , fail ) ;
5773
58- for ( const chunk of chunks ) {
59- console . log ( `Checking chunk ${ done + 1 } /${ chunks . length } ` ) ;
60- done ++ ;
61- await checkChunk ( chunk , deploymentUrl , success , fail ) ;
62- }
74+ console . log ( `\nResults: ${ success . length } successful, ${ fail . length } failed` ) ;
6375
6476 if ( fail . length > 0 ) {
6577 console . log ( "Broken links:" ) ;
0 commit comments