Spaces:
Build error
Build error
fix: expect failure while loading pdf
Browse files- src/api/crawler.ts +16 -3
src/api/crawler.ts
CHANGED
|
@@ -1010,10 +1010,23 @@ export class CrawlerHost extends RPCHost {
|
|
| 1010 |
return this.snapshotFormatter.formatSnapshot(mode, snapshotCopy, nominalUrl, urlValidMs);
|
| 1011 |
}
|
| 1012 |
|
| 1013 |
-
const r = await this.curlControl.sideLoad(new URL(pdfUrl), scrappingOptions)
|
| 1014 |
-
|
| 1015 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1016 |
}
|
|
|
|
| 1017 |
}
|
| 1018 |
}
|
| 1019 |
|
|
|
|
| 1010 |
return this.snapshotFormatter.formatSnapshot(mode, snapshotCopy, nominalUrl, urlValidMs);
|
| 1011 |
}
|
| 1012 |
|
| 1013 |
+
const r = await this.curlControl.sideLoad(new URL(pdfUrl), scrappingOptions).catch((err) => {
|
| 1014 |
+
if (err instanceof ServiceBadAttemptError) {
|
| 1015 |
+
return Promise.reject(new AssertionFailureError(`Failed to load PDF(${pdfUrl}): ${err.message}`));
|
| 1016 |
+
}
|
| 1017 |
+
|
| 1018 |
+
return Promise.reject(err);
|
| 1019 |
+
});
|
| 1020 |
+
if (r.status !== 200) {
|
| 1021 |
+
throw new AssertionFailureError(`Failed to load PDF(${pdfUrl}): Server responded status ${r.status}`);
|
| 1022 |
+
}
|
| 1023 |
+
if (!r.contentType.includes('application/pdf')) {
|
| 1024 |
+
throw new AssertionFailureError(`Failed to load PDF(${pdfUrl}): Server responded with wrong content type ${r.contentType}`);
|
| 1025 |
+
}
|
| 1026 |
+
if (!r.file) {
|
| 1027 |
+
throw new AssertionFailureError(`Failed to load PDF(${pdfUrl}): Server did not return a body`);
|
| 1028 |
}
|
| 1029 |
+
snapshotCopy.pdfs[0] = pathToFileURL(await r.file.filePath).href;
|
| 1030 |
}
|
| 1031 |
}
|
| 1032 |
|