Spaces:
Build error
Build error
fix
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -90,6 +90,10 @@ ${this.content}
|
|
| 90 |
|
| 91 |
try {
|
| 92 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
const formatted = this.formatSnapshot(scrapped);
|
| 94 |
|
| 95 |
if (scrapped.screenshot && screenshotEnabled) {
|
|
|
|
| 90 |
|
| 91 |
try {
|
| 92 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
| 93 |
+
if (!scrapped) {
|
| 94 |
+
continue;
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
const formatted = this.formatSnapshot(scrapped);
|
| 98 |
|
| 99 |
if (scrapped.screenshot && screenshotEnabled) {
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -15,7 +15,7 @@ export interface PageSnapshot {
|
|
| 15 |
href: string;
|
| 16 |
html: string;
|
| 17 |
text: string;
|
| 18 |
-
parsed: {
|
| 19 |
title: string;
|
| 20 |
content: string;
|
| 21 |
textContent: string;
|
|
@@ -78,7 +78,7 @@ export class PuppeteerControl extends AsyncService {
|
|
| 78 |
timeout: 10_000
|
| 79 |
}).catch((err) => {
|
| 80 |
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
|
| 81 |
-
process.nextTick(()=> {
|
| 82 |
process.exit(1);
|
| 83 |
});
|
| 84 |
return Promise.reject(err);
|
|
@@ -153,7 +153,7 @@ function giveSnapshot() {
|
|
| 153 |
return page;
|
| 154 |
}
|
| 155 |
|
| 156 |
-
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
|
| 157 |
const parsedUrl = new URL(url);
|
| 158 |
// parsedUrl.search = '';
|
| 159 |
parsedUrl.hash = '';
|
|
@@ -236,7 +236,7 @@ function giveSnapshot() {
|
|
| 236 |
while (true) {
|
| 237 |
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
|
| 238 |
if (finalized) {
|
| 239 |
-
yield { ...snapshot, screenshot };
|
| 240 |
break;
|
| 241 |
}
|
| 242 |
yield snapshot;
|
|
|
|
| 15 |
href: string;
|
| 16 |
html: string;
|
| 17 |
text: string;
|
| 18 |
+
parsed?: {
|
| 19 |
title: string;
|
| 20 |
content: string;
|
| 21 |
textContent: string;
|
|
|
|
| 78 |
timeout: 10_000
|
| 79 |
}).catch((err) => {
|
| 80 |
this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
|
| 81 |
+
process.nextTick(() => {
|
| 82 |
process.exit(1);
|
| 83 |
});
|
| 84 |
return Promise.reject(err);
|
|
|
|
| 153 |
return page;
|
| 154 |
}
|
| 155 |
|
| 156 |
+
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot | undefined> {
|
| 157 |
const parsedUrl = new URL(url);
|
| 158 |
// parsedUrl.search = '';
|
| 159 |
parsedUrl.hash = '';
|
|
|
|
| 236 |
while (true) {
|
| 237 |
await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
|
| 238 |
if (finalized) {
|
| 239 |
+
yield { ...snapshot, screenshot } as PageSnapshot;
|
| 240 |
break;
|
| 241 |
}
|
| 242 |
yield snapshot;
|