Spaces:
Build error
Build error
fix
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection } from 'civkit';
|
| 2 |
import { singleton } from 'tsyringe';
|
| 3 |
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
| 4 |
import _ from 'lodash';
|
|
@@ -90,10 +90,6 @@ ${this.content}
|
|
| 90 |
|
| 91 |
try {
|
| 92 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
| 93 |
-
if (!scrapped) {
|
| 94 |
-
continue;
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
const formatted = this.formatSnapshot(scrapped);
|
| 98 |
|
| 99 |
if (scrapped.screenshot && screenshotEnabled) {
|
|
@@ -134,6 +130,10 @@ ${this.content}
|
|
| 134 |
return formatted;
|
| 135 |
}
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
return this.formatSnapshot(lastScrapped);
|
| 138 |
}
|
| 139 |
|
|
@@ -148,6 +148,10 @@ ${this.content}
|
|
| 148 |
return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null });
|
| 149 |
}
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
return `${this.formatSnapshot(lastScrapped)}`;
|
| 152 |
}
|
| 153 |
|
|
|
|
| 1 |
+
import { assignTransferProtocolMeta, marshalErrorLike, RPCHost, RPCReflection, AssertionFailureError } from 'civkit';
|
| 2 |
import { singleton } from 'tsyringe';
|
| 3 |
import { CloudHTTPv2, Ctx, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
| 4 |
import _ from 'lodash';
|
|
|
|
| 90 |
|
| 91 |
try {
|
| 92 |
for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
const formatted = this.formatSnapshot(scrapped);
|
| 94 |
|
| 95 |
if (scrapped.screenshot && screenshotEnabled) {
|
|
|
|
| 130 |
return formatted;
|
| 131 |
}
|
| 132 |
|
| 133 |
+
if (!lastScrapped) {
|
| 134 |
+
throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
return this.formatSnapshot(lastScrapped);
|
| 138 |
}
|
| 139 |
|
|
|
|
| 148 |
return assignTransferProtocolMeta(`${formatted}`, { contentType: 'text/plain', envelope: null });
|
| 149 |
}
|
| 150 |
|
| 151 |
+
if (!lastScrapped) {
|
| 152 |
+
throw new AssertionFailureError(`No content available for URL ${urlToCrawl}`);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
return `${this.formatSnapshot(lastScrapped)}`;
|
| 156 |
}
|
| 157 |
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -153,7 +153,7 @@ function giveSnapshot() {
|
|
| 153 |
return page;
|
| 154 |
}
|
| 155 |
|
| 156 |
-
async *scrap(url: string, noCache: string | boolean = false) {
|
| 157 |
const parsedUrl = new URL(url);
|
| 158 |
// parsedUrl.search = '';
|
| 159 |
parsedUrl.hash = '';
|
|
|
|
| 153 |
return page;
|
| 154 |
}
|
| 155 |
|
| 156 |
+
async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
|
| 157 |
const parsedUrl = new URL(url);
|
| 158 |
// parsedUrl.search = '';
|
| 159 |
parsedUrl.hash = '';
|