Spaces:
Build error
Build error
feat(crawl): viewport options
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -69,7 +69,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 69 |
// Potential privacy issue, dont cache if cookies are used
|
| 70 |
return;
|
| 71 |
}
|
| 72 |
-
if (options.injectFrameScripts?.length || options.injectPageScripts?.length) {
|
| 73 |
// Potentially mangeled content, dont cache if scripts are injected
|
| 74 |
return;
|
| 75 |
}
|
|
@@ -725,6 +725,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 725 |
withShadowDom: opts.withShadowDom,
|
| 726 |
locale: opts.locale,
|
| 727 |
referer: opts.referer,
|
|
|
|
| 728 |
};
|
| 729 |
|
| 730 |
if (opts.locale) {
|
|
|
|
| 69 |
// Potential privacy issue, dont cache if cookies are used
|
| 70 |
return;
|
| 71 |
}
|
| 72 |
+
if (options.injectFrameScripts?.length || options.injectPageScripts?.length || options.viewport) {
|
| 73 |
// Potentially mangeled content, dont cache if scripts are injected
|
| 74 |
return;
|
| 75 |
}
|
|
|
|
| 725 |
withShadowDom: opts.withShadowDom,
|
| 726 |
locale: opts.locale,
|
| 727 |
referer: opts.referer,
|
| 728 |
+
viewport: opts.viewport,
|
| 729 |
};
|
| 730 |
|
| 731 |
if (opts.locale) {
|
backend/functions/src/dto/scrapping-options.ts
CHANGED
|
@@ -16,6 +16,25 @@ const CONTENT_FORMAT_VALUES = new Set<string>(Object.values(CONTENT_FORMAT));
|
|
| 16 |
export const IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'] as const;
|
| 17 |
const IMAGE_RETENTION_MODE_VALUES = new Set<string>(IMAGE_RETENTION_MODES);
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
@Also({
|
| 20 |
openapi: {
|
| 21 |
operation: {
|
|
@@ -279,6 +298,9 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 279 |
@Prop()
|
| 280 |
tokenBudget?: number;
|
| 281 |
|
|
|
|
|
|
|
|
|
|
| 282 |
static override from(input: any) {
|
| 283 |
const instance = super.from(input) as CrawlerOptions;
|
| 284 |
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
|
@@ -432,6 +454,9 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 432 |
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
|
| 433 |
return false;
|
| 434 |
}
|
|
|
|
|
|
|
|
|
|
| 435 |
|
| 436 |
return true;
|
| 437 |
}
|
|
|
|
| 16 |
export const IMAGE_RETENTION_MODES = ['none', 'all', 'alt', 'all_p', 'alt_p'] as const;
|
| 17 |
const IMAGE_RETENTION_MODE_VALUES = new Set<string>(IMAGE_RETENTION_MODES);
|
| 18 |
|
| 19 |
+
class Viewport extends AutoCastable {
|
| 20 |
+
@Prop({
|
| 21 |
+
default: 1024
|
| 22 |
+
})
|
| 23 |
+
width!: number;
|
| 24 |
+
@Prop({
|
| 25 |
+
default: 1024
|
| 26 |
+
})
|
| 27 |
+
height!: number;
|
| 28 |
+
@Prop()
|
| 29 |
+
deviceScaleFactor?: number;
|
| 30 |
+
@Prop()
|
| 31 |
+
isMobile?: boolean;
|
| 32 |
+
@Prop()
|
| 33 |
+
isLandscape?: boolean;
|
| 34 |
+
@Prop()
|
| 35 |
+
hasTouch?: boolean;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
@Also({
|
| 39 |
openapi: {
|
| 40 |
operation: {
|
|
|
|
| 298 |
@Prop()
|
| 299 |
tokenBudget?: number;
|
| 300 |
|
| 301 |
+
@Prop()
|
| 302 |
+
viewport?: Viewport;
|
| 303 |
+
|
| 304 |
static override from(input: any) {
|
| 305 |
const instance = super.from(input) as CrawlerOptions;
|
| 306 |
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
|
|
|
| 454 |
if (this.injectFrameScript?.length || this.injectPageScript?.length) {
|
| 455 |
return false;
|
| 456 |
}
|
| 457 |
+
if (this.viewport) {
|
| 458 |
+
return false;
|
| 459 |
+
}
|
| 460 |
|
| 461 |
return true;
|
| 462 |
}
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -80,6 +80,7 @@ export interface ScrappingOptions {
|
|
| 80 |
extraHeaders?: Record<string, string>;
|
| 81 |
injectFrameScripts?: string[];
|
| 82 |
injectPageScripts?: string[];
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
|
|
@@ -863,6 +864,9 @@ export class PuppeteerControl extends AsyncService {
|
|
| 863 |
if (options?.overrideUserAgent) {
|
| 864 |
await page.setUserAgent(options.overrideUserAgent);
|
| 865 |
}
|
|
|
|
|
|
|
|
|
|
| 866 |
|
| 867 |
let nextSnapshotDeferred = Defer();
|
| 868 |
const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` }));
|
|
|
|
| 80 |
extraHeaders?: Record<string, string>;
|
| 81 |
injectFrameScripts?: string[];
|
| 82 |
injectPageScripts?: string[];
|
| 83 |
+
viewport?: Viewport;
|
| 84 |
}
|
| 85 |
|
| 86 |
|
|
|
|
| 864 |
if (options?.overrideUserAgent) {
|
| 865 |
await page.setUserAgent(options.overrideUserAgent);
|
| 866 |
}
|
| 867 |
+
if (options?.viewport) {
|
| 868 |
+
await page.setViewport(options.viewport);
|
| 869 |
+
}
|
| 870 |
|
| 871 |
let nextSnapshotDeferred = Defer();
|
| 872 |
const crippleListener = () => nextSnapshotDeferred.reject(new ServiceCrashedError({ message: `Browser crashed, try again` }));
|