Spaces:
Build error
Build error
feat: add referer param
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -1123,6 +1123,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|
| 1123 |
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
| 1124 |
withIframe: opts.withIframe,
|
| 1125 |
locale: opts.locale,
|
|
|
|
| 1126 |
};
|
| 1127 |
|
| 1128 |
return crawlOpts;
|
|
|
|
| 1123 |
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
| 1124 |
withIframe: opts.withIframe,
|
| 1125 |
locale: opts.locale,
|
| 1126 |
+
referer: opts.referer,
|
| 1127 |
};
|
| 1128 |
|
| 1129 |
return crawlOpts;
|
backend/functions/src/dto/scrapping-options.ts
CHANGED
|
@@ -115,6 +115,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|
| 115 |
description: 'Specify browser locale for the page.',
|
| 116 |
in: 'header',
|
| 117 |
schema: { type: 'string' }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
}
|
| 119 |
}
|
| 120 |
}
|
|
@@ -201,6 +206,9 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 201 |
@Prop()
|
| 202 |
locale?: string;
|
| 203 |
|
|
|
|
|
|
|
|
|
|
| 204 |
static override from(input: any) {
|
| 205 |
const instance = super.from(input) as CrawlerOptions;
|
| 206 |
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
|
@@ -218,6 +226,11 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 218 |
instance.locale = locale;
|
| 219 |
}
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
| 222 |
if (withGeneratedAlt !== undefined) {
|
| 223 |
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
|
|
|
| 115 |
description: 'Specify browser locale for the page.',
|
| 116 |
in: 'header',
|
| 117 |
schema: { type: 'string' }
|
| 118 |
+
},
|
| 119 |
+
'X-Referer': {
|
| 120 |
+
description: 'Specify referer for the page.',
|
| 121 |
+
in: 'header',
|
| 122 |
+
schema: { type: 'string' }
|
| 123 |
}
|
| 124 |
}
|
| 125 |
}
|
|
|
|
| 206 |
@Prop()
|
| 207 |
locale?: string;
|
| 208 |
|
| 209 |
+
@Prop()
|
| 210 |
+
referer?: string;
|
| 211 |
+
|
| 212 |
static override from(input: any) {
|
| 213 |
const instance = super.from(input) as CrawlerOptions;
|
| 214 |
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
|
|
|
| 226 |
instance.locale = locale;
|
| 227 |
}
|
| 228 |
|
| 229 |
+
const referer = ctx?.req.get('x-referer');
|
| 230 |
+
if (referer !== undefined) {
|
| 231 |
+
instance.referer = referer;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
| 235 |
if (withGeneratedAlt !== undefined) {
|
| 236 |
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { container, singleton } from 'tsyringe';
|
|
| 4 |
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
|
| 5 |
import { Logger } from '../shared/services/logger';
|
| 6 |
|
| 7 |
-
import type { Browser, CookieParam, Page } from 'puppeteer';
|
| 8 |
import puppeteer from 'puppeteer-extra';
|
| 9 |
|
| 10 |
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|
@@ -69,6 +69,7 @@ export interface ScrappingOptions {
|
|
| 69 |
overrideUserAgent?: string;
|
| 70 |
timeoutMs?: number;
|
| 71 |
locale?: string;
|
|
|
|
| 72 |
}
|
| 73 |
|
| 74 |
|
|
@@ -545,11 +546,16 @@ document.addEventListener('load', handlePageLoad);
|
|
| 545 |
});
|
| 546 |
|
| 547 |
const timeout = options?.timeoutMs || 30_000;
|
| 548 |
-
|
| 549 |
-
const gotoPromise = page.goto(url, {
|
| 550 |
waitUntil: ['load', 'domcontentloaded', 'networkidle0'],
|
| 551 |
timeout,
|
| 552 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
.catch((err) => {
|
| 554 |
if (err instanceof TimeoutError) {
|
| 555 |
this.logger.warn(`Page ${sn}: Browsing of ${url} timed out`, { err: marshalErrorLike(err) });
|
|
|
|
| 4 |
import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
|
| 5 |
import { Logger } from '../shared/services/logger';
|
| 6 |
|
| 7 |
+
import type { Browser, CookieParam, GoToOptions, Page } from 'puppeteer';
|
| 8 |
import puppeteer from 'puppeteer-extra';
|
| 9 |
|
| 10 |
import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
|
|
|
|
| 69 |
overrideUserAgent?: string;
|
| 70 |
timeoutMs?: number;
|
| 71 |
locale?: string;
|
| 72 |
+
referer?: string;
|
| 73 |
}
|
| 74 |
|
| 75 |
|
|
|
|
| 546 |
});
|
| 547 |
|
| 548 |
const timeout = options?.timeoutMs || 30_000;
|
| 549 |
+
const goToOptions: GoToOptions = {
|
|
|
|
| 550 |
waitUntil: ['load', 'domcontentloaded', 'networkidle0'],
|
| 551 |
timeout,
|
| 552 |
+
};
|
| 553 |
+
|
| 554 |
+
if (options?.referer) {
|
| 555 |
+
goToOptions.referer = options.referer;
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
const gotoPromise = page.goto(url, goToOptions)
|
| 559 |
.catch((err) => {
|
| 560 |
if (err instanceof TimeoutError) {
|
| 561 |
this.logger.warn(`Page ${sn}: Browsing of ${url} timed out`, { err: marshalErrorLike(err) });
|