Spaces:
Build error
Build error
feat: expose X-Locale parameter
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -1106,6 +1106,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|
| 1106 |
overrideUserAgent: opts.userAgent,
|
| 1107 |
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
| 1108 |
withIframe: opts.withIframe,
|
|
|
|
| 1109 |
};
|
| 1110 |
|
| 1111 |
return crawlOpts;
|
|
|
|
| 1106 |
overrideUserAgent: opts.userAgent,
|
| 1107 |
timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
|
| 1108 |
withIframe: opts.withIframe,
|
| 1109 |
+
locale: opts.locale,
|
| 1110 |
};
|
| 1111 |
|
| 1112 |
return crawlOpts;
|
backend/functions/src/dto/scrapping-options.ts
CHANGED
|
@@ -111,6 +111,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|
| 111 |
in: 'header',
|
| 112 |
schema: { type: 'string' }
|
| 113 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
}
|
| 115 |
}
|
| 116 |
}
|
|
@@ -188,6 +193,9 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 188 |
})
|
| 189 |
timeout?: number | null;
|
| 190 |
|
|
|
|
|
|
|
|
|
|
| 191 |
static override from(input: any) {
|
| 192 |
const instance = super.from(input) as CrawlerOptions;
|
| 193 |
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
|
@@ -200,6 +208,11 @@ export class CrawlerOptions extends AutoCastable {
|
|
| 200 |
instance.respondWith = customMode;
|
| 201 |
}
|
| 202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
| 204 |
if (withGeneratedAlt !== undefined) {
|
| 205 |
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
|
|
|
| 111 |
in: 'header',
|
| 112 |
schema: { type: 'string' }
|
| 113 |
},
|
| 114 |
+
'X-Locale': {
|
| 115 |
+
description: 'Specify browser locale for the page.',
|
| 116 |
+
in: 'header',
|
| 117 |
+
schema: { type: 'string' }
|
| 118 |
+
}
|
| 119 |
}
|
| 120 |
}
|
| 121 |
}
|
|
|
|
| 193 |
})
|
| 194 |
timeout?: number | null;
|
| 195 |
|
| 196 |
+
@Prop()
|
| 197 |
+
locale?: string;
|
| 198 |
+
|
| 199 |
static override from(input: any) {
|
| 200 |
const instance = super.from(input) as CrawlerOptions;
|
| 201 |
const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
|
|
|
|
| 208 |
instance.respondWith = customMode;
|
| 209 |
}
|
| 210 |
|
| 211 |
+
const locale = ctx?.req.get('x-locale');
|
| 212 |
+
if (locale !== undefined) {
|
| 213 |
+
instance.locale = locale;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
|
| 217 |
if (withGeneratedAlt !== undefined) {
|
| 218 |
instance.withGeneratedAlt = Boolean(withGeneratedAlt);
|
backend/functions/src/services/puppeteer.ts
CHANGED
|
@@ -68,6 +68,7 @@ export interface ScrappingOptions {
|
|
| 68 |
minIntervalMs?: number;
|
| 69 |
overrideUserAgent?: string;
|
| 70 |
timeoutMs?: number;
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
|
|
@@ -472,6 +473,27 @@ document.addEventListener('load', handlePageLoad);
|
|
| 472 |
const page = await this.getNextPage();
|
| 473 |
const sn = this.snMap.get(page);
|
| 474 |
this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
if (options?.proxyUrl) {
|
| 476 |
await page.useProxy(options.proxyUrl);
|
| 477 |
}
|
|
|
|
| 68 |
minIntervalMs?: number;
|
| 69 |
overrideUserAgent?: string;
|
| 70 |
timeoutMs?: number;
|
| 71 |
+
locale?: string;
|
| 72 |
}
|
| 73 |
|
| 74 |
|
|
|
|
| 473 |
const page = await this.getNextPage();
|
| 474 |
const sn = this.snMap.get(page);
|
| 475 |
this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
|
| 476 |
+
|
| 477 |
+
this.logger.info(`Locale setting: ${options?.locale}`);
|
| 478 |
+
if (options?.locale) {
|
| 479 |
+
await page.setExtraHTTPHeaders({
|
| 480 |
+
'Accept-Language': options?.locale
|
| 481 |
+
});
|
| 482 |
+
|
| 483 |
+
await page.evaluateOnNewDocument(() => {
|
| 484 |
+
Object.defineProperty(navigator, "language", {
|
| 485 |
+
get: function() {
|
| 486 |
+
return options?.locale;
|
| 487 |
+
}
|
| 488 |
+
});
|
| 489 |
+
Object.defineProperty(navigator, "languages", {
|
| 490 |
+
get: function() {
|
| 491 |
+
return [options?.locale];
|
| 492 |
+
}
|
| 493 |
+
});
|
| 494 |
+
})
|
| 495 |
+
}
|
| 496 |
+
|
| 497 |
if (options?.proxyUrl) {
|
| 498 |
await page.useProxy(options.proxyUrl);
|
| 499 |
}
|