mapleeit commited on
Commit
de50c93
·
1 Parent(s): fb5bd58

feat: expose X-Locale parameter

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -1106,6 +1106,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
1106
  overrideUserAgent: opts.userAgent,
1107
  timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
1108
  withIframe: opts.withIframe,
 
1109
  };
1110
 
1111
  return crawlOpts;
 
1106
  overrideUserAgent: opts.userAgent,
1107
  timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
1108
  withIframe: opts.withIframe,
1109
+ locale: opts.locale,
1110
  };
1111
 
1112
  return crawlOpts;
backend/functions/src/dto/scrapping-options.ts CHANGED
@@ -111,6 +111,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
111
  in: 'header',
112
  schema: { type: 'string' }
113
  },
 
 
 
 
 
114
  }
115
  }
116
  }
@@ -188,6 +193,9 @@ export class CrawlerOptions extends AutoCastable {
188
  })
189
  timeout?: number | null;
190
 
 
 
 
191
  static override from(input: any) {
192
  const instance = super.from(input) as CrawlerOptions;
193
  const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
@@ -200,6 +208,11 @@ export class CrawlerOptions extends AutoCastable {
200
  instance.respondWith = customMode;
201
  }
202
 
 
 
 
 
 
203
  const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
204
  if (withGeneratedAlt !== undefined) {
205
  instance.withGeneratedAlt = Boolean(withGeneratedAlt);
 
111
  in: 'header',
112
  schema: { type: 'string' }
113
  },
114
+ 'X-Locale': {
115
+ description: 'Specify browser locale for the page.',
116
+ in: 'header',
117
+ schema: { type: 'string' }
118
+ }
119
  }
120
  }
121
  }
 
193
  })
194
  timeout?: number | null;
195
 
196
+ @Prop()
197
+ locale?: string;
198
+
199
  static override from(input: any) {
200
  const instance = super.from(input) as CrawlerOptions;
201
  const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
 
208
  instance.respondWith = customMode;
209
  }
210
 
211
+ const locale = ctx?.req.get('x-locale');
212
+ if (locale !== undefined) {
213
+ instance.locale = locale;
214
+ }
215
+
216
  const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
217
  if (withGeneratedAlt !== undefined) {
218
  instance.withGeneratedAlt = Boolean(withGeneratedAlt);
backend/functions/src/services/puppeteer.ts CHANGED
@@ -68,6 +68,7 @@ export interface ScrappingOptions {
68
  minIntervalMs?: number;
69
  overrideUserAgent?: string;
70
  timeoutMs?: number;
 
71
  }
72
 
73
 
@@ -472,6 +473,27 @@ document.addEventListener('load', handlePageLoad);
472
  const page = await this.getNextPage();
473
  const sn = this.snMap.get(page);
474
  this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
475
  if (options?.proxyUrl) {
476
  await page.useProxy(options.proxyUrl);
477
  }
 
68
  minIntervalMs?: number;
69
  overrideUserAgent?: string;
70
  timeoutMs?: number;
71
+ locale?: string;
72
  }
73
 
74
 
 
473
  const page = await this.getNextPage();
474
  const sn = this.snMap.get(page);
475
  this.logger.info(`Page ${sn}: Scraping ${url}`, { url });
476
+
477
+ this.logger.info(`Locale setting: ${options?.locale}`);
478
+ if (options?.locale) {
479
+ await page.setExtraHTTPHeaders({
480
+ 'Accept-Language': options?.locale
481
+ });
482
+
483
+ await page.evaluateOnNewDocument(() => {
484
+ Object.defineProperty(navigator, "language", {
485
+ get: function() {
486
+ return options?.locale;
487
+ }
488
+ });
489
+ Object.defineProperty(navigator, "languages", {
490
+ get: function() {
491
+ return [options?.locale];
492
+ }
493
+ });
494
+ })
495
+ }
496
+
497
  if (options?.proxyUrl) {
498
  await page.useProxy(options.proxyUrl);
499
  }