mapleeit commited on
Commit
7e6c2fc
·
1 Parent(s): 080056e

feat: add referer param

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -1123,6 +1123,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
1123
  timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
1124
  withIframe: opts.withIframe,
1125
  locale: opts.locale,
 
1126
  };
1127
 
1128
  return crawlOpts;
 
1123
  timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
1124
  withIframe: opts.withIframe,
1125
  locale: opts.locale,
1126
+ referer: opts.referer,
1127
  };
1128
 
1129
  return crawlOpts;
backend/functions/src/dto/scrapping-options.ts CHANGED
@@ -115,6 +115,11 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
115
  description: 'Specify browser locale for the page.',
116
  in: 'header',
117
  schema: { type: 'string' }
 
 
 
 
 
118
  }
119
  }
120
  }
@@ -201,6 +206,9 @@ export class CrawlerOptions extends AutoCastable {
201
  @Prop()
202
  locale?: string;
203
 
 
 
 
204
  static override from(input: any) {
205
  const instance = super.from(input) as CrawlerOptions;
206
  const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
@@ -218,6 +226,11 @@ export class CrawlerOptions extends AutoCastable {
218
  instance.locale = locale;
219
  }
220
 
 
 
 
 
 
221
  const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
222
  if (withGeneratedAlt !== undefined) {
223
  instance.withGeneratedAlt = Boolean(withGeneratedAlt);
 
115
  description: 'Specify browser locale for the page.',
116
  in: 'header',
117
  schema: { type: 'string' }
118
+ },
119
+ 'X-Referer': {
120
+ description: 'Specify referer for the page.',
121
+ in: 'header',
122
+ schema: { type: 'string' }
123
  }
124
  }
125
  }
 
206
  @Prop()
207
  locale?: string;
208
 
209
+ @Prop()
210
+ referer?: string;
211
+
212
  static override from(input: any) {
213
  const instance = super.from(input) as CrawlerOptions;
214
  const ctx = Reflect.get(input, RPC_CALL_ENVIRONMENT) as {
 
226
  instance.locale = locale;
227
  }
228
 
229
+ const referer = ctx?.req.get('x-referer');
230
+ if (referer !== undefined) {
231
+ instance.referer = referer;
232
+ }
233
+
234
  const withGeneratedAlt = ctx?.req.get('x-with-generated-alt');
235
  if (withGeneratedAlt !== undefined) {
236
  instance.withGeneratedAlt = Boolean(withGeneratedAlt);
backend/functions/src/services/puppeteer.ts CHANGED
@@ -4,7 +4,7 @@ import { container, singleton } from 'tsyringe';
4
  import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
5
  import { Logger } from '../shared/services/logger';
6
 
7
- import type { Browser, CookieParam, Page } from 'puppeteer';
8
  import puppeteer from 'puppeteer-extra';
9
 
10
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
@@ -69,6 +69,7 @@ export interface ScrappingOptions {
69
  overrideUserAgent?: string;
70
  timeoutMs?: number;
71
  locale?: string;
 
72
  }
73
 
74
 
@@ -545,11 +546,16 @@ document.addEventListener('load', handlePageLoad);
545
  });
546
 
547
  const timeout = options?.timeoutMs || 30_000;
548
-
549
- const gotoPromise = page.goto(url, {
550
  waitUntil: ['load', 'domcontentloaded', 'networkidle0'],
551
  timeout,
552
- })
 
 
 
 
 
 
553
  .catch((err) => {
554
  if (err instanceof TimeoutError) {
555
  this.logger.warn(`Page ${sn}: Browsing of ${url} timed out`, { err: marshalErrorLike(err) });
 
4
  import { AsyncService, Defer, marshalErrorLike, AssertionFailureError, delay, maxConcurrency } from 'civkit';
5
  import { Logger } from '../shared/services/logger';
6
 
7
+ import type { Browser, CookieParam, GoToOptions, Page } from 'puppeteer';
8
  import puppeteer from 'puppeteer-extra';
9
 
10
  import puppeteerBlockResources from 'puppeteer-extra-plugin-block-resources';
 
69
  overrideUserAgent?: string;
70
  timeoutMs?: number;
71
  locale?: string;
72
+ referer?: string;
73
  }
74
 
75
 
 
546
  });
547
 
548
  const timeout = options?.timeoutMs || 30_000;
549
+ const goToOptions: GoToOptions = {
 
550
  waitUntil: ['load', 'domcontentloaded', 'networkidle0'],
551
  timeout,
552
+ };
553
+
554
+ if (options?.referer) {
555
+ goToOptions.referer = options.referer;
556
+ }
557
+
558
+ const gotoPromise = page.goto(url, goToOptions)
559
  .catch((err) => {
560
  if (err instanceof TimeoutError) {
561
  this.logger.warn(`Page ${sn}: Browsing of ${url} timed out`, { err: marshalErrorLike(err) });