nomagick commited on
Commit
867636d
·
unverified ·
1 Parent(s): 15606f3

fix: apply rate limit to 100qpm per IP

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -6,6 +6,7 @@ import {
6
  } from 'civkit';
7
  import { singleton } from 'tsyringe';
8
  import { CloudHTTPv2, Ctx, FirebaseStorageBucketControl, Logger, OutputServerEventStream, RPCReflect } from '../shared';
 
9
  import _ from 'lodash';
10
  import { PageSnapshot, PuppeteerControl, ScrappingOptions } from '../services/puppeteer';
11
  import { Request, Response } from 'express';
@@ -36,6 +37,7 @@ export class CrawlerHost extends RPCHost {
36
  protected puppeteerControl: PuppeteerControl,
37
  protected altTextService: AltTextService,
38
  protected firebaseObjectStorage: FirebaseStorageBucketControl,
 
39
  ) {
40
  super(...arguments);
41
 
@@ -113,7 +115,7 @@ export class CrawlerHost extends RPCHost {
113
  }
114
 
115
  const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
116
- let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule');
117
  for (const plugin of this.turnDownPlugins) {
118
  turnDownService = turnDownService.use(plugin);
119
  }
@@ -295,6 +297,13 @@ ${this.content}
295
  res: Response,
296
  },
297
  ) {
 
 
 
 
 
 
 
298
  const noSlashURL = ctx.req.url.slice(1);
299
  if (!noSlashURL) {
300
  return assignTransferProtocolMeta(`[Usage] https://r.jina.ai/YOUR_URL
 
6
  } from 'civkit';
7
  import { singleton } from 'tsyringe';
8
  import { CloudHTTPv2, Ctx, FirebaseStorageBucketControl, Logger, OutputServerEventStream, RPCReflect } from '../shared';
9
+ import { RateLimitControl } from '../shared/services/rate-limit';
10
  import _ from 'lodash';
11
  import { PageSnapshot, PuppeteerControl, ScrappingOptions } from '../services/puppeteer';
12
  import { Request, Response } from 'express';
 
37
  protected puppeteerControl: PuppeteerControl,
38
  protected altTextService: AltTextService,
39
  protected firebaseObjectStorage: FirebaseStorageBucketControl,
40
+ protected rateLimitControl: RateLimitControl,
41
  ) {
42
  super(...arguments);
43
 
 
115
  }
116
 
117
  const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
118
+ let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule');
119
  for (const plugin of this.turnDownPlugins) {
120
  turnDownService = turnDownService.use(plugin);
121
  }
 
297
  res: Response,
298
  },
299
  ) {
300
+ if (ctx.req.ip) {
301
+ await this.rateLimitControl.simpleRpcIPBasedLimit(rpcReflect, ctx.req.ip, ['CRAWL'], [
302
+ // 100 requests per minute
303
+ new Date(Date.now() - 60 * 1000), 100
304
+ ]);
305
+ }
306
+
307
  const noSlashURL = ctx.req.url.slice(1);
308
  if (!noSlashURL) {
309
  return assignTransferProtocolMeta(`[Usage] https://r.jina.ai/YOUR_URL
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit b165c1cb0e21b7b8762a23b8ce88219aa532c293
 
1
+ Subproject commit e681cf89bd21d77469dd286b2348e4cf5fce76e7