Spaces:
Build error
Build error
fix: apply rate limit to 100qpm per IP
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -6,6 +6,7 @@ import {
|
|
| 6 |
} from 'civkit';
|
| 7 |
import { singleton } from 'tsyringe';
|
| 8 |
import { CloudHTTPv2, Ctx, FirebaseStorageBucketControl, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
|
|
|
| 9 |
import _ from 'lodash';
|
| 10 |
import { PageSnapshot, PuppeteerControl, ScrappingOptions } from '../services/puppeteer';
|
| 11 |
import { Request, Response } from 'express';
|
|
@@ -36,6 +37,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 36 |
protected puppeteerControl: PuppeteerControl,
|
| 37 |
protected altTextService: AltTextService,
|
| 38 |
protected firebaseObjectStorage: FirebaseStorageBucketControl,
|
|
|
|
| 39 |
) {
|
| 40 |
super(...arguments);
|
| 41 |
|
|
@@ -113,7 +115,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 113 |
}
|
| 114 |
|
| 115 |
const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
|
| 116 |
-
let turnDownService = mode === 'markdown' ?
|
| 117 |
for (const plugin of this.turnDownPlugins) {
|
| 118 |
turnDownService = turnDownService.use(plugin);
|
| 119 |
}
|
|
@@ -295,6 +297,13 @@ ${this.content}
|
|
| 295 |
res: Response,
|
| 296 |
},
|
| 297 |
) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
const noSlashURL = ctx.req.url.slice(1);
|
| 299 |
if (!noSlashURL) {
|
| 300 |
return assignTransferProtocolMeta(`[Usage] https://r.jina.ai/YOUR_URL
|
|
|
|
| 6 |
} from 'civkit';
|
| 7 |
import { singleton } from 'tsyringe';
|
| 8 |
import { CloudHTTPv2, Ctx, FirebaseStorageBucketControl, Logger, OutputServerEventStream, RPCReflect } from '../shared';
|
| 9 |
+
import { RateLimitControl } from '../shared/services/rate-limit';
|
| 10 |
import _ from 'lodash';
|
| 11 |
import { PageSnapshot, PuppeteerControl, ScrappingOptions } from '../services/puppeteer';
|
| 12 |
import { Request, Response } from 'express';
|
|
|
|
| 37 |
protected puppeteerControl: PuppeteerControl,
|
| 38 |
protected altTextService: AltTextService,
|
| 39 |
protected firebaseObjectStorage: FirebaseStorageBucketControl,
|
| 40 |
+
protected rateLimitControl: RateLimitControl,
|
| 41 |
) {
|
| 42 |
super(...arguments);
|
| 43 |
|
|
|
|
| 115 |
}
|
| 116 |
|
| 117 |
const toBeTurnedToMd = mode === 'markdown' ? snapshot.html : snapshot.parsed?.content;
|
| 118 |
+
let turnDownService = mode === 'markdown' ? this.getTurndown() : this.getTurndown('without any rule');
|
| 119 |
for (const plugin of this.turnDownPlugins) {
|
| 120 |
turnDownService = turnDownService.use(plugin);
|
| 121 |
}
|
|
|
|
| 297 |
res: Response,
|
| 298 |
},
|
| 299 |
) {
|
| 300 |
+
if (ctx.req.ip) {
|
| 301 |
+
await this.rateLimitControl.simpleRpcIPBasedLimit(rpcReflect, ctx.req.ip, ['CRAWL'], [
|
| 302 |
+
// 100 requests per minute
|
| 303 |
+
new Date(Date.now() - 60 * 1000), 100
|
| 304 |
+
]);
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
const noSlashURL = ctx.req.url.slice(1);
|
| 308 |
if (!noSlashURL) {
|
| 309 |
return assignTransferProtocolMeta(`[Usage] https://r.jina.ai/YOUR_URL
|
thinapps-shared
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit e681cf89bd21d77469dd286b2348e4cf5fce76e7
|