nomagick commited on
Commit
5a81177
·
unverified ·
1 Parent(s): 33ca164

saas: new tier policy

Browse files
src/api/crawler.ts CHANGED
@@ -240,6 +240,7 @@ export class CrawlerHost extends RPCHost {
240
  const uid = await auth.solveUID();
241
  let chargeAmount = 0;
242
  const crawlerOptions = ctx.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
 
243
 
244
  // Use koa ctx.URL, a standard URL object to avoid node.js framework prop naming confusion
245
  const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(`${ctx.URL.pathname}${ctx.URL.search}`), crawlerOptions);
@@ -298,15 +299,13 @@ export class CrawlerHost extends RPCHost {
298
  if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
299
  return;
300
  }
301
- if (chargeAmount) {
302
- apiRoll._ref?.set({
303
- chargeAmount,
304
- }, { merge: true }).catch((err) => this.logger.warn(`Failed to log charge amount in apiRoll`, { err }));
305
- }
306
  });
307
  }
308
 
309
  if (!uid) {
 
 
310
  const blockade = (await DomainBlockade.fromFirestoreQuery(
311
  DomainBlockade.COLLECTION
312
  .where('domain', '==', targetUrl.hostname.toLowerCase())
@@ -338,10 +337,7 @@ export class CrawlerHost extends RPCHost {
338
  }
339
 
340
  const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
341
- chargeAmount = this.assignChargeAmount(formatted, crawlerOptions);
342
- if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
343
- throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
344
- }
345
  sseStream.write({
346
  event: 'data',
347
  data: formatted,
@@ -379,11 +375,7 @@ export class CrawlerHost extends RPCHost {
379
  }
380
 
381
  const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
382
- chargeAmount = this.assignChargeAmount(formatted, crawlerOptions);
383
-
384
- if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
385
- throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
386
- }
387
 
388
  if (scrapped?.pdfs?.length && !chargeAmount) {
389
  continue;
@@ -405,10 +397,7 @@ export class CrawlerHost extends RPCHost {
405
  }
406
 
407
  const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
408
- chargeAmount = this.assignChargeAmount(formatted, crawlerOptions);
409
- if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
410
- throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
411
- }
412
 
413
  return formatted;
414
  }
@@ -434,10 +423,7 @@ export class CrawlerHost extends RPCHost {
434
  }
435
 
436
  const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
437
- chargeAmount = this.assignChargeAmount(formatted, crawlerOptions);
438
- if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
439
- throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
440
- }
441
 
442
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
443
  return assignTransferProtocolMeta(`${formatted.textRepresentation}`,
@@ -465,10 +451,7 @@ export class CrawlerHost extends RPCHost {
465
  throw new AssertionFailureError(`No content available for URL ${targetUrl}`);
466
  }
467
  const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
468
- chargeAmount = this.assignChargeAmount(formatted, crawlerOptions);
469
- if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
470
- throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
471
- }
472
 
473
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
474
 
@@ -840,7 +823,8 @@ export class CrawlerHost extends RPCHost {
840
  yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
841
  }
842
  let fallbackProxyIsUsed = false;
843
- if (((!crawlOpts?.allocProxy || crawlOpts.allocProxy === 'none') && !crawlOpts?.proxyUrl) &&
 
844
  (analyzed.tokens < 42 || sideLoaded.status !== 200)
845
  ) {
846
  const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
@@ -911,18 +895,14 @@ export class CrawlerHost extends RPCHost {
911
  }
912
  }
913
 
914
- assignChargeAmount(formatted: FormattedPage, crawlerOptions?: CrawlerOptions) {
915
  if (!formatted) {
916
  return 0;
917
  }
918
 
919
  let amount = 0;
920
  if (formatted.content) {
921
- const x1 = estimateToken(formatted.content);
922
- if (crawlerOptions?.respondWith?.toLowerCase().includes('lm')) {
923
- amount += x1 * 2;
924
- }
925
- amount += x1;
926
  } else if (formatted.description) {
927
  amount += estimateToken(formatted.description);
928
  }
@@ -939,6 +919,10 @@ export class CrawlerHost extends RPCHost {
939
  amount += 765;
940
  }
941
 
 
 
 
 
942
  Object.assign(formatted, { usage: { tokens: amount } });
943
  assignMeta(formatted, { usage: { tokens: amount } });
944
 
@@ -1312,4 +1296,54 @@ export class CrawlerHost extends RPCHost {
1312
 
1313
  return false;
1314
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1315
  }
 
240
  const uid = await auth.solveUID();
241
  let chargeAmount = 0;
242
  const crawlerOptions = ctx.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
243
+ const tierPolicy = await this.saasAssertTierPolicy(crawlerOptions, auth);
244
 
245
  // Use koa ctx.URL, a standard URL object to avoid node.js framework prop naming confusion
246
  const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(`${ctx.URL.pathname}${ctx.URL.search}`), crawlerOptions);
 
299
  if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
300
  return;
301
  }
302
+ apiRoll.chargeAmount = chargeAmount;
 
 
 
 
303
  });
304
  }
305
 
306
  if (!uid) {
307
+ // Enforce no proxy is allocated for anonymous users due to abuse.
308
+ crawlerOptions.proxy = 'none';
309
  const blockade = (await DomainBlockade.fromFirestoreQuery(
310
  DomainBlockade.COLLECTION
311
  .where('domain', '==', targetUrl.hostname.toLowerCase())
 
337
  }
338
 
339
  const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
340
+ chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
 
 
 
341
  sseStream.write({
342
  event: 'data',
343
  data: formatted,
 
375
  }
376
 
377
  const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
378
+ chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
 
 
 
 
379
 
380
  if (scrapped?.pdfs?.length && !chargeAmount) {
381
  continue;
 
397
  }
398
 
399
  const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
400
+ chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
 
 
 
401
 
402
  return formatted;
403
  }
 
423
  }
424
 
425
  const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
426
+ chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
 
 
 
427
 
428
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
429
  return assignTransferProtocolMeta(`${formatted.textRepresentation}`,
 
451
  throw new AssertionFailureError(`No content available for URL ${targetUrl}`);
452
  }
453
  const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
454
+ chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
 
 
 
455
 
456
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
457
 
 
823
  yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
824
  }
825
  let fallbackProxyIsUsed = false;
826
+ if (
827
+ ((!crawlOpts?.allocProxy || crawlOpts.allocProxy !== 'none') && !crawlOpts?.proxyUrl) &&
828
  (analyzed.tokens < 42 || sideLoaded.status !== 200)
829
  ) {
830
  const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
 
895
  }
896
  }
897
 
898
+ assignChargeAmount(formatted: FormattedPage, saasTierPolicy?: Parameters<typeof this.saasApplyTierPolicy>[0]) {
899
  if (!formatted) {
900
  return 0;
901
  }
902
 
903
  let amount = 0;
904
  if (formatted.content) {
905
+ amount = estimateToken(formatted.content);
 
 
 
 
906
  } else if (formatted.description) {
907
  amount += estimateToken(formatted.description);
908
  }
 
919
  amount += 765;
920
  }
921
 
922
+ if (saasTierPolicy) {
923
+ amount = this.saasApplyTierPolicy(saasTierPolicy, amount);
924
+ }
925
+
926
  Object.assign(formatted, { usage: { tokens: amount } });
927
  assignMeta(formatted, { usage: { tokens: amount } });
928
 
 
1296
 
1297
  return false;
1298
  }
1299
+
1300
+ async saasAssertTierPolicy(opts: CrawlerOptions, auth: JinaEmbeddingsAuthDTO) {
1301
+ let chargeScalar = 1;
1302
+ let minimalCharge = 0;
1303
+
1304
+ if (opts.injectPageScript || opts.injectFrameScript) {
1305
+ await auth.assertTier(0, 'Script injection');
1306
+ minimalCharge = 4_000;
1307
+ }
1308
+
1309
+ if (opts.withGeneratedAlt) {
1310
+ await auth.assertTier(0, 'Alt text generation');
1311
+ minimalCharge = 4_000;
1312
+ }
1313
+
1314
+ if (opts.withIframe) {
1315
+ await auth.assertTier(0, 'Iframe');
1316
+ }
1317
+
1318
+ if (opts.engine === ENGINE_TYPE.CF_BROWSER_RENDERING) {
1319
+ await auth.assertTier(0, 'Cloudflare browser rendering');
1320
+ minimalCharge = 4_000;
1321
+ }
1322
+
1323
+ if (opts.respondWith.includes('lm') || opts.engine?.includes('lm')) {
1324
+ await auth.assertTier(0, 'Language model');
1325
+ minimalCharge = 4_000;
1326
+ chargeScalar = 3;
1327
+ }
1328
+
1329
+ if (opts.proxy && opts.proxy !== 'none') {
1330
+ await auth.assertTier(['auto', 'any'].includes(opts.proxy) ? 0 : 2, 'Proxy allocation');
1331
+ chargeScalar = 5;
1332
+ }
1333
+
1334
+ return {
1335
+ budget: opts.tokenBudget || 0,
1336
+ chargeScalar,
1337
+ minimalCharge,
1338
+ };
1339
+ }
1340
+
1341
+ saasApplyTierPolicy(policy: Awaited<ReturnType<typeof this.saasAssertTierPolicy>>, chargeAmount: number) {
1342
+ const effectiveChargeAmount = policy.chargeScalar * Math.max(chargeAmount, policy.minimalCharge);
1343
+ if (policy.budget && policy.budget < effectiveChargeAmount) {
1344
+ throw new BudgetExceededError(`Token budget (${policy.budget}) exceeded, intended charge amount ${effectiveChargeAmount}`);
1345
+ }
1346
+
1347
+ return effectiveChargeAmount;
1348
+ }
1349
  }
src/dto/jina-embeddings-auth.ts CHANGED
@@ -17,6 +17,7 @@ import { AsyncLocalContext } from '../services/async-context';
17
  import envConfig from '../shared/services/secrets';
18
  import { JinaEmbeddingsDashboardHTTP } from '../shared/3rd-party/jina-embeddings';
19
  import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
 
20
 
21
  const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
22
 
@@ -236,6 +237,30 @@ export class JinaEmbeddingsAuthDTO extends AutoCastable {
236
  return this.user!;
237
  }
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  getRateLimits(...tags: string[]) {
240
  const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
241
 
 
17
  import envConfig from '../shared/services/secrets';
18
  import { JinaEmbeddingsDashboardHTTP } from '../shared/3rd-party/jina-embeddings';
19
  import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
20
+ import { TierFeatureConstraintError } from '../services/errors';
21
 
22
  const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
23
 
 
237
  return this.user!;
238
  }
239
 
240
+ async assertTier(n: number, feature?: string) {
241
+ let user;
242
+ try {
243
+ user = await this.assertUser();
244
+ } catch (err) {
245
+ if (err instanceof AuthenticationRequiredError) {
246
+ throw new AuthenticationRequiredError({
247
+ message: `Authentication is required to use this feature${feature ? ` (${feature})` : ''}. Please provide a valid API key.`
248
+ });
249
+ }
250
+
251
+ throw err;
252
+ }
253
+
254
+ const tier = parseInt(user.metadata?.speed_level);
255
+ if (isNaN(tier) || tier < n) {
256
+ throw new TierFeatureConstraintError({
257
+ message: `Your current plan does not support this feature${feature ? ` (${feature})` : ''}. Please upgrade your plan.`
258
+ });
259
+ }
260
+
261
+ return true;
262
+ }
263
+
264
  getRateLimits(...tags: string[]) {
265
  const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
266
 
src/services/errors.ts CHANGED
@@ -27,7 +27,7 @@ export class EmailUnverifiedError extends ApplicationError { }
27
  export class InsufficientCreditsError extends ApplicationError { }
28
 
29
  @StatusCode(40202)
30
- export class FreeFeatureLimitError extends ApplicationError { }
31
 
32
  @StatusCode(40203)
33
  export class InsufficientBalanceError extends ApplicationError { }
 
27
  export class InsufficientCreditsError extends ApplicationError { }
28
 
29
  @StatusCode(40202)
30
+ export class TierFeatureConstraintError extends ApplicationError { }
31
 
32
  @StatusCode(40203)
33
  export class InsufficientBalanceError extends ApplicationError { }