Spaces:
Build error
Build error
saas: new tier policy
Browse files- src/api/crawler.ts +67 -33
- src/dto/jina-embeddings-auth.ts +25 -0
- src/services/errors.ts +1 -1
src/api/crawler.ts
CHANGED
|
@@ -240,6 +240,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 240 |
const uid = await auth.solveUID();
|
| 241 |
let chargeAmount = 0;
|
| 242 |
const crawlerOptions = ctx.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
|
|
|
|
| 243 |
|
| 244 |
// Use koa ctx.URL, a standard URL object to avoid node.js framework prop naming confusion
|
| 245 |
const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(`${ctx.URL.pathname}${ctx.URL.search}`), crawlerOptions);
|
|
@@ -298,15 +299,13 @@ export class CrawlerHost extends RPCHost {
|
|
| 298 |
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 299 |
return;
|
| 300 |
}
|
| 301 |
-
|
| 302 |
-
apiRoll._ref?.set({
|
| 303 |
-
chargeAmount,
|
| 304 |
-
}, { merge: true }).catch((err) => this.logger.warn(`Failed to log charge amount in apiRoll`, { err }));
|
| 305 |
-
}
|
| 306 |
});
|
| 307 |
}
|
| 308 |
|
| 309 |
if (!uid) {
|
|
|
|
|
|
|
| 310 |
const blockade = (await DomainBlockade.fromFirestoreQuery(
|
| 311 |
DomainBlockade.COLLECTION
|
| 312 |
.where('domain', '==', targetUrl.hostname.toLowerCase())
|
|
@@ -338,10 +337,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 338 |
}
|
| 339 |
|
| 340 |
const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 341 |
-
chargeAmount = this.assignChargeAmount(formatted,
|
| 342 |
-
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 343 |
-
throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
|
| 344 |
-
}
|
| 345 |
sseStream.write({
|
| 346 |
event: 'data',
|
| 347 |
data: formatted,
|
|
@@ -379,11 +375,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 379 |
}
|
| 380 |
|
| 381 |
const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 382 |
-
chargeAmount = this.assignChargeAmount(formatted,
|
| 383 |
-
|
| 384 |
-
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 385 |
-
throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
|
| 386 |
-
}
|
| 387 |
|
| 388 |
if (scrapped?.pdfs?.length && !chargeAmount) {
|
| 389 |
continue;
|
|
@@ -405,10 +397,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 405 |
}
|
| 406 |
|
| 407 |
const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 408 |
-
chargeAmount = this.assignChargeAmount(formatted,
|
| 409 |
-
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 410 |
-
throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
|
| 411 |
-
}
|
| 412 |
|
| 413 |
return formatted;
|
| 414 |
}
|
|
@@ -434,10 +423,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 434 |
}
|
| 435 |
|
| 436 |
const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 437 |
-
chargeAmount = this.assignChargeAmount(formatted,
|
| 438 |
-
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 439 |
-
throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
|
| 440 |
-
}
|
| 441 |
|
| 442 |
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
| 443 |
return assignTransferProtocolMeta(`${formatted.textRepresentation}`,
|
|
@@ -465,10 +451,7 @@ export class CrawlerHost extends RPCHost {
|
|
| 465 |
throw new AssertionFailureError(`No content available for URL ${targetUrl}`);
|
| 466 |
}
|
| 467 |
const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 468 |
-
chargeAmount = this.assignChargeAmount(formatted,
|
| 469 |
-
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 470 |
-
throw new BudgetExceededError(`Token budget (${crawlerOptions.tokenBudget}) exceeded, intended charge amount ${chargeAmount}.`);
|
| 471 |
-
}
|
| 472 |
|
| 473 |
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
| 474 |
|
|
@@ -840,7 +823,8 @@ export class CrawlerHost extends RPCHost {
|
|
| 840 |
yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
|
| 841 |
}
|
| 842 |
let fallbackProxyIsUsed = false;
|
| 843 |
-
if (
|
|
|
|
| 844 |
(analyzed.tokens < 42 || sideLoaded.status !== 200)
|
| 845 |
) {
|
| 846 |
const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
|
|
@@ -911,18 +895,14 @@ export class CrawlerHost extends RPCHost {
|
|
| 911 |
}
|
| 912 |
}
|
| 913 |
|
| 914 |
-
assignChargeAmount(formatted: FormattedPage,
|
| 915 |
if (!formatted) {
|
| 916 |
return 0;
|
| 917 |
}
|
| 918 |
|
| 919 |
let amount = 0;
|
| 920 |
if (formatted.content) {
|
| 921 |
-
|
| 922 |
-
if (crawlerOptions?.respondWith?.toLowerCase().includes('lm')) {
|
| 923 |
-
amount += x1 * 2;
|
| 924 |
-
}
|
| 925 |
-
amount += x1;
|
| 926 |
} else if (formatted.description) {
|
| 927 |
amount += estimateToken(formatted.description);
|
| 928 |
}
|
|
@@ -939,6 +919,10 @@ export class CrawlerHost extends RPCHost {
|
|
| 939 |
amount += 765;
|
| 940 |
}
|
| 941 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
Object.assign(formatted, { usage: { tokens: amount } });
|
| 943 |
assignMeta(formatted, { usage: { tokens: amount } });
|
| 944 |
|
|
@@ -1312,4 +1296,54 @@ export class CrawlerHost extends RPCHost {
|
|
| 1312 |
|
| 1313 |
return false;
|
| 1314 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1315 |
}
|
|
|
|
| 240 |
const uid = await auth.solveUID();
|
| 241 |
let chargeAmount = 0;
|
| 242 |
const crawlerOptions = ctx.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
|
| 243 |
+
const tierPolicy = await this.saasAssertTierPolicy(crawlerOptions, auth);
|
| 244 |
|
| 245 |
// Use koa ctx.URL, a standard URL object to avoid node.js framework prop naming confusion
|
| 246 |
const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(`${ctx.URL.pathname}${ctx.URL.search}`), crawlerOptions);
|
|
|
|
| 299 |
if (crawlerOptions.tokenBudget && chargeAmount > crawlerOptions.tokenBudget) {
|
| 300 |
return;
|
| 301 |
}
|
| 302 |
+
apiRoll.chargeAmount = chargeAmount;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
});
|
| 304 |
}
|
| 305 |
|
| 306 |
if (!uid) {
|
| 307 |
+
// Enforce no proxy is allocated for anonymous users due to abuse.
|
| 308 |
+
crawlerOptions.proxy = 'none';
|
| 309 |
const blockade = (await DomainBlockade.fromFirestoreQuery(
|
| 310 |
DomainBlockade.COLLECTION
|
| 311 |
.where('domain', '==', targetUrl.hostname.toLowerCase())
|
|
|
|
| 337 |
}
|
| 338 |
|
| 339 |
const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 340 |
+
chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
|
|
|
|
|
|
|
|
|
|
| 341 |
sseStream.write({
|
| 342 |
event: 'data',
|
| 343 |
data: formatted,
|
|
|
|
| 375 |
}
|
| 376 |
|
| 377 |
const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 378 |
+
chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
if (scrapped?.pdfs?.length && !chargeAmount) {
|
| 381 |
continue;
|
|
|
|
| 397 |
}
|
| 398 |
|
| 399 |
const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 400 |
+
chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
return formatted;
|
| 403 |
}
|
|
|
|
| 423 |
}
|
| 424 |
|
| 425 |
const formatted = await this.formatSnapshot(crawlerOptions, scrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 426 |
+
chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
|
|
|
|
|
|
|
|
|
|
| 427 |
|
| 428 |
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
| 429 |
return assignTransferProtocolMeta(`${formatted.textRepresentation}`,
|
|
|
|
| 451 |
throw new AssertionFailureError(`No content available for URL ${targetUrl}`);
|
| 452 |
}
|
| 453 |
const formatted = await this.formatSnapshot(crawlerOptions, lastScrapped, targetUrl, this.urlValidMs, crawlOpts);
|
| 454 |
+
chargeAmount = this.assignChargeAmount(formatted, tierPolicy);
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
|
| 457 |
|
|
|
|
| 823 |
yield this.jsdomControl.narrowSnapshot(draftSnapshot, crawlOpts);
|
| 824 |
}
|
| 825 |
let fallbackProxyIsUsed = false;
|
| 826 |
+
if (
|
| 827 |
+
((!crawlOpts?.allocProxy || crawlOpts.allocProxy !== 'none') && !crawlOpts?.proxyUrl) &&
|
| 828 |
(analyzed.tokens < 42 || sideLoaded.status !== 200)
|
| 829 |
) {
|
| 830 |
const proxyLoaded = await this.sideLoadWithAllocatedProxy(urlToCrawl, altOpts);
|
|
|
|
| 895 |
}
|
| 896 |
}
|
| 897 |
|
| 898 |
+
assignChargeAmount(formatted: FormattedPage, saasTierPolicy?: Parameters<typeof this.saasApplyTierPolicy>[0]) {
|
| 899 |
if (!formatted) {
|
| 900 |
return 0;
|
| 901 |
}
|
| 902 |
|
| 903 |
let amount = 0;
|
| 904 |
if (formatted.content) {
|
| 905 |
+
amount = estimateToken(formatted.content);
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
} else if (formatted.description) {
|
| 907 |
amount += estimateToken(formatted.description);
|
| 908 |
}
|
|
|
|
| 919 |
amount += 765;
|
| 920 |
}
|
| 921 |
|
| 922 |
+
if (saasTierPolicy) {
|
| 923 |
+
amount = this.saasApplyTierPolicy(saasTierPolicy, amount);
|
| 924 |
+
}
|
| 925 |
+
|
| 926 |
Object.assign(formatted, { usage: { tokens: amount } });
|
| 927 |
assignMeta(formatted, { usage: { tokens: amount } });
|
| 928 |
|
|
|
|
| 1296 |
|
| 1297 |
return false;
|
| 1298 |
}
|
| 1299 |
+
|
| 1300 |
+
async saasAssertTierPolicy(opts: CrawlerOptions, auth: JinaEmbeddingsAuthDTO) {
|
| 1301 |
+
let chargeScalar = 1;
|
| 1302 |
+
let minimalCharge = 0;
|
| 1303 |
+
|
| 1304 |
+
if (opts.injectPageScript || opts.injectFrameScript) {
|
| 1305 |
+
await auth.assertTier(0, 'Script injection');
|
| 1306 |
+
minimalCharge = 4_000;
|
| 1307 |
+
}
|
| 1308 |
+
|
| 1309 |
+
if (opts.withGeneratedAlt) {
|
| 1310 |
+
await auth.assertTier(0, 'Alt text generation');
|
| 1311 |
+
minimalCharge = 4_000;
|
| 1312 |
+
}
|
| 1313 |
+
|
| 1314 |
+
if (opts.withIframe) {
|
| 1315 |
+
await auth.assertTier(0, 'Iframe');
|
| 1316 |
+
}
|
| 1317 |
+
|
| 1318 |
+
if (opts.engine === ENGINE_TYPE.CF_BROWSER_RENDERING) {
|
| 1319 |
+
await auth.assertTier(0, 'Cloudflare browser rendering');
|
| 1320 |
+
minimalCharge = 4_000;
|
| 1321 |
+
}
|
| 1322 |
+
|
| 1323 |
+
if (opts.respondWith.includes('lm') || opts.engine?.includes('lm')) {
|
| 1324 |
+
await auth.assertTier(0, 'Language model');
|
| 1325 |
+
minimalCharge = 4_000;
|
| 1326 |
+
chargeScalar = 3;
|
| 1327 |
+
}
|
| 1328 |
+
|
| 1329 |
+
if (opts.proxy && opts.proxy !== 'none') {
|
| 1330 |
+
await auth.assertTier(['auto', 'any'].includes(opts.proxy) ? 0 : 2, 'Proxy allocation');
|
| 1331 |
+
chargeScalar = 5;
|
| 1332 |
+
}
|
| 1333 |
+
|
| 1334 |
+
return {
|
| 1335 |
+
budget: opts.tokenBudget || 0,
|
| 1336 |
+
chargeScalar,
|
| 1337 |
+
minimalCharge,
|
| 1338 |
+
};
|
| 1339 |
+
}
|
| 1340 |
+
|
| 1341 |
+
saasApplyTierPolicy(policy: Awaited<ReturnType<typeof this.saasAssertTierPolicy>>, chargeAmount: number) {
|
| 1342 |
+
const effectiveChargeAmount = policy.chargeScalar * Math.max(chargeAmount, policy.minimalCharge);
|
| 1343 |
+
if (policy.budget && policy.budget < effectiveChargeAmount) {
|
| 1344 |
+
throw new BudgetExceededError(`Token budget (${policy.budget}) exceeded, intended charge amount ${effectiveChargeAmount}`);
|
| 1345 |
+
}
|
| 1346 |
+
|
| 1347 |
+
return effectiveChargeAmount;
|
| 1348 |
+
}
|
| 1349 |
}
|
src/dto/jina-embeddings-auth.ts
CHANGED
|
@@ -17,6 +17,7 @@ import { AsyncLocalContext } from '../services/async-context';
|
|
| 17 |
import envConfig from '../shared/services/secrets';
|
| 18 |
import { JinaEmbeddingsDashboardHTTP } from '../shared/3rd-party/jina-embeddings';
|
| 19 |
import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
|
|
|
|
| 20 |
|
| 21 |
const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
|
| 22 |
|
|
@@ -236,6 +237,30 @@ export class JinaEmbeddingsAuthDTO extends AutoCastable {
|
|
| 236 |
return this.user!;
|
| 237 |
}
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
getRateLimits(...tags: string[]) {
|
| 240 |
const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
|
| 241 |
|
|
|
|
| 17 |
import envConfig from '../shared/services/secrets';
|
| 18 |
import { JinaEmbeddingsDashboardHTTP } from '../shared/3rd-party/jina-embeddings';
|
| 19 |
import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
|
| 20 |
+
import { TierFeatureConstraintError } from '../services/errors';
|
| 21 |
|
| 22 |
const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
|
| 23 |
|
|
|
|
| 237 |
return this.user!;
|
| 238 |
}
|
| 239 |
|
| 240 |
+
async assertTier(n: number, feature?: string) {
|
| 241 |
+
let user;
|
| 242 |
+
try {
|
| 243 |
+
user = await this.assertUser();
|
| 244 |
+
} catch (err) {
|
| 245 |
+
if (err instanceof AuthenticationRequiredError) {
|
| 246 |
+
throw new AuthenticationRequiredError({
|
| 247 |
+
message: `Authentication is required to use this feature${feature ? ` (${feature})` : ''}. Please provide a valid API key.`
|
| 248 |
+
});
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
throw err;
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
const tier = parseInt(user.metadata?.speed_level);
|
| 255 |
+
if (isNaN(tier) || tier < n) {
|
| 256 |
+
throw new TierFeatureConstraintError({
|
| 257 |
+
message: `Your current plan does not support this feature${feature ? ` (${feature})` : ''}. Please upgrade your plan.`
|
| 258 |
+
});
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
return true;
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
getRateLimits(...tags: string[]) {
|
| 265 |
const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
|
| 266 |
|
src/services/errors.ts
CHANGED
|
@@ -27,7 +27,7 @@ export class EmailUnverifiedError extends ApplicationError { }
|
|
| 27 |
export class InsufficientCreditsError extends ApplicationError { }
|
| 28 |
|
| 29 |
@StatusCode(40202)
|
| 30 |
-
export class
|
| 31 |
|
| 32 |
@StatusCode(40203)
|
| 33 |
export class InsufficientBalanceError extends ApplicationError { }
|
|
|
|
| 27 |
export class InsufficientCreditsError extends ApplicationError { }
|
| 28 |
|
| 29 |
@StatusCode(40202)
|
| 30 |
+
export class TierFeatureConstraintError extends ApplicationError { }
|
| 31 |
|
| 32 |
@StatusCode(40203)
|
| 33 |
export class InsufficientBalanceError extends ApplicationError { }
|