Spaces:
Build error
Build error
fix: saas issus
Browse files- src/api/searcher-serper.ts +14 -6
- src/api/serp.ts +10 -9
- src/services/curl.ts +16 -3
- src/services/errors.ts +1 -29
- src/services/serper-search.ts +5 -1
- thinapps-shared +1 -1
src/api/searcher-serper.ts
CHANGED
|
@@ -6,7 +6,7 @@ import { marshalErrorLike } from 'civkit/lang';
|
|
| 6 |
import { objHashMd5B64Of } from 'civkit/hash';
|
| 7 |
import _ from 'lodash';
|
| 8 |
|
| 9 |
-
import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
| 10 |
|
| 11 |
import { CrawlerHost, ExtraScrappingOptions } from './crawler';
|
| 12 |
import { SerperSearchResult } from '../db/searched';
|
|
@@ -19,8 +19,16 @@ import { AsyncLocalContext } from '../services/async-context';
|
|
| 19 |
import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
|
| 20 |
import { OutputServerEventStream } from '../lib/transform-server-event-stream';
|
| 21 |
import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
| 22 |
-
import { InsufficientBalanceError
|
| 23 |
-
import {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
import { toAsyncGenerator } from '../utils/misc';
|
| 25 |
import type { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
|
| 26 |
import { LRUCache } from 'lru-cache';
|
|
@@ -218,10 +226,10 @@ export class SearcherHost extends RPCHost {
|
|
| 218 |
}
|
| 219 |
const now = Date.now();
|
| 220 |
let tgtDate;
|
| 221 |
-
if (err.
|
| 222 |
-
tgtDate = new Date(now + err.retryAfter * 1000);
|
| 223 |
-
} else if (err.retryAfterDate) {
|
| 224 |
tgtDate = err.retryAfterDate;
|
|
|
|
|
|
|
| 225 |
}
|
| 226 |
|
| 227 |
if (tgtDate) {
|
|
|
|
| 6 |
import { objHashMd5B64Of } from 'civkit/hash';
|
| 7 |
import _ from 'lodash';
|
| 8 |
|
| 9 |
+
import { RateLimitControl, RateLimitDesc, RateLimitTriggeredError } from '../shared/services/rate-limit';
|
| 10 |
|
| 11 |
import { CrawlerHost, ExtraScrappingOptions } from './crawler';
|
| 12 |
import { SerperSearchResult } from '../db/searched';
|
|
|
|
| 19 |
import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
|
| 20 |
import { OutputServerEventStream } from '../lib/transform-server-event-stream';
|
| 21 |
import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
| 22 |
+
import { InsufficientBalanceError } from '../services/errors';
|
| 23 |
+
import {
|
| 24 |
+
SerperImageSearchResponse,
|
| 25 |
+
SerperNewsSearchResponse,
|
| 26 |
+
SerperSearchQueryParams,
|
| 27 |
+
SerperSearchResponse,
|
| 28 |
+
SerperWebSearchResponse,
|
| 29 |
+
WORLD_COUNTRIES,
|
| 30 |
+
WORLD_LANGUAGES
|
| 31 |
+
} from '../shared/3rd-party/serper-search';
|
| 32 |
import { toAsyncGenerator } from '../utils/misc';
|
| 33 |
import type { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
|
| 34 |
import { LRUCache } from 'lru-cache';
|
|
|
|
| 226 |
}
|
| 227 |
const now = Date.now();
|
| 228 |
let tgtDate;
|
| 229 |
+
if (err.retryAfterDate) {
|
|
|
|
|
|
|
| 230 |
tgtDate = err.retryAfterDate;
|
| 231 |
+
} else if (err.retryAfter) {
|
| 232 |
+
tgtDate = new Date(now + err.retryAfter * 1000);
|
| 233 |
}
|
| 234 |
|
| 235 |
if (tgtDate) {
|
src/api/serp.ts
CHANGED
|
@@ -7,14 +7,14 @@ import {
|
|
| 7 |
import { marshalErrorLike } from 'civkit/lang';
|
| 8 |
import _ from 'lodash';
|
| 9 |
|
| 10 |
-
import { RateLimitControl, RateLimitDesc } from '../shared/services/rate-limit';
|
| 11 |
|
| 12 |
import { GlobalLogger } from '../services/logger';
|
| 13 |
import { AsyncLocalContext } from '../services/async-context';
|
| 14 |
import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
|
| 15 |
import { OutputServerEventStream } from '../lib/transform-server-event-stream';
|
| 16 |
import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
| 17 |
-
import { InsufficientBalanceError
|
| 18 |
import { WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
|
| 19 |
import { GoogleSERP } from '../services/serp/google';
|
| 20 |
import { WebSearchEntry } from '../services/serp/compat';
|
|
@@ -172,10 +172,11 @@ export class SerpHost extends RPCHost {
|
|
| 172 |
const now = new Date();
|
| 173 |
const blockedTimeRemaining = (highFreqKey.blockedUntil.valueOf() - now.valueOf());
|
| 174 |
if (blockedTimeRemaining > 0) {
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
| 179 |
}
|
| 180 |
}
|
| 181 |
|
|
@@ -229,10 +230,10 @@ export class SerpHost extends RPCHost {
|
|
| 229 |
}
|
| 230 |
const now = Date.now();
|
| 231 |
let tgtDate;
|
| 232 |
-
if (err.
|
| 233 |
-
tgtDate = new Date(now + err.retryAfter * 1000);
|
| 234 |
-
} else if (err.retryAfterDate) {
|
| 235 |
tgtDate = err.retryAfterDate;
|
|
|
|
|
|
|
| 236 |
}
|
| 237 |
|
| 238 |
if (tgtDate) {
|
|
|
|
| 7 |
import { marshalErrorLike } from 'civkit/lang';
|
| 8 |
import _ from 'lodash';
|
| 9 |
|
| 10 |
+
import { RateLimitControl, RateLimitDesc, RateLimitTriggeredError } from '../shared/services/rate-limit';
|
| 11 |
|
| 12 |
import { GlobalLogger } from '../services/logger';
|
| 13 |
import { AsyncLocalContext } from '../services/async-context';
|
| 14 |
import { Context, Ctx, Method, Param, RPCReflect } from '../services/registry';
|
| 15 |
import { OutputServerEventStream } from '../lib/transform-server-event-stream';
|
| 16 |
import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
|
| 17 |
+
import { InsufficientBalanceError } from '../services/errors';
|
| 18 |
import { WORLD_COUNTRIES, WORLD_LANGUAGES } from '../shared/3rd-party/serper-search';
|
| 19 |
import { GoogleSERP } from '../services/serp/google';
|
| 20 |
import { WebSearchEntry } from '../services/serp/compat';
|
|
|
|
| 172 |
const now = new Date();
|
| 173 |
const blockedTimeRemaining = (highFreqKey.blockedUntil.valueOf() - now.valueOf());
|
| 174 |
if (blockedTimeRemaining > 0) {
|
| 175 |
+
this.logger.warn(`Rate limit triggered for ${uid}, this request should have been blocked`);
|
| 176 |
+
// throw RateLimitTriggeredError.from({
|
| 177 |
+
// message: `Per UID rate limit exceeded (async)`,
|
| 178 |
+
// retryAfter: Math.ceil(blockedTimeRemaining / 1000),
|
| 179 |
+
// });
|
| 180 |
}
|
| 181 |
}
|
| 182 |
|
|
|
|
| 230 |
}
|
| 231 |
const now = Date.now();
|
| 232 |
let tgtDate;
|
| 233 |
+
if (err.retryAfterDate) {
|
|
|
|
|
|
|
| 234 |
tgtDate = err.retryAfterDate;
|
| 235 |
+
} else if (err.retryAfter) {
|
| 236 |
+
tgtDate = new Date(now + err.retryAfter * 1000);
|
| 237 |
}
|
| 238 |
|
| 239 |
if (tgtDate) {
|
src/services/curl.ts
CHANGED
|
@@ -61,10 +61,23 @@ export class CurlControl extends AsyncService {
|
|
| 61 |
}
|
| 62 |
|
| 63 |
curlImpersonateHeader(curl: Curl, headers?: object) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
const mixinHeaders: Record<string, string> = {
|
| 65 |
-
'
|
| 66 |
-
'
|
| 67 |
-
'
|
| 68 |
'Upgrade-Insecure-Requests': '1',
|
| 69 |
'User-Agent': this.ua,
|
| 70 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
|
|
| 61 |
}
|
| 62 |
|
| 63 |
curlImpersonateHeader(curl: Curl, headers?: object) {
|
| 64 |
+
let uaPlatform = this.platform;
|
| 65 |
+
if (this.ua.includes('Windows')) {
|
| 66 |
+
uaPlatform = 'Windows';
|
| 67 |
+
} else if (this.ua.includes('Android')) {
|
| 68 |
+
uaPlatform = 'Android';
|
| 69 |
+
} else if (this.ua.includes('iPhone') || this.ua.includes('iPad') || this.ua.includes('iPod')) {
|
| 70 |
+
uaPlatform = 'iOS';
|
| 71 |
+
} else if (this.ua.includes('CrOS')) {
|
| 72 |
+
uaPlatform = 'Chrome OS';
|
| 73 |
+
} else if (this.ua.includes('Macintosh')) {
|
| 74 |
+
uaPlatform = 'macOS';
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
const mixinHeaders: Record<string, string> = {
|
| 78 |
+
'Sec-Ch-Ua': `Not A(Brand";v="8", "Chromium";v="${this.chromeVersion}", "Google Chrome";v="${this.chromeVersion}"`,
|
| 79 |
+
'Sec-Ch-Ua-Mobile': '?0',
|
| 80 |
+
'Sec-Ch-Ua-Platform': `"${uaPlatform}"`,
|
| 81 |
'Upgrade-Insecure-Requests': '1',
|
| 82 |
'User-Agent': this.ua,
|
| 83 |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
src/services/errors.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import { ApplicationError,
|
| 2 |
import _ from 'lodash';
|
| 3 |
import dayjs from 'dayjs';
|
| 4 |
import utc from 'dayjs/plugin/utc';
|
|
@@ -46,31 +46,3 @@ export class SecurityCompromiseError extends ApplicationError { }
|
|
| 46 |
|
| 47 |
@StatusCode(41201)
|
| 48 |
export class BatchSizeTooLargeError extends ApplicationError { }
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
@StatusCode(42903)
|
| 52 |
-
export class RateLimitTriggeredError extends ApplicationError {
|
| 53 |
-
|
| 54 |
-
@Prop({
|
| 55 |
-
desc: 'Retry after seconds',
|
| 56 |
-
})
|
| 57 |
-
retryAfter?: number;
|
| 58 |
-
|
| 59 |
-
@Prop({
|
| 60 |
-
desc: 'Retry after date',
|
| 61 |
-
})
|
| 62 |
-
retryAfterDate?: Date;
|
| 63 |
-
|
| 64 |
-
protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
|
| 65 |
-
const retryAfter = this.retryAfter || this.retryAfterDate;
|
| 66 |
-
if (!retryAfter) {
|
| 67 |
-
return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
|
| 68 |
-
}
|
| 69 |
-
|
| 70 |
-
return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
|
| 71 |
-
headers: {
|
| 72 |
-
'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
|
| 73 |
-
}
|
| 74 |
-
});
|
| 75 |
-
}
|
| 76 |
-
}
|
|
|
|
| 1 |
+
import { ApplicationError, StatusCode } from 'civkit/civ-rpc';
|
| 2 |
import _ from 'lodash';
|
| 3 |
import dayjs from 'dayjs';
|
| 4 |
import utc from 'dayjs/plugin/utc';
|
|
|
|
| 46 |
|
| 47 |
@StatusCode(41201)
|
| 48 |
export class BatchSizeTooLargeError extends ApplicationError { }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/services/serper-search.ts
CHANGED
|
@@ -56,6 +56,7 @@ export class SerperSearchService extends AsyncService {
|
|
| 56 |
let maxTries = 3;
|
| 57 |
|
| 58 |
while (maxTries--) {
|
|
|
|
| 59 |
try {
|
| 60 |
this.logger.debug(`Doing external search`, query);
|
| 61 |
let r;
|
|
@@ -101,11 +102,14 @@ export class SerperSearchService extends AsyncService {
|
|
| 101 |
break;
|
| 102 |
}
|
| 103 |
}
|
|
|
|
| 104 |
this.blackHoleDetector.itWorked();
|
|
|
|
| 105 |
|
| 106 |
return r.parsed;
|
| 107 |
} catch (err: any) {
|
| 108 |
-
|
|
|
|
| 109 |
if (err?.status === 429) {
|
| 110 |
await delay(500 + 1000 * Math.random());
|
| 111 |
continue;
|
|
|
|
| 56 |
let maxTries = 3;
|
| 57 |
|
| 58 |
while (maxTries--) {
|
| 59 |
+
const t0 = Date.now();
|
| 60 |
try {
|
| 61 |
this.logger.debug(`Doing external search`, query);
|
| 62 |
let r;
|
|
|
|
| 102 |
break;
|
| 103 |
}
|
| 104 |
}
|
| 105 |
+
const dt = Date.now() - t0;
|
| 106 |
this.blackHoleDetector.itWorked();
|
| 107 |
+
this.logger.debug(`External search took ${dt}ms`, { searchDt: dt, variant });
|
| 108 |
|
| 109 |
return r.parsed;
|
| 110 |
} catch (err: any) {
|
| 111 |
+
const dt = Date.now() - t0;
|
| 112 |
+
this.logger.error(`${variant} search failed: ${err?.message}`, { searchDt: dt, err: marshalErrorLike(err) });
|
| 113 |
if (err?.status === 429) {
|
| 114 |
await delay(500 + 1000 * Math.random());
|
| 115 |
continue;
|
thinapps-shared
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit 580ea72e0eddaa115b85dabf29de41d079ecd2d0
|