nomagick commited on
Commit
fb5bd58
·
unverified ·
1 Parent(s): c7860e6

feat: return usage tokens in json

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -49,6 +49,11 @@ export interface FormattedPage {
49
  pageshot?: Buffer;
50
  links?: { [k: string]: string; };
51
  images?: { [k: string]: string; };
 
 
 
 
 
52
 
53
  toString: () => string;
54
  }
@@ -743,7 +748,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
743
  }
744
 
745
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
746
- chargeAmount = this.getChargeAmount(formatted);
747
  sseStream.write({
748
  event: 'data',
749
  data: formatted,
@@ -771,7 +776,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
771
  }
772
 
773
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
774
- chargeAmount = this.getChargeAmount(formatted);
775
 
776
  if (crawlerOptions.timeout === undefined) {
777
  return formatted;
@@ -783,7 +788,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
783
  }
784
 
785
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
786
- chargeAmount = this.getChargeAmount(formatted);
787
 
788
  return formatted;
789
  }
@@ -795,7 +800,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
795
  }
796
 
797
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
798
- chargeAmount = this.getChargeAmount(formatted);
799
 
800
  if (crawlerOptions.timeout === undefined) {
801
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
@@ -820,7 +825,7 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
820
  }
821
 
822
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
823
- chargeAmount = this.getChargeAmount(formatted);
824
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
825
 
826
  return assignTransferProtocolMeta(`${formatted}`,
@@ -1005,25 +1010,31 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
1005
  }
1006
  }
1007
 
1008
- getChargeAmount(formatted: FormattedPage) {
1009
  if (!formatted) {
1010
  return undefined;
1011
  }
1012
 
1013
  const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
 
 
 
 
 
 
1014
 
1015
- if (typeof textContent === 'string') {
1016
- return estimateToken(textContent);
1017
- }
1018
 
1019
- const imageContent = formatted.screenshotUrl || formatted.screenshot;
 
 
 
 
 
1020
 
1021
- if (imageContent) {
1022
- // OpenAI image token count for 1024x1024 image
1023
- return 765;
1024
- }
1025
 
1026
- return undefined;
1027
  }
1028
 
1029
 
 
49
  pageshot?: Buffer;
50
  links?: { [k: string]: string; };
51
  images?: { [k: string]: string; };
52
+ usage?: {
53
+ total_tokens?: number;
54
+ totalTokens?: number;
55
+ tokens?: number;
56
+ };
57
 
58
  toString: () => string;
59
  }
 
748
  }
749
 
750
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
751
+ chargeAmount = this.assignChargeAmount(formatted);
752
  sseStream.write({
753
  event: 'data',
754
  data: formatted,
 
776
  }
777
 
778
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
779
+ chargeAmount = this.assignChargeAmount(formatted);
780
 
781
  if (crawlerOptions.timeout === undefined) {
782
  return formatted;
 
788
  }
789
 
790
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
791
+ chargeAmount = this.assignChargeAmount(formatted);
792
 
793
  return formatted;
794
  }
 
800
  }
801
 
802
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, scrapped, urlToCrawl);
803
+ chargeAmount = this.assignChargeAmount(formatted);
804
 
805
  if (crawlerOptions.timeout === undefined) {
806
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
 
825
  }
826
 
827
  const formatted = await this.formatSnapshot(crawlerOptions.respondWith, lastScrapped, urlToCrawl);
828
+ chargeAmount = this.assignChargeAmount(formatted);
829
  if (crawlerOptions.respondWith === 'screenshot' && Reflect.get(formatted, 'screenshotUrl')) {
830
 
831
  return assignTransferProtocolMeta(`${formatted}`,
 
1010
  }
1011
  }
1012
 
1013
+ assignChargeAmount(formatted: FormattedPage) {
1014
  if (!formatted) {
1015
  return undefined;
1016
  }
1017
 
1018
  const textContent = formatted?.content || formatted?.description || formatted?.text || formatted?.html;
1019
+ let amount;
1020
+ do {
1021
+ if (typeof textContent === 'string') {
1022
+ amount = estimateToken(textContent);
1023
+ break;
1024
+ }
1025
 
1026
+ const imageContent = formatted.screenshotUrl || formatted.screenshot;
 
 
1027
 
1028
+ if (imageContent) {
1029
+ // OpenAI image token count for 1024x1024 image
1030
+ amount = 765;
1031
+ break;
1032
+ }
1033
+ } while (false);
1034
 
1035
+ Object.assign(formatted, { usage: { tokens: amount } });
 
 
 
1036
 
1037
+ return amount;
1038
  }
1039
 
1040
 
backend/functions/src/cloud-functions/searcher.ts CHANGED
@@ -178,7 +178,7 @@ export class SearcherHost extends RPCHost {
178
  continue;
179
  }
180
 
181
- chargeAmount = this.getChargeAmount(scrapped);
182
  sseStream.write({
183
  event: 'data',
184
  data: scrapped,
@@ -211,7 +211,7 @@ export class SearcherHost extends RPCHost {
211
  if (!lastScrapped) {
212
  return;
213
  }
214
- chargeAmount = this.getChargeAmount(lastScrapped);
215
  rpcReflect.return(lastScrapped);
216
  earlyReturn = true;
217
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@@ -228,7 +228,7 @@ export class SearcherHost extends RPCHost {
228
  if (earlyReturnTimer) {
229
  clearTimeout(earlyReturnTimer);
230
  }
231
- chargeAmount = this.getChargeAmount(scrapped);
232
 
233
  return scrapped;
234
  }
@@ -242,7 +242,7 @@ export class SearcherHost extends RPCHost {
242
  }
243
 
244
  if (!earlyReturn) {
245
- chargeAmount = this.getChargeAmount(lastScrapped);
246
  }
247
 
248
  return lastScrapped;
@@ -257,7 +257,7 @@ export class SearcherHost extends RPCHost {
257
  if (!lastScrapped) {
258
  return;
259
  }
260
- chargeAmount = this.getChargeAmount(lastScrapped);
261
  rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
262
  earlyReturn = true;
263
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
@@ -278,7 +278,7 @@ export class SearcherHost extends RPCHost {
278
  clearTimeout(earlyReturnTimer);
279
  }
280
 
281
- chargeAmount = this.getChargeAmount(scrapped);
282
 
283
  return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
284
  }
@@ -292,7 +292,7 @@ export class SearcherHost extends RPCHost {
292
  }
293
 
294
  if (!earlyReturn) {
295
- chargeAmount = this.getChargeAmount(lastScrapped);
296
  }
297
 
298
  return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
@@ -423,9 +423,9 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n')}\n` : ''}`;
423
  return resultArray;
424
  }
425
 
426
- getChargeAmount(formatted: FormattedPage[]) {
427
  return _.sum(
428
- formatted.map((x) => this.crawler.getChargeAmount(x) || 0)
429
  );
430
  }
431
 
 
178
  continue;
179
  }
180
 
181
+ chargeAmount = this.assignChargeAmount(scrapped);
182
  sseStream.write({
183
  event: 'data',
184
  data: scrapped,
 
211
  if (!lastScrapped) {
212
  return;
213
  }
214
+ chargeAmount = this.assignChargeAmount(lastScrapped);
215
  rpcReflect.return(lastScrapped);
216
  earlyReturn = true;
217
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
 
228
  if (earlyReturnTimer) {
229
  clearTimeout(earlyReturnTimer);
230
  }
231
+ chargeAmount = this.assignChargeAmount(scrapped);
232
 
233
  return scrapped;
234
  }
 
242
  }
243
 
244
  if (!earlyReturn) {
245
+ chargeAmount = this.assignChargeAmount(lastScrapped);
246
  }
247
 
248
  return lastScrapped;
 
257
  if (!lastScrapped) {
258
  return;
259
  }
260
+ chargeAmount = this.assignChargeAmount(lastScrapped);
261
  rpcReflect.return(assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null }));
262
  earlyReturn = true;
263
  }, ((crawlerOptions.timeout || 0) * 1000) || this.reasonableDelayMs);
 
278
  clearTimeout(earlyReturnTimer);
279
  }
280
 
281
+ chargeAmount = this.assignChargeAmount(scrapped);
282
 
283
  return assignTransferProtocolMeta(`${scrapped}`, { contentType: 'text/plain', envelope: null });
284
  }
 
292
  }
293
 
294
  if (!earlyReturn) {
295
+ chargeAmount = this.assignChargeAmount(lastScrapped);
296
  }
297
 
298
  return assignTransferProtocolMeta(`${lastScrapped}`, { contentType: 'text/plain', envelope: null });
 
423
  return resultArray;
424
  }
425
 
426
+ assignChargeAmount(formatted: FormattedPage[]) {
427
  return _.sum(
428
+ formatted.map((x) => this.crawler.assignChargeAmount(x) || 0)
429
  );
430
  }
431