nomagick commited on
Commit
8eee951
·
unverified ·
1 Parent(s): 1c94456

feat: index brief in JSON format

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -21,6 +21,7 @@ import { randomUUID } from 'crypto';
21
  import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
22
 
23
  import { countGPTToken as estimateToken } from '../shared/utils/openai';
 
24
 
25
  const md5Hasher = new HashManager('md5', 'hex');
26
 
@@ -44,6 +45,16 @@ export interface FormattedPage {
44
  toString: () => string;
45
  }
46
 
 
 
 
 
 
 
 
 
 
 
47
  @singleton()
48
  export class CrawlerHost extends RPCHost {
49
  logger = this.globalLogger.child({ service: this.constructor.name });
@@ -54,12 +65,6 @@ export class CrawlerHost extends RPCHost {
54
  cacheValidMs = 1000 * 3600;
55
  urlValidMs = 1000 * 3600 * 4;
56
 
57
- indexText = `[Usage1] https://r.jina.ai/YOUR_URL
58
- [Usage2] https://s.jina.ai/YOUR_SEARCH_QUERY
59
- [Homepage] https://jina.ai/reader
60
- [Source code] https://github.com/jina-ai/reader
61
- `;
62
-
63
  constructor(
64
  protected globalLogger: Logger,
65
  protected puppeteerControl: PuppeteerControl,
@@ -89,6 +94,25 @@ export class CrawlerHost extends RPCHost {
89
  this.emit('ready');
90
  }
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  getTurndown(noRules?: boolean | string) {
93
  const turnDownService = new TurndownService();
94
  if (!noRules) {
@@ -497,12 +521,11 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
497
  const noSlashURL = ctx.req.url.slice(1);
498
  if (!noSlashURL) {
499
  const latestUser = uid ? await auth.assertUser() : undefined;
500
- const authMixin = latestUser ? `
501
- [Authenticated as] ${latestUser.user_id} (${latestUser.full_name})
502
- [Balance left] ${latestUser.wallet.total_balance}
503
- ` : '';
504
 
505
- return assignTransferProtocolMeta(`${this.indexText}${authMixin}`,
506
  { contentType: 'text/plain', envelope: null }
507
  );
508
  }
 
21
  import { JinaEmbeddingsAuthDTO } from '../shared/dto/jina-embeddings-auth';
22
 
23
  import { countGPTToken as estimateToken } from '../shared/utils/openai';
24
+ import { JinaEmbeddingsTokenAccount } from '../shared/db/jina-embeddings-token-account';
25
 
26
  const md5Hasher = new HashManager('md5', 'hex');
27
 
 
45
  toString: () => string;
46
  }
47
 
48
+ const indexProto = {
49
+ toString: function (): string {
50
+ return _(this)
51
+ .toPairs()
52
+ .map(([k, v]) => k ? `[${_.upperFirst(_.lowerCase(k))}] ${v}` : '')
53
+ .value()
54
+ .join('\n') + '\n';
55
+ }
56
+ };
57
+
58
  @singleton()
59
  export class CrawlerHost extends RPCHost {
60
  logger = this.globalLogger.child({ service: this.constructor.name });
 
65
  cacheValidMs = 1000 * 3600;
66
  urlValidMs = 1000 * 3600 * 4;
67
 
 
 
 
 
 
 
68
  constructor(
69
  protected globalLogger: Logger,
70
  protected puppeteerControl: PuppeteerControl,
 
94
  this.emit('ready');
95
  }
96
 
97
+ getIndex(user?: JinaEmbeddingsTokenAccount) {
98
+ const indexObject: Record<string, string | number | undefined> = Object.create(indexProto);
99
+
100
+ Object.assign(indexObject, {
101
+ usage1: 'https://r.jina.ai/YOUR_URL',
102
+ usage2: 'https://s.jina.ai/YOUR_SEARCH_QUERY',
103
+ homepage: 'https://jina.ai/reader',
104
+ sourceCode: 'https://github.com/jina-ai/reader',
105
+ });
106
+
107
+ if (user) {
108
+ indexObject[''] = undefined;
109
+ indexObject.authenticatedAs = `${user.user_id} (${user.full_name})`;
110
+ indexObject.balanceLeft = user.wallet.total_balance;
111
+ }
112
+
113
+ return indexObject;
114
+ }
115
+
116
  getTurndown(noRules?: boolean | string) {
117
  const turnDownService = new TurndownService();
118
  if (!noRules) {
 
521
  const noSlashURL = ctx.req.url.slice(1);
522
  if (!noSlashURL) {
523
  const latestUser = uid ? await auth.assertUser() : undefined;
524
+ if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
525
+ return this.getIndex(latestUser);
526
+ }
 
527
 
528
+ return assignTransferProtocolMeta(`${this.getIndex(latestUser)}`,
529
  { contentType: 'text/plain', envelope: null }
530
  );
531
  }
backend/functions/src/cloud-functions/searcher.ts CHANGED
@@ -152,12 +152,12 @@ export class SearcherHost extends RPCHost {
152
  const noSlashPath = ctx.req.url.slice(1);
153
  if (!noSlashPath) {
154
  const latestUser = uid ? await auth.assertUser() : undefined;
155
- const authMixin = latestUser ? `
156
- [Authenticated as] ${latestUser.user_id} (${latestUser.full_name})
157
- [Balance left] ${latestUser.wallet.total_balance}
158
- ` : '';
159
 
160
- return assignTransferProtocolMeta(`${this.crawler.indexText}${authMixin}`,
 
 
 
161
  { contentType: 'text/plain', envelope: null }
162
  );
163
  }
 
152
  const noSlashPath = ctx.req.url.slice(1);
153
  if (!noSlashPath) {
154
  const latestUser = uid ? await auth.assertUser() : undefined;
155
+ if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
 
 
 
156
 
157
+ return this.crawler.getIndex(latestUser);
158
+ }
159
+
160
+ return assignTransferProtocolMeta(`${this.crawler.getIndex(latestUser)}`,
161
  { contentType: 'text/plain', envelope: null }
162
  );
163
  }