nomagick commited on
Commit
8739943
·
unverified ·
1 Parent(s): 786b182

fix: allow POST with url

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -571,12 +571,14 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
571
  res: Response,
572
  },
573
  auth: JinaEmbeddingsAuthDTO,
574
- crawlerOptions: CrawlerOptionsHeaderOnly,
 
575
  ) {
576
  const uid = await auth.solveUID();
577
  let chargeAmount = 0;
578
  const noSlashURL = ctx.req.url.slice(1);
579
- if (!noSlashURL) {
 
580
  const latestUser = uid ? await auth.assertUser() : undefined;
581
  if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
582
  return this.getIndex(latestUser);
@@ -636,7 +638,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
636
  let urlToCrawl;
637
  const normalizeUrl = (await pNormalizeUrl).default;
638
  try {
639
- urlToCrawl = new URL(normalizeUrl(noSlashURL.trim(), { stripWWW: false, removeTrailingSlash: false, removeSingleSlash: false, sortQueryParameters: false }));
 
 
 
 
 
 
 
 
 
 
640
  } catch (err) {
641
  throw new ParamValidationError({
642
  message: `${err}`,
 
571
  res: Response,
572
  },
573
  auth: JinaEmbeddingsAuthDTO,
574
+ crawlerOptionsHeaderOnly: CrawlerOptionsHeaderOnly,
575
+ crawlerOptionsParamsAllowed: CrawlerOptions,
576
  ) {
577
  const uid = await auth.solveUID();
578
  let chargeAmount = 0;
579
  const noSlashURL = ctx.req.url.slice(1);
580
+ const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
581
+ if (!noSlashURL && !crawlerOptions.url) {
582
  const latestUser = uid ? await auth.assertUser() : undefined;
583
  if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
584
  return this.getIndex(latestUser);
 
638
  let urlToCrawl;
639
  const normalizeUrl = (await pNormalizeUrl).default;
640
  try {
641
+ urlToCrawl = new URL(
642
+ normalizeUrl(
643
+ (crawlerOptions.url || noSlashURL).trim(),
644
+ {
645
+ stripWWW: false,
646
+ removeTrailingSlash: false,
647
+ removeSingleSlash: false,
648
+ sortQueryParameters: false,
649
+ }
650
+ )
651
+ );
652
  } catch (err) {
653
  throw new ParamValidationError({
654
  message: `${err}`,
backend/functions/src/dto/scrapping-options.ts CHANGED
@@ -116,6 +116,9 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
116
  })
117
  export class CrawlerOptions extends AutoCastable {
118
 
 
 
 
119
  @Prop({
120
  default: 'default',
121
  })
 
116
  })
117
  export class CrawlerOptions extends AutoCastable {
118
 
119
+ @Prop()
120
+ url?: string;
121
+
122
  @Prop({
123
  default: 'default',
124
  })
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit 128201c8a6b3b5f1145c310fb144e669b1feba30
 
1
+ Subproject commit a26b633136a2651f01dcf02b47efc35b2401d807