Spaces:
Build error
Build error
fix: allow POST with url
Browse files
backend/functions/src/cloud-functions/crawler.ts
CHANGED
|
@@ -571,12 +571,14 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|
| 571 |
res: Response,
|
| 572 |
},
|
| 573 |
auth: JinaEmbeddingsAuthDTO,
|
| 574 |
-
|
|
|
|
| 575 |
) {
|
| 576 |
const uid = await auth.solveUID();
|
| 577 |
let chargeAmount = 0;
|
| 578 |
const noSlashURL = ctx.req.url.slice(1);
|
| 579 |
-
|
|
|
|
| 580 |
const latestUser = uid ? await auth.assertUser() : undefined;
|
| 581 |
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
| 582 |
return this.getIndex(latestUser);
|
|
@@ -636,7 +638,17 @@ ${suffixMixins.length ? `\n${suffixMixins.join('\n\n')}\n` : ''}`;
|
|
| 636 |
let urlToCrawl;
|
| 637 |
const normalizeUrl = (await pNormalizeUrl).default;
|
| 638 |
try {
|
| 639 |
-
urlToCrawl = new URL(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
} catch (err) {
|
| 641 |
throw new ParamValidationError({
|
| 642 |
message: `${err}`,
|
|
|
|
| 571 |
res: Response,
|
| 572 |
},
|
| 573 |
auth: JinaEmbeddingsAuthDTO,
|
| 574 |
+
crawlerOptionsHeaderOnly: CrawlerOptionsHeaderOnly,
|
| 575 |
+
crawlerOptionsParamsAllowed: CrawlerOptions,
|
| 576 |
) {
|
| 577 |
const uid = await auth.solveUID();
|
| 578 |
let chargeAmount = 0;
|
| 579 |
const noSlashURL = ctx.req.url.slice(1);
|
| 580 |
+
const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
|
| 581 |
+
if (!noSlashURL && !crawlerOptions.url) {
|
| 582 |
const latestUser = uid ? await auth.assertUser() : undefined;
|
| 583 |
if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
|
| 584 |
return this.getIndex(latestUser);
|
|
|
|
| 638 |
let urlToCrawl;
|
| 639 |
const normalizeUrl = (await pNormalizeUrl).default;
|
| 640 |
try {
|
| 641 |
+
urlToCrawl = new URL(
|
| 642 |
+
normalizeUrl(
|
| 643 |
+
(crawlerOptions.url || noSlashURL).trim(),
|
| 644 |
+
{
|
| 645 |
+
stripWWW: false,
|
| 646 |
+
removeTrailingSlash: false,
|
| 647 |
+
removeSingleSlash: false,
|
| 648 |
+
sortQueryParameters: false,
|
| 649 |
+
}
|
| 650 |
+
)
|
| 651 |
+
);
|
| 652 |
} catch (err) {
|
| 653 |
throw new ParamValidationError({
|
| 654 |
message: `${err}`,
|
backend/functions/src/dto/scrapping-options.ts
CHANGED
|
@@ -116,6 +116,9 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
|
|
| 116 |
})
|
| 117 |
export class CrawlerOptions extends AutoCastable {
|
| 118 |
|
|
|
|
|
|
|
|
|
|
| 119 |
@Prop({
|
| 120 |
default: 'default',
|
| 121 |
})
|
|
|
|
| 116 |
})
|
| 117 |
export class CrawlerOptions extends AutoCastable {
|
| 118 |
|
| 119 |
+
@Prop()
|
| 120 |
+
url?: string;
|
| 121 |
+
|
| 122 |
@Prop({
|
| 123 |
default: 'default',
|
| 124 |
})
|
thinapps-shared
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
Subproject commit
|
|
|
|
| 1 |
+
Subproject commit a26b633136a2651f01dcf02b47efc35b2401d807
|