nomagick commited on
Commit
0e8308e
·
unverified ·
1 Parent(s): 05df989

fix: some invalid uriComponent case

Browse files
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -24,6 +24,7 @@ import { JSDomControl } from '../services/jsdom';
24
  import { FormattedPage, md5Hasher, SnapshotFormatter } from '../services/snapshot-formatter';
25
  import { CurlControl } from '../services/curl';
26
  import { LmControl } from '../services/lm';
 
27
 
28
  export interface ExtraScrappingOptions extends ScrappingOptions {
29
  withIframe?: boolean | 'quoted';
@@ -169,7 +170,8 @@ export class CrawlerHost extends RPCHost {
169
  let chargeAmount = 0;
170
  const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
171
 
172
- const targetUrl = await this.getTargetUrl(decodeURIComponent(ctx.req.url), crawlerOptions);
 
173
  if (!targetUrl) {
174
  const latestUser = uid ? await auth.assertUser() : undefined;
175
  if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
 
24
  import { FormattedPage, md5Hasher, SnapshotFormatter } from '../services/snapshot-formatter';
25
  import { CurlControl } from '../services/curl';
26
  import { LmControl } from '../services/lm';
27
+ import { tryDecodeURIComponent } from '../utils/misc';
28
 
29
  export interface ExtraScrappingOptions extends ScrappingOptions {
30
  withIframe?: boolean | 'quoted';
 
170
  let chargeAmount = 0;
171
  const crawlerOptions = ctx.req.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed;
172
 
173
+ // Note req.url in express is actually unparsed `path`, e.g. `/some-path?abc`. Instead of a real url.
174
+ const targetUrl = await this.getTargetUrl(tryDecodeURIComponent(ctx.req.url), crawlerOptions);
175
  if (!targetUrl) {
176
  const latestUser = uid ? await auth.assertUser() : undefined;
177
  if (!ctx.req.accepts('text/plain') && (ctx.req.accepts('text/json') || ctx.req.accepts('application/json'))) {
backend/functions/src/utils/misc.ts CHANGED
@@ -1,3 +1,18 @@
 
 
1
  export function cleanAttribute(attribute: string | null) {
2
  return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
3
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ParamValidationError } from 'civkit';
2
+
3
  export function cleanAttribute(attribute: string | null) {
4
  return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
5
  }
6
+
7
+
8
+ export function tryDecodeURIComponent(input: string) {
9
+ try {
10
+ return decodeURIComponent(input);
11
+ } catch (err) {
12
+ if (URL.canParse(input, 'http://localhost:3000')) {
13
+ return input;
14
+ }
15
+
16
+ throw new ParamValidationError(`Invalid URIComponent: ${input}`);
17
+ }
18
+ }