nomagick commited on
Commit
45d1682
·
unverified ·
1 Parent(s): 7d4102e

feat: pdf upload and ip check

Browse files
package-lock.json CHANGED
@@ -17,7 +17,7 @@
17
  "axios": "^1.3.3",
18
  "bcrypt": "^5.1.0",
19
  "busboy": "^1.6.0",
20
- "civkit": "^0.8.4-ef21ac9",
21
  "core-js": "^3.37.1",
22
  "cors": "^2.8.5",
23
  "dayjs": "^1.11.9",
@@ -3989,9 +3989,9 @@
3989
  }
3990
  },
3991
  "node_modules/civkit": {
3992
- "version": "0.8.4-ef21ac9",
3993
- "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.4-ef21ac9.tgz",
3994
- "integrity": "sha512-CAGzSIcXeBbYmhweTBqTqoroIpxI/dH87KhlT6MzokOiMpRcs02NJXM5V/KPbZ5hTqT9jii2xGd1CwsvTYZezg==",
3995
  "license": "AGPL",
3996
  "dependencies": {
3997
  "lodash": "^4.17.21",
 
17
  "axios": "^1.3.3",
18
  "bcrypt": "^5.1.0",
19
  "busboy": "^1.6.0",
20
+ "civkit": "^0.9.0-f7b0ca7",
21
  "core-js": "^3.37.1",
22
  "cors": "^2.8.5",
23
  "dayjs": "^1.11.9",
 
3989
  }
3990
  },
3991
  "node_modules/civkit": {
3992
+ "version": "0.9.0-f7b0ca7",
3993
+ "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.9.0-f7b0ca7.tgz",
3994
+ "integrity": "sha512-WjF0zRY83Ewvx4fGs1O0PQD2Oyc/RlKCVGiO/LHdwEFwfldTqDE3XWdWv+brZ2GvsIsVVKVa+bEGP0SwJfrRXA==",
3995
  "license": "AGPL",
3996
  "dependencies": {
3997
  "lodash": "^4.17.21",
package.json CHANGED
@@ -26,7 +26,7 @@
26
  "axios": "^1.3.3",
27
  "bcrypt": "^5.1.0",
28
  "busboy": "^1.6.0",
29
- "civkit": "^0.8.4-ef21ac9",
30
  "core-js": "^3.37.1",
31
  "cors": "^2.8.5",
32
  "dayjs": "^1.11.9",
 
26
  "axios": "^1.3.3",
27
  "bcrypt": "^5.1.0",
28
  "busboy": "^1.6.0",
29
+ "civkit": "^0.9.0-f7b0ca7",
30
  "core-js": "^3.37.1",
31
  "cors": "^2.8.5",
32
  "dayjs": "^1.11.9",
src/api/crawler.ts CHANGED
@@ -13,6 +13,7 @@ import {
13
  import { marshalErrorLike } from 'civkit/lang';
14
  import { Defer } from 'civkit/defer';
15
  import { retryWith } from 'civkit/decorators';
 
16
 
17
  import { CONTENT_FORMAT, CrawlerOptions, CrawlerOptionsHeaderOnly, ENGINE_TYPE } from '../dto/crawler-options';
18
 
@@ -43,10 +44,8 @@ import { ProxyProvider } from '../shared/services/proxy-provider';
43
  import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
44
  import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
45
  import { RobotsTxtService } from '../services/robots-text';
46
- import { lookup } from 'dns/promises';
47
- import { isIP } from 'net';
48
-
49
- const normalizeUrl = require('@esm2cjs/normalize-url').default;
50
 
51
  export interface ExtraScrappingOptions extends ScrappingOptions {
52
  withIframe?: boolean | 'quoted';
@@ -92,6 +91,8 @@ export class CrawlerHost extends RPCHost {
92
  protected rateLimitControl: RateLimitControl,
93
  protected threadLocal: AsyncLocalContext,
94
  protected robotsTxtService: RobotsTxtService,
 
 
95
  ) {
96
  super(...arguments);
97
 
@@ -472,47 +473,28 @@ export class CrawlerHost extends RPCHost {
472
  }
473
 
474
  async getTargetUrl(originPath: string, crawlerOptions: CrawlerOptions) {
475
- let url: string;
476
 
477
  const targetUrlFromGet = originPath.slice(1);
478
  if (crawlerOptions.pdf) {
479
- url = `blob://pdf/${randomUUID()}`;
 
 
 
480
  } else if (targetUrlFromGet) {
481
  url = targetUrlFromGet.trim();
482
  } else if (crawlerOptions.url) {
483
  url = crawlerOptions.url.trim();
484
- } else {
485
- return null;
486
- }
487
-
488
- let result: URL;
489
- try {
490
- result = new URL(
491
- normalizeUrl(
492
- url,
493
- {
494
- stripWWW: false,
495
- removeTrailingSlash: false,
496
- removeSingleSlash: false,
497
- sortQueryParameters: false,
498
- }
499
- )
500
- );
501
- } catch (err) {
502
- throw new ParamValidationError({
503
- message: `${err}`,
504
- path: 'url'
505
- });
506
  }
507
 
508
- if (!['http:', 'https:', 'blob:'].includes(result.protocol)) {
509
  throw new ParamValidationError({
510
- message: `Invalid protocol ${result.protocol}`,
511
  path: 'url'
512
  });
513
  }
514
 
515
-
516
  if (this.puppeteerControl.circuitBreakerHosts.has(result.hostname.toLowerCase())) {
517
  throw new SecurityCompromiseError({
518
  message: `Circular hostname: ${result.protocol}`,
@@ -520,31 +502,6 @@ export class CrawlerHost extends RPCHost {
520
  });
521
  }
522
 
523
- const isIp = isIP(result.hostname);
524
-
525
- if (
526
- (result.hostname === 'localhost') ||
527
- (isIp && result.hostname.startsWith('127.'))
528
- ) {
529
- throw new SecurityCompromiseError({
530
- message: `Suspicious action: Request to localhost: ${result}`,
531
- path: 'url'
532
- });
533
- }
534
-
535
- if (!isIp && result.protocol !== 'blob:') {
536
- await lookup(result.hostname).catch((err) => {
537
- if (err.code === 'ENOTFOUND') {
538
- return Promise.reject(new ParamValidationError({
539
- message: `Domain '${result.hostname}' could not be resolved`,
540
- path: 'url'
541
- }));
542
- }
543
-
544
- return;
545
- });
546
- }
547
-
548
  return result;
549
  }
550
 
@@ -733,14 +690,14 @@ export class CrawlerHost extends RPCHost {
733
  }
734
 
735
  if (crawlerOpts?.pdf) {
736
- const pdfBuf = crawlerOpts.pdf instanceof Blob ? await crawlerOpts.pdf.arrayBuffer().then((x) => Buffer.from(x)) : Buffer.from(crawlerOpts.pdf, 'base64');
737
- const pdfDataUrl = `data:application/pdf;base64,${pdfBuf.toString('base64')}`;
738
  const snapshot = {
739
  href: urlToCrawl.toString(),
740
- html: `<!DOCTYPE html><html><head></head><body style="height: 100%; width: 100%; overflow: hidden; margin:0px; background-color: rgb(82, 86, 89);"><embed style="position:absolute; left: 0; top: 0;" width="100%" height="100%" src="${pdfDataUrl}"></body></html>`,
741
  title: '',
742
  text: '',
743
- pdfs: [pdfDataUrl],
744
  } as PageSnapshot;
745
 
746
  yield this.jsdomControl.narrowSnapshot(snapshot, crawlOpts);
 
13
  import { marshalErrorLike } from 'civkit/lang';
14
  import { Defer } from 'civkit/defer';
15
  import { retryWith } from 'civkit/decorators';
16
+ import { FancyFile } from 'civkit/fancy-file';
17
 
18
  import { CONTENT_FORMAT, CrawlerOptions, CrawlerOptionsHeaderOnly, ENGINE_TYPE } from '../dto/crawler-options';
19
 
 
44
  import { FirebaseStorageBucketControl } from '../shared/services/firebase-storage-bucket';
45
  import { JinaEmbeddingsAuthDTO } from '../dto/jina-embeddings-auth';
46
  import { RobotsTxtService } from '../services/robots-text';
47
+ import { TempFileManager } from '../services/temp-file';
48
+ import { MiscService } from '../services/misc';
 
 
49
 
50
  export interface ExtraScrappingOptions extends ScrappingOptions {
51
  withIframe?: boolean | 'quoted';
 
91
  protected rateLimitControl: RateLimitControl,
92
  protected threadLocal: AsyncLocalContext,
93
  protected robotsTxtService: RobotsTxtService,
94
+ protected tempFileManager: TempFileManager,
95
+ protected miscService: MiscService,
96
  ) {
97
  super(...arguments);
98
 
 
473
  }
474
 
475
  async getTargetUrl(originPath: string, crawlerOptions: CrawlerOptions) {
476
+ let url: string = '';
477
 
478
  const targetUrlFromGet = originPath.slice(1);
479
  if (crawlerOptions.pdf) {
480
+ const pdfFile = crawlerOptions.pdf;
481
+ const identifier = pdfFile instanceof FancyFile ? (await pdfFile.sha256Sum) : randomUUID();
482
+ url = `blob://pdf/${identifier}`;
483
+ crawlerOptions.url ??= url;
484
  } else if (targetUrlFromGet) {
485
  url = targetUrlFromGet.trim();
486
  } else if (crawlerOptions.url) {
487
  url = crawlerOptions.url.trim();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  }
489
 
490
+ if (!url) {
491
  throw new ParamValidationError({
492
+ message: 'No URL provided',
493
  path: 'url'
494
  });
495
  }
496
 
497
+ const result = await this.miscService.assertNormalizedUrl(url);
498
  if (this.puppeteerControl.circuitBreakerHosts.has(result.hostname.toLowerCase())) {
499
  throw new SecurityCompromiseError({
500
  message: `Circular hostname: ${result.protocol}`,
 
502
  });
503
  }
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  return result;
506
  }
507
 
 
690
  }
691
 
692
  if (crawlerOpts?.pdf) {
693
+ const pdfFile = crawlerOpts.pdf instanceof FancyFile ? crawlerOpts.pdf : this.tempFileManager.cacheBuffer(Buffer.from(crawlerOpts.pdf, 'base64'));
694
+ const pdfLocalPath = pathToFileURL((await pdfFile.filePath));
695
  const snapshot = {
696
  href: urlToCrawl.toString(),
697
+ html: `<!DOCTYPE html><html><head></head><body style="height: 100%; width: 100%; overflow: hidden; margin:0px; background-color: rgb(82, 86, 89);"><embed style="position:absolute; left: 0; top: 0;" width="100%" height="100%" src="${crawlerOpts.url}"></body></html>`,
698
  title: '',
699
  text: '',
700
+ pdfs: [pdfLocalPath.href],
701
  } as PageSnapshot;
702
 
703
  yield this.jsdomControl.narrowSnapshot(snapshot, crawlOpts);
src/dto/crawler-options.ts CHANGED
@@ -1,4 +1,5 @@
1
- import { Also, AutoCastable, ParamValidationError, Prop, RPC_CALL_ENVIRONMENT } from 'civkit'; // Adjust the import based on where your decorators are defined
 
2
  import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser';
3
  import { Context } from '../services/registry';
4
  import { TurnDownTweakableOptions } from './turndown-tweakable-options';
@@ -277,9 +278,9 @@ export class CrawlerOptions extends AutoCastable {
277
 
278
  @Prop({
279
  desc: 'Base64 encoded PDF.',
280
- type: [File, String]
281
  })
282
- pdf?: File | string;
283
 
284
  @Prop({
285
  default: CONTENT_FORMAT.CONTENT,
 
1
+ import { Also, AutoCastable, ParamValidationError, Prop, RPC_CALL_ENVIRONMENT } from 'civkit/civ-rpc';
2
+ import { FancyFile } from 'civkit/fancy-file';
3
  import { Cookie, parseString as parseSetCookieString } from 'set-cookie-parser';
4
  import { Context } from '../services/registry';
5
  import { TurnDownTweakableOptions } from './turndown-tweakable-options';
 
278
 
279
  @Prop({
280
  desc: 'Base64 encoded PDF.',
281
+ type: [FancyFile, String]
282
  })
283
+ pdf?: FancyFile | string;
284
 
285
  @Prop({
286
  default: CONTENT_FORMAT.CONTENT,
src/services/curl.ts CHANGED
@@ -109,6 +109,8 @@ export class CurlControl extends AsyncService {
109
  curl.setOpt(Curl.option.SSL_VERIFYPEER, false);
110
  curl.setOpt(Curl.option.TIMEOUT_MS, crawlOpts?.timeoutMs || 30_000);
111
  curl.setOpt(Curl.option.CONNECTTIMEOUT_MS, 3_000);
 
 
112
  if (crawlOpts?.method) {
113
  curl.setOpt(Curl.option.CUSTOMREQUEST, crawlOpts.method.toUpperCase());
114
  }
@@ -401,12 +403,12 @@ export class CurlControl extends AsyncService {
401
  digestCurlCode(code: CurlCode, msg: string) {
402
  switch (code) {
403
  // 400 User errors
404
- case CurlCode.CURLE_COULDNT_RESOLVE_HOST:
405
- {
406
- return new AssertionFailureError(msg);
407
- }
408
 
409
  // Maybe retry but dont retry with curl again
 
410
  case CurlCode.CURLE_UNSUPPORTED_PROTOCOL:
411
  case CurlCode.CURLE_PEER_FAILED_VERIFICATION: {
412
  return new ServiceBadApproachError(msg);
@@ -417,7 +419,6 @@ export class CurlControl extends AsyncService {
417
  case CurlCode.CURLE_SEND_ERROR:
418
  case CurlCode.CURLE_RECV_ERROR:
419
  case CurlCode.CURLE_GOT_NOTHING:
420
- case CurlCode.CURLE_OPERATION_TIMEDOUT:
421
  case CurlCode.CURLE_SSL_CONNECT_ERROR:
422
  case CurlCode.CURLE_QUIC_CONNECT_ERROR:
423
  case CurlCode.CURLE_COULDNT_RESOLVE_PROXY:
 
109
  curl.setOpt(Curl.option.SSL_VERIFYPEER, false);
110
  curl.setOpt(Curl.option.TIMEOUT_MS, crawlOpts?.timeoutMs || 30_000);
111
  curl.setOpt(Curl.option.CONNECTTIMEOUT_MS, 3_000);
112
+ curl.setOpt(Curl.option.LOW_SPEED_LIMIT, 32768);
113
+ curl.setOpt(Curl.option.LOW_SPEED_TIME, 5_000);
114
  if (crawlOpts?.method) {
115
  curl.setOpt(Curl.option.CUSTOMREQUEST, crawlOpts.method.toUpperCase());
116
  }
 
403
  digestCurlCode(code: CurlCode, msg: string) {
404
  switch (code) {
405
  // 400 User errors
406
+ case CurlCode.CURLE_COULDNT_RESOLVE_HOST: {
407
+ return new AssertionFailureError(msg);
408
+ }
 
409
 
410
  // Maybe retry but dont retry with curl again
411
+ case CurlCode.CURLE_OPERATION_TIMEDOUT:
412
  case CurlCode.CURLE_UNSUPPORTED_PROTOCOL:
413
  case CurlCode.CURLE_PEER_FAILED_VERIFICATION: {
414
  return new ServiceBadApproachError(msg);
 
419
  case CurlCode.CURLE_SEND_ERROR:
420
  case CurlCode.CURLE_RECV_ERROR:
421
  case CurlCode.CURLE_GOT_NOTHING:
 
422
  case CurlCode.CURLE_SSL_CONNECT_ERROR:
423
  case CurlCode.CURLE_QUIC_CONNECT_ERROR:
424
  case CurlCode.CURLE_COULDNT_RESOLVE_PROXY:
src/services/misc.ts ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { singleton } from 'tsyringe';
2
+ import { AsyncService } from 'civkit/async-service';
3
+ import { ParamValidationError } from 'civkit/civ-rpc';
4
+ import { SecurityCompromiseError } from '../shared/lib/errors';
5
+ import { isIP } from 'node:net';
6
+ import { isIPInNonPublicRange } from '../utils/ip';
7
+ import { GlobalLogger } from './logger';
8
+ import { lookup } from 'node:dns/promises';
9
+ import { Threaded } from './threaded';
10
+
11
+ const normalizeUrl = require('@esm2cjs/normalize-url').default;
12
+
13
+ @singleton()
14
+ export class MiscService extends AsyncService {
15
+
16
+ logger = this.globalLogger.child({ service: this.constructor.name });
17
+
18
+ constructor(
19
+ protected globalLogger: GlobalLogger,
20
+ ) {
21
+ super(...arguments);
22
+ }
23
+
24
+ override async init() {
25
+ await this.dependencyReady();
26
+
27
+ this.emit('ready');
28
+ }
29
+
30
+ @Threaded()
31
+ async assertNormalizedUrl(input: string) {
32
+ let result: URL;
33
+ try {
34
+ result = new URL(
35
+ normalizeUrl(
36
+ input,
37
+ {
38
+ stripWWW: false,
39
+ removeTrailingSlash: false,
40
+ removeSingleSlash: false,
41
+ sortQueryParameters: false,
42
+ }
43
+ )
44
+ );
45
+ } catch (err) {
46
+ throw new ParamValidationError({
47
+ message: `${err}`,
48
+ path: 'url'
49
+ });
50
+ }
51
+
52
+ if (!['http:', 'https:', 'blob:'].includes(result.protocol)) {
53
+ throw new ParamValidationError({
54
+ message: `Invalid protocol ${result.protocol}`,
55
+ path: 'url'
56
+ });
57
+ }
58
+
59
+ const normalizedHostname = result.hostname.startsWith('[') ? result.hostname.slice(1, -1) : result.hostname;
60
+ const isIp = isIP(normalizedHostname);
61
+ if (
62
+ (result.hostname === 'localhost') ||
63
+ (isIp && isIPInNonPublicRange(normalizedHostname))
64
+ ) {
65
+ this.logger.warn(`Suspicious action: Request to localhost or non-public IP: ${normalizedHostname}`, { href: result.href });
66
+ throw new SecurityCompromiseError({
67
+ message: `Suspicious action: Request to localhost or non-public IP: ${normalizedHostname}`,
68
+ path: 'url'
69
+ });
70
+ }
71
+ if (!isIp && result.protocol !== 'blob:') {
72
+ const resolved = await lookup(result.hostname, { all: true }).catch((err) => {
73
+ if (err.code === 'ENOTFOUND') {
74
+ return Promise.reject(new ParamValidationError({
75
+ message: `Domain '${result.hostname}' could not be resolved`,
76
+ path: 'url'
77
+ }));
78
+ }
79
+
80
+ return;
81
+ });
82
+ if (resolved) {
83
+ for (const x of resolved) {
84
+ if (isIPInNonPublicRange(x.address)) {
85
+ this.logger.warn(`Suspicious action: Domain resolved to non-public IP: ${result.hostname} => ${x.address}`, { href: result.href, ip: x.address });
86
+ throw new SecurityCompromiseError({
87
+ message: `Suspicious action: Domain resolved to non-public IP: ${x.address}`,
88
+ path: 'url'
89
+ });
90
+ }
91
+ }
92
+
93
+ }
94
+ }
95
+
96
+ return result;
97
+ }
98
+
99
+ }
src/services/pdf-extract.ts CHANGED
@@ -274,19 +274,19 @@ export class PDFExtractor extends AsyncService {
274
  return { meta: meta.info as Record<string, any>, content: mdChunks.join(''), text: rawChunks.join('') };
275
  }
276
 
277
- async cachedExtract(url: string | URL, cacheTolerance: number = 1000 * 3600 * 24, alternativeUrl?: string) {
278
  if (!url) {
279
  return undefined;
280
  }
281
- const nameUrl = alternativeUrl || url.toString();
282
  const digest = md5Hasher.hash(nameUrl);
283
 
284
- const data = url;
285
- if (typeof url === 'string' && this.isDataUrl(url)) {
286
- url = `dataurl://digest:${digest}`;
287
  }
288
 
289
- const cache: PDFContent | undefined = (await PDFContent.fromFirestoreQuery(PDFContent.COLLECTION.where('urlDigest', '==', digest).orderBy('createdAt', 'desc').limit(1)))?.[0];
 
290
 
291
  if (cache) {
292
  const age = Date.now() - cache?.createdAt.valueOf();
@@ -324,13 +324,13 @@ export class PDFExtractor extends AsyncService {
324
  let extracted;
325
 
326
  try {
327
- extracted = await this.extract(data);
328
  } catch (err: any) {
329
  this.logger.warn(`Unable to extract from pdf ${nameUrl}`, { err, url, nameUrl });
330
  throw new AssertionFailureError(`Unable to process ${nameUrl} as pdf: ${err?.message}`);
331
  }
332
 
333
- if (!this.asyncLocalContext.ctx.DNT) {
334
  const theID = randomUUID();
335
  await this.firebaseObjectStorage.saveFile(`pdfs/${theID}`,
336
  Buffer.from(JSON.stringify(extracted), 'utf-8'), { contentType: 'application/json' });
 
274
  return { meta: meta.info as Record<string, any>, content: mdChunks.join(''), text: rawChunks.join('') };
275
  }
276
 
277
+ async cachedExtract(url: string, cacheTolerance: number = 1000 * 3600 * 24, alternativeUrl?: string) {
278
  if (!url) {
279
  return undefined;
280
  }
281
+ let nameUrl = alternativeUrl || url;
282
  const digest = md5Hasher.hash(nameUrl);
283
 
284
+ if (this.isDataUrl(url)) {
285
+ nameUrl = `blob://pdf:${digest}`;
 
286
  }
287
 
288
+ const cache: PDFContent | undefined = nameUrl.startsWith('blob:') ? undefined :
289
+ (await PDFContent.fromFirestoreQuery(PDFContent.COLLECTION.where('urlDigest', '==', digest).orderBy('createdAt', 'desc').limit(1)))?.[0];
290
 
291
  if (cache) {
292
  const age = Date.now() - cache?.createdAt.valueOf();
 
324
  let extracted;
325
 
326
  try {
327
+ extracted = await this.extract(url);
328
  } catch (err: any) {
329
  this.logger.warn(`Unable to extract from pdf ${nameUrl}`, { err, url, nameUrl });
330
  throw new AssertionFailureError(`Unable to process ${nameUrl} as pdf: ${err?.message}`);
331
  }
332
 
333
+ if (!this.asyncLocalContext.ctx.DNT && !nameUrl.startsWith('blob:')) {
334
  const theID = randomUUID();
335
  await this.firebaseObjectStorage.saveFile(`pdfs/${theID}`,
336
  Buffer.from(JSON.stringify(extracted), 'utf-8'), { contentType: 'application/json' });
src/utils/ip.ts ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { isIPv4, isIPv6 } from 'net';
2
+
3
+ export function parseIp(ip: string): Buffer {
4
+ if (isIPv4(ip)) {
5
+ const [a, b, c, d] = ip.split('.').map(Number);
6
+
7
+ const buf = Buffer.alloc(4);
8
+ buf.writeUInt8(a, 0);
9
+ buf.writeUInt8(b, 1);
10
+ buf.writeUInt8(c, 2);
11
+ buf.writeUInt8(d, 3);
12
+
13
+ return buf;
14
+ }
15
+
16
+ if (isIPv6(ip)) {
17
+ if (ip.includes('.')) {
18
+ const parts = ip.split(':');
19
+ const ipv4Part = parts.pop();
20
+ if (!ipv4Part) throw new Error('Invalid IPv6 address');
21
+ const ipv4Bytes = parseIp(ipv4Part);
22
+ parts.push('0');
23
+ const ipv6Bytes = parseIp(parts.join(':'));
24
+ ipv6Bytes.writeUInt32BE(ipv4Bytes.readUInt32BE(0), 12);
25
+
26
+ return ipv6Bytes;
27
+ }
28
+
29
+ const buf = Buffer.alloc(16);
30
+
31
+ // Expand :: notation
32
+ let expanded = ip;
33
+ if (ip.includes('::')) {
34
+ const sides = ip.split('::');
35
+ const left = sides[0] ? sides[0].split(':') : [];
36
+ const right = sides[1] ? sides[1].split(':') : [];
37
+ const middle = Array(8 - left.length - right.length).fill('0');
38
+ expanded = [...left, ...middle, ...right].join(':');
39
+ }
40
+
41
+ // Convert to buffer
42
+ const parts = expanded.split(':');
43
+ let offset = 0;
44
+ for (const part of parts) {
45
+ buf.writeUInt16BE(parseInt(part, 16), offset);
46
+ offset += 2;
47
+ }
48
+
49
+ return buf;
50
+ }
51
+
52
+ throw new Error('Invalid IP address');
53
+ }
54
+
55
+
56
+ export function parseCIDR(cidr: string): [Buffer, Buffer] {
57
+ const [ip, prefixTxt] = cidr.split('/');
58
+ const buf = parseIp(ip);
59
+ const maskBuf = Buffer.alloc(buf.byteLength, 0xff);
60
+ const prefixBits = parseInt(prefixTxt);
61
+
62
+ let offsetBits = 0;
63
+ while (offsetBits < (buf.byteLength * 8)) {
64
+ if (offsetBits <= (prefixBits - 8)) {
65
+ offsetBits += 8;
66
+ continue;
67
+ }
68
+ const bitsRemain = prefixBits - offsetBits;
69
+ const byteOffset = Math.floor(offsetBits / 8);
70
+
71
+ if (bitsRemain > 0) {
72
+ const theByte = buf[byteOffset];
73
+ const mask = 0xff << (8 - bitsRemain);
74
+ maskBuf[byteOffset] = mask;
75
+ buf[byteOffset] = theByte & mask;
76
+
77
+ offsetBits += 8;
78
+ continue;
79
+ };
80
+ buf[byteOffset] = 0;
81
+ maskBuf[byteOffset] = 0;
82
+
83
+ offsetBits += 8;
84
+ }
85
+
86
+ return [buf, maskBuf];
87
+ }
88
+
89
+ export class CIDR {
90
+ buff: Buffer;
91
+ mask: Buffer;
92
+ text: string;
93
+ constructor(cidr: string) {
94
+ this.text = cidr;
95
+ [this.buff, this.mask] = parseCIDR(cidr);
96
+ }
97
+
98
+ toString() {
99
+ return this.text;
100
+ }
101
+
102
+ get family() {
103
+ return this.buff.byteLength === 4 ? 4 : 6;
104
+ }
105
+
106
+ test(ip: string | Buffer): boolean {
107
+ const parsedIp = typeof ip === 'string' ? parseIp(ip) : ip;
108
+
109
+ if (parsedIp.byteLength !== this.buff.byteLength) {
110
+ return false;
111
+ }
112
+
113
+ for (const i of Array(this.buff.byteLength).keys()) {
114
+ const t = parsedIp[i];
115
+ const m = this.mask[i];
116
+
117
+ if (m === 0) {
118
+ return true;
119
+ }
120
+
121
+ const r = this.buff[i];
122
+ if ((t & m) !== r) {
123
+ return false;
124
+ }
125
+ }
126
+
127
+ return true;
128
+ }
129
+ }
130
+
131
+ const nonPublicNetworks4 = [
132
+ '10.0.0.0/8',
133
+ '172.16.0.0/12',
134
+ '192.168.0.0/16',
135
+
136
+ '127.0.0.0/8',
137
+ '255.255.255.255/32',
138
+ '169.254.0.0/16',
139
+ '224.0.0.0/4',
140
+
141
+ '100.64.0.0/10',
142
+ '240.0.0.0/4',
143
+ ];
144
+
145
+
146
+ const nonPublicNetworks6 = [
147
+ 'fc00::/7',
148
+ 'fe80::/10',
149
+ 'ff00::/8',
150
+
151
+ '::127.0.0.0/104',
152
+ '::/128',
153
+ ];
154
+
155
+ const nonPublicCIDRs = [...nonPublicNetworks4, ...nonPublicNetworks6].map(cidr => new CIDR(cidr));
156
+
157
+ export function isIPInNonPublicRange(ip: string) {
158
+ const parsed = parseIp(ip);
159
+
160
+ for (const cidr of nonPublicCIDRs) {
161
+ if (cidr.test(parsed)) {
162
+ return true;
163
+ }
164
+ }
165
+
166
+ return false;
167
+ }