nomagick commited on
Commit
629ab27
·
unverified ·
1 Parent(s): 664d4b1
backend/functions/package.json CHANGED
@@ -18,8 +18,7 @@
18
  "from-preset": "npm run build && npm run emu:reset && npm run emu:start",
19
  "start": "npm run shell",
20
  "deploy": "firebase deploy --only functions",
21
- "logs": "firebase functions:log",
22
- "gcp-build": "node node_modules/puppeteer/install.js"
23
  },
24
  "engines": {
25
  "node": "18"
 
18
  "from-preset": "npm run build && npm run emu:reset && npm run emu:start",
19
  "start": "npm run shell",
20
  "deploy": "firebase deploy --only functions",
21
+ "logs": "firebase functions:log"
 
22
  },
23
  "engines": {
24
  "node": "18"
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -30,7 +30,9 @@ export class CrawlerHost extends RPCHost {
30
  formatSnapshot(snapshot: PageSnapshot) {
31
 
32
  const toBeTurnedToMd = snapshot.parsed?.content;
33
- const contentText = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd) : snapshot.text;
 
 
34
 
35
  const formatted = {
36
  title: (snapshot.parsed?.title || snapshot.title || '').trim(),
@@ -51,6 +53,16 @@ ${contentText}
51
  return formatted;
52
  }
53
 
 
 
 
 
 
 
 
 
 
 
54
  @CloudHTTPv2({
55
  runtime: {
56
  memory: '4GiB',
 
30
  formatSnapshot(snapshot: PageSnapshot) {
31
 
32
  const toBeTurnedToMd = snapshot.parsed?.content;
33
+ const turnedDown = toBeTurnedToMd ? this.turnDownService.turndown(toBeTurnedToMd).trim() : '';
34
+
35
+ const contentText = turnedDown && !(turnedDown.startsWith('<') && turnedDown.endsWith('>')) ? turnedDown : snapshot.text.trim();
36
 
37
  const formatted = {
38
  title: (snapshot.parsed?.title || snapshot.title || '').trim(),
 
53
  return formatted;
54
  }
55
 
56
+ @CloudHTTPv2({
57
+ name: 'crawl2',
58
+ runtime: {
59
+ memory: '4GiB',
60
+ timeoutSeconds: 540,
61
+ concurrency: 4,
62
+ },
63
+ httpMethod: ['get', 'post'],
64
+ returnType: [String, OutputServerEventStream],
65
+ })
66
  @CloudHTTPv2({
67
  runtime: {
68
  memory: '4GiB',
backend/functions/src/services/puppeteer.ts CHANGED
@@ -71,8 +71,7 @@ export class PuppeteerControl extends AsyncService {
71
  }
72
  }
73
  this.browser = await puppeteer.launch({
74
- headless: true,
75
- timeout: 60_000
76
  });
77
  this.browser.once('disconnected', () => {
78
  this.logger.warn(`Browser disconnected`);
 
71
  }
72
  }
73
  this.browser = await puppeteer.launch({
74
+ headless: true
 
75
  });
76
  this.browser.once('disconnected', () => {
77
  this.logger.warn(`Browser disconnected`);