nomagick commited on
Commit
9503382
·
unverified ·
1 Parent(s): 5199b00
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -90,6 +90,10 @@ ${this.content}
90
 
91
  try {
92
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
 
 
 
 
93
  const formatted = this.formatSnapshot(scrapped);
94
 
95
  if (scrapped.screenshot && screenshotEnabled) {
 
90
 
91
  try {
92
  for await (const scrapped of this.puppeteerControl.scrap(urlToCrawl.toString(), noCache)) {
93
+ if (!scrapped) {
94
+ continue;
95
+ }
96
+
97
  const formatted = this.formatSnapshot(scrapped);
98
 
99
  if (scrapped.screenshot && screenshotEnabled) {
backend/functions/src/services/puppeteer.ts CHANGED
@@ -15,7 +15,7 @@ export interface PageSnapshot {
15
  href: string;
16
  html: string;
17
  text: string;
18
- parsed: {
19
  title: string;
20
  content: string;
21
  textContent: string;
@@ -78,7 +78,7 @@ export class PuppeteerControl extends AsyncService {
78
  timeout: 10_000
79
  }).catch((err) => {
80
  this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
81
- process.nextTick(()=> {
82
  process.exit(1);
83
  });
84
  return Promise.reject(err);
@@ -153,7 +153,7 @@ function giveSnapshot() {
153
  return page;
154
  }
155
 
156
- async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot> {
157
  const parsedUrl = new URL(url);
158
  // parsedUrl.search = '';
159
  parsedUrl.hash = '';
@@ -236,7 +236,7 @@ function giveSnapshot() {
236
  while (true) {
237
  await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
238
  if (finalized) {
239
- yield { ...snapshot, screenshot };
240
  break;
241
  }
242
  yield snapshot;
 
15
  href: string;
16
  html: string;
17
  text: string;
18
+ parsed?: {
19
  title: string;
20
  content: string;
21
  textContent: string;
 
78
  timeout: 10_000
79
  }).catch((err) => {
80
  this.logger.error(`Unknown firebase issue, just die fast, quitting process.`, { err });
81
+ process.nextTick(() => {
82
  process.exit(1);
83
  });
84
  return Promise.reject(err);
 
153
  return page;
154
  }
155
 
156
+ async *scrap(url: string, noCache: string | boolean = false): AsyncGenerator<PageSnapshot | undefined> {
157
  const parsedUrl = new URL(url);
158
  // parsedUrl.search = '';
159
  parsedUrl.hash = '';
 
236
  while (true) {
237
  await Promise.race([nextSnapshotDeferred.promise, gotoPromise]);
238
  if (finalized) {
239
+ yield { ...snapshot, screenshot } as PageSnapshot;
240
  break;
241
  }
242
  yield snapshot;