nomagick commited on
Commit
102a168
·
unverified ·
1 Parent(s): 00a1278

feat: expand shadow dom

Browse files
backend/functions/package-lock.json CHANGED
@@ -15,13 +15,13 @@
15
  "archiver": "^6.0.1",
16
  "axios": "^1.3.3",
17
  "bcrypt": "^5.1.0",
18
- "civkit": "^0.8.0-8592519",
19
  "core-js": "^3.37.1",
20
  "cors": "^2.8.5",
21
  "dayjs": "^1.11.9",
22
  "express": "^4.19.2",
23
  "firebase-admin": "^12.1.0",
24
- "firebase-functions": "^6.0.1",
25
  "htmlparser2": "^9.0.0",
26
  "jose": "^5.1.0",
27
  "langdetect": "^0.2.1",
@@ -2176,12 +2176,14 @@
2176
  }
2177
  },
2178
  "node_modules/@types/express": {
2179
- "version": "4.17.3",
2180
- "resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.3.tgz",
2181
- "integrity": "sha512-I8cGRJj3pyOLs/HndoP+25vOqhqWkAZsWMEmq1qXy/b/M3ppufecUwaK2/TVDVxcV61/iSdhykUjQQ2DLSrTdg==",
 
2182
  "dependencies": {
2183
  "@types/body-parser": "*",
2184
- "@types/express-serve-static-core": "*",
 
2185
  "@types/serve-static": "*"
2186
  }
2187
  },
@@ -3727,9 +3729,10 @@
3727
  }
3728
  },
3729
  "node_modules/civkit": {
3730
- "version": "0.8.0-8592519",
3731
- "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.0-8592519.tgz",
3732
- "integrity": "sha512-CFd6RLjYyKkNNlzE/kBqWqiYQJOzMXL2uuMiDYGy+IY4WnO5U9wzQ1VQDEWSPWDZl+czybyVGTp0Uz5s9NyA5A==",
 
3733
  "dependencies": {
3734
  "lodash": "^4.17.21",
3735
  "tslib": "^2.5.0"
@@ -5510,15 +5513,15 @@
5510
  }
5511
  },
5512
  "node_modules/firebase-functions": {
5513
- "version": "6.0.1",
5514
- "resolved": "https://registry.npmjs.org/firebase-functions/-/firebase-functions-6.0.1.tgz",
5515
- "integrity": "sha512-0rIpTU6dnLRvP3IK+okn1FDjoqjzShm0/S+i4OMY7JFu/HJoyJ1JNkrT4KjECy1/mCHK49KsmH8iYE0rzrglHg==",
5516
  "license": "MIT",
5517
  "dependencies": {
5518
  "@types/cors": "^2.8.5",
5519
- "@types/express": "4.17.3",
5520
  "cors": "^2.8.5",
5521
- "express": "^4.17.1",
5522
  "protobufjs": "^7.2.2"
5523
  },
5524
  "bin": {
@@ -7848,17 +7851,6 @@
7848
  "node": ">=14"
7849
  }
7850
  },
7851
- "node_modules/jwks-rsa/node_modules/@types/express": {
7852
- "version": "4.17.21",
7853
- "resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.21.tgz",
7854
- "integrity": "sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==",
7855
- "dependencies": {
7856
- "@types/body-parser": "*",
7857
- "@types/express-serve-static-core": "^4.17.33",
7858
- "@types/qs": "*",
7859
- "@types/serve-static": "*"
7860
- }
7861
- },
7862
  "node_modules/jwks-rsa/node_modules/jose": {
7863
  "version": "4.15.5",
7864
  "resolved": "https://registry.npmjs.org/jose/-/jose-4.15.5.tgz",
 
15
  "archiver": "^6.0.1",
16
  "axios": "^1.3.3",
17
  "bcrypt": "^5.1.0",
18
+ "civkit": "^0.8.1-1f42c5a",
19
  "core-js": "^3.37.1",
20
  "cors": "^2.8.5",
21
  "dayjs": "^1.11.9",
22
  "express": "^4.19.2",
23
  "firebase-admin": "^12.1.0",
24
+ "firebase-functions": "^6.1.0",
25
  "htmlparser2": "^9.0.0",
26
  "jose": "^5.1.0",
27
  "langdetect": "^0.2.1",
 
2176
  }
2177
  },
2178
  "node_modules/@types/express": {
2179
+ "version": "4.17.21",
2180
+ "resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.21.tgz",
2181
+ "integrity": "sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==",
2182
+ "license": "MIT",
2183
  "dependencies": {
2184
  "@types/body-parser": "*",
2185
+ "@types/express-serve-static-core": "^4.17.33",
2186
+ "@types/qs": "*",
2187
  "@types/serve-static": "*"
2188
  }
2189
  },
 
3729
  }
3730
  },
3731
  "node_modules/civkit": {
3732
+ "version": "0.8.1-1f42c5a",
3733
+ "resolved": "https://registry.npmjs.org/civkit/-/civkit-0.8.1-1f42c5a.tgz",
3734
+ "integrity": "sha512-+cXywfdiu9+QbnNmJXKCjiAdEUdGRiiZ8zg/YKRqsr4vaX6lFNEI3P0J1FOj1x3vRL9cESGucXN6rh0AfmHHTQ==",
3735
+ "license": "AGPL",
3736
  "dependencies": {
3737
  "lodash": "^4.17.21",
3738
  "tslib": "^2.5.0"
 
5513
  }
5514
  },
5515
  "node_modules/firebase-functions": {
5516
+ "version": "6.1.0",
5517
+ "resolved": "https://registry.npmjs.org/firebase-functions/-/firebase-functions-6.1.0.tgz",
5518
+ "integrity": "sha512-7Gq7XpIA2qo9wKhYA9Ksb0v2bHfXD70zQwBJO6//Q624A7D9KAb449K6DM0swrCoPO7NGExbPf2eC7j7e+4+xA==",
5519
  "license": "MIT",
5520
  "dependencies": {
5521
  "@types/cors": "^2.8.5",
5522
+ "@types/express": "^4.17.21",
5523
  "cors": "^2.8.5",
5524
+ "express": "^4.21.0",
5525
  "protobufjs": "^7.2.2"
5526
  },
5527
  "bin": {
 
7851
  "node": ">=14"
7852
  }
7853
  },
 
 
 
 
 
 
 
 
 
 
 
7854
  "node_modules/jwks-rsa/node_modules/jose": {
7855
  "version": "4.15.5",
7856
  "resolved": "https://registry.npmjs.org/jose/-/jose-4.15.5.tgz",
backend/functions/package.json CHANGED
@@ -35,13 +35,13 @@
35
  "archiver": "^6.0.1",
36
  "axios": "^1.3.3",
37
  "bcrypt": "^5.1.0",
38
- "civkit": "^0.8.0-8592519",
39
  "core-js": "^3.37.1",
40
  "cors": "^2.8.5",
41
  "dayjs": "^1.11.9",
42
  "express": "^4.19.2",
43
  "firebase-admin": "^12.1.0",
44
- "firebase-functions": "^6.0.1",
45
  "htmlparser2": "^9.0.0",
46
  "jose": "^5.1.0",
47
  "langdetect": "^0.2.1",
 
35
  "archiver": "^6.0.1",
36
  "axios": "^1.3.3",
37
  "bcrypt": "^5.1.0",
38
+ "civkit": "^0.8.1-1f42c5a",
39
  "core-js": "^3.37.1",
40
  "cors": "^2.8.5",
41
  "dayjs": "^1.11.9",
42
  "express": "^4.19.2",
43
  "firebase-admin": "^12.1.0",
44
+ "firebase-functions": "^6.1.0",
45
  "htmlparser2": "^9.0.0",
46
  "jose": "^5.1.0",
47
  "langdetect": "^0.2.1",
backend/functions/src/cloud-functions/crawler.ts CHANGED
@@ -24,6 +24,7 @@ import { FormattedPage, md5Hasher, SnapshotFormatter } from '../services/snapsho
24
 
25
  export interface ExtraScrappingOptions extends ScrappingOptions {
26
  withIframe?: boolean;
 
27
  targetSelector?: string | string[];
28
  removeSelector?: string | string[];
29
  keepImgDataUrl?: boolean;
@@ -571,7 +572,7 @@ export class CrawlerHost extends RPCHost {
571
  }
572
 
573
  try {
574
- if (crawlOpts?.targetSelector || crawlOpts?.removeSelector || crawlOpts?.withIframe) {
575
  for await (const x of this.puppeteerControl.scrap(urlToCrawl, crawlOpts)) {
576
  yield this.jsdomControl.narrowSnapshot(x, crawlOpts);
577
  }
@@ -686,6 +687,7 @@ export class CrawlerHost extends RPCHost {
686
  overrideUserAgent: opts.userAgent,
687
  timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
688
  withIframe: opts.withIframe,
 
689
  locale: opts.locale,
690
  referer: opts.referer,
691
  };
 
24
 
25
  export interface ExtraScrappingOptions extends ScrappingOptions {
26
  withIframe?: boolean;
27
+ withShadowDom?: boolean;
28
  targetSelector?: string | string[];
29
  removeSelector?: string | string[];
30
  keepImgDataUrl?: boolean;
 
572
  }
573
 
574
  try {
575
+ if (crawlOpts?.targetSelector || crawlOpts?.removeSelector || crawlOpts?.withIframe || crawlOpts?.withShadowDom) {
576
  for await (const x of this.puppeteerControl.scrap(urlToCrawl, crawlOpts)) {
577
  yield this.jsdomControl.narrowSnapshot(x, crawlOpts);
578
  }
 
687
  overrideUserAgent: opts.userAgent,
688
  timeoutMs: opts.timeout ? opts.timeout * 1000 : undefined,
689
  withIframe: opts.withIframe,
690
+ withShadowDom: opts.withShadowDom,
691
  locale: opts.locale,
692
  referer: opts.referer,
693
  };
backend/functions/src/dto/scrapping-options.ts CHANGED
@@ -101,6 +101,16 @@ import { parseString as parseSetCookieString } from 'set-cookie-parser';
101
  in: 'header',
102
  schema: { type: 'string' }
103
  },
 
 
 
 
 
 
 
 
 
 
104
  'X-User-Agent': {
105
  description: `Override User-Agent.`,
106
  in: 'header',
@@ -185,6 +195,11 @@ export class CrawlerOptions extends AutoCastable {
185
  })
186
  withIframe!: boolean;
187
 
 
 
 
 
 
188
  @Prop({
189
  arrayOf: String,
190
  })
@@ -283,6 +298,13 @@ export class CrawlerOptions extends AutoCastable {
283
  if (instance.withIframe) {
284
  instance.timeout ??= null;
285
  }
 
 
 
 
 
 
 
286
 
287
  const cookies: CookieParam[] = [];
288
  const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
 
101
  in: 'header',
102
  schema: { type: 'string' }
103
  },
104
+ 'X-With-Iframe': {
105
+ description: `Enable filling iframe contents into main. (violates standards)`,
106
+ in: 'header',
107
+ schema: { type: 'string' }
108
+ },
109
+ 'X-With-Shadow-Dom': {
110
+ description: `Enable filling shadow dom contents into main. (violates standards)`,
111
+ in: 'header',
112
+ schema: { type: 'string' }
113
+ },
114
  'X-User-Agent': {
115
  description: `Override User-Agent.`,
116
  in: 'header',
 
195
  })
196
  withIframe!: boolean;
197
 
198
+ @Prop({
199
+ default: false,
200
+ })
201
+ withShadowDom!: boolean;
202
+
203
  @Prop({
204
  arrayOf: String,
205
  })
 
298
  if (instance.withIframe) {
299
  instance.timeout ??= null;
300
  }
301
+ const withShadowDom = ctx?.req.get('x-with-shadow-dom');
302
+ if (withShadowDom) {
303
+ instance.withShadowDom = Boolean(withShadowDom);
304
+ }
305
+ if (instance.withShadowDom) {
306
+ instance.timeout ??= null;
307
+ }
308
 
309
  const cookies: CookieParam[] = [];
310
  const setCookieHeaders = ctx?.req.get('x-set-cookie')?.split(', ') || (instance.setCookies as any as string[]);
backend/functions/src/index.ts CHANGED
@@ -1,5 +1,5 @@
1
  import 'reflect-metadata';
2
- import './shared/lib/doom-domain';
3
  import { initializeApp } from 'firebase-admin/app';
4
  initializeApp();
5
 
 
1
  import 'reflect-metadata';
2
+ // import './shared/lib/doom-domain';
3
  import { initializeApp } from 'firebase-admin/app';
4
  initializeApp();
5
 
backend/functions/src/services/jsdom.ts CHANGED
@@ -5,6 +5,7 @@ import { ExtendedSnapshot, PageSnapshot } from './puppeteer';
5
  import { Readability } from '@mozilla/readability';
6
  import TurndownService from 'turndown';
7
  import { Threaded } from '../shared/services/threaded';
 
8
 
9
  const pLinkedom = import('linkedom');
10
 
@@ -27,12 +28,8 @@ export class JSDomControl extends AsyncService {
27
  this.emit('ready');
28
  }
29
 
30
- async narrowSnapshot(snapshot: PageSnapshot | undefined, options?: {
31
- targetSelector?: string | string[];
32
- removeSelector?: string | string[];
33
- withIframe?: boolean;
34
- }) {
35
- if (snapshot?.parsed && !options?.targetSelector && !options?.removeSelector && !options?.withIframe) {
36
  return snapshot;
37
  }
38
  if (!snapshot?.html) {
@@ -43,14 +40,13 @@ export class JSDomControl extends AsyncService {
43
  }
44
 
45
  @Threaded()
46
- async actualNarrowSnapshot(snapshot: PageSnapshot, options?: {
47
- targetSelector?: string | string[];
48
- removeSelector?: string | string[];
49
- withIframe?: boolean;
50
- }): Promise<PageSnapshot | undefined> {
51
-
52
  const t0 = Date.now();
53
- const jsdom = this.linkedom.parseHTML(snapshot.html);
 
 
 
 
54
  const allNodes: Node[] = [];
55
  jsdom.window.document.querySelectorAll('svg').forEach((x) => x.innerHTML = '');
56
  if (options?.withIframe) {
@@ -107,12 +103,12 @@ export class JSDomControl extends AsyncService {
107
 
108
  return snapshot;
109
  }
110
- const textChunks: string[] = [];
111
  let rootDoc: Document;
112
  if (allNodes.length === 1 && allNodes[0].nodeName === '#document') {
113
  rootDoc = allNodes[0] as any;
114
  if (rootDoc.body.innerText) {
115
- textChunks.push(rootDoc.body.innerText);
116
  }
117
  } else {
118
  rootDoc = this.linkedom.parseHTML('<html><body></body></html>').window.document;
@@ -120,10 +116,16 @@ export class JSDomControl extends AsyncService {
120
  rootDoc.body.appendChild(n);
121
  rootDoc.body.appendChild(rootDoc.createTextNode('\n\n'));
122
  if ((n as HTMLElement).innerText) {
123
- textChunks.push((n as HTMLElement).innerText);
124
  }
125
  }
126
  }
 
 
 
 
 
 
127
 
128
  let parsed;
129
  try {
@@ -229,6 +231,14 @@ export class JSDomControl extends AsyncService {
229
  snippetToElement(snippet?: string, url?: string) {
230
  const parsed = this.linkedom.parseHTML(snippet || '<html><body></body></html>');
231
 
 
 
 
 
 
 
 
 
232
  return parsed.window.document.documentElement;
233
  }
234
 
 
5
  import { Readability } from '@mozilla/readability';
6
  import TurndownService from 'turndown';
7
  import { Threaded } from '../shared/services/threaded';
8
+ import type { ExtraScrappingOptions } from '../cloud-functions/crawler';
9
 
10
  const pLinkedom = import('linkedom');
11
 
 
28
  this.emit('ready');
29
  }
30
 
31
+ async narrowSnapshot(snapshot: PageSnapshot | undefined, options?: ExtraScrappingOptions) {
32
+ if (snapshot?.parsed && !options?.targetSelector && !options?.removeSelector && !options?.withIframe && !options?.withShadowDom) {
 
 
 
 
33
  return snapshot;
34
  }
35
  if (!snapshot?.html) {
 
40
  }
41
 
42
  @Threaded()
43
+ async actualNarrowSnapshot(snapshot: PageSnapshot, options?: ExtraScrappingOptions): Promise<PageSnapshot | undefined> {
 
 
 
 
 
44
  const t0 = Date.now();
45
+ let sourceHTML = snapshot.html;
46
+ if (options?.withShadowDom && snapshot.shadowExpanded) {
47
+ sourceHTML = snapshot.shadowExpanded;
48
+ }
49
+ const jsdom = this.linkedom.parseHTML(sourceHTML);
50
  const allNodes: Node[] = [];
51
  jsdom.window.document.querySelectorAll('svg').forEach((x) => x.innerHTML = '');
52
  if (options?.withIframe) {
 
103
 
104
  return snapshot;
105
  }
106
+ const textNodes: HTMLElement[] = [];
107
  let rootDoc: Document;
108
  if (allNodes.length === 1 && allNodes[0].nodeName === '#document') {
109
  rootDoc = allNodes[0] as any;
110
  if (rootDoc.body.innerText) {
111
+ textNodes.push(rootDoc.body);
112
  }
113
  } else {
114
  rootDoc = this.linkedom.parseHTML('<html><body></body></html>').window.document;
 
116
  rootDoc.body.appendChild(n);
117
  rootDoc.body.appendChild(rootDoc.createTextNode('\n\n'));
118
  if ((n as HTMLElement).innerText) {
119
+ textNodes.push(n as HTMLElement);
120
  }
121
  }
122
  }
123
+ const textChunks = textNodes.map((x) => {
124
+ const clone = x.cloneNode(true) as HTMLElement;
125
+ clone.querySelectorAll('script,style,link,svg').forEach((s) => s.remove());
126
+
127
+ return clone.innerText;
128
+ });
129
 
130
  let parsed;
131
  try {
 
231
  snippetToElement(snippet?: string, url?: string) {
232
  const parsed = this.linkedom.parseHTML(snippet || '<html><body></body></html>');
233
 
234
+ // Hack for turndown gfm table plugin.
235
+ parsed.window.document.querySelectorAll('table').forEach((x) => {
236
+ Object.defineProperty(x, 'rows', { value: Array.from(x.querySelectorAll('tr')), enumerable: true });
237
+ });
238
+ Object.defineProperty(parsed.window.document.documentElement, 'cloneNode', {
239
+ value: function () { return this; },
240
+ });
241
+
242
  return parsed.window.document.documentElement;
243
  }
244
 
backend/functions/src/services/puppeteer.ts CHANGED
@@ -46,6 +46,7 @@ export interface PageSnapshot {
46
  href: string;
47
  rebase?: string;
48
  html: string;
 
49
  text: string;
50
  status?: number;
51
  statusText?: string;
@@ -157,6 +158,79 @@ function getMaxDepthAndCountUsingTreeWalker(root) {
157
  };
158
  }
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  function giveSnapshot(stopActiveSnapshot) {
161
  if (stopActiveSnapshot) {
162
  window.haltSnapshot = true;
@@ -174,6 +248,7 @@ function giveSnapshot(stopActiveSnapshot) {
174
  href: document.location.href,
175
  html: document.documentElement?.outerHTML,
176
  text: document.body?.innerText,
 
177
  parsed: parsed,
178
  imgs: [],
179
  maxElemDepth: domAnalysis.maxDepth,
 
46
  href: string;
47
  rebase?: string;
48
  html: string;
49
+ shadowExpanded?: string
50
  text: string;
51
  status?: number;
52
  statusText?: string;
 
158
  };
159
  }
160
 
161
+ function cloneAndExpandShadowRoots(rootElement = document.documentElement) {
162
+ // Create a shallow clone of the root element
163
+ const clone = rootElement.cloneNode(false);
164
+ // Function to process an element and its shadow root
165
+ function processShadowRoot(original, cloned) {
166
+ if (original.shadowRoot && original.shadowRoot.mode === 'open') {
167
+ shadowDomPresents = true;
168
+ const shadowContent = document.createDocumentFragment();
169
+
170
+ // Clone shadow root content normally
171
+ original.shadowRoot.childNodes.forEach(childNode => {
172
+ const clonedNode = childNode.cloneNode(true);
173
+ shadowContent.appendChild(clonedNode);
174
+ });
175
+
176
+ // Handle slots
177
+ const slots = shadowContent.querySelectorAll('slot');
178
+ slots.forEach(slot => {
179
+ const slotName = slot.getAttribute('name') || '';
180
+ const assignedElements = original.querySelectorAll(
181
+ slotName ? \`[slot="\${slotName}"]\` : ':not([slot])'
182
+ );
183
+
184
+ if (assignedElements.length > 0) {
185
+ const slotContent = document.createDocumentFragment();
186
+ assignedElements.forEach(el => {
187
+ const clonedEl = el.cloneNode(true);
188
+ slotContent.appendChild(clonedEl);
189
+ });
190
+ slot.parentNode.replaceChild(slotContent, slot);
191
+ } else if (!slotName) {
192
+ // Keep default slot content
193
+ // No need to do anything as it's already cloned
194
+ }
195
+ });
196
+
197
+ cloned.appendChild(shadowContent);
198
+ }
199
+ }
200
+
201
+ // Use a TreeWalker on the original root to clone the entire structure
202
+ const treeWalker = document.createTreeWalker(
203
+ rootElement,
204
+ NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT
205
+ );
206
+
207
+ const elementMap = new Map([[rootElement, clone]]);
208
+
209
+ let currentNode;
210
+ while (currentNode = treeWalker.nextNode()) {
211
+ const parentClone = elementMap.get(currentNode.parentNode);
212
+ const clonedNode = currentNode.cloneNode(false);
213
+ parentClone.appendChild(clonedNode);
214
+
215
+ if (currentNode.nodeType === Node.ELEMENT_NODE) {
216
+ elementMap.set(currentNode, clonedNode);
217
+ processShadowRoot(currentNode, clonedNode);
218
+ }
219
+ }
220
+
221
+ return clone;
222
+ }
223
+
224
+ function shadowDomPresent(rootElement = document.documentElement) {
225
+ const elems = rootElement.querySelectorAll('*');
226
+ for (const x of elems) {
227
+ if (x.shadowRoot && x.shadowRoot.mode === 'open') {
228
+ return true;
229
+ }
230
+ }
231
+ return false;
232
+ }
233
+
234
  function giveSnapshot(stopActiveSnapshot) {
235
  if (stopActiveSnapshot) {
236
  window.haltSnapshot = true;
 
248
  href: document.location.href,
249
  html: document.documentElement?.outerHTML,
250
  text: document.body?.innerText,
251
+ shadowExpanded: shadowDomPresent() ? cloneAndExpandShadowRoots()?.outerHTML : undefined,
252
  parsed: parsed,
253
  imgs: [],
254
  maxElemDepth: domAnalysis.maxDepth,
backend/functions/src/services/snapshot-formatter.ts CHANGED
@@ -299,12 +299,12 @@ export class SnapshotFormatter extends AsyncService {
299
  && toBeTurnedToMd !== jsDomElementOfHTML
300
  ) {
301
  try {
302
- contentText = this.jsdomControl.runTurndown(turnDownService, snapshot.html);
303
  } catch (err) {
304
  this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
305
  const vanillaTurnDownService = this.getTurndown({ url: snapshot.rebase || nominalUrl, imgDataUrlToObjectUrl });
306
  try {
307
- contentText = this.jsdomControl.runTurndown(vanillaTurnDownService, snapshot.html);
308
  } catch (err2) {
309
  this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
310
  }
 
299
  && toBeTurnedToMd !== jsDomElementOfHTML
300
  ) {
301
  try {
302
+ contentText = this.jsdomControl.runTurndown(turnDownService, jsDomElementOfHTML);
303
  } catch (err) {
304
  this.logger.warn(`Turndown failed to run, retrying without plugins`, { err });
305
  const vanillaTurnDownService = this.getTurndown({ url: snapshot.rebase || nominalUrl, imgDataUrlToObjectUrl });
306
  try {
307
+ contentText = this.jsdomControl.runTurndown(vanillaTurnDownService, jsDomElementOfHTML);
308
  } catch (err2) {
309
  this.logger.warn(`Turndown failed to run, giving up`, { err: err2 });
310
  }
thinapps-shared CHANGED
@@ -1 +1 @@
1
- Subproject commit 09a88ebec8ba6154df6cb0b5a3caab07fe7cd150
 
1
+ Subproject commit fecbdd92230de5ebd0de168b43b0358d8221769f