nomagick commited on
Commit
3dc902e
·
unverified ·
1 Parent(s): c758ec5

fix: puppeteer tricks

Browse files
src/services/minimal-stealth.js ADDED
@@ -0,0 +1,599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ export function minimalStealth() {
4
+ /**
5
+ * A set of shared utility functions specifically for the purpose of modifying native browser APIs without leaving traces.
6
+ *
7
+ * Meant to be passed down in puppeteer and used in the context of the page (everything in here runs in NodeJS as well as a browser).
8
+ *
9
+ * Note: If for whatever reason you need to use this outside of `puppeteer-extra`:
10
+ * Just remove the `module.exports` statement at the very bottom, the rest can be copy pasted into any browser context.
11
+ *
12
+ * Alternatively take a look at the `extract-stealth-evasions` package to create a finished bundle which includes these utilities.
13
+ *
14
+ */
15
+ const utils = {};
16
+
17
+ // const toStringRedirects = new WeakMap();
18
+
19
+ utils.init = () => {
20
+ utils.preloadCache();
21
+ // const handler = {
22
+ // apply: function (target, ctx) {
23
+ // // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""`
24
+ // if (ctx === Function.prototype.toString) {
25
+ // return utils.makeNativeString('toString');
26
+ // }
27
+
28
+ // const originalObj = toStringRedirects.get(ctx);
29
+
30
+ // // `toString` targeted at our proxied Object detected
31
+ // if (originalObj) {
32
+ // const fallback = () =>
33
+ // originalObj && originalObj.name
34
+ // ? utils.makeNativeString(originalObj.name)
35
+ // : utils.makeNativeString(ctx.name);
36
+
37
+ // // Return the toString representation of our original object if possible
38
+ // return originalObj + '' || fallback();
39
+ // }
40
+
41
+ // if (typeof ctx === 'undefined' || ctx === null) {
42
+ // return target.call(ctx);
43
+ // }
44
+
45
+ // // Check if the toString protype of the context is the same as the global prototype,
46
+ // // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case
47
+ // const hasSameProto = Object.getPrototypeOf(
48
+ // Function.prototype.toString
49
+ // ).isPrototypeOf(ctx.toString); // eslint-disable-line no-prototype-builtins
50
+ // if (!hasSameProto) {
51
+ // // Pass the call on to the local Function.prototype.toString instead
52
+ // return ctx.toString();
53
+ // }
54
+
55
+ // return target.call(ctx);
56
+ // }
57
+ // };
58
+
59
+ // const toStringProxy = new Proxy(
60
+ // Function.prototype.toString,
61
+ // utils.stripProxyFromErrors(handler)
62
+ // );
63
+ // utils.replaceProperty(Function.prototype, 'toString', {
64
+ // value: toStringProxy
65
+ // });
66
+ };
67
+
68
+ /**
69
+ * Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw.
70
+ *
71
+ * The presence of a JS Proxy can be revealed as it shows up in error stack traces.
72
+ *
73
+ * @param {object} handler - The JS Proxy handler to wrap
74
+ */
75
+ utils.stripProxyFromErrors = (handler = {}) => {
76
+ const newHandler = {
77
+ setPrototypeOf: function (target, proto) {
78
+ if (proto === null)
79
+ throw new TypeError('Cannot convert object to primitive value');
80
+ if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {
81
+ throw new TypeError('Cyclic __proto__ value');
82
+ }
83
+ return Reflect.setPrototypeOf(target, proto);
84
+ }
85
+ };
86
+ // We wrap each trap in the handler in a try/catch and modify the error stack if they throw
87
+ const traps = Object.getOwnPropertyNames(handler);
88
+ traps.forEach(trap => {
89
+ newHandler[trap] = function () {
90
+ try {
91
+ // Forward the call to the defined proxy handler
92
+ return handler[trap].call(this, ...(arguments || []));
93
+ } catch (err) {
94
+ // Stack traces differ per browser, we only support chromium based ones currently
95
+ if (!err || !err.stack || !err.stack.includes(`at `)) {
96
+ throw err;
97
+ }
98
+
99
+ // When something throws within one of our traps the Proxy will show up in error stacks
100
+ // An earlier implementation of this code would simply strip lines with a blacklist,
101
+ // but it makes sense to be more surgical here and only remove lines related to our Proxy.
102
+ // We try to use a known "anchor" line for that and strip it with everything above it.
103
+ // If the anchor line cannot be found for some reason we fall back to our blacklist approach.
104
+
105
+ const stripWithBlacklist = (stack, stripFirstLine = true) => {
106
+ const blacklist = [
107
+ `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply
108
+ `at Object.${trap} `, // e.g. Object.get or Object.apply
109
+ `at Object.newHandler.<computed> [as ${trap}] ` // caused by this very wrapper :-)
110
+ ];
111
+ return (
112
+ err.stack
113
+ .split('\n')
114
+ // Always remove the first (file) line in the stack (guaranteed to be our proxy)
115
+ .filter((line, index) => !(index === 1 && stripFirstLine))
116
+ // Check if the line starts with one of our blacklisted strings
117
+ .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))
118
+ .join('\n')
119
+ );
120
+ };
121
+
122
+ const stripWithAnchor = (stack, anchor) => {
123
+ const stackArr = stack.split('\n');
124
+ anchor = anchor || `at Object.newHandler.<computed> [as ${trap}] `; // Known first Proxy line in chromium
125
+ const anchorIndex = stackArr.findIndex(line =>
126
+ line.trim().startsWith(anchor)
127
+ );
128
+ if (anchorIndex === -1) {
129
+ return false; // 404, anchor not found
130
+ }
131
+ // Strip everything from the top until we reach the anchor line
132
+ // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)
133
+ stackArr.splice(1, anchorIndex);
134
+ return stackArr.join('\n');
135
+ };
136
+
137
+ // Special cases due to our nested toString proxies
138
+ err.stack = err.stack.replace(
139
+ 'at Object.toString (',
140
+ 'at Function.toString ('
141
+ );
142
+ if ((err.stack || '').includes('at Function.toString (')) {
143
+ err.stack = stripWithBlacklist(err.stack, false);
144
+ throw err;
145
+ }
146
+
147
+ // Try using the anchor method, fallback to blacklist if necessary
148
+ err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack);
149
+
150
+ throw err; // Re-throw our now sanitized error
151
+ }
152
+ };
153
+ });
154
+ return newHandler;
155
+ };
156
+
157
+ /**
158
+ * Strip error lines from stack traces until (and including) a known line the stack.
159
+ *
160
+ * @param {object} err - The error to sanitize
161
+ * @param {string} anchor - The string the anchor line starts with
162
+ */
163
+ utils.stripErrorWithAnchor = (err, anchor) => {
164
+ const stackArr = err.stack.split('\n');
165
+ const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor));
166
+ if (anchorIndex === -1) {
167
+ return err; // 404, anchor not found
168
+ }
169
+ // Strip everything from the top until we reach the anchor line (remove anchor line as well)
170
+ // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)
171
+ stackArr.splice(1, anchorIndex);
172
+ err.stack = stackArr.join('\n');
173
+ return err;
174
+ };
175
+
176
+ /**
177
+ * Replace the property of an object in a stealthy way.
178
+ *
179
+ * Note: You also want to work on the prototype of an object most often,
180
+ * as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)).
181
+ *
182
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty
183
+ *
184
+ * @example
185
+ * replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: "alice" })
186
+ * // or
187
+ * replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] })
188
+ *
189
+ * @param {object} obj - The object which has the property to replace
190
+ * @param {string} propName - The property name to replace
191
+ * @param {object} descriptorOverrides - e.g. { value: "alice" }
192
+ */
193
+ utils.replaceProperty = (obj, propName, descriptorOverrides = {}) => {
194
+ return Object.defineProperty(obj, propName, {
195
+ // Copy over the existing descriptors (writable, enumerable, configurable, etc)
196
+ ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),
197
+ // Add our overrides (e.g. value, get())
198
+ ...descriptorOverrides
199
+ });
200
+ };
201
+
202
+ /**
203
+ * Preload a cache of function copies and data.
204
+ *
205
+ * For a determined enough observer it would be possible to overwrite and sniff usage of functions
206
+ * we use in our internal Proxies, to combat that we use a cached copy of those functions.
207
+ *
208
+ * Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before,
209
+ * by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups).
210
+ *
211
+ * This is evaluated once per execution context (e.g. window)
212
+ */
213
+ utils.preloadCache = () => {
214
+ if (utils.cache) {
215
+ return;
216
+ }
217
+ utils.cache = {
218
+ // Used in our proxies
219
+ Reflect: {
220
+ get: Reflect.get.bind(Reflect),
221
+ apply: Reflect.apply.bind(Reflect)
222
+ },
223
+ // Used in `makeNativeString`
224
+ nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`
225
+ };
226
+ };
227
+
228
+ /**
229
+ * Utility function to generate a cross-browser `toString` result representing native code.
230
+ *
231
+ * There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings.
232
+ * To future-proof this we use an existing native toString result as the basis.
233
+ *
234
+ * The only advantage we have over the other team is that our JS runs first, hence we cache the result
235
+ * of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it.
236
+ *
237
+ * @example
238
+ * makeNativeString('foobar') // => `function foobar() { [native code] }`
239
+ *
240
+ * @param {string} [name] - Optional function name
241
+ */
242
+ utils.makeNativeString = (name = '') => {
243
+ return utils.cache.nativeToStringStr.replace('toString', name || '');
244
+ };
245
+
246
+ /**
247
+ * Helper function to modify the `toString()` result of the provided object.
248
+ *
249
+ * Note: Use `utils.redirectToString` instead when possible.
250
+ *
251
+ * There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object.
252
+ * If no string is provided we will generate a `[native code]` thing based on the name of the property object.
253
+ *
254
+ * @example
255
+ * patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }')
256
+ *
257
+ * @param {object} obj - The object for which to modify the `toString()` representation
258
+ * @param {string} str - Optional string used as a return value
259
+ */
260
+ utils.patchToString = (obj, str = '') => {
261
+ // toStringRedirects.set(obj, str);
262
+ Object.defineProperty(obj, 'toString', {
263
+ value: ()=> str,
264
+ enumerable: false,
265
+ writable: true,
266
+ configurable: true,
267
+ });
268
+ };
269
+
270
+ /**
271
+ * Make all nested functions of an object native.
272
+ *
273
+ * @param {object} obj
274
+ */
275
+ utils.patchToStringNested = (obj = {}) => {
276
+ return utils.execRecursively(obj, ['function'], utils.patchToString);
277
+ };
278
+
279
+ /**
280
+ * Redirect toString requests from one object to another.
281
+ *
282
+ * @param {object} proxyObj - The object that toString will be called on
283
+ * @param {object} originalObj - The object which toString result we wan to return
284
+ */
285
+ utils.redirectToString = (proxyObj, originalObj) => {
286
+ // toStringRedirects.set(proxyObj, originalObj);
287
+ Object.defineProperty(proxyObj, 'toString', {
288
+ value: ()=> originalObj.toString(),
289
+ enumerable: false,
290
+ writable: true,
291
+ configurable: true,
292
+ });
293
+ };
294
+
295
+
296
+ /**
297
+ * All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps.
298
+ *
299
+ * Will stealthify these aspects (strip error stack traces, redirect toString, etc).
300
+ * Note: This is meant to modify native Browser APIs and works best with prototype objects.
301
+ *
302
+ * @example
303
+ * replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler)
304
+ *
305
+ * @param {object} obj - The object which has the property to replace
306
+ * @param {string} propName - The name of the property to replace
307
+ * @param {object} handler - The JS Proxy handler to use
308
+ */
309
+ utils.replaceWithProxy = (obj, propName, handler) => {
310
+ const originalObj = obj[propName];
311
+ const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler));
312
+
313
+ utils.replaceProperty(obj, propName, { value: proxyObj });
314
+ utils.redirectToString(proxyObj, originalObj);
315
+
316
+ return true;
317
+ };
318
+ /**
319
+ * All-in-one method to replace a getter with a JS Proxy using the provided Proxy handler with traps.
320
+ *
321
+ * @example
322
+ * replaceGetterWithProxy(Object.getPrototypeOf(navigator), 'vendor', proxyHandler)
323
+ *
324
+ * @param {object} obj - The object which has the property to replace
325
+ * @param {string} propName - The name of the property to replace
326
+ * @param {object} handler - The JS Proxy handler to use
327
+ */
328
+ utils.replaceGetterWithProxy = (obj, propName, handler) => {
329
+ const fn = Object.getOwnPropertyDescriptor(obj, propName).get;
330
+ const fnStr = fn.toString(); // special getter function string
331
+ const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler));
332
+
333
+ utils.replaceProperty(obj, propName, { get: proxyObj });
334
+ utils.patchToString(proxyObj, fnStr);
335
+
336
+ return true;
337
+ };
338
+
339
+ /**
340
+ * All-in-one method to replace a getter and/or setter. Functions get and set
341
+ * of handler have one more argument that contains the native function.
342
+ *
343
+ * @example
344
+ * replaceGetterSetter(HTMLIFrameElement.prototype, 'contentWindow', handler)
345
+ *
346
+ * @param {object} obj - The object which has the property to replace
347
+ * @param {string} propName - The name of the property to replace
348
+ * @param {object} handlerGetterSetter - The handler with get and/or set
349
+ * functions
350
+ * @see https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty#description
351
+ */
352
+ utils.replaceGetterSetter = (obj, propName, handlerGetterSetter) => {
353
+ const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName);
354
+ const handler = { ...ownPropertyDescriptor };
355
+
356
+ if (handlerGetterSetter.get !== undefined) {
357
+ const nativeFn = ownPropertyDescriptor.get;
358
+ handler.get = function () {
359
+ return handlerGetterSetter.get.call(this, nativeFn.bind(this));
360
+ };
361
+ utils.redirectToString(handler.get, nativeFn);
362
+ }
363
+
364
+ if (handlerGetterSetter.set !== undefined) {
365
+ const nativeFn = ownPropertyDescriptor.set;
366
+ handler.set = function (newValue) {
367
+ handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this));
368
+ };
369
+ utils.redirectToString(handler.set, nativeFn);
370
+ }
371
+
372
+ Object.defineProperty(obj, propName, handler);
373
+ };
374
+
375
+ /**
376
+ * All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps.
377
+ *
378
+ * Will stealthify these aspects (strip error stack traces, redirect toString, etc).
379
+ *
380
+ * @example
381
+ * mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler)
382
+ *
383
+ * @param {object} obj - The object which has the property to replace
384
+ * @param {string} propName - The name of the property to replace or create
385
+ * @param {object} pseudoTarget - The JS Proxy target to use as a basis
386
+ * @param {object} handler - The JS Proxy handler to use
387
+ */
388
+ utils.mockWithProxy = (obj, propName, pseudoTarget, handler) => {
389
+ const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));
390
+
391
+ utils.replaceProperty(obj, propName, { value: proxyObj });
392
+ utils.patchToString(proxyObj);
393
+
394
+ return true;
395
+ };
396
+
397
+ /**
398
+ * All-in-one method to create a new JS Proxy with stealth tweaks.
399
+ *
400
+ * This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property.
401
+ *
402
+ * Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc).
403
+ *
404
+ * @example
405
+ * createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy
406
+ *
407
+ * @param {object} pseudoTarget - The JS Proxy target to use as a basis
408
+ * @param {object} handler - The JS Proxy handler to use
409
+ */
410
+ utils.createProxy = (pseudoTarget, handler) => {
411
+ const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));
412
+ utils.patchToString(proxyObj);
413
+
414
+ return proxyObj;
415
+ };
416
+
417
+ /**
418
+ * Helper function to split a full path to an Object into the first part and property.
419
+ *
420
+ * @example
421
+ * splitObjPath(`HTMLMediaElement.prototype.canPlayType`)
422
+ * // => {objName: "HTMLMediaElement.prototype", propName: "canPlayType"}
423
+ *
424
+ * @param {string} objPath - The full path to an object as dot notation string
425
+ */
426
+ utils.splitObjPath = objPath => ({
427
+ // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`
428
+ objName: objPath.split('.').slice(0, -1).join('.'),
429
+ // Extract last dot entry ==> `canPlayType`
430
+ propName: objPath.split('.').slice(-1)[0]
431
+ });
432
+
433
+ /**
434
+ * Convenience method to replace a property with a JS Proxy using the provided objPath.
435
+ *
436
+ * Supports a full path (dot notation) to the object as string here, in case that makes it easier.
437
+ *
438
+ * @example
439
+ * replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler)
440
+ *
441
+ * @param {string} objPath - The full path to an object (dot notation string) to replace
442
+ * @param {object} handler - The JS Proxy handler to use
443
+ */
444
+ utils.replaceObjPathWithProxy = (objPath, handler) => {
445
+ const { objName, propName } = utils.splitObjPath(objPath);
446
+ const obj = eval(objName); // eslint-disable-line no-eval
447
+ return utils.replaceWithProxy(obj, propName, handler);
448
+ };
449
+
450
+ /**
451
+ * Traverse nested properties of an object recursively and apply the given function on a whitelist of value types.
452
+ *
453
+ * @param {object} obj
454
+ * @param {array} typeFilter - e.g. `['function']`
455
+ * @param {Function} fn - e.g. `utils.patchToString`
456
+ */
457
+ utils.execRecursively = (obj = {}, typeFilter = [], fn) => {
458
+ function recurse(obj) {
459
+ for (const key in obj) {
460
+ if (obj[key] === undefined) {
461
+ continue;
462
+ }
463
+ if (obj[key] && typeof obj[key] === 'object') {
464
+ recurse(obj[key]);
465
+ } else {
466
+ if (obj[key] && typeFilter.includes(typeof obj[key])) {
467
+ fn.call(this, obj[key]);
468
+ }
469
+ }
470
+ }
471
+ }
472
+ recurse(obj);
473
+ return obj;
474
+ };
475
+
476
+ /**
477
+ * Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one.
478
+ * That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter.
479
+ *
480
+ * Unfortunately the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process.
481
+ * This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings.
482
+ *
483
+ * We use this to pass down our utility functions as well as any other functions (to be able to split up code better).
484
+ *
485
+ * @see utils.materializeFns
486
+ *
487
+ * @param {object} fnObj - An object containing functions as properties
488
+ */
489
+ utils.stringifyFns = (fnObj = { hello: () => 'world' }) => {
490
+ // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine
491
+ // https://github.com/feross/fromentries
492
+ function fromEntries(iterable) {
493
+ return [...iterable].reduce((obj, [key, val]) => {
494
+ obj[key] = val;
495
+ return obj;
496
+ }, {});
497
+ }
498
+ return (Object.fromEntries || fromEntries)(
499
+ Object.entries(fnObj)
500
+ .filter(([key, value]) => typeof value === 'function')
501
+ .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval
502
+ );
503
+ };
504
+
505
+ /**
506
+ * Utility function to reverse the process of `utils.stringifyFns`.
507
+ * Will materialize an object with stringified functions (supports classic and fat arrow functions).
508
+ *
509
+ * @param {object} fnStrObj - An object containing stringified functions as properties
510
+ */
511
+ utils.materializeFns = (fnStrObj = { hello: "() => 'world'" }) => {
512
+ return Object.fromEntries(
513
+ Object.entries(fnStrObj).map(([key, value]) => {
514
+ if (value.startsWith('function')) {
515
+ // some trickery is needed to make oldschool functions work :-)
516
+ return [key, eval(`() => ${value}`)()]; // eslint-disable-line no-eval
517
+ } else {
518
+ // arrow functions just work
519
+ return [key, eval(value)]; // eslint-disable-line no-eval
520
+ }
521
+ })
522
+ );
523
+ };
524
+
525
+ // Proxy handler templates for re-usability
526
+ utils.makeHandler = () => ({
527
+ // Used by simple `navigator` getter evasions
528
+ getterValue: value => ({
529
+ apply(target, ctx, args) {
530
+ // Let's fetch the value first, to trigger and escalate potential errors
531
+ // Illegal invocations like `navigator.__proto__.vendor` will throw here
532
+ utils.cache.Reflect.apply(...arguments);
533
+ return value;
534
+ }
535
+ })
536
+ });
537
+
538
+ /**
539
+ * Compare two arrays.
540
+ *
541
+ * @param {array} array1 - First array
542
+ * @param {array} array2 - Second array
543
+ */
544
+ utils.arrayEquals = (array1, array2) => {
545
+ if (array1.length !== array2.length) {
546
+ return false;
547
+ }
548
+ for (let i = 0; i < array1.length; ++i) {
549
+ if (array1[i] !== array2[i]) {
550
+ return false;
551
+ }
552
+ }
553
+ return true;
554
+ };
555
+
556
+ /**
557
+ * Cache the method return according to its arguments.
558
+ *
559
+ * @param {Function} fn - A function that will be cached
560
+ */
561
+ utils.memoize = fn => {
562
+ const cache = [];
563
+ return function (...args) {
564
+ if (!cache.some(c => utils.arrayEquals(c.key, args))) {
565
+ cache.push({ key: args, value: fn.apply(this, args) });
566
+ }
567
+ return cache.find(c => utils.arrayEquals(c.key, args)).value;
568
+ };
569
+ };
570
+
571
+ utils.init();
572
+
573
+ const getParameterProxyHandler = {
574
+ apply: function (target, ctx, args) {
575
+ const param = (args || [])[0]
576
+ const result = utils.cache.Reflect.apply(target, ctx, args)
577
+ // UNMASKED_VENDOR_WEBGL
578
+ if (param === 37445) {
579
+ return 'Intel Inc.' // default in headless: Google Inc.
580
+ }
581
+ // UNMASKED_RENDERER_WEBGL
582
+ if (param === 37446) {
583
+ return 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader
584
+ }
585
+ return result
586
+ }
587
+ }
588
+
589
+ // There's more than one WebGL rendering context
590
+ // https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility
591
+ // To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter)
592
+ const addProxy = (obj, propName) => {
593
+ utils.replaceWithProxy(obj, propName, getParameterProxyHandler)
594
+ }
595
+ // For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing:
596
+ addProxy(WebGLRenderingContext.prototype, 'getParameter')
597
+ addProxy(WebGL2RenderingContext.prototype, 'getParameter')
598
+
599
+ }
src/services/puppeteer.ts CHANGED
@@ -19,6 +19,7 @@ import { CurlControl } from './curl';
19
  import { BlackHoleDetector } from './blackhole-detector';
20
  import { AsyncLocalContext } from './async-context';
21
  import { GlobalLogger } from './logger';
 
22
  const tldExtract = require('tld-extract');
23
 
24
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
@@ -234,6 +235,7 @@ const SCRIPT_TO_INJECT_INTO_FRAME = `
234
  ${READABILITY_JS}
235
  ${SIMULATE_SCROLL}
236
  ${MUTATION_IDLE_WATCH}
 
237
 
238
  (function(){
239
  function briefImgs(elem) {
@@ -559,7 +561,9 @@ export class PuppeteerControl extends AsyncService {
559
  timeout: 10_000,
560
  headless: !Boolean(process.env.DEBUG_BROWSER),
561
  executablePath: process.env.OVERRIDE_CHROME_EXECUTABLE_PATH,
562
- args: ['--disable-dev-shm-usage']
 
 
563
  }).catch((err: any) => {
564
  this.logger.error(`Unknown firebase issue, just die fast.`, { err });
565
  process.nextTick(() => {
@@ -1172,8 +1176,8 @@ export class PuppeteerControl extends AsyncService {
1172
  try {
1173
  const pSubFrameSnapshots = this.snapshotChildFrames(page);
1174
  snapshot = await page.evaluate('giveSnapshot(true)') as PageSnapshot;
1175
- screenshot = Buffer.from(await page.screenshot());
1176
- pageshot = Buffer.from(await page.screenshot({ fullPage: true }));
1177
  if (snapshot) {
1178
  snapshot.childFrames = await pSubFrameSnapshots;
1179
  }
@@ -1190,22 +1194,6 @@ export class PuppeteerControl extends AsyncService {
1190
  throw stuff;
1191
  }
1192
  }
1193
- // try {
1194
- // if ((!snapshot?.title || !snapshot?.parsed?.content) && !(snapshot?.pdfs?.length)) {
1195
- // const salvaged = await this.salvage(url, page);
1196
- // if (salvaged) {
1197
- // const pSubFrameSnapshots = this.snapshotChildFrames(page);
1198
- // snapshot = await page.evaluate('giveSnapshot(true)') as PageSnapshot;
1199
- // screenshot = Buffer.from(await page.screenshot());
1200
- // pageshot = Buffer.from(await page.screenshot({ fullPage: true }));
1201
- // if (snapshot) {
1202
- // snapshot.childFrames = await pSubFrameSnapshots;
1203
- // }
1204
- // }
1205
- // }
1206
- // } catch (err: any) {
1207
- // this.logger.warn(`Page ${sn}: Failed to salvage ${url}`, { err });
1208
- // }
1209
 
1210
  finalized = true;
1211
  if (snapshot?.html) {
@@ -1236,8 +1224,8 @@ export class PuppeteerControl extends AsyncService {
1236
  .then(async () => {
1237
  const pSubFrameSnapshots = this.snapshotChildFrames(page);
1238
  snapshot = await page.evaluate('giveSnapshot(true)') as PageSnapshot;
1239
- screenshot = Buffer.from(await page.screenshot());
1240
- pageshot = Buffer.from(await page.screenshot({ fullPage: true }));
1241
  if (snapshot) {
1242
  snapshot.childFrames = await pSubFrameSnapshots;
1243
  }
@@ -1279,8 +1267,8 @@ export class PuppeteerControl extends AsyncService {
1279
  break;
1280
  }
1281
  if (options.favorScreenshot && snapshot?.title && snapshot?.html !== lastHTML) {
1282
- screenshot = Buffer.from(await page.screenshot());
1283
- pageshot = Buffer.from(await page.screenshot({ fullPage: true }));
1284
  lastHTML = snapshot.html;
1285
  }
1286
  if (snapshot || screenshot) {
@@ -1306,28 +1294,17 @@ export class PuppeteerControl extends AsyncService {
1306
  }
1307
  }
1308
 
1309
- // async salvage(url: string, page: Page) {
1310
- // this.logger.info(`Salvaging ${url}`);
1311
- // const googleArchiveUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
1312
- // const resp = await fetch(googleArchiveUrl, {
1313
- // headers: {
1314
- // 'User-Agent': `Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)`
1315
- // }
1316
- // });
1317
- // resp.body?.cancel().catch(() => void 0);
1318
- // if (!resp.ok) {
1319
- // this.logger.warn(`No salvation found for url: ${url}`, { status: resp.status, url });
1320
- // return null;
1321
- // }
1322
-
1323
- // await page.goto(googleArchiveUrl, { waitUntil: ['load', 'domcontentloaded', 'networkidle0'], timeout: 15_000 }).catch((err) => {
1324
- // this.logger.warn(`Page salvation did not fully succeed.`, { err });
1325
- // });
1326
-
1327
- // this.logger.info(`Salvation completed.`);
1328
-
1329
- // return true;
1330
- // }
1331
 
1332
  async snapshotChildFrames(page: Page): Promise<PageSnapshot[]> {
1333
  const childFrames = page.mainFrame().childFrames();
 
19
  import { BlackHoleDetector } from './blackhole-detector';
20
  import { AsyncLocalContext } from './async-context';
21
  import { GlobalLogger } from './logger';
22
+ import { minimalStealth } from './minimal-stealth';
23
  const tldExtract = require('tld-extract');
24
 
25
  const READABILITY_JS = fs.readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf-8');
 
235
  ${READABILITY_JS}
236
  ${SIMULATE_SCROLL}
237
  ${MUTATION_IDLE_WATCH}
238
+ (${minimalStealth.toString()})();
239
 
240
  (function(){
241
  function briefImgs(elem) {
 
561
  timeout: 10_000,
562
  headless: !Boolean(process.env.DEBUG_BROWSER),
563
  executablePath: process.env.OVERRIDE_CHROME_EXECUTABLE_PATH,
564
+ args: [
565
+ '--disable-dev-shm-usage', '--disable-blink-features=AutomationControlled'
566
+ ]
567
  }).catch((err: any) => {
568
  this.logger.error(`Unknown firebase issue, just die fast.`, { err });
569
  process.nextTick(() => {
 
1176
  try {
1177
  const pSubFrameSnapshots = this.snapshotChildFrames(page);
1178
  snapshot = await page.evaluate('giveSnapshot(true)') as PageSnapshot;
1179
+ screenshot = await this.takeScreenShot(page);
1180
+ pageshot = await this.takeScreenShot(page, { fullPage: true });
1181
  if (snapshot) {
1182
  snapshot.childFrames = await pSubFrameSnapshots;
1183
  }
 
1194
  throw stuff;
1195
  }
1196
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1197
 
1198
  finalized = true;
1199
  if (snapshot?.html) {
 
1224
  .then(async () => {
1225
  const pSubFrameSnapshots = this.snapshotChildFrames(page);
1226
  snapshot = await page.evaluate('giveSnapshot(true)') as PageSnapshot;
1227
+ screenshot = await this.takeScreenShot(page);
1228
+ pageshot = await this.takeScreenShot(page, { fullPage: true });
1229
  if (snapshot) {
1230
  snapshot.childFrames = await pSubFrameSnapshots;
1231
  }
 
1267
  break;
1268
  }
1269
  if (options.favorScreenshot && snapshot?.title && snapshot?.html !== lastHTML) {
1270
+ screenshot = await this.takeScreenShot(page);
1271
+ pageshot = await this.takeScreenShot(page, { fullPage: true });
1272
  lastHTML = snapshot.html;
1273
  }
1274
  if (snapshot || screenshot) {
 
1294
  }
1295
  }
1296
 
1297
+ protected async takeScreenShot(page: Page, opts?: Parameters<typeof page.screenshot>[0]): Promise<Buffer | undefined> {
1298
+ const r = await page.screenshot(opts).catch((err) => {
1299
+ this.logger.warn(`Failed to take screenshot`, { err });
1300
+ });
1301
+
1302
+ if (r) {
1303
+ return Buffer.from(r);
1304
+ }
1305
+
1306
+ return undefined;
1307
+ }
 
 
 
 
 
 
 
 
 
 
 
1308
 
1309
  async snapshotChildFrames(page: Page): Promise<PageSnapshot[]> {
1310
  const childFrames = page.mainFrame().childFrames();