Spaces:
Build error
Build error
| ; | |
| var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) { | |
| var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d; | |
| if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc); | |
| else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r; | |
| return c > 3 && r && Object.defineProperty(target, key, r), r; | |
| }; | |
| var __metadata = (this && this.__metadata) || function (k, v) { | |
| if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v); | |
| }; | |
| var _a; | |
| Object.defineProperty(exports, "__esModule", { value: true }); | |
| exports.RobotsTxtService = exports.md5Hasher = void 0; | |
| const tsyringe_1 = require("tsyringe"); | |
| const url_1 = require("url"); | |
| const civ_rpc_1 = require("civkit/civ-rpc"); | |
| const async_service_1 = require("civkit/async-service"); | |
| const hash_1 = require("civkit/hash"); | |
| const lang_1 = require("civkit/lang"); | |
| const logger_1 = require("./logger"); | |
| const firebase_storage_bucket_1 = require("../shared/services/firebase-storage-bucket"); | |
| const threaded_1 = require("../services/threaded"); | |
| exports.md5Hasher = new hash_1.HashManager('md5', 'hex'); | |
| let RobotsTxtService = class RobotsTxtService extends async_service_1.AsyncService { | |
| constructor(globalLogger, firebaseStorageBucketControl) { | |
| super(...arguments); | |
| this.globalLogger = globalLogger; | |
| this.firebaseStorageBucketControl = firebaseStorageBucketControl; | |
| this.logger = this.globalLogger.child({ service: this.constructor.name }); | |
| } | |
| async init() { | |
| await this.dependencyReady(); | |
| this.emit('ready'); | |
| } | |
| async getCachedRobotTxt(origin) { | |
| const digest = exports.md5Hasher.hash(origin.toLowerCase()); | |
| const cacheLoc = `robots-txt/${digest}`; | |
| let buff; | |
| buff = await this.firebaseStorageBucketControl.downloadFile(cacheLoc).catch(() => undefined); | |
| if (buff) { | |
| return buff.toString(); | |
| } | |
| const r = await fetch(new url_1.URL('robots.txt', origin).href, { signal: AbortSignal.timeout(5000) }); | |
| if (!r.ok) { | |
| throw new civ_rpc_1.DownstreamServiceFailureError(`Failed to fetch robots.txt from ${origin}: ${r.status} ${r.statusText}`); | |
| } | |
| buff = Buffer.from(await r.arrayBuffer()); | |
| this.firebaseStorageBucketControl.saveFile(cacheLoc, buff, { | |
| contentType: 'text/plain' | |
| }).catch((err) => { | |
| this.logger.warn(`Failed to save robots.txt to cache: ${err}`, { err: (0, lang_1.marshalErrorLike)(err) }); | |
| }); | |
| return buff.toString(); | |
| } | |
| async assertAccessAllowed(url, inputMyUa = '*') { | |
| let robotTxt = ''; | |
| try { | |
| robotTxt = await this.getCachedRobotTxt(url.origin); | |
| } | |
| catch (err) { | |
| if (err instanceof civ_rpc_1.DownstreamServiceFailureError) { | |
| // Remote server is reachable but cannot provide a robot.txt; this is treated as public access | |
| return true; | |
| } | |
| throw new civ_rpc_1.AssertionFailureError(`Failed to load robots.txt from ${url.origin}: ${err}`); | |
| } | |
| const myUa = inputMyUa.toLowerCase(); | |
| const lines = robotTxt.split(/\r?\n/g); | |
| let currentUa = myUa || '*'; | |
| let uaLine = 'User-Agent: *'; | |
| const pathNormalized = `${url.pathname}?`; | |
| for (const line of lines) { | |
| const trimmed = line.trim(); | |
| if (trimmed.startsWith('#') || !trimmed) { | |
| continue; | |
| } | |
| const [k, ...rest] = trimmed.split(':'); | |
| const key = k.trim().toLowerCase(); | |
| const value = rest.join(':').trim(); | |
| if (key === 'user-agent') { | |
| currentUa = value.toLowerCase(); | |
| if (value === '*') { | |
| currentUa = myUa; | |
| } | |
| uaLine = line; | |
| continue; | |
| } | |
| if (currentUa !== myUa) { | |
| continue; | |
| } | |
| if (key === 'disallow') { | |
| if (!value) { | |
| return true; | |
| } | |
| if (value.includes('*')) { | |
| const [head, tail] = value.split('*'); | |
| if (url.pathname.startsWith(head) && url.pathname.endsWith(tail)) { | |
| throw new civ_rpc_1.ResourcePolicyDenyError(`Access to ${url.href} is disallowed by site robots.txt: For ${uaLine}, ${line}`); | |
| } | |
| } | |
| else if (pathNormalized.startsWith(value)) { | |
| throw new civ_rpc_1.ResourcePolicyDenyError(`Access to ${url.href} is disallowed by site robots.txt: For ${uaLine}, ${line}`); | |
| } | |
| continue; | |
| } | |
| if (key === 'allow') { | |
| if (!value) { | |
| return true; | |
| } | |
| if (pathNormalized.startsWith(value)) { | |
| return true; | |
| } | |
| continue; | |
| } | |
| } | |
| return true; | |
| } | |
| }; | |
| exports.RobotsTxtService = RobotsTxtService; | |
| __decorate([ | |
| (0, threaded_1.Threaded)(), | |
| __metadata("design:type", Function), | |
| __metadata("design:paramtypes", [typeof (_a = typeof url_1.URL !== "undefined" && url_1.URL) === "function" ? _a : Object, Object]), | |
| __metadata("design:returntype", Promise) | |
| ], RobotsTxtService.prototype, "assertAccessAllowed", null); | |
| exports.RobotsTxtService = RobotsTxtService = __decorate([ | |
| (0, tsyringe_1.singleton)(), | |
| __metadata("design:paramtypes", [logger_1.GlobalLogger, | |
| firebase_storage_bucket_1.FirebaseStorageBucketControl]) | |
| ], RobotsTxtService); | |
| //# sourceMappingURL=robots-text.js.map |