web_reader / Dockerfile
Mohammad Shahid
Fix HF deployment: remove external deps, use pre-built files, download required assets
1ba2d7b
# syntax=docker/dockerfile:1
FROM lwthiker/curl-impersonate:0.6-chrome-slim-bullseye
FROM node:22
RUN apt-get update \
&& apt-get install -y wget gnupg \
&& wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
&& apt-get update \
&& apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 zstd \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
COPY --from=0 /usr/local/lib/libcurl-impersonate.so /usr/local/lib/libcurl-impersonate.so
RUN groupadd -r jina
RUN useradd -g jina -G audio,video -m jina
USER jina
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci
COPY . .
# Download required licensed files
RUN mkdir -p licensed && \
curl -o licensed/GeoLite2-City.mmdb https://raw.githubusercontent.com/P3TERX/GeoLite.mmdb/download/GeoLite2-City.mmdb && \
curl -o licensed/SourceHanSansSC-Regular.otf https://raw.githubusercontent.com/adobe-fonts/source-han-sans/refs/heads/release/OTF/SimplifiedChinese/SourceHanSansSC-Regular.otf
ENV HF_SPACE_ID=1
# Skip TypeScript build since we have pre-built files, but run dry-run to verify
RUN NODE_COMPILE_CACHE=node_modules npm run dry-run
RUN rm -rf ~/.config/chromium && mkdir -p ~/.config/chromium
RUN NODE_COMPILE_CACHE=node_modules npm run dry-run
ENV OVERRIDE_CHROME_EXECUTABLE_PATH=/usr/bin/google-chrome-stable
ENV LD_PRELOAD=/usr/local/lib/libcurl-impersonate.so CURL_IMPERSONATE=chrome116 CURL_IMPERSONATE_HEADERS=no
ENV NODE_COMPILE_CACHE=node_modules
ENV PORT=8080
EXPOSE 3000 3001 8080 8081
ENTRYPOINT ["node"]
CMD [ "build/stand-alone/crawl.js" ]