ghostlm / requirements.txt
Ghostgim's picture
feat(rag): wire retrieval-augmented chat into chat_fn
ae0fa76 verified
# Hugging Face Spaces installs from this file at build time.
# Pinned conservatively so a Space build doesn't regress on a future
# breaking change in any of the deps.
# Note: gradio is intentionally NOT listed here. HF Spaces auto-installs
# `gradio[oauth,mcp]==<sdk_version>` on top of this file based on the SDK
# selection in README.md frontmatter. Listing it here causes a pip
# version-conflict at build time when our pin disagrees with HF's.
# torch >= 2.0 for the scaled_dot_product_attention path. CPU-only is
# fine on free Spaces.
torch>=2.0.0
# tiktoken is the GPT-2 BPE backend the GhostTokenizer wraps.
tiktoken>=0.5.0
# huggingface_hub for hf_hub_download. The v0.9 chat weights and the
# RAG index both live in the Models repo Ghostgim/GhostLM-v0.9-experimental
# (not in the Space's own LFS) so the Space stays within the 1 GB
# free-tier cap. Pulled and cached on first launch.
huggingface_hub>=0.20
# transformers + sentencepiece for the BGE-small-en-v1.5 retrieval
# embedder used by the RAG layer. transformers is already pulled in
# transitively but listing it explicitly pins the version range we test
# against. sentencepiece is BGE's tokenizer backend.
transformers>=4.38
sentencepiece>=0.1.99
# numpy for the in-memory retrieval index (83K x 384 float32 matrix).
# Already a transitive dep of torch but listed explicitly because the
# RAG code path imports it directly.
numpy>=1.24
# Python 3.13 removed the stdlib audioop module that gradio's transitive
# pydub dep imports at module-load time. Without this the entire gradio
# import chain fails with ModuleNotFoundError: No module named
# 'pyaudioop'. The PEP 594 replacement is audioop-lts. Conditional so
# 3.12 and earlier (where stdlib audioop still exists) skip it.
audioop-lts; python_version >= '3.13'