Spaces:

seanpedrickcase
/

document_redaction

Running

document_redaction / requirements_lightweight.txt

Sync: updated redaction modification skill to mention page image exports in more detail

111fa16 about 3 hours ago

1.36 kB

	# --- Core and data packages ---
	numpy<=2.4.4
	pandas<=2.3.3
	bleach<=6.3.0
	polars<=1.38.1
	pyarrow<=23.0.1
	openpyxl<=3.1.5
	boto3<=1.42.91
	python-dotenv<=1.0.1
	defusedxml<=0.7.1
	Faker<=40.8.0
	python-levenshtein<=0.27.3
	rapidfuzz<=3.14.5
	markdown<=3.10.2
	tabulate<=0.10.0

	# --- Machine learning / NLP ---
	scikit-learn<=1.8.0
	spacy<=3.8.14
	spaczz<=0.6.1
	en_core_web_lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz

	# --- PDF / OCR / Redaction tools ---
	pdfminer.six<=20260107
	pdf2image<=1.17.0
	pymupdf<=1.27.1
	pikepdf<=10.3.0
	opencv-python<=4.13.0.92
	presidio_analyzer<=2.2.362
	presidio_anonymizer<=2.2.362
	presidio-image-redactor<=0.0.57

	# --- Gradio and apps ---
	gradio<=6.10.0
	https://github.com/seanpedrick-case/gradio_image_annotator/releases/download/v0.5.3/gradio_image_annotation-0.5.3-py3-none-any.whl # Custom annotator version with rotation, zoom, labels, and box IDs
	spaces<=0.48.3

	# --- AWS Lambda runtime ---
	awslambdaric<=3.1.1

	# --- Document generation ---
	python-docx<=1.2.0

	# --- Testing ---
	pytest<=9.0.3
	pytest-cov<=7.1.0

	# --- LLM libraries ---
	# Explicit protobuf: PaddlePaddle imports google.protobuf; layered `pip --target` installs
	# plus google-* packages can leave the google namespace incomplete without it.
	protobuf<=7.34.0
	google-genai<=1.73.0
	openai<=2.31.0