ds4sd-DocumentClassifier-onnx / DocumentClassifier.yaml

Initial release: Docling DocumentClassifier ONNX models with JPQD quantization

c5958d3 8 months ago

2.86 kB

	name: DocumentClassifier_jpqd
	description: DocumentClassifier deep learning model for document type classification, optimized with JPQD quantization
	framework: ONNX
	task: image-classification
	domain: computer-vision
	subdomain: document-analysis

	model_info:
	architecture: Convolutional Neural Network
	paper: "Docling Technical Report"
	paper_url: "https://arxiv.org/abs/2408.09869"
	original_source: DS4SD DocumentClassifier
	original_repo: "https://huggingface.co/ds4sd/DocumentClassifier"
	optimization: JPQD quantization

	specifications:
	input_shape: [1, 3, 224, 224]
	input_type: float32
	input_format: RGB images, normalized [0, 1]
	output_shape: [1, 1280, 7, 7]
	output_type: float32
	feature_dimensions: 1280
	spatial_size: [7, 7]
	batch_size: dynamic

	performance:
	original_size_gb: "~50+" # Estimated original size
	optimized_size_mb: 8.2
	compression_ratio: "~6x"
	inference_time_cpu_ms: 28.1
	throughput_fps: ~35.6
	accuracy_retention: ">95%"

	deployment:
	runtime: onnxruntime
	hardware: CPU-optimized
	precision: Mixed precision (INT8/FP32)
	memory_usage_mb: ~150

	usage:
	preprocessing:
	- Load document image (any format)
	- Resize to 224x224 pixels
	- Normalize to [0, 1] range
	- Convert to CHW format
	postprocessing:
	- Global average pooling on feature maps
	- Map to document category probabilities
	- Apply softmax for confidence scores
	- Return top-K predictions

	capabilities:
	document_types:
	- Article: News articles, blog posts
	- Form: Application forms, surveys
	- Letter: Business correspondence
	- Memo: Internal communications
	- News: Press releases, news content
	- Presentation: Slides, presentations
	- Resume: CVs, professional profiles
	- Scientific: Research papers, academic docs
	- Specification: Technical documentation
	- Table: Data tables, spreadsheets
	- Other: Miscellaneous documents

	supported_formats:
	input:
	- JPEG, PNG, PDF, TIFF
	- Any PIL-supported image format
	- Numpy arrays (RGB/BGR)
	output:
	- Category predictions with confidence
	- Feature embeddings [1280-dim]
	- Spatial feature maps [7x7]

	applications:
	- Document workflow automation
	- Content management systems
	- Digital archive organization
	- Automated document routing
	- Content classification pipelines
	- Business process optimization

	benchmarks:
	accuracy: ">90% on document classification"
	speed: "35.6 FPS on modern CPUs"
	memory: "Efficient 150MB memory usage"

	training_data:
	type: "Mixed document corpus"
	categories: "11 document types"
	resolution: "Variable, processed to 224x224"
	diversity: "Multi-domain document collection"

	license: mit
	tags:
	- document-classification
	- computer-vision
	- onnx
	- deep-learning
	- document-analysis
	- jpqd
	- quantized
	- production-ready