File size: 895 Bytes
0c1ca58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | """
domainTokenizer — Building small models that understand domain tokens, not just words.
Core components:
- schema: DomainSchema, FieldSpec, FieldType
- tokenizers: DomainTokenizerBuilder, per-field tokenizers
- schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
"""
from .schema import DomainSchema, FieldSpec, FieldType
from .tokenizers.domain_tokenizer import DomainTokenizerBuilder
from .tokenizers.field_tokenizers import (
BaseFieldTokenizer,
CalendarTokenizer,
CategoricalTokenizer,
DiscreteNumericalTokenizer,
MagnitudeBucketTokenizer,
SignTokenizer,
)
__version__ = "0.1.0"
__all__ = [
"DomainSchema",
"FieldSpec",
"FieldType",
"DomainTokenizerBuilder",
"BaseFieldTokenizer",
"SignTokenizer",
"MagnitudeBucketTokenizer",
"DiscreteNumericalTokenizer",
"CalendarTokenizer",
"CategoricalTokenizer",
]
|