File size: 895 Bytes
0c1ca58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""
domainTokenizer — Building small models that understand domain tokens, not just words.

Core components:
  - schema: DomainSchema, FieldSpec, FieldType
  - tokenizers: DomainTokenizerBuilder, per-field tokenizers
  - schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
"""

from .schema import DomainSchema, FieldSpec, FieldType
from .tokenizers.domain_tokenizer import DomainTokenizerBuilder
from .tokenizers.field_tokenizers import (
    BaseFieldTokenizer,
    CalendarTokenizer,
    CategoricalTokenizer,
    DiscreteNumericalTokenizer,
    MagnitudeBucketTokenizer,
    SignTokenizer,
)

__version__ = "0.1.0"

__all__ = [
    "DomainSchema",
    "FieldSpec",
    "FieldType",
    "DomainTokenizerBuilder",
    "BaseFieldTokenizer",
    "SignTokenizer",
    "MagnitudeBucketTokenizer",
    "DiscreteNumericalTokenizer",
    "CalendarTokenizer",
    "CategoricalTokenizer",
]