| """ |
| domainTokenizer v0.4.0 — Building small models that understand domain tokens, not just words. |
| |
| Components: |
| - schema: DomainSchema, FieldSpec, FieldType |
| - tokenizers: DomainTokenizerBuilder, per-field tokenizers |
| - schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE) |
| - models: DomainTransformerForCausalLM, PLR, JointFusion |
| - training: prepare_clm_dataset, pretrain_domain_model, finetune_domain_model |
| """ |
|
|
| from .schema import DomainSchema, FieldSpec, FieldType |
| from .tokenizers.domain_tokenizer import DomainTokenizerBuilder |
| from .tokenizers.field_tokenizers import ( |
| BaseFieldTokenizer, CalendarTokenizer, CategoricalTokenizer, |
| DiscreteNumericalTokenizer, MagnitudeBucketTokenizer, SignTokenizer, |
| ) |
| from .models.configuration import DomainTransformerConfig |
| from .models.modeling import DomainTransformerForCausalLM, DomainTransformerModel |
| from .models.plr_embeddings import PeriodicLinearReLU |
| from .models.joint_fusion import JointFusionModel, DCNv2 |
| from .training.data_pipeline import prepare_clm_dataset, pack_sequences |
| from .training.pretrain import pretrain_domain_model |
| from .training.finetune_data import DomainFinetuneDataset, prepare_finetune_dataset |
| from .training.finetune import finetune_domain_model |
|
|
| __version__ = "0.4.0" |
|
|