Update package to v0.3.0 with training exports
Browse files
src/domain_tokenizer/__init__.py
CHANGED
|
@@ -6,21 +6,20 @@ Core components:
|
|
| 6 |
- tokenizers: DomainTokenizerBuilder, per-field tokenizers
|
| 7 |
- schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
|
| 8 |
- models: DomainTransformerForCausalLM, PLR, JointFusion
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
from .schema import DomainSchema, FieldSpec, FieldType
|
| 12 |
from .tokenizers.domain_tokenizer import DomainTokenizerBuilder
|
| 13 |
from .tokenizers.field_tokenizers import (
|
| 14 |
-
BaseFieldTokenizer,
|
| 15 |
-
|
| 16 |
-
CategoricalTokenizer,
|
| 17 |
-
DiscreteNumericalTokenizer,
|
| 18 |
-
MagnitudeBucketTokenizer,
|
| 19 |
-
SignTokenizer,
|
| 20 |
)
|
| 21 |
from .models.configuration import DomainTransformerConfig
|
| 22 |
from .models.modeling import DomainTransformerForCausalLM, DomainTransformerModel
|
| 23 |
from .models.plr_embeddings import PeriodicLinearReLU
|
| 24 |
from .models.joint_fusion import JointFusionModel, DCNv2
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
__version__ = "0.
|
|
|
|
| 6 |
- tokenizers: DomainTokenizerBuilder, per-field tokenizers
|
| 7 |
- schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
|
| 8 |
- models: DomainTransformerForCausalLM, PLR, JointFusion
|
| 9 |
+
- training: prepare_clm_dataset, pretrain_domain_model
|
| 10 |
"""
|
| 11 |
|
| 12 |
from .schema import DomainSchema, FieldSpec, FieldType
|
| 13 |
from .tokenizers.domain_tokenizer import DomainTokenizerBuilder
|
| 14 |
from .tokenizers.field_tokenizers import (
|
| 15 |
+
BaseFieldTokenizer, CalendarTokenizer, CategoricalTokenizer,
|
| 16 |
+
DiscreteNumericalTokenizer, MagnitudeBucketTokenizer, SignTokenizer,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
)
|
| 18 |
from .models.configuration import DomainTransformerConfig
|
| 19 |
from .models.modeling import DomainTransformerForCausalLM, DomainTransformerModel
|
| 20 |
from .models.plr_embeddings import PeriodicLinearReLU
|
| 21 |
from .models.joint_fusion import JointFusionModel, DCNv2
|
| 22 |
+
from .training.data_pipeline import prepare_clm_dataset, pack_sequences
|
| 23 |
+
from .training.pretrain import pretrain_domain_model
|
| 24 |
|
| 25 |
+
__version__ = "0.3.0"
|