rtferraz commited on
Commit
7edb04f
·
verified ·
1 Parent(s): 64d55e2

Update package to v0.4.0 with fine-tuning exports

Browse files
Files changed (1) hide show
  1. src/domain_tokenizer/__init__.py +6 -4
src/domain_tokenizer/__init__.py CHANGED
@@ -1,12 +1,12 @@
1
  """
2
- domainTokenizer — Building small models that understand domain tokens, not just words.
3
 
4
- Core components:
5
  - schema: DomainSchema, FieldSpec, FieldType
6
  - tokenizers: DomainTokenizerBuilder, per-field tokenizers
7
  - schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
8
  - models: DomainTransformerForCausalLM, PLR, JointFusion
9
- - training: prepare_clm_dataset, pretrain_domain_model
10
  """
11
 
12
  from .schema import DomainSchema, FieldSpec, FieldType
@@ -21,5 +21,7 @@ from .models.plr_embeddings import PeriodicLinearReLU
21
  from .models.joint_fusion import JointFusionModel, DCNv2
22
  from .training.data_pipeline import prepare_clm_dataset, pack_sequences
23
  from .training.pretrain import pretrain_domain_model
 
 
24
 
25
- __version__ = "0.3.0"
 
1
  """
2
+ domainTokenizer v0.4.0 — Building small models that understand domain tokens, not just words.
3
 
4
+ Components:
5
  - schema: DomainSchema, FieldSpec, FieldType
6
  - tokenizers: DomainTokenizerBuilder, per-field tokenizers
7
  - schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
8
  - models: DomainTransformerForCausalLM, PLR, JointFusion
9
+ - training: prepare_clm_dataset, pretrain_domain_model, finetune_domain_model
10
  """
11
 
12
  from .schema import DomainSchema, FieldSpec, FieldType
 
21
  from .models.joint_fusion import JointFusionModel, DCNv2
22
  from .training.data_pipeline import prepare_clm_dataset, pack_sequences
23
  from .training.pretrain import pretrain_domain_model
24
+ from .training.finetune_data import DomainFinetuneDataset, prepare_finetune_dataset
25
+ from .training.finetune import finetune_domain_model
26
 
27
+ __version__ = "0.4.0"