akzaidan commited on
Commit
12276f2
·
verified ·
1 Parent(s): 2b7de37

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +46 -0
  2. config.json +12 -0
  3. norm_stats.json +14 -0
  4. pytorch_model.bin +3 -0
  5. tokenizer.json +0 -0
  6. tokenizer_config.json +23 -0
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - job-classification
5
+ - salary-prediction
6
+ - experience-prediction
7
+ - deberta
8
+ ---
9
+
10
+ # JobPredictor1
11
+
12
+ Fine-tuned DeBERTa-v3-small model that predicts:
13
+ - **Expected years of experience** required for a job
14
+ - **Lower salary bound** (USD)
15
+ - **Upper salary bound** (USD)
16
+
17
+ ## Input Format
18
+ ```
19
+ [TITLE]: <job title> [DESC]: <job description>
20
+ ```
21
+
22
+ ## Outputs
23
+ | Output | Type | Description |
24
+ |---|---|---|
25
+ | expected_experience_years | int | Years of experience required |
26
+ | pay_lower | int | Lower salary bound (USD) |
27
+ | pay_upper | int | Upper salary bound (USD) |
28
+
29
+ ## Normalization
30
+ Predictions are z-score normalized. Use `norm_stats.json` to denormalize:
31
+ ```python
32
+ real_value = pred * norm_stats[col]["std"] + norm_stats[col]["mean"]
33
+ ```
34
+
35
+ ## Test Set Performance
36
+ | Metric | Value |
37
+ |---|---|
38
+ | Experience MAE | 0.57 years |
39
+ | Experience Within 1yr | 83.1% |
40
+ | Pay Lower MAE | $15,511 |
41
+ | Pay Lower Within $20k | 84.5% |
42
+ | Pay Upper MAE | $20,190 |
43
+ | Pay Upper Within $20k | 76.0% |
44
+
45
+ ## Base Model
46
+ microsoft/deberta-v3-small
config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model": "microsoft/deberta-v3-small",
3
+ "architecture": "DeBERTa-v3-small + 3 regression heads",
4
+ "outputs": {
5
+ "expected_experience_years": "integer (years of experience)",
6
+ "pay_lower": "integer (lower salary bound USD)",
7
+ "pay_upper": "integer (upper salary bound USD)"
8
+ },
9
+ "normalization": "z-score \u2014 use norm_stats.json to denormalize predictions",
10
+ "max_length": 512,
11
+ "dropout": 0.1
12
+ }
norm_stats.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "expected_experience_years": {
3
+ "mean": 2.8786184883249994,
4
+ "std": 2.7134313236051546
5
+ },
6
+ "pay_lower": {
7
+ "mean": 70610.15046174698,
8
+ "std": 54165.983999683085
9
+ },
10
+ "pay_upper": {
11
+ "mean": 92701.26081987657,
12
+ "std": 69885.4087522144
13
+ }
14
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6890fbfd124d723db5d6baa86f519647315593c19da65d5d832b981ff808e5a8
3
+ size 567630955
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "backend": "tokenizers",
4
+ "bos_token": "[CLS]",
5
+ "cls_token": "[CLS]",
6
+ "do_lower_case": false,
7
+ "eos_token": "[SEP]",
8
+ "extra_special_tokens": [
9
+ "[PAD]",
10
+ "[CLS]",
11
+ "[SEP]"
12
+ ],
13
+ "is_local": false,
14
+ "mask_token": "[MASK]",
15
+ "model_max_length": 1000000000000000019884624838656,
16
+ "pad_token": "[PAD]",
17
+ "sep_token": "[SEP]",
18
+ "split_by_punct": false,
19
+ "tokenizer_class": "DebertaV2Tokenizer",
20
+ "unk_id": 3,
21
+ "unk_token": "[UNK]",
22
+ "vocab_type": "spm"
23
+ }