davanstrien
/

fineweb-c-quality-reranker-v2

@@ -31,25 +31,25 @@ model-index:
       type: fineweb_c_eval
     metrics:
     - type: accuracy
-      value: 0.849609375
       name: Accuracy
     - type: accuracy_threshold
-      value: 0.8169846534729004
       name: Accuracy Threshold
     - type: f1
-      value: 0.5108433734939759
       name: F1
     - type: f1_threshold
-      value: 0.6361271142959595
       name: F1 Threshold
     - type: precision
-      value: 0.4274193548387097
       name: Precision
     - type: recall
-      value: 0.6347305389221557
       name: Recall
     - type: average_precision
-      value: 0.4518233024149694
       name: Average Precision
 ---
@@ -110,7 +110,7 @@ pairs = [
 ]
 scores = model.predict(pairs)
 print(scores)
-# [0.1112 0.8549 0.0385 0.0726 0.0967]
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
@@ -161,13 +161,13 @@ You can finetune this model on your own dataset.
 | Metric                | Value      |
 |:----------------------|:-----------|
-| accuracy              | 0.8496     |
-| accuracy_threshold    | 0.817      |
-| f1                    | 0.5108     |
-| f1_threshold          | 0.6361     |
-| precision             | 0.4274     |
-| recall                | 0.6347     |
-| **average_precision** | **0.4518** |
 <!--
 ## Bias, Risks and Limitations
@@ -391,12 +391,22 @@ You can finetune this model on your own dataset.
 | 0.5606 | 162  | 1.0361        | -               | -                                |
 | 0.5813 | 168  | 0.9421        | -               | -                                |
 | 0.6021 | 174  | 0.9503        | 0.8597          | 0.4518                           |
 ### Training Time
-- **Training**: 2.9 minutes
-- **Evaluation**: 1.0 minutes
-- **Total**: 3.9 minutes
 ### Framework Versions
 - Python: 3.12.12

       type: fineweb_c_eval
     metrics:
     - type: accuracy
+      value: 0.869140625
       name: Accuracy
     - type: accuracy_threshold
+      value: 0.8164063692092896
       name: Accuracy Threshold
     - type: f1
+      value: 0.53276955602537
       name: F1
     - type: f1_threshold
+      value: 0.5794004201889038
       name: F1 Threshold
     - type: precision
+      value: 0.4117647058823529
       name: Precision
     - type: recall
+      value: 0.7544910179640718
       name: Recall
     - type: average_precision
+      value: 0.5406815506036883
       name: Average Precision
 ---
 ]
 scores = model.predict(pairs)
 print(scores)
+# [0.0706 0.9546 0.1859 0.1413 0.1883]
 # Or rank different texts based on similarity to a single text
 ranks = model.rank(
 | Metric                | Value      |
 |:----------------------|:-----------|
+| accuracy              | 0.8691     |
+| accuracy_threshold    | 0.8164     |
+| f1                    | 0.5328     |
+| f1_threshold          | 0.5794     |
+| precision             | 0.4118     |
+| recall                | 0.7545     |
+| **average_precision** | **0.5407** |
 <!--
 ## Bias, Risks and Limitations
 | 0.5606 | 162  | 1.0361        | -               | -                                |
 | 0.5813 | 168  | 0.9421        | -               | -                                |
 | 0.6021 | 174  | 0.9503        | 0.8597          | 0.4518                           |
+| 0.6228 | 180  | 0.9766        | -               | -                                |
+| 0.6436 | 186  | 1.1067        | -               | -                                |
+| 0.6644 | 192  | 1.0229        | -               | -                                |
+| 0.6851 | 198  | 0.9341        | -               | -                                |
+| 0.7059 | 204  | 0.7538        | -               | -                                |
+| 0.7266 | 210  | 1.1375        | -               | -                                |
+| 0.7474 | 216  | 1.0365        | -               | -                                |
+| 0.7682 | 222  | 0.9019        | -               | -                                |
+| 0.7889 | 228  | 1.0598        | -               | -                                |
+| 0.8028 | 232  | -             | 0.8322          | 0.5407                           |
 ### Training Time
+- **Training**: 3.8 minutes
+- **Evaluation**: 1.4 minutes
+- **Total**: 5.2 minutes
 ### Framework Versions
 - Python: 3.12.12

eval/CrossEncoderClassificationEvaluator_fineweb_c_eval_results.csv CHANGED Viewed

@@ -2,3 +2,4 @@ epoch,steps,Accuracy,Accuracy_Threshold,F1,F1_Threshold,Precision,Recall,Average
 0.20069204152249134,58,0.8369140625,0.8085842,0.29916897506925205,0.39676917,0.1945945945945946,0.6467065868263473,0.22527425150874691
 0.4013840830449827,116,0.8388671875,0.75488913,0.37500000000000006,0.4951936,0.3132530120481928,0.46706586826347307,0.296686339444048
 0.6020761245674741,174,0.849609375,0.81698465,0.5108433734939759,0.6361271,0.4274193548387097,0.6347305389221557,0.4518233024149694

 0.20069204152249134,58,0.8369140625,0.8085842,0.29916897506925205,0.39676917,0.1945945945945946,0.6467065868263473,0.22527425150874691
 0.4013840830449827,116,0.8388671875,0.75488913,0.37500000000000006,0.4951936,0.3132530120481928,0.46706586826347307,0.296686339444048
 0.6020761245674741,174,0.849609375,0.81698465,0.5108433734939759,0.6361271,0.4274193548387097,0.6347305389221557,0.4518233024149694
+0.8027681660899654,232,0.869140625,0.81640637,0.53276955602537,0.5794004,0.4117647058823529,0.7544910179640718,0.5406815506036883

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a37604d143cbc9b7174630f1e818d0663e10ea8901d7c0137eb8068f2eab4c36
 size 1230138348

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee20c26b299d14b87a0fd1769b8aeae4760ee528ebe2a96f2cf8c2d2e1eba1ec
 size 1230138348