ClauseGuard CUAD classifier — ONNX + dynamic INT8 quantization (Legal-BERT + LoRA merged)

Browse files

Files changed (7) hide show

config.json +113 -0
model_quantized.onnx +3 -0
ort_config.json +33 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_ids": 0,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Document Name",
+    "1": "Parties",
+    "2": "Agreement Date",
+    "3": "Effective Date",
+    "4": "Expiration Date",
+    "5": "Renewal Term",
+    "6": "Notice Period to Terminate Renewal",
+    "7": "Governing Law",
+    "8": "Most Favored Nation",
+    "9": "Non-Compete",
+    "10": "Exclusivity",
+    "11": "No-Solicit of Customers",
+    "12": "No-Solicit of Employees",
+    "13": "Non-Disparagement",
+    "14": "Termination for Convenience",
+    "15": "ROFR/ROFO/ROFN",
+    "16": "Change of Control",
+    "17": "Anti-Assignment",
+    "18": "Revenue/Profit Sharing",
+    "19": "Price Restriction",
+    "20": "Minimum Commitment",
+    "21": "Volume Restriction",
+    "22": "IP Ownership Assignment",
+    "23": "Joint IP Ownership",
+    "24": "License Grant",
+    "25": "Non-Transferable License",
+    "26": "Affiliate License-Licensor",
+    "27": "Affiliate License-Licensee",
+    "28": "Unlimited/All-You-Can-Eat License",
+    "29": "Irrevocable or Perpetual License",
+    "30": "Source Code Escrow",
+    "31": "Post-Termination Services",
+    "32": "Audit Rights",
+    "33": "Uncapped Liability",
+    "34": "Cap on Liability",
+    "35": "Liquidated Damages",
+    "36": "Warranty Duration",
+    "37": "Insurance",
+    "38": "Covenant Not to Sue",
+    "39": "Third Party Beneficiary",
+    "40": "Other"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Affiliate License-Licensee": 27,
+    "Affiliate License-Licensor": 26,
+    "Agreement Date": 2,
+    "Anti-Assignment": 17,
+    "Audit Rights": 32,
+    "Cap on Liability": 34,
+    "Change of Control": 16,
+    "Covenant Not to Sue": 38,
+    "Document Name": 0,
+    "Effective Date": 3,
+    "Exclusivity": 10,
+    "Expiration Date": 4,
+    "Governing Law": 7,
+    "IP Ownership Assignment": 22,
+    "Insurance": 37,
+    "Irrevocable or Perpetual License": 29,
+    "Joint IP Ownership": 23,
+    "License Grant": 24,
+    "Liquidated Damages": 35,
+    "Minimum Commitment": 20,
+    "Most Favored Nation": 8,
+    "No-Solicit of Customers": 11,
+    "No-Solicit of Employees": 12,
+    "Non-Compete": 9,
+    "Non-Disparagement": 13,
+    "Non-Transferable License": 25,
+    "Notice Period to Terminate Renewal": 6,
+    "Other": 40,
+    "Parties": 1,
+    "Post-Termination Services": 31,
+    "Price Restriction": 19,
+    "ROFR/ROFO/ROFN": 15,
+    "Renewal Term": 5,
+    "Revenue/Profit Sharing": 18,
+    "Source Code Escrow": 30,
+    "Termination for Convenience": 14,
+    "Third Party Beneficiary": 39,
+    "Uncapped Liability": 33,
+    "Unlimited/All-You-Can-Eat License": 28,
+    "Volume Restriction": 21,
+    "Warranty Duration": 36
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.57.6",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd34a472056553ac06cf7538107720a29f3a1b6ceea3e34921f6022c10fef664
+size 110310911

ort_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {},
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QOperator",
+    "is_static": false,
+    "mode": "IntegerOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "Conv",
+      "MatMul",
+      "Attention",
+      "LSTM",
+      "Gather",
+      "Transpose",
+      "EmbedLayerNormalization"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "use_external_data_format": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff