Simo76 commited on
Commit
91d7191
·
1 Parent(s): 2d544ad

Update MRPC example notebook with new code cells

Browse files
Files changed (1) hide show
  1. notebooks/mrpc_example.ipynb +38 -8
notebooks/mrpc_example.ipynb CHANGED
@@ -11,12 +11,16 @@
11
  },
12
  {
13
  "cell_type": "code",
 
14
  "source": [
15
  "!pip install -q transformers datasets evaluate scikit-learn accelerate"
16
- ]
 
 
17
  },
18
  {
19
  "cell_type": "code",
 
20
  "source": [
21
  "import torch\n",
22
  "from datasets import load_dataset\n",
@@ -33,10 +37,13 @@
33
  "\n",
34
  "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
35
  "print(device)"
36
- ]
 
 
37
  },
38
  {
39
  "cell_type": "code",
 
40
  "source": [
41
  "dataset = load_dataset('glue','mrpc')\n",
42
  "tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')\n",
@@ -54,10 +61,13 @@
54
  "val_loader = DataLoader(val, batch_size=16)\n",
55
  "\n",
56
  "metric = evaluate.load('glue','mrpc')"
57
- ]
 
 
58
  },
59
  {
60
  "cell_type": "code",
 
61
  "source": [
62
  "def eval_model(model):\n",
63
  " model.eval()\n",
@@ -70,10 +80,13 @@
70
  " p=model(input_ids=x,attention_mask=m).logits.argmax(-1)\n",
71
  " preds.extend(p.cpu().numpy()); labels.extend(y.cpu().numpy())\n",
72
  " return metric.compute(predictions=preds,references=labels)['f1']"
73
- ]
 
 
74
  },
75
  {
76
  "cell_type": "code",
 
77
  "source": [
78
  "# BASELINE\n",
79
  "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n",
@@ -90,10 +103,13 @@
90
  "\n",
91
  "f1_base = eval_model(model)\n",
92
  "print('Baseline F1:', round(f1_base,3))"
93
- ]
 
 
94
  },
95
  {
96
  "cell_type": "code",
 
97
  "source": [
98
  "# ORBITAL\n",
99
  "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n",
@@ -118,18 +134,32 @@
118
  "\n",
119
  "f1_orb = eval_model(model)\n",
120
  "print('Orbital F1:', round(f1_orb,3))"
121
- ]
 
 
122
  },
123
  {
124
  "cell_type": "code",
 
125
  "source": [
126
  "print('\\nBaseline:', round(f1_base,3))\n",
127
  "print('Orbital:', round(f1_orb,3))\n",
128
  "print('Delta:', round(f1_orb-f1_base,3))"
129
- ]
 
 
130
  }
131
  ],
132
- "metadata": {},
 
 
 
 
 
 
 
 
 
133
  "nbformat": 4,
134
  "nbformat_minor": 4
135
  }
 
11
  },
12
  {
13
  "cell_type": "code",
14
+ "metadata": {},
15
  "source": [
16
  "!pip install -q transformers datasets evaluate scikit-learn accelerate"
17
+ ],
18
+ "outputs": [],
19
+ "execution_count": null
20
  },
21
  {
22
  "cell_type": "code",
23
+ "metadata": {},
24
  "source": [
25
  "import torch\n",
26
  "from datasets import load_dataset\n",
 
37
  "\n",
38
  "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
39
  "print(device)"
40
+ ],
41
+ "outputs": [],
42
+ "execution_count": null
43
  },
44
  {
45
  "cell_type": "code",
46
+ "metadata": {},
47
  "source": [
48
  "dataset = load_dataset('glue','mrpc')\n",
49
  "tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')\n",
 
61
  "val_loader = DataLoader(val, batch_size=16)\n",
62
  "\n",
63
  "metric = evaluate.load('glue','mrpc')"
64
+ ],
65
+ "outputs": [],
66
+ "execution_count": null
67
  },
68
  {
69
  "cell_type": "code",
70
+ "metadata": {},
71
  "source": [
72
  "def eval_model(model):\n",
73
  " model.eval()\n",
 
80
  " p=model(input_ids=x,attention_mask=m).logits.argmax(-1)\n",
81
  " preds.extend(p.cpu().numpy()); labels.extend(y.cpu().numpy())\n",
82
  " return metric.compute(predictions=preds,references=labels)['f1']"
83
+ ],
84
+ "outputs": [],
85
+ "execution_count": null
86
  },
87
  {
88
  "cell_type": "code",
89
+ "metadata": {},
90
  "source": [
91
  "# BASELINE\n",
92
  "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n",
 
103
  "\n",
104
  "f1_base = eval_model(model)\n",
105
  "print('Baseline F1:', round(f1_base,3))"
106
+ ],
107
+ "outputs": [],
108
+ "execution_count": null
109
  },
110
  {
111
  "cell_type": "code",
112
+ "metadata": {},
113
  "source": [
114
  "# ORBITAL\n",
115
  "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n",
 
134
  "\n",
135
  "f1_orb = eval_model(model)\n",
136
  "print('Orbital F1:', round(f1_orb,3))"
137
+ ],
138
+ "outputs": [],
139
+ "execution_count": null
140
  },
141
  {
142
  "cell_type": "code",
143
+ "metadata": {},
144
  "source": [
145
  "print('\\nBaseline:', round(f1_base,3))\n",
146
  "print('Orbital:', round(f1_orb,3))\n",
147
  "print('Delta:', round(f1_orb-f1_base,3))"
148
+ ],
149
+ "outputs": [],
150
+ "execution_count": null
151
  }
152
  ],
153
+ "metadata": {
154
+ "kernelspec": {
155
+ "display_name": "Python 3",
156
+ "language": "python",
157
+ "name": "python3"
158
+ },
159
+ "language_info": {
160
+ "name": "python"
161
+ }
162
+ },
163
  "nbformat": 4,
164
  "nbformat_minor": 4
165
  }