Update README.md
Browse files
README.md
CHANGED
|
@@ -50,29 +50,42 @@ pip install transformers torch
|
|
| 50 |
|
| 51 |
Then run the following:
|
| 52 |
```python
|
| 53 |
-
from transformers import pipeline
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
#
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
#
|
| 74 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
# label='no' β Non-green proposal (Label 0)
|
|
|
|
|
|
|
| 76 |
```
|
| 77 |
|
| 78 |
---
|
|
|
|
| 50 |
|
| 51 |
Then run the following:
|
| 52 |
```python
|
| 53 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 54 |
+
from transformers.pipelines.pt_utils import KeyDataset
|
| 55 |
+
import datasets
|
| 56 |
+
from tqdm.auto import tqdm
|
| 57 |
+
|
| 58 |
+
# ββ Model ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
+
model_name = "Jidi1997/ClimateBERT_GPROP_Detector"
|
| 60 |
+
|
| 61 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 62 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
|
| 63 |
+
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0) # change to device=-1 if only CPU is available
|
| 64 |
+
|
| 65 |
+
# ββ Data βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
+
# Option A: Load your own dataset from a local CSV / JSON file
|
| 67 |
+
# dataset = datasets.load_dataset("csv", data_files="your_proposals.csv", split="train")
|
| 68 |
+
|
| 69 |
+
# Option B: Construct proposals inline using the recommended input format
|
| 70 |
+
# Each entry should follow the structure below for best performance:
|
| 71 |
+
# "A(An) {sponsor_type}-type sponsor has filed a shareholder proposal to a(an)
|
| 72 |
+
# {sic2_des}-sector company. This proposal requests: {resolution}.
|
| 73 |
+
# It falls under a broader agenda class that may include items not directly
|
| 74 |
+
# relevant to this specific proposal: {AgendaCodeInformation}"
|
| 75 |
+
|
| 76 |
+
dataset = datasets.Dataset.from_dict({"text": [
|
| 77 |
+
# Replace with your own proposals following the recommended input format above
|
| 78 |
+
"""A(An) institutional-type sponsor has filed a shareholder proposal to a(an)
|
| 79 |
+
energy-sector company. This proposal requests: the company to issue a report
|
| 80 |
+
on its greenhouse gas emissions reduction targets.
|
| 81 |
+
It falls under a broader agenda class: Environmental/Social."""
|
| 82 |
+
]})
|
| 83 |
+
|
| 84 |
+
# ββ Inference ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 85 |
+
# label='yes' β Green proposal (Label 1)
|
| 86 |
# label='no' β Non-green proposal (Label 0)
|
| 87 |
+
for out in tqdm(pipe(KeyDataset(dataset, "text"), padding=True, truncation=True)):
|
| 88 |
+
print(out)
|
| 89 |
```
|
| 90 |
|
| 91 |
---
|