Jidi1997
/

ClimateBERT_GPROP_Detector

@@ -50,29 +50,42 @@ pip install transformers torch
 Then run the following:
 ```python
-from transformers import pipeline
-# 1. Load the model
-classifier = pipeline(
-    "text-classification",
-    model="Jidi1997/ClimateBERT_GPROP_Detector"
-)
-# 2. Construct a proposal input
-test_proposal = """
-A(An) institutional-type sponsor has filed a shareholder proposal to a(an)
-energy-sector company. This proposal requests: the company to issue a report
-on its greenhouse gas emissions reduction targets.
-It falls under a broader agenda class that may include items not directly
-relevant to this specific proposal: Environmental/Social.
-"""
-# 3. Run inference
-result = classifier(test_proposal)
-print(result)
-# Expected output: [{'label': 'yes', 'score': 0.99...}]
-# label='yes' → Green proposal detected (Label 1)
 # label='no'  → Non-green proposal (Label 0)
 ```
 ---

 Then run the following:
 ```python
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+from transformers.pipelines.pt_utils import KeyDataset
+import datasets
+from tqdm.auto import tqdm
+# ── Model ──────────────────────────────────────────────────────────────────────
+model_name = "Jidi1997/ClimateBERT_GPROP_Detector"
+model     = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
+pipe      = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0) # change to device=-1 if only CPU is available
+# ── Data ───────────────────────────────────────────────────────────────────────
+# Option A: Load your own dataset from a local CSV / JSON file
+#   dataset = datasets.load_dataset("csv", data_files="your_proposals.csv", split="train")
+# Option B: Construct proposals inline using the recommended input format
+#   Each entry should follow the structure below for best performance:
+#   "A(An) {sponsor_type}-type sponsor has filed a shareholder proposal to a(an)
+#    {sic2_des}-sector company. This proposal requests: {resolution}.
+#    It falls under a broader agenda class that may include items not directly
+#    relevant to this specific proposal: {AgendaCodeInformation}"
+dataset = datasets.Dataset.from_dict({"text": [
+    # Replace with your own proposals following the recommended input format above
+    """A(An) institutional-type sponsor has filed a shareholder proposal to a(an)
+    energy-sector company. This proposal requests: the company to issue a report
+    on its greenhouse gas emissions reduction targets.
+    It falls under a broader agenda class: Environmental/Social."""
+]})
+# ── Inference ──────────────────────────────────────────────────────────────────
+# label='yes' → Green proposal (Label 1)
 # label='no'  → Non-green proposal (Label 0)
+for out in tqdm(pipe(KeyDataset(dataset, "text"), padding=True, truncation=True)):
+    print(out)
 ```
 ---