Jidi1997 commited on
Commit
2b2a34f
Β·
verified Β·
1 Parent(s): e794bf5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -22
README.md CHANGED
@@ -50,29 +50,42 @@ pip install transformers torch
50
 
51
  Then run the following:
52
  ```python
53
- from transformers import pipeline
54
-
55
- # 1. Load the model
56
- classifier = pipeline(
57
- "text-classification",
58
- model="Jidi1997/ClimateBERT_GPROP_Detector"
59
- )
60
-
61
- # 2. Construct a proposal input
62
- test_proposal = """
63
- A(An) institutional-type sponsor has filed a shareholder proposal to a(an)
64
- energy-sector company. This proposal requests: the company to issue a report
65
- on its greenhouse gas emissions reduction targets.
66
- It falls under a broader agenda class that may include items not directly
67
- relevant to this specific proposal: Environmental/Social.
68
- """
69
-
70
- # 3. Run inference
71
- result = classifier(test_proposal)
72
- print(result)
73
- # Expected output: [{'label': 'yes', 'score': 0.99...}]
74
- # label='yes' β†’ Green proposal detected (Label 1)
 
 
 
 
 
 
 
 
 
 
 
75
  # label='no' β†’ Non-green proposal (Label 0)
 
 
76
  ```
77
 
78
  ---
 
50
 
51
  Then run the following:
52
  ```python
53
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
54
+ from transformers.pipelines.pt_utils import KeyDataset
55
+ import datasets
56
+ from tqdm.auto import tqdm
57
+
58
+ # ── Model ──────────────────────────────────────────────────────────────────────
59
+ model_name = "Jidi1997/ClimateBERT_GPROP_Detector"
60
+
61
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
62
+ tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
63
+ pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0) # change to device=-1 if only CPU is available
64
+
65
+ # ── Data ───────────────────────────────────────────────────────────────────────
66
+ # Option A: Load your own dataset from a local CSV / JSON file
67
+ # dataset = datasets.load_dataset("csv", data_files="your_proposals.csv", split="train")
68
+
69
+ # Option B: Construct proposals inline using the recommended input format
70
+ # Each entry should follow the structure below for best performance:
71
+ # "A(An) {sponsor_type}-type sponsor has filed a shareholder proposal to a(an)
72
+ # {sic2_des}-sector company. This proposal requests: {resolution}.
73
+ # It falls under a broader agenda class that may include items not directly
74
+ # relevant to this specific proposal: {AgendaCodeInformation}"
75
+
76
+ dataset = datasets.Dataset.from_dict({"text": [
77
+ # Replace with your own proposals following the recommended input format above
78
+ """A(An) institutional-type sponsor has filed a shareholder proposal to a(an)
79
+ energy-sector company. This proposal requests: the company to issue a report
80
+ on its greenhouse gas emissions reduction targets.
81
+ It falls under a broader agenda class: Environmental/Social."""
82
+ ]})
83
+
84
+ # ── Inference ──────────────────────────────────────────────────────────────────
85
+ # label='yes' β†’ Green proposal (Label 1)
86
  # label='no' β†’ Non-green proposal (Label 0)
87
+ for out in tqdm(pipe(KeyDataset(dataset, "text"), padding=True, truncation=True)):
88
+ print(out)
89
  ```
90
 
91
  ---