Raman Hasymau commited on
Commit
32903ec
·
1 Parent(s): 2afb296
ood_detector.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcf2a6c78f52242f694de098c3a0619e5cf1f0d508c562db471dba2b66c8ce4
3
+ size 698020
paper_cls_models_train.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
src/model/config.json CHANGED
@@ -11,21 +11,31 @@
11
  "eos_token_id": null,
12
  "hidden_dim": 3072,
13
  "id2label": {
14
- "0": "cmp-lg",
15
- "1": "cs",
16
- "2": "math",
17
- "3": "physics",
18
- "4": "q-bio",
19
- "5": "stat"
 
 
 
 
 
20
  },
21
  "initializer_range": 0.02,
22
  "label2id": {
23
- "cmp-lg": 0,
24
- "cs": 1,
25
- "math": 2,
26
- "physics": 3,
27
- "q-bio": 4,
28
- "stat": 5
 
 
 
 
 
29
  },
30
  "max_position_embeddings": 512,
31
  "model_type": "distilbert",
 
11
  "eos_token_id": null,
12
  "hidden_dim": 3072,
13
  "id2label": {
14
+ "0": "math.AC",
15
+ "1": "cs.CV",
16
+ "2": "cs.AI",
17
+ "3": "cs.SY",
18
+ "4": "math.GR",
19
+ "5": "cs.CE",
20
+ "6": "cs.PL",
21
+ "7": "cs.IT",
22
+ "8": "cs.DS",
23
+ "9": "cs.NE",
24
+ "10": "math.ST"
25
  },
26
  "initializer_range": 0.02,
27
  "label2id": {
28
+ "cs.AI": 2,
29
+ "cs.CE": 5,
30
+ "cs.CV": 1,
31
+ "cs.DS": 8,
32
+ "cs.IT": 7,
33
+ "cs.NE": 9,
34
+ "cs.PL": 6,
35
+ "cs.SY": 3,
36
+ "math.AC": 0,
37
+ "math.GR": 4,
38
+ "math.ST": 10
39
  },
40
  "max_position_embeddings": 512,
41
  "model_type": "distilbert",
src/model/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af50efddce3c3be23f1a10a8e36a552bc103eee2e23f3f72932c28a2e408447
3
- size 267844872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c04a94c7251860b13606a9f7f83c3fdac1ae53cbcc176d30850d475a470a83ae
3
+ size 267860252
src/model/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a32c09b0f520aeb11a0e8a1125469cb8d0108f232ec0afb0098b0992077044b5
3
  size 5137
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d46654a39fad3d312716bedc11d84a28e1cfd41badf3703ed0fc11c176fa7b
3
  size 5137
src/streamlit_app.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import time
4
 
5
  import arxiv
 
6
  import streamlit as st
7
  from transformers import pipeline
8
 
@@ -20,7 +21,14 @@ def load_pipeline():
20
  return pipeline("text-classification", model=model_path, top_k=None)
21
 
22
 
 
 
 
 
 
 
23
  classifier = load_pipeline()
 
24
 
25
  with st.sidebar:
26
  st.subheader("About the Model")
@@ -56,7 +64,11 @@ with st.expander("Load from link", expanded=True):
56
  st.text_input("Title", key="auto_title")
57
  st.text_area("Abstract", height=200, key="auto_abstract")
58
 
59
- if st.button("Classify"):
 
 
 
 
60
  title = st.session_state["auto_title"]
61
  abstract = st.session_state["auto_abstract"]
62
 
@@ -66,6 +78,14 @@ if st.button("Classify"):
66
 
67
  text = f"{title.strip()}. {abstract.strip()}" if title.strip() else abstract.strip()
68
 
 
 
 
 
 
 
 
 
69
  with st.spinner("Classifying paper"):
70
  start_time = time.time()
71
  predictions = classifier(text)[0]
 
3
  import time
4
 
5
  import arxiv
6
+ import joblib
7
  import streamlit as st
8
  from transformers import pipeline
9
 
 
21
  return pipeline("text-classification", model=model_path, top_k=None)
22
 
23
 
24
+ @st.cache_resource
25
+ def load_gatekeeper():
26
+ base_dir = os.path.dirname(os.path.abspath(__file__))
27
+ return joblib.load(os.path.join(base_dir, "ood_detector.pkl"))
28
+
29
+
30
  classifier = load_pipeline()
31
+ gatekeeper = load_gatekeeper()
32
 
33
  with st.sidebar:
34
  st.subheader("About the Model")
 
64
  st.text_input("Title", key="auto_title")
65
  st.text_area("Abstract", height=200, key="auto_abstract")
66
 
67
+ col_btn, col_bypass = st.columns([3, 1])
68
+ classify_clicked = col_btn.button("Classify", use_container_width=True)
69
+ bypass_gatekeeper = col_bypass.toggle("⚡ Bypass Gatekeeper")
70
+
71
+ if classify_clicked:
72
  title = st.session_state["auto_title"]
73
  abstract = st.session_state["auto_abstract"]
74
 
 
78
 
79
  text = f"{title.strip()}. {abstract.strip()}" if title.strip() else abstract.strip()
80
 
81
+ if not bypass_gatekeeper:
82
+ is_science = gatekeeper.predict([text])[0]
83
+ if is_science == 0:
84
+ st.warning(
85
+ "This text is NOT a scientific paper. Please enter a valid scientific abstract."
86
+ )
87
+ st.stop()
88
+
89
  with st.spinner("Classifying paper"):
90
  start_time = time.time()
91
  predictions = classifier(text)[0]
train_model.ipynb DELETED
The diff for this file is too large to render. See raw diff