elly99 commited on
Commit
9397062
·
verified ·
1 Parent(s): f7b7d61

Create ranking_originality.py

Browse files
Files changed (1) hide show
  1. src/evaluation/ranking_originality.py +109 -0
src/evaluation/ranking_originality.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # © 2025 Elena Marziali — Code released under Apache 2.0 license.
2
+ # See LICENSE in the repository for details.
3
+ # Removal of this copyright is prohibited.
4
+
5
+ # Sample data for ranking
6
+ data = np.array([
7
+ [120, 45, 1, 2023], # Citations, h-index, peer review, year
8
+ [50, 30, 1, 2020],
9
+ [10, 15, 0, 2018]
10
+ ])
11
+
12
+ labels = [95, 70, 30] # Academic impact score
13
+
14
+ # Model training
15
+ ranking_model = RandomForestRegressor(n_estimators=100)
16
+ ranking_model.fit(data, labels)
17
+
18
+ # **Ranking prediction**
19
+ def calculate_impact_score(citations, h_index, peer_review, publication_year):
20
+ paper_data = np.array([[citations, h_index, peer_review, publication_year]])
21
+ score = ranking_model.predict(paper_data)
22
+ return max(0, score[0]) # Ensure non-negative
23
+
24
+ # Usage example
25
+ impact_score = calculate_impact_score(80, 40, 1, 2024)
26
+ print(f"Estimated score: {impact_score}")
27
+
28
+ # Ranking model
29
+ from sklearn.ensemble import RandomForestRegressor
30
+
31
+ # Sample data for ranking
32
+ data = np.array([
33
+ [120, 45, 1, 2023], # Citations, h-index, peer review, year
34
+ [50, 30, 1, 2020],
35
+ [10, 15, 0, 2018]
36
+ ])
37
+
38
+ labels = [95, 70, 30] # Academic impact score
39
+
40
+ # Model training
41
+ ranking_model = RandomForestRegressor(n_estimators=100)
42
+ ranking_model.fit(data, labels)
43
+
44
+ # Ranking prediction
45
+ new_paper = np.array([[80, 40, 1, 2024]])
46
+ score = ranking_model.predict(new_paper)
47
+ print(f"Estimated score: {score[0]}")
48
+
49
+ # === Scientific originality evaluation ===
50
+ def evaluate_hypothesis_novelty(hypothesis, existing_articles, threshold=0.7):
51
+ """
52
+ Compares the hypothesis with existing articles using semantic embeddings.
53
+ Returns:
54
+ - average similarity score
55
+ - similar articles
56
+ - qualitative assessment of originality
57
+ """
58
+ try:
59
+ emb_hypothesis = model_embedding.encode([hypothesis])
60
+ emb_articles = model_embedding.encode([a["abstract"] for a in existing_articles if "abstract" in a])
61
+
62
+ similarity = np.dot(emb_hypothesis, emb_articles.T) / (
63
+ np.linalg.norm(emb_hypothesis) * np.linalg.norm(emb_articles, axis=1)
64
+ )
65
+ average = round(float(np.mean(similarity)), 3)
66
+
67
+ similar_articles = [
68
+ existing_articles[i]["title"]
69
+ for i, score in enumerate(similarity[0]) if score > threshold
70
+ ]
71
+
72
+ if average < 0.4:
73
+ assessment = "High originality: hypothesis is rarely present in the literature."
74
+ elif average < 0.7:
75
+ assessment = "Moderate originality: related concepts exist."
76
+ else:
77
+ assessment = "Low originality: hypothesis is already widely discussed."
78
+
79
+ return {
80
+ "novelty_score": average,
81
+ "similar_articles": similar_articles,
82
+ "assessment": assessment
83
+ }
84
+
85
+ except Exception as e:
86
+ logging.error(f"[evaluate_novelty] Error during originality evaluation: {e}")
87
+ return {
88
+ "novelty_score": 0.0,
89
+ "similar_articles": [],
90
+ "assessment": "Error during originality evaluation."
91
+ }
92
+
93
+ # Automated paper review with AI
94
+ async def review_paper(paper_text):
95
+ """ Checks the methodology and citation quality of a paper. """
96
+ methodology = await verify_methodology(paper_text)
97
+ citations = await verify_citations(paper_text)
98
+ return {"methodology": methodology, "citations": citations}
99
+
100
+ async def validate_hypothesis(hypothesis):
101
+ sources = await search_multi_database(hypothesis)
102
+ score = calculate_impact_score(sources) # Based on citations, year, h-index, etc.
103
+ summary = summarize_evidence(sources)
104
+ return score, summary
105
+
106
+ def summarize_evidence(sources):
107
+ return "\n".join([
108
+ f"- {a['title'][:80]}…" for a in sources if isinstance(a, dict) and 'title' in a
109
+ ]) if sources else "No evidence found."