Anirban Saha commited on
Commit
ac2ef7e
·
1 Parent(s): 5dd9985

Added Application Code

Browse files
Files changed (3) hide show
  1. README.md +6 -6
  2. app.py +64 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -10,28 +10,28 @@ pinned: false
10
 
11
  # Configuration
12
 
13
- `title`: _string_
14
  Display title for the Space
15
 
16
  `emoji`: _string_
17
  Space emoji (emoji-only character allowed)
18
 
19
- `colorFrom`: _string_
20
  Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
 
22
- `colorTo`: _string_
23
  Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
 
25
- `sdk`: _string_
26
  Can be either `gradio` or `streamlit`
27
 
28
  `sdk_version` : _string_
29
  Only applicable for `streamlit` SDK.
30
  See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31
 
32
- `app_file`: _string_
33
  Path to your main application file (which contains either `gradio` or `streamlit` Python code).
34
  Path is relative to the root of the repository.
35
 
36
- `pinned`: _boolean_
37
  Whether the Space stays on top of your list.
 
10
 
11
  # Configuration
12
 
13
+ `title`: `Wikisummarizer`
14
  Display title for the Space
15
 
16
  `emoji`: _string_
17
  Space emoji (emoji-only character allowed)
18
 
19
+ `colorFrom`: `indigo`
20
  Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
21
 
22
+ `colorTo`: `pink`
23
  Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
24
 
25
+ `sdk`: `streamlit`
26
  Can be either `gradio` or `streamlit`
27
 
28
  `sdk_version` : _string_
29
  Only applicable for `streamlit` SDK.
30
  See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
31
 
32
+ `app_file`: `app.py`
33
  Path to your main application file (which contains either `gradio` or `streamlit` Python code).
34
  Path is relative to the root of the repository.
35
 
36
+ `pinned`: `True`
37
  Whether the Space stays on top of your list.
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import bs4 as bs
3
+ import urllib.request
4
+ import re
5
+
6
+ def main():
7
+ st.title("Wikipedia Summarizer")
8
+ url_topull= st.text_input("Enter the Wikipedia URL to pull - ")
9
+ if url_topull!='':
10
+ scraped_data = urllib.request.urlopen(url_topull)
11
+ article = scraped_data.read()
12
+
13
+ parsed_article=bs.BeautifulSoup(article,'lxml')
14
+
15
+ paragraphs = parsed_article.find_all('p')
16
+
17
+ article_text = ""
18
+
19
+ for p in paragraphs:
20
+ article_text += p.text
21
+ article_text = re.sub(r'\[[0-9]*\]', ' ', article_text)
22
+ article_text = re.sub(r'\s+', ' ', article_text)
23
+
24
+ import nltk
25
+ nltk.download('punkt')
26
+ nltk.download('stopwords')
27
+ import heapq
28
+ number=st.text_input('How many sentences long do you want your summary to be?')
29
+ if number!='':
30
+ sent_num = int(number)
31
+ formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text )
32
+ formatted_article_text = re.sub(r'\s+', ' ', formatted_article_text)
33
+ sentence_list = nltk.sent_tokenize(article_text)
34
+
35
+ stopwords = nltk.corpus.stopwords.words('english')
36
+ word_frequencies = {}
37
+ for word in nltk.word_tokenize(formatted_article_text):
38
+ if word not in stopwords:
39
+ if word not in word_frequencies.keys():
40
+ word_frequencies[word] = 1
41
+ else:
42
+ word_frequencies[word] += 1
43
+
44
+ maximum_frequncy = max(word_frequencies.values())
45
+
46
+ for word in word_frequencies.keys():
47
+ word_frequencies[word] = (word_frequencies[word]/maximum_frequncy)
48
+ sentence_scores = {}
49
+ for sent in sentence_list:
50
+ for word in nltk.word_tokenize(sent.lower()):
51
+ if word in word_frequencies.keys():
52
+ if len(sent.split(' ')) < 30:
53
+ if sent not in sentence_scores.keys():
54
+ sentence_scores[sent] = word_frequencies[word]
55
+ else:
56
+ sentence_scores[sent] += word_frequencies[word]
57
+
58
+ summary_sentences = heapq.nlargest(sent_num, sentence_scores, key=sentence_scores.get)
59
+ summary = ' '.join(summary_sentences)
60
+ st.markdown("# Summary: ")
61
+ st.write(summary)
62
+
63
+ if __name__ == '__main__':
64
+ main()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gensim
2
+ bs4
3
+ lxml
4
+ nltk