| """ |
| Created on Mon Mar 28 01:04:50 2022 |
| @author: adeep |
| """ |
| from fnmatch import translate |
| import cv2 as cv |
| import tempfile |
| import numpy as np |
| import pandas as pd |
| import streamlit as st |
| import joblib |
| import os |
| from moviepy.editor import VideoFileClip |
| import speech_recognition as sr |
| from pydub import AudioSegment |
| from pydub.silence import split_on_silence |
| import transformers |
| from transformers import pipeline |
| import nltk |
| nltk.download('punkt') |
| nltk.download('averaged_perceptron_tagger') |
| import nltk |
| nltk.download('punkt') |
| nltk.download('averaged_perceptron_tagger') |
| from nltk.tokenize import sent_tokenize |
| import re |
| from utils import get_translation, welcome, get_large_audio_transcription |
|
|
| from PIL import Image |
|
|
| |
|
|
| def main(): |
| |
| |
| st.title("Summarize Text") |
| video = st.file_uploader("Choose a file", type=['mp4']) |
| button = st.button("Summarize") |
| |
| max_c = st.sidebar.slider('Select max words', 50, 500, step=10, value=150) |
| min_c = st.sidebar.slider('Select min words', 10, 450, step=10, value=50) |
| gen_summ = False |
| |
| |
|
|
| with st.spinner("Running.."): |
| |
| if button and video: |
| tfile = tempfile.NamedTemporaryFile(delete=False) |
| tfile.write(video.read()) |
| |
| v = VideoFileClip(tfile.name) |
| v.audio.write_audiofile("movie.wav") |
| |
| |
| whole_text=get_large_audio_transcription("movie.wav") |
| |
| |
| |
| summarizer = pipeline("summarization", model="t5-large", tokenizer="t5-large", framework="pt") |
| summarized = summarizer(whole_text, min_length=min_c, max_length=max_c) |
| summ=summarized[0]['summary_text'] |
| |
| gen_summ = True |
| |
| |
| |
| |
| |
| sentences = sent_tokenize(summ, language='english') |
| |
| sentences_capitalized = [s.capitalize() for s in sentences] |
| |
| summ = re.sub(" (?=[\.,'!?:;])", "", ' '.join(sentences_capitalized)) |
| |
| if 'summary' not in st.session_state: |
| st.session_state.summary=True |
| st.session_state.summarization = summ |
| st.session_state.gen_summ = True |
| |
|
|
|
|
| translate = st.sidebar.radio('Do you want to translate the text to any different language?', ('No', 'Yes')) |
| if 'summary' in st.session_state: |
| summarized_text = st.session_state.summarization |
| st.write(summarized_text) |
| gen_summ = st.session_state.gen_summ |
| |
| if translate == 'Yes' and gen_summ == True: |
| lang_list = ['Hindi', 'Marathi', 'Malayalam', 'Kannada', 'Telugu', 'Tamil', 'Oriya', 'Bengali', 'Gujarati', 'Urdu'] |
| |
| s_type = st.sidebar.selectbox('Select the Language in which you want to Translate:',lang_list) |
| st.sidebar.write('You selected:', s_type) |
|
|
| |
| translation = get_translation(source='English', dest=s_type, text=summarized_text) |
|
|
| st.sidebar.write(translation) |
| elif translate == 'Yes' and gen_summ == False: |
| st.error("The summary has not been generated yet. Please generate the summary first and then translate") |
| |
| else: |
| st.write('') |
| |
| if __name__ == '__main__': |
| |
| main() |