from huggingface_hub import snapshot_download # Download legislature vectordatabase REPO_ID = "TomData/speeches-of-the-german-parliament" LOCAL_DIR = "src/FAISS" snapshot_download(repo_id=REPO_ID, local_dir=LOCAL_DIR, repo_type="dataset") import gradio as gr #from gradio_calendar import Calendar #from datetime import datetime from src.chatbot import chatbot, keyword_search # Only required when running locally # import os # from dotenv import load_dotenv # from huggingface_hub import login # load_dotenv(dotenv_path=".env") # login(token=os.getenv("HUGGINGFACEHUB_API_TOKEN")) # Your token here # Define important variables legislature_periods = [ "All", "20. Legislaturperiode", "19. Legislaturperiode", "18. Legislaturperiode", "17. Legislaturperiode", "16. Legislaturperiode", "15. Legislaturperiode", "14. Legislaturperiode", "13. Legislaturperiode", "12. Legislaturperiode", "11. Legislaturperiode", "10. Legislaturperiode", "9. Legislaturperiode", "8. Legislaturperiode", "7. Legislaturperiode", "6. Legislaturperiode", "5. Legislaturperiode", "4. Legislaturperiode", "3. Legislaturperiode", "2. Legislaturperiode", "1. Legislaturperiode" ] partys = ['All','CDU/CSU','SPD','AfD','Grüne','FDP','DIE LINKE.','GB/BHE','DRP', 'WAV', 'NR', 'BP', 'FU', 'SSW', 'KPD', 'DA', 'FVP','DP','Z', 'PDS','Fraktionslos','not found', 'Gast'] # Define Gradio App Layout with gr.Blocks() as App: with gr.Tab("ChatBot"): with gr.Blocks(fill_height=True): with gr.Accordion(open=False, label="Filter"): # Apply RAG using chatbot function from file chatbot.py db_inputs = gr.Dropdown(choices=legislature_periods, value="All", multiselect=True, label="Legislature", info="Select a combination of legislatures as basis for the chatbot's replies", show_label=True) prompt_language = gr.Dropdown(choices=["DE", "EN"], value="DE",label="Language", info="Choose output language", multiselect=False) gr.ChatInterface(chatbot, title="PoliticsToYou", description= "Ask anything about your favorite political topic from any legislature period", examples=[ ["Wie steht die CDU zur Cannabislegalisierung?", "All", "DE"], ["Wie steht die FDP zur Rente?", "All", "DE"], ["Was sagten die Parteien in der ersten Legislaturperiode über die nazi Vergangenheit?", "1. Legislaturperiode", "DE"], ["Wie wird die Ehe für alle diskutiert?", "18. Legislaturperiode", "DE"], ["How is the GDR perceived?", "11. Legislaturperiode", "EN"] ], cache_examples=True, #true increases loading time additional_inputs = [db_inputs, prompt_language], ) with gr.Tab("KeywordSearch"): with gr.Blocks() as Block: # Keyword Input keyword_box = gr.Textbox(label='keyword') # Additional Input (hidden) with gr.Accordion('Filter', open=False): # Row orientation with gr.Row() as additional_input: n_slider = gr.Slider(label="Number of Results",info="Other filters reduces the returned results", minimum=1, maximum=100, step=1, value=10) party_dopdown = gr.Dropdown(value='All', choices=partys, label='Party') # ToDo: Add date or legislature filter as input #start_date = Calendar(value="1949-01-01", type="datetime", label="Select start date", info="Click the calendar icon to bring up the calendar.", interactive=True) #end_date = Calendar(value=datetime.today().strftime('%Y-%m-%d'), type="datetime", label="Select end date", info="Click the calendar icon to bring up the calendar.", interactive=True) search_btn = gr.Button('Search') with gr.Column(visible=False) as output_col: results_df = gr.Dataframe(label='Results', interactive=False) # Download results from keyword search with gr.Accordion('Would you like to download your results?', open=False) as download_row: with gr.Row(): ftype_dropdown = gr.Dropdown(choices=["csv","excel","json"], label="Format") export_btn = gr.Button('Export') file = gr.File(file_types=[".xlsx", ".csv", ".json"], visible=False) # Keyword Search on click def search(keyword, n, party): # ToDo: Include party and timedate return { output_col: gr.Column(visible=True), results_df: keyword_search(query=keyword, n=n, party_filter=party), } search_btn.click( fn=search, inputs=[keyword_box, n_slider, party_dopdown], outputs=[output_col, results_df], ) # Export data to a downloadable format def export(df, keyword, ftype=None): if ftype == "csv": file = f'{keyword}.csv' df.to_csv(file, index = False) return gr.File(value=file,visible=True) elif ftype == "json": file = f'{keyword}.json' df.to_json(file, index = True) return gr.File(value=file,visible=True) else: file = f'{keyword}.xlsx' df.to_excel(file, index = True) return gr.File(value=file,visible=True) export_btn.click( fn=export, inputs=[results_df, keyword_box, ftype_dropdown], outputs=[file], ) with gr.Tab("About"): gr.Markdown("""
Would you like to gain insights into political debates or reveal party positions on specific topics from any legislature?
You can use the ChatBot to ask all your questions or search for related speech content in the Keyword Search section.
Looking forward to your feedback!
Big thank you to the OpenDiscourse team for creating the underlying speeches corpus. Check out their website here.
""" ) if __name__ == "__main__": App.launch(share=False) # true not supported on hf spaces