Finish-him's picture
Update app.py
aded655 verified
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
DATA_DIR = "Data" if os.path.exists("Data") else "data"
if not os.path.exists(DATA_DIR):
os.makedirs(DATA_DIR)
print(f"Usando diretorio: {DATA_DIR}")
print(f"Arquivos: {os.listdir(DATA_DIR) if os.path.exists(DATA_DIR) else 'vazio'}")
loader = PyPDFDirectoryLoader(DATA_DIR)
documents = loader.load()
print(f"Documentos carregados: {len(documents)}")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
if docs:
db = FAISS.from_documents(docs, embeddings)
else:
import faiss
dimension = 384
index = faiss.IndexFlatL2(dimension)
from langchain_community.docstore.in_memory import InMemoryDocstore
db = FAISS(embedding_function=embeddings, index=index, docstore=InMemoryDocstore(), index_to_docstore_id={})
retriever = db.as_retriever(search_kwargs={"k": 3})
# Usando HuggingFaceEndpoint (substituto moderno do HuggingFaceHub)
llm = HuggingFaceEndpoint(
repo_id="mistralai/Mistral-7B-Instruct-v0.3",
task="text-generation",
huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
max_new_tokens=512,
temperature=0.7
)
system_prompt = (
"Você é o Prometheus, um assistente especializado no DETRAN-RJ. "
"Use o contexto abaixo para responder às perguntas dos usuários de forma clara e profissional. "
"Se não souber a resposta com base no contexto, diga que não encontrou a informação específica, "
"mas tente ajudar com o que for possível.\n\n"
"{context}"
)
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{input}"),
])
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
def respond(message, history):
try:
response = rag_chain.invoke({"input": message})
return response["answer"]
except Exception as e:
return f"Erro ao processar consulta: {str(e)}"
demo = gr.ChatInterface(
respond,
title="Prometheus — Agente Especialista DETRAN-RJ",
description="IA treinada com regulamentações e manuais do DETRAN-RJ para suporte ao cidadão.",
examples=["Como renovar a CNH?", "Quais os documentos para transferência de veículo?", "O que é o GRT?"]
)
if __name__ == "__main__":
demo.launch()
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.llms import HuggingFaceHub
DATA_DIR = "Data" if os.path.exists("Data") else "data"
if not os.path.exists(DATA_DIR):
os.makedirs(DATA_DIR)
print(f"Usando diretorio: {DATA_DIR}")
print(f"Arquivos: {os.listdir(DATA_DIR) if os.path.exists(DATA_DIR) else 'vazio'}")
loader = PyPDFDirectoryLoader(DATA_DIR)
documents = loader.load()
print(f"Documentos carregados: {len(documents)}")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)
if docs:
db = FAISS.from_documents(docs, embeddings)
else:
import faiss
dimension = 384
index = faiss.IndexFlatL2(dimension)
from langchain_community.docstore.in_memory import InMemoryDocstore
db = FAISS(embedding_function=embeddings, index=index,
docstore=InMemoryDocstore({}), index_to_docstore_id={})
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(
repo_id=repo_id,
task="text-generation",
model_kwargs={"temperature": 0.2, "max_new_tokens": 1024}
)
system_prompt = (
"Voce e um assistente especializado em analise de contratos do DETRAN-RJ. "
"Use o contexto fornecido para responder de forma precisa e concisa. "
"Se voce nao souber a resposta, diga que nao sabe. "
"Contexto: {context}"
)
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{input}"),
])
question_answer_chain = create_stuff_documents_chain(llm, prompt)
qa_chain = create_retrieval_chain(
db.as_retriever(search_kwargs={"k": 3}),
question_answer_chain
)
def process_query(query):
try:
result = qa_chain.invoke({"input": query})
answer = result.get("answer", "Nao foi possivel encontrar uma resposta.")
return answer
except Exception as e:
return f"Erro ao processar: {str(e)}"
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Assistente de Analise do Contrato DETRAN-RJ")
gr.Markdown("Faca uma pergunta sobre os documentos do contrato.")
with gr.Row():
inp = gr.Textbox(label="Sua pergunta", placeholder="Ex: Qual o valor total do contrato?", scale=4)
btn = gr.Button("Enviar", variant="primary", scale=1)
out = gr.Markdown(label="Resposta")
btn.click(fn=process_query, inputs=inp, outputs=out)
inp.submit(fn=process_query, inputs=inp, outputs=out)
gr.Examples(
examples=["Qual o objeto do contrato?", "Qual o valor total?", "Quem e o gestor?", "Qual o prazo de vigencia?"],
inputs=inp
)
demo.launch()