| from langchain_core.runnables import RunnablePassthrough |
| from langchain_core.output_parsers import StrOutputParser |
| from langchain_community.chat_models import ChatOllama |
| from langchain_core.prompts import ChatPromptTemplate |
| from langchain_pinecone import PineconeVectorStore |
| from langchain_community.embeddings import SentenceTransformerEmbeddings |
|
|
| import os |
| from dotenv import load_dotenv |
| from langchain_community.retrievers import BM25Retriever |
| from langchain.retrievers import EnsembleRetriever |
| from kiwipiepy import Kiwi |
| load_dotenv() |
|
|
| kiwi = Kiwi() |
|
|
| def kiwi_tokenize(text): |
| return [token.form for token in kiwi.tokenize(text)] |
| |
|
|
| def retriever(pc, bm25): |
| pcretriever = pc.as_retriever(search_kwargs={'k':4}) |
| kiwi_bm25 = BM25Retriever.from_documents(bm25,preprocess_func=kiwi_tokenize) |
| kiwi_bm25.k=4 |
| |
| kiwibm25_pc_37 = EnsembleRetriever( |
| retrievers=[kiwi_bm25, pcretriever], |
| weights=[0.3, 0.7], |
| search_type="mmr", |
| ) |
| |
| |
| |
| |
|
|
| |
|
|
| return kiwibm25_pc_37 |
|
|
|
|