| import os |
| import pinecone |
| from tqdm import tqdm |
| from langchain.llms import OpenAI |
| from langchain.text_splitter import SpacyTextSplitter |
| from langchain.document_loaders import TextLoader |
| from langchain.document_loaders import DirectoryLoader |
| from langchain.indexes import VectorstoreIndexCreator |
| from langchain.embeddings.openai import OpenAIEmbeddings |
| from langchain.vectorstores import Pinecone |
|
|
| |
| openai_key="你的key" |
| pinecone_key="你的key" |
| pinecone_index="你的库" |
| pinecone_environment="你的Environment" |
| pinecone_namespace="你的Namespace" |
|
|
| |
| os.environ['HTTP_PROXY'] = 'http://127.0.0.1:7890' |
| os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:7890' |
|
|
| |
| pinecone.init( |
| api_key=pinecone_key, |
| environment=pinecone_environment |
| ) |
| index = pinecone.Index(pinecone_index) |
|
|
| |
| embeddings = OpenAIEmbeddings(openai_api_key=openai_key) |
|
|
| |
| text_splitter = SpacyTextSplitter(pipeline='zh_core_web_sm',chunk_size=1000,chunk_overlap=200) |
|
|
| |
| loader = DirectoryLoader('../docs', glob="**/*.txt", loader_cls=TextLoader) |
|
|
| |
| documents = loader.load() |
|
|
| |
| split_text = text_splitter.split_documents(documents) |
| try: |
| for document in tqdm(split_text): |
| |
| Pinecone.from_documents([document], embeddings, index_name=pinecone_index) |
| except Exception as e: |
| print(f"Error: {e}") |
| quit() |
|
|
|
|
|
|