Spaces:

Toadied
/

ragllm

Runtime error

App Files Files Community

Toadied commited on Nov 22, 2025

Commit

edf63e7

verified ·

1 Parent(s): 5d751eb

Upload 16 files

Browse files

Files changed (17) hide show

.env +26 -0
.gitattributes +1 -0
.gitignore +164 -0
.vscode/settings.json +3 -0
__pycache__/zhipuEmbedding.cpython-310.pyc +0 -0
__pycache__/zhipuLLM.cpython-310.pyc +0 -0
app.py +158 -0
data_base/data/rag.md +1 -0
data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/data_level0.bin +3 -0
data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/header.bin +3 -0
data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/length.bin +3 -0
data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/link_lists.bin +3 -0
data_base/vector_db/chroma/chroma.sqlite3 +3 -0
requirements.txt +16 -0
zhipuEmbedding.py +33 -0
zhipuEmbeddingsData.py +118 -0
zhipuLLM.py +172 -0

.env ADDED Viewed

	@@ -0,0 +1,26 @@

+# OPENAI API 访问密钥配置
+OPENAI_API_KEY = ""
+# 文心 API 访问密钥配置
+# 方式1. 使用应用 AK/SK 鉴权
+# 创建的应用的 API Key
+QIANFAN_AK = ""
+# 创建的应用的 Secret Key
+QIANFAN_SK = ""
+# 方式2. 使用安全认证 AK/SK 鉴权
+# 安全认证方式获取的 Access Key
+QIANFAN_ACCESS_KEY = ""
+# 安全认证方式获取的 Secret Key
+QIANFAN_SECRET_KEY = ""
+# Ernie SDK 文心 API 访问密钥配置
+EB_ACCESS_TOKEN = ""
+# 控制台中获取的 APPID 信息
+IFLYTEK_SPARK_APP_ID = ""
+# 控制台中获取的 APIKey 信息
+IFLYTEK_SPARK_API_KEY = ""
+# 控制台中获取的 APISecret 信息
+IFLYTEK_SPARK_API_SECRET = ""
+# 智谱 API 访问密钥配置
+ZHIPUAI_API_KEY = "18d8cadb02594fa9b6876ea298ddc13c.vpdBu5BbfpTzsS7t"

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data_base/vector_db/chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,164 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+*/.idea/
+*/.DS_Store
+*/*/.DS_Store
+.idea/
+.DS_Store

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "editor.autoIndentOnPaste": true
+}

__pycache__/zhipuEmbedding.cpython-310.pyc ADDED Viewed

Binary file (1.87 kB). View file

__pycache__/zhipuLLM.cpython-310.pyc ADDED Viewed

Binary file (4.92 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+import os
+os.environ["CHROMA_TELEMETRY_DISABLED"] = "true"
+from dotenv import load_dotenv, find_dotenv
+from zhipuLLM import ZhipuaiLLM
+from zhipuEmbedding import ZhipuAiEmbeddings
+from langchain_community.vectorstores import Chroma
+from langchain_core.runnables import RunnablePassthrough
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableBranch
+#ui
+import gradio as gr
+_ = load_dotenv(find_dotenv())
+api_key=os.environ["ZHIPUAI_API_KEY"]
+def combine_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs["context"])
+def show_switch_status(switch_state):
+    return switch_state
+# 在文件顶部定义转换函数
+def format_chat_history(chatbot):
+    """将 Gradio Chatbot 格式转为 LangChain 支持的 chat_history 格式"""
+    formatted_history = []
+    for human_msg, ai_msg in chatbot:
+        formatted_history.append(("human", human_msg))
+        formatted_history.append(("ai", ai_msg))
+    return formatted_history
+def chatbot_response(input, chatbot, isUseRAG):
+    """根据开关状态返回提示信息"""
+    llm = ZhipuaiLLM(model_name="glm-4-plus", temperature=0.1, api_key=api_key)
+    if isUseRAG:
+        # 问答链的系统prompt
+        system_prompt = (
+            "你是一个问答任务的助手。 "
+            "请使用检索到的上下文片段回答这个问题。 "
+            "如果你不知道答案就说不知道。 "
+            "请使用简洁的话语回答用户。"
+            "\n\n"
+            "{context}"
+        )
+        # 制定prompt template
+        qa_prompt = ChatPromptTemplate(
+            [
+                ("system", system_prompt),
+                ("placeholder", "{chat_history}"),
+                ("human", "{input}"),
+            ]
+        )
+         # 定义问答链
+        qa_chain = (
+            RunnablePassthrough.assign(context=combine_docs) # 使用 combine_docs 函数整合 qa_prompt 中的 context
+            | qa_prompt # 问答模板
+            | llm
+            | StrOutputParser() # 规定输出的格式为 str
+        )
+        #
+        #获取得到向量库
+        vectordb = Chroma(
+            persist_directory='data_base/vector_db/chroma',  # 允许我们将persist_directory目录保存到磁盘上
+            embedding_function=ZhipuAiEmbeddings()
+        )
+        #取数据
+        retriever = vectordb.as_retriever(search_kwargs={"k": 1})
+        # 压缩问题的系统 prompt
+        condense_question_system_template = (
+            "请根据聊天记录完善用户最新的问题，"
+            "如果用户最新的问题不需要完善则返回用户的问题。"
+            )
+        # 构造 压缩问题的 prompt template
+        condense_question_prompt = ChatPromptTemplate([
+                ("system", condense_question_system_template),
+                ("placeholder", "{chat_history}"),
+                ("human", "{input}"),
+            ])
+        retrieve_docs = RunnableBranch(
+        # 分支 1: 若聊天记录中没有 chat_history 则直接使用用户问题查询向量数据库
+        (lambda x: not x.get("chat_history", False), (lambda x: x["input"]) | retriever, ),
+        # 分支 2 : 若聊天记录中有 chat_history 则先让 llm 根据聊天记录完善问题再查询向量数据库
+        condense_question_prompt | llm | StrOutputParser() | retriever,
+        )
+        # 定义带有历史记录的问答链
+        qa_history_chain = RunnablePassthrough.assign(
+            context = (lambda x: x) | retrieve_docs # 将查询结果存为 content
+            ).assign(answer=qa_chain)
+        result = qa_history_chain.invoke({
+            "input": input,
+            "chat_history": format_chat_history(chatbot)
+        })
+        print(result)
+        chatbot.append((input,result["answer"]))
+        return [chatbot,input]
+    else:
+        result = llm.invoke(input)
+        print(chatbot)
+        chatbot.append((input,result.content))
+        return [chatbot,input]
+# 创建界面
+with gr.Blocks() as demo:
+    gr.Markdown("""
+    # 🤖 RAG 智能聊天机器人
+    支持直接调用大模型或结合本地知识库（RAG）回答问题
+    """)
+    chatbot = gr.Chatbot(
+        label="对话历史",
+        height=500,  # 对话框高度
+        avatar_images=(None, "https://gradio.s3-us-west-2.amazonaws.com/guides/robot.png")  # （可选）用户/机器人头像
+    )
+    with gr.Row():
+        chebox = gr.Checkbox(
+            label="RAG",
+            value=False
+        )
+    with gr.Row():
+        input = gr.Textbox(
+            label="输入你的问题",
+            placeholder="例如：",
+            lines=2,
+            container=False
+        )
+        submit_btn = gr.Button("发送", variant="primary", icon="📤")
+    submit_btn.click(
+        fn=chatbot_response,
+        inputs=[input, chatbot, chebox],  # 输入：用户消息 + 历史对话 + 开关状态
+        outputs=[chatbot, input]  # 输出：更新后的对话 + 清空输入框
+    )
+if __name__ == "__main__":
+    demo.launch(
+        share=False,
+        show_error=True,  # 显示错误信息（调试用）
+    )

data_base/data/rag.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ 2025年乒乓球项目恭喜樊振东获得冠军,马龙为亚军

data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5707b4304f81e825ab1c96b0955b9fcbce912c03d1b0e55d9f3b70d0d68046b
+size 8332000

data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de65dd7dc719eee86a1e11054bd45ee9d541ad62e7e654ea3a1c5b7d61da6baa
+size 100

data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a24c0f603727245b4a01a14a3ee703614fed0d5fe14e19b71f01ac4099b3a433
+size 4000

data_base/vector_db/chroma/81183b78-851d-4e82-8886-717c11558d9c/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

data_base/vector_db/chroma/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e47310452bc5312a566c477b2af85270b441763a7a127b499fd59462e4b92b89
+size 167936

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+langchain==0.3.0
+langchain-community==0.3.0
+langchain-text-splitters==0.3.0
+langchain-core==0.3.0
+langchain-openai==0.2.0
+langchain-chroma==0.1.4
+python-dotenv==1.0.1
+zhipuai==2.1.5.20250106
+qianfan==0.4.12.3
+unstructured==0.16.23
+pymupdf==1.25.3
+markdown==3.7
+streamlit==1.43.0
+jieba==0.42.1
+pydantic==2.10.6
+gradio==4.44.1

zhipuEmbedding.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import List
+import os
+from langchain_core.embeddings import Embeddings
+from zhipuai import ZhipuAI
+class ZhipuAiEmbeddings(Embeddings):
+    def __init__(self):
+        self.client = ZhipuAI()
+        self.batch_size = 64
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        '''
+        all_embeddings = []
+        for i in range(0,len(texts),self.batch_size):
+            input_embeddings = texts[i : i + self.batch_size]
+            input_embeddings = [text.strip() for text in input_embeddings if text.strip()]
+            print(len(texts))
+            print(input_embeddings)
+            response = self.client.embeddings.create(
+                model="embedding-3",
+                input=input_embeddings
+            )
+            batch_embeddings = [embeddings.embedding for embeddings in response.data]
+        return all_embeddings.extend(batch_embeddings)
+        '''
+        response = self.client.embeddings.create(
+                model="embedding-3",
+                input=texts
+            )
+        return [embeddings.embedding for embeddings in response.data]
+    def embed_query(self, text: str) -> List[float]:
+        return self.embed_documents([text])[0]

zhipuEmbeddingsData.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import os
+os.environ["USER_AGENT"] = "MyRAGApp/1.0 (https://myapp.example.com; myemail@example.com)"
+os.environ["CHROMA_TELEMETRY_DISABLED"] = "true"
+from zhipuai import ZhipuAI
+from dotenv import load_dotenv, find_dotenv
+from langchain_community.document_loaders import (
+    TextLoader, PythonLoader, CSVLoader, JSONLoader,
+    Docx2txtLoader, UnstructuredPowerPointLoader,
+    PyMuPDFLoader, UnstructuredMarkdownLoader,
+    UnstructuredImageLoader, WebBaseLoader
+)
+_ = load_dotenv(find_dotenv())
+client = ZhipuAI(api_key=os.environ["ZHIPUAI_API_KEY"])
+#数据存入向量库
+from zhipuEmbedding import ZhipuAiEmbeddings
+from langchain_community.vectorstores import Chroma
+def dataLoadToVectordb(texts):
+    embedding = ZhipuAiEmbeddings()
+    persist_directory = 'data_base/vector_db/chroma'
+    vectordb = Chroma.from_documents(
+        documents=texts,
+        embedding=embedding,
+        persist_directory=persist_directory
+    )
+    print(f"向量库中存储的数量：{vectordb._collection.count()}")
+    return
+def get_file_paths(folder_path):
+    current_dir = os.getcwd()
+    abs_folder_path = os.path.abspath(folder_path)
+    print(f"当前工作目录：{current_dir}")
+    print(f"目标文件夹绝对路径：{abs_folder_path}")
+    print(f"目标路径是否存在：{os.path.exists(abs_folder_path)}")
+    print(f"目标路径是否是文件夹：{os.path.isdir(abs_folder_path)}")
+    # 1.获取所有文件
+    file_paths = []
+    for root, dirs, files in os.walk(folder_path):
+        for file in files:
+            file_path = os.path.join(root, file)
+            file_paths.append(file_path)
+    print(file_paths[:3])
+    # 下载所有文件并存储到text
+    texts = []
+    for file_path in file_paths:
+        splitDocuments(file_path, texts)
+    #2。清洗数据
+    #去除多余换行，符号，空格等
+    #3.文档数据分割
+    from langchain_text_splitters import RecursiveCharacterTextSplitter
+    # 知识库中单段文本长度
+    CHUNK_SIZE = 500
+    # 知识库中相邻文本重合长度
+    OVERLAP_SIZE = 0
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=OVERLAP_SIZE
+    )
+    docs = text_splitter.split_documents(texts)
+    print(f"切分后的文件数量：{docs}")
+    #print(f"切分后的字符数（可以用来大致评估 token 数）：{sum([len(doc.page_content) for doc in docs])}")
+    #dataLoadToVectordb(docs)
+    for i in range(0,len(docs),64):
+        input_embeddings = docs[i : i + 64]
+       #input_embeddings = [text.strip() for text in input_embeddings if text.strip()]
+        dataLoadToVectordb(input_embeddings)
+def splitDocuments(file_path, texts):
+    file_type = file_path.split('.')[-1].lower()
+    loader = None
+    if file_type == 'pdf':
+        loader = PyMuPDFLoader(file_path)  # PDF首选（高效稳定）
+    elif file_type == 'md':
+        loader = UnstructuredMarkdownLoader(file_path)  # Markdown
+    elif file_type == 'txt':
+        loader = TextLoader(file_path, encoding="utf-8")  # 纯文本
+    elif file_type == 'py':
+        loader = PythonLoader(file_path)  # Python代码
+    elif file_type == 'csv':
+        loader = CSVLoader(file_path, encoding="utf-8")  # 表格
+    elif file_type == 'json':
+        loader = JSONLoader(file_path, jq_schema=".content", text_content=False)  # JSON
+    elif file_type == 'docx':
+        loader = Docx2txtLoader(file_path) # Word（docx）
+    elif file_type in ['xlsx', 'xls']:
+        #loader = ExcelLoader(file_path)  # Excel（新旧格式）
+         print(f"不支持的文件格式：{file_type} | 文件路径：{file_path}")
+         return
+    elif file_type in ['pptx', 'ppt']:
+        loader = UnstructuredPowerPointLoader(file_path)  # PPT（新旧格式）
+    elif file_type in ['png', 'jpg', 'jpeg']:
+        loader = UnstructuredImageLoader(file_path)  # 图片（OCR提取）
+    elif file_type == 'url':
+        loader = WebBaseLoader(file_path)  # 普通网页
+    elif file_type == 'epub':
+        #loader = EpubLoader(file_path)  # 电子书
+        print(f"不支持的文件格式：{file_type} | 文件路径：{file_path}")
+        return
+    else:
+        print(f"不支持的文件格式：{file_type} | 文件路径：{file_path}")
+        return
+    if loader is not None:
+        texts.extend(loader.load())
+if __name__ == "__main__":
+    get_file_paths("data_base/data")

zhipuLLM.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from typing import Any, Dict, Iterator, List, Optional
+from zhipuai import ZhipuAI
+from langchain_core.callbacks import (
+    CallbackManagerForLLMRun,
+)
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    BaseMessage,
+    SystemMessage,
+    ChatMessage,
+    HumanMessage
+)
+from langchain_core.messages.ai import UsageMetadata
+from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
+import time
+def _convert_message_to_dict(message: BaseMessage) -> dict:
+        """ 把LangChain的消息格式转为智谱支持的格式
+        Args:
+            message: The LangChain message.
+        Returns:
+            The dictionary.
+        """
+        message_dict: Dict[str, Any] = {"content": message.content}
+        if (name := message.name or message.additional_kwargs.get("name")) is not None:
+            message_dict["name"] = name
+        # populate role and additional message data
+        if isinstance(message, ChatMessage):
+            message_dict["role"] = message.role
+        elif isinstance(message, HumanMessage):
+            message_dict["role"] = "user"
+        elif isinstance(message, AIMessage):
+            message_dict["role"] = "assistant"
+        elif isinstance(message, SystemMessage):
+            message_dict["role"] = "system"
+        else:
+            raise TypeError(f"Got unknown type {message}")
+        return message_dict
+class ZhipuaiLLM(BaseChatModel):
+    """自定义Zhipuai聊天模型。
+    """
+    model_name: str = None
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    timeout: Optional[int] = None
+    stop: Optional[List[str]] = None
+    max_retries: int = 3
+    api_key: str | None = None
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        """通过调用智谱API从而响应输入。
+        Args:
+            messages: 由messages列表组成的prompt
+            stop: 在模型生成的回答中有该字符串列表中的元素则停止响应
+            run_manager: 一个为LLM提供回调的运行管理器
+        """
+        # 列表推导式 将 messages 的元素逐个转为智谱的格式
+        messages = [_convert_message_to_dict(message) for message in messages]
+        # 定义推理的开始时间
+        start_time = time.time()
+        # 调用 ZhipuAI 对处理消息
+        response = ZhipuAI(api_key=self.api_key).chat.completions.create(
+            model=self.model_name,
+            temperature=self.temperature,
+            max_tokens=self.max_tokens,
+            timeout=self.timeout,
+            stop=stop,
+            messages=messages
+        )
+        # 计算运行时间 由现在时间 time.time() 减去 开始时间start_time得到
+        time_in_seconds = time.time() - start_time
+        # 将返回的消息封装并返回
+        message = AIMessage(
+            content=response.choices[0].message.content, # 响应的结果
+            additional_kwargs={}, # 额外信息
+            response_metadata={
+                "time_in_seconds": round(time_in_seconds, 3), # 响应源数据 这里是运行时间 也可以添加其他信息
+            },
+            # 本次推理消耗的token
+            usage_metadata={
+                "input_tokens": response.usage.prompt_tokens, # 输入token
+                "output_tokens": response.usage.completion_tokens, # 输出token
+                "total_tokens": response.usage.total_tokens, # 全部token
+            },
+        )
+        generation = ChatGeneration(message=message)
+        return ChatResult(generations=[generation])
+    def _stream(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[ChatGenerationChunk]:
+        """通过调用智谱API返回流式输出。
+        Args:
+            messages: 由messages列表组成的prompt
+            stop: 在模型生成的回答中有该字符串列表中的元素则停止响应
+            run_manager: 一个为LLM提供回调的运行管理器
+        """
+        messages = [_convert_message_to_dict(message) for message in messages]
+        response = ZhipuAI().chat.completions.create(
+            model=self.model_name,
+            stream=True, # 将stream 设置为 True 返回的是迭代器，可以通过for循环取值
+            temperature=self.temperature,
+            max_tokens=self.max_tokens,
+            timeout=self.timeout,
+            stop=stop,
+            messages=messages
+        )
+        start_time = time.time()
+        # 使用for循环存���结果
+        for res in response:
+            if res.usage: # 如果 res.usage 存在则存储token使用情况
+                usage_metadata = UsageMetadata(
+                    {
+                        "input_tokens": res.usage.prompt_tokens,
+                        "output_tokens": res.usage.completion_tokens,
+                        "total_tokens": res.usage.total_tokens,
+                    }
+                )
+            # 封装每次返回的chunk
+            chunk = ChatGenerationChunk(
+                message=AIMessageChunk(content=res.choices[0].delta.content)
+            )
+            if run_manager:
+                # This is optional in newer versions of LangChain
+                # The on_llm_new_token will be called automatically
+                run_manager.on_llm_new_token(res.choices[0].delta.content, chunk=chunk)
+            # 使用yield返回 结果是一个生成器 同样可以使用for循环调用
+            yield chunk
+        time_in_sec = time.time() - start_time
+        # Let's add some other information (e.g., response metadata)
+        # 最终返回运行时间
+        chunk = ChatGenerationChunk(
+            message=AIMessageChunk(content="", response_metadata={"time_in_sec": round(time_in_sec, 3)}, usage_metadata=usage_metadata)
+        )
+        if run_manager:
+            # This is optional in newer versions of LangChain
+            # The on_llm_new_token will be called automatically
+            run_manager.on_llm_new_token("", chunk=chunk)
+        yield chunk
+    @property
+    def _llm_type(self) -> str:
+            """获取此聊天模型使用的语言模型类型。"""
+            return self.model_name
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+            """返回一个标识参数的字典。
+            该信息由LangChain回调系统使用，用于跟踪目的，使监视llm成为可能。
+            """
+            return {
+                "model_name": self.model_name,
+            }