Spaces:

Yuski
/

multi_model_detection

Runtime error

App Files Files Community

Yuski commited on Aug 25, 2025

Commit

842a433

verified ·

1 Parent(s): 919fa9d

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.gitattributes +1 -0
__pycache__/gemini_ai.cpython-313.pyc +0 -0
__pycache__/image_converter.cpython-313.pyc +0 -0
__pycache__/mongo_lib.cpython-313.pyc +0 -0
__pycache__/target_object.cpython-313.pyc +0 -0
gemini_ai.py +43 -78
image_converter.py +1 -1
main.py +80 -120
main_ver2.py +80 -120
mongo_lib.py +38 -0
output_images/1411032040-楊宗祥.jpg +3 -0
requirements.txt +10 -6
target_object.py +59 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+output_images/1411032040-楊宗祥.jpg filter=lfs diff=lfs merge=lfs -text

__pycache__/gemini_ai.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/gemini_ai.cpython-313.pyc and b/__pycache__/gemini_ai.cpython-313.pyc differ

__pycache__/image_converter.cpython-313.pyc CHANGED Viewed

Binary files a/__pycache__/image_converter.cpython-313.pyc and b/__pycache__/image_converter.cpython-313.pyc differ

__pycache__/mongo_lib.cpython-313.pyc ADDED Viewed

Binary file (1.17 kB). View file

__pycache__/target_object.cpython-313.pyc ADDED Viewed

Binary file (1.21 kB). View file

gemini_ai.py CHANGED Viewed

@@ -5,72 +5,52 @@ import image_converter as img_converter
 import random
 import os
 import ast
-import target
-# 基本設定都放這邊----------------------------------------
 #
 #
 # 設定圖檔位置 (此處僅為範例，純文字查詢時可忽略)
-image_path = r"D:\Practice\Python_YOLO_AI_ENV\test_images\input\CAT1.png"
 # 要使用的模型種類，免費版一分鐘只能跑最多十筆
-gemini_model = "gemini-2.5-flash"
-# 要求AI的提示語放這邊
-# image_prompt = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
-# 請在各大類中選最近似的一樣，輸出結果如範例:"A[開心],B[學習],C[學校]"。
-# 若您覺得，該圖片不具上列特徵，請回覆"A[NIL]",加上NIL表示該類未再提供的選項內。
-# 以下是我們要請您分辨的種類:
-# A情感類-人物表情: A[面無表情,開心,生氣,悲傷,緊張,輕視，想睡，疲憊，興奮，自信滿滿,臉部遮蔽]。
-# B動作類-B[學習,工作,飲食,遊戲,駕駛,睡覺,冥想,醫療行為,會議,團隊討論，聽音樂，看電視，畫畫，騎車，烹飪，走路]。
-# C場景類-C[辦公室等工作空間,書房,臥室,客廳,學校,網咖,超現實場景，車內，外太空]。"""
-# image_prompt = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
 # 請在各大類中選最近似的一樣，輸出結果如範例:"物理環境[辦公室],技術應用[人工智慧,虛擬實境,其他],資訊設備[其他]"。
-# 若您覺得，該圖片不具上列特徵，請回覆"XXX[NIL]",XXX為該類別,加上NIL表示該類未再提供的選項內。
-# 以下是我們要請您分辨的種類，會以JSON標示:
-# 物理環境[辦公室,臥室,工作室,工廠]。
-# 技術應用[人工智慧,虛擬實境,大數據分析,其他]。
-# 社交關係[獨立工作(1人),,團隊合作(2人以上),遠程協作(遠端控制)]。
-# 職業情感[快樂,睡覺,壓力/焦慮,成就感]。
-# 資訊設備[AI助手,投影儀,手機,眼鏡投影,智慧手錶,機械手臂,平板,電腦,鍵盤,滑鼠,其他]。
-# 物體[床,椅子,桌子,書架,PC,肖像,監視器,窗戶,冷氣機,其他]。
-# 角色[機器人,教師,學生,動物,工作人員]。
-# """
-image_prompt = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
-請在各大類中選最近似的一樣，輸出結果如範例:"物理環境[辦公室],技術應用[人工智慧,虛擬實境,其他],資訊設備[其他]"。
-若您覺得，該圖片不具上列特徵，請回覆"XXX[NIL]",XXX為該類別,加上NIL表示該類未再提供的選項內。
-以下是我們要請您分辨的種類，會以JSON標示:""" + str(
-    target.target_JSON
-)
-# --------------------------------------------------------
 ## 替換冒號和逗號為換行符號
 def replace_colon_comma_with_newline(input_string):
-    processed_string = (
-        input_string.replace(":", "\n").replace("：", "\n").replace("],", "]\n")
-    )
-    return processed_string
 def getApiToken():
     try:
-        my_api_key = os.getenv("my_api_key")
-        my_list = ast.literal_eval(
-            my_api_key
-        )  # Convert string to list因為存在環境變數中是字串格式
-        return random.choice(my_list)
     except Exception as e:
         return ""
 # function，輸入是文字或是圖檔的位置
-def analyze_content_with_gemini(input_content, user_prompt=None):
     """
     透過 Gemini API 辨識內容，可處理純文字或圖片。
@@ -85,43 +65,30 @@ def analyze_content_with_gemini(input_content, user_prompt=None):
     Returns:
         str: 辨識結果的文字描述。
     """
-    try:
-        # 請將 'YOUR_API_KEY' 替換為您的實際 API 金鑰。
-        my_api_key = getApiToken()  # 從環境變數中獲取API金鑰
-        print(my_api_key)
-        genai.configure(api_key=my_api_key)
-    except Exception as e:
-        return f"發生錯誤：{e}"
     # 根據 user_prompt 決定要使用的 prompt
-    prompt_to_use = (
-        image_prompt + user_prompt
-        if user_prompt and user_prompt.strip()
-        else image_prompt
-    )
     try:
         # 判斷輸入的類型
         if isinstance(input_content, str):
             # 如果輸入是字串，嘗試判斷是否為圖片路徑
-            if input_content.lower().endswith(
-                (".png", ".jpg", ".jpeg", ".gif", ".webp")
-            ):
-                if input_content.lower().endswith((".webp")):
-                    input_content = img_converter.convert_webp_to_jpg(
-                        input_content
-                    )  # 如果是 webp 圖片，先轉換為 jpg
                 model = genai.GenerativeModel(gemini_model)
                 image_obj = PIL.Image.open(input_content)
                 response = model.generate_content([prompt_to_use, image_obj])
             else:
                 # 純文字輸入
                 model = genai.GenerativeModel(gemini_model)
-                response = model.generate_content(
-                    input_content
-                )  # 純文字直接使用輸入內容當 prompt
         elif isinstance(input_content, PIL.Image.Image):
             model = genai.GenerativeModel(gemini_model)
             response = model.generate_content([prompt_to_use, input_content])
@@ -134,7 +101,7 @@ def analyze_content_with_gemini(input_content, user_prompt=None):
         return f"發生錯誤：{e}"
-if __name__ == "__main__":
     # --- 程式碼使用範例 ---
     # 範例 1：傳送純文字訊息
@@ -148,12 +115,10 @@ if __name__ == "__main__":
     # 範例 2：傳送圖片路徑
     # 請確保 image_path 指向有效的圖片檔案
     print("正在處理圖片訊息...")
-    my_prompt =""
-#     my_prompt = """{
-#     "物品": ["辦公室", "臥室", "工作室", "工廠","牛","鴨","船"]
-# }"""
-    response_image = analyze_content_with_gemini(image_path)
     print("回應結果：")
     print(response_image)
     print("-" * 20)

 import random
 import os
 import ast
+import target_object
+#基本設定都放這邊----------------------------------------
 #
 #
 # 設定圖檔位置 (此處僅為範例，純文字查詢時可忽略)
+image_path = r'G:\Python\tools\input_images\1411135045-張華桀.jpg'
 # 要使用的模型種類，免費版一分鐘只能跑最多十筆
+gemini_model = 'gemini-2.5-flash'
+#要求AI扮演的角色和提示詞，這裡的提示詞會用來引導AI進行圖片分類
+# 給AI的提示詞 = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
 # 請在各大類中選最近似的一樣，輸出結果如範例:"物理環境[辦公室],技術應用[人工智慧,虛擬實境,其他],資訊設備[其他]"。
+# 若您覺得，該圖片完全不具要辨識的特徵，請回覆"XXX[NIL]",XXX為該類別,加上NIL表示該類未再提供的選項內。
+# 以下是我們要請您分辨的種類，會以JSON標示:"""
+給AI的提示詞 = """您現在扮演一位圖片分類大師，擅長解讀圖片中的一些抽象涵義並加以分類。
+請在各大類中選最近似的一樣，輸出結果如範例:[物理環境_辦公室,技術應用_人工智慧,技術應用_大數據分析,社交關係_獨立工作(1人),資訊設備_電腦,資訊設備_鍵盤,資訊設備_滑鼠,資訊設備_手機,物體_桌子,物體_椅子,角色_工作人員]。
+若您覺得，該圖片完全不具要辨識的特徵，請回覆[NIL]。
+以下是我們要請您分辨的種類，會以JSON標示:"""
+#--------------------------------------------------------
 ## 替換冒號和逗號為換行符號
 def replace_colon_comma_with_newline(input_string):
+  processed_string = input_string.replace(':', '\n').replace('：', '\n').replace('],', ']\n')
+  return processed_string
 def getApiToken():
     try:
+        my_api_key = os.getenv('my_api_key')
+        my_list = ast.literal_eval(my_api_key) # Convert string to list因為存在環境變數中是字串格式
+        return  random.choice(my_list)
     except Exception as e:
         return ""
 # function，輸入是文字或是圖檔的位置
+def analyze_content_with_gemini(input_content, 辨識目標物=None):
     """
     透過 Gemini API 辨識內容，可處理純文字或圖片。
     Returns:
         str: 辨識結果的文字描述。
     """
+    my_api_key = getApiToken()  # 從環境變數中獲取API金鑰
+    genai.configure(api_key=my_api_key)
     # 根據 user_prompt 決定要使用的 prompt
+    prompt_to_use = 給AI的提示詞+辨識目標物 if 辨識目標物 and 辨識目標物.strip() else 給AI的提示詞+ str(target_object.target_JSON)
+    # print("-"*50)
+    # print(prompt_to_use)
     try:
         # 判斷輸入的類型
         if isinstance(input_content, str):
             # 如果輸入是字串，嘗試判斷是否為圖片路徑
+            if input_content.lower().endswith(('.png', '.jpg', '.jpeg', '.gif','.webp')):
+                if input_content.lower().endswith(('.webp')):
+                    input_content = img_converter.convert_webp_to_jpg(input_content)  # 如果是 webp 圖片，先轉換為 jpg
                 model = genai.GenerativeModel(gemini_model)
                 image_obj = PIL.Image.open(input_content)
                 response = model.generate_content([prompt_to_use, image_obj])
             else:
                 # 純文字輸入
                 model = genai.GenerativeModel(gemini_model)
+                response = model.generate_content(input_content) # 純文字直接使用輸入內容當 prompt
         elif isinstance(input_content, PIL.Image.Image):
             model = genai.GenerativeModel(gemini_model)
             response = model.generate_content([prompt_to_use, input_content])
         return f"發生錯誤：{e}"
+if __name__ == '__main__':
     # --- 程式碼使用範例 ---
     # 範例 1：傳送純文字訊息
     # 範例 2：傳送圖片路徑
     # 請確保 image_path 指向有效的圖片檔案
     print("正在處理圖片訊息...")
+    我要辨識的物體 = ""
+    #我要辨識的物體 = '{"物件類別": ["人", "老虎", "獅子", "牛","書架", "PC", "窗戶", "冷氣機","其他", "雞", "車子", "企鵝","長頸鹿"]}'
+    #我要辨識的物體 = '{"物件類別": ["人", "老虎", "獅子", "牛","書架", "PC", "窗戶", "冷氣機","其他", "雞", "車子"]}'
+    response_image = analyze_content_with_gemini(image_path, 我要辨識的物體)
     print("回應結果：")
     print(response_image)
     print("-" * 20)

image_converter.py CHANGED Viewed

@@ -44,7 +44,7 @@ if __name__ == '__main__':
     try:
         # 建立一個簡單的白色圖片
         #G:\Python\tools\input_images\1411032040-楊宗祥.webp
-        dummy_webp_path = r"G:\Python\tools\input_images\1411032040-楊宗祥.webp"
         # 測試轉換函數
         # 範例 1: 轉換並儲存在相同資料夾

     try:
         # 建立一個簡單的白色圖片
         #G:\Python\tools\input_images\1411032040-楊宗祥.webp
+        dummy_webp_path = r"G:\Python\tools\input_images\1411032040.webp"
         # 測試轉換函數
         # 範例 1: 轉換並儲存在相同資料夾

main.py CHANGED Viewed

@@ -15,10 +15,12 @@ from ultralytics import YOLO
 import shutil
 import zipfile
 import uuid  # 匯入 uuid 以生成唯一的執行 ID
-from pathlib import Path  # 匯入 Path 以更方便地操作路徑
-# 假設 gemini_ai.py 在同一個目錄或 Python 路徑中
 import gemini_ai as genai
 def create_zip_archive(files, zip_filename):
@@ -32,7 +34,7 @@ def create_zip_archive(files, zip_filename):
     Returns:
         str: 產生的 zip 檔案路徑。
     """
-    with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
         for file in files:
             if os.path.exists(file):
                 # 使用 os.path.basename 確保只寫入檔案名稱，而非完整路徑
@@ -41,14 +43,13 @@ def create_zip_archive(files, zip_filename):
                 print(f"警告: 檔案 '{file}' 不存在，無法加入壓縮檔。")
     return zip_filename
 def gradio_multi_model_detection(
     image_files,
     model_files,
     conf_threshold,
     enable_mllm,
     mllm_prompt,
-    progress=gr.Progress(track_tqdm=True),
 ):
     """
     Gradio 的主要處理函式，使用生成器 (yield) 實現流式輸出。
@@ -64,19 +65,33 @@ def gradio_multi_model_detection(
     Yields:
         dict: 用於更新 Gradio 介面元件的字典。
     """
     if not image_files:
         yield {
             output_status: gr.update(value="錯誤：請至少上傳一張圖片。"),
             output_gallery: None,
             output_text: None,
-            download_button: None,
         }
         return
     # --- 1. 初始化設定 ---
     # 為本次執行創建一個唯一的子目錄
     run_id = str(uuid.uuid4())
-    base_output_dir = Path("gradio_detection_results")
     run_output_dir = base_output_dir / f"run_{run_id[:8]}"
     run_output_dir.mkdir(parents=True, exist_ok=True)
@@ -88,16 +103,12 @@ def gradio_multi_model_detection(
     loaded_models = []
     if not model_paths:
         # 如果沒有上傳模型，使用預設模型
-        default_model_path = "yolov8n.pt"
         try:
             model = YOLO(default_model_path)
             loaded_models.append((default_model_path, model))
         except Exception as e:
-            yield {
-                output_status: gr.update(
-                    value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}"
-                )
-            }
             return
     else:
         for model_path in model_paths:
@@ -125,24 +136,22 @@ def gradio_multi_model_detection(
         image_path = Path(image_path_str)
         progress(i / total_images, desc=f"處理中: {image_path.name}")
         yield {
-            output_status: gr.update(
-                value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"
-            ),
-            output_gallery: gr.update(value=annotated_image_paths),
         }
         original_image = cv2.imread(str(image_path))
         if original_image is None:
             print(f"警告: 無法讀取圖片 '{image_path}'，跳過。")
             continue
         annotated_image = original_image.copy()
         image_base_name = image_path.stem
         # --- 3a. YOLO 物件偵測 ---
         yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
         all_detections_for_image = []
         for model_path_str, model_obj in loaded_models:
             model_name = Path(model_path_str).name
             yolo_output_content.append(f"--- 模型: {model_name} ---")
@@ -155,47 +164,22 @@ def gradio_multi_model_detection(
                         x1, y1, x2, y2 = map(int, box.xyxy[0])
                         cls_id = int(box.cls[0])
                         cls_name = model_obj.names[cls_id]
-                        detection_info = {
-                            "model_name": model_name,
-                            "class_name": cls_name,
-                            "confidence": conf,
-                            "bbox": (x1, y1, x2, y2),
-                        }
                         all_detections_for_image.append(detection_info)
-                        yolo_output_content.append(
-                            f"  - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]"
-                        )
             else:
                 yolo_output_content.append("  未偵測到任何物件。")
         # 繪製偵測框
-        colors = [
-            (255, 0, 0),
-            (0, 255, 0),
-            (0, 0, 255),
-            (255, 255, 0),
-            (255, 0, 255),
-            (0, 255, 255),
-        ]
-        color_map = {
-            Path(p).name: colors[idx % len(colors)]
-            for idx, (p, _) in enumerate(loaded_models)
-        }
         for det in all_detections_for_image:
-            x1, y1, x2, y2 = det["bbox"]
-            color = color_map.get(det["model_name"], (200, 200, 200))
             label = f"{det['class_name']} {det['confidence']:.2f}"
             cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
-            cv2.putText(
-                annotated_image,
-                label,
-                (x1, y1 - 10),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                0.5,
-                color,
-                2,
-            )
         # 儲存 YOLO 標註圖
         output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
@@ -205,134 +189,110 @@ def gradio_multi_model_detection(
         # 儲存 YOLO 辨識資訊
         output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
-        output_yolo_txt_path.write_text(
-            "\n".join(yolo_output_content), encoding="utf-8"
-        )
         all_result_files.append(str(output_yolo_txt_path))
         # --- 3b. MLLM 分析 (如果啟用) ---
         output_mllm_txt_path = None
         if enable_mllm:
             try:
-                prompt_to_use = (
-                    mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
-                )
-                mllm_str = genai.analyze_content_with_gemini(
-                    str(image_path), prompt_to_use
-                )
                 mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
             except Exception as e:
                 mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
             output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
-            output_mllm_txt_path.write_text(mllm_result_content, encoding="utf-8")
             all_result_files.append(str(output_mllm_txt_path))
         # 將本次圖片的結果加入到總列表中
         all_texts.append("\n".join(yolo_output_content))
         if output_mllm_txt_path:
-            all_texts.append(output_mllm_txt_path.read_text(encoding="utf-8"))
     # --- 4. 完成處理，打包並更新最終結果 ---
     progress(1, desc="打包結果中...")
     zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
     created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
-    final_status = (
-        f"處理完成！共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
-    )
     combined_text_output = "\n\n".join(all_texts)
     yield {
         output_status: gr.update(value=final_status),
         download_button: gr.update(value=created_zip_path, visible=True),
         output_text: gr.update(value=combined_text_output),
-        output_gallery: gr.update(
-            value=annotated_image_paths
-        ),  # 確保最終 gallery 也被更新
     }
 def toggle_mllm_prompt(is_enabled):
     """
     根據 Checkbox 狀態，顯示或隱藏 MLLM prompt 輸入框。
     """
     return gr.update(visible=is_enabled)
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    str1 = os.getenv("mongo_1")
-    str2 = os.getenv("mongo_2")
-    str3 = os.getenv("mongo_3")
     gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
-    gr.Markdown("上傳圖片與YOLO模型進行物件偵測，並可選用MLLM進行進階圖像理解。")
-    gr.Markdown("Str1=" + str1)
-    gr.Markdown("Str2=" + str2)
-    gr.Markdown("Str3=" + str3)
     with gr.Row():
         with gr.Column(scale=1):
             # 輸入元件
-            image_input = gr.File(
-                label="上傳圖片", file_count="multiple", file_types=["image"]
-            )
-            # model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供，將使用預設的 yolov8n.pt 模型。")
-            model_input = gr.File(
-                label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"]
-            )
             with gr.Accordion("進階設定", open=False):
-                conf_slider = gr.Slider(
-                    minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值"
-                )
                 mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
-                mllm_prompt_input = gr.Textbox(
-                    label="自訂 MLLM Prompt (選填)",
-                    placeholder="例如：請描述圖中人物的穿著與場景。",
-                    visible=False,
-                )
             run_button = gr.Button("開始辨識", variant="primary")
         with gr.Column(scale=2):
             # 輸出元件
-            output_gallery = gr.Gallery(
-                label="辨識結果預覽",
-                height=500,
-                object_fit="contain",
-                allow_preview=True,
-            )
-            output_text = gr.Textbox(
-                label="詳細辨識資訊",
-                lines=15,
-                placeholder="辨識完成後，所有結果將顯示於此。",
-            )
             output_status = gr.Textbox(label="執行狀態", interactive=False)
-            download_button = gr.File(
-                label="下載所有結果 (.zip)", file_count="single", visible=False
-            )
     # --- 事件綁定 ---
     # 點擊 "開始辨識" 按鈕
     run_button.click(
         fn=gradio_multi_model_detection,
-        inputs=[
-            image_input,
-            model_input,
-            conf_slider,
-            mllm_enabled_checkbox,
-            mllm_prompt_input,
-        ],
-        outputs=[output_gallery, output_status, download_button, output_text],
     )
     # 勾選/取消 "開啟MLLM辨識"
     mllm_enabled_checkbox.change(
-        fn=toggle_mllm_prompt, inputs=mllm_enabled_checkbox, outputs=mllm_prompt_input
     )
 # 啟動 Gradio 應用
 if __name__ == "__main__":
     demo.launch(debug=True)

 import shutil
 import zipfile
 import uuid  # 匯入 uuid 以生成唯一的執行 ID
+from pathlib import Path # 匯入 Path 以更方便地操作路徑
 import gemini_ai as genai
+from datetime import datetime
+import mongo_lib as mongo
 def create_zip_archive(files, zip_filename):
     Returns:
         str: 產生的 zip 檔案路徑。
     """
+    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
         for file in files:
             if os.path.exists(file):
                 # 使用 os.path.basename 確保只寫入檔案名稱，而非完整路徑
                 print(f"警告: 檔案 '{file}' 不存在，無法加入壓縮檔。")
     return zip_filename
 def gradio_multi_model_detection(
     image_files,
     model_files,
     conf_threshold,
     enable_mllm,
     mllm_prompt,
+    progress=gr.Progress(track_tqdm=True)
 ):
     """
     Gradio 的主要處理函式，使用生成器 (yield) 實現流式輸出。
     Yields:
         dict: 用於更新 Gradio 介面元件的字典。
     """
+    global_datetime = datetime.now()
+    #寫主表log
+    document = {"log_style":"master",
+                "create_datetime": str(global_datetime),
+                "image_files": image_files,
+                "model_files": model_files,
+                "conf_threshold":conf_threshold,
+                "enable_mllm":enable_mllm,
+                "mllm_prompt":mllm_prompt
+                }
+    mongo.insert_mongodb_log("multi_model_detection",document)  #寫入log方便日後查驗
     if not image_files:
         yield {
             output_status: gr.update(value="錯誤：請至少上傳一張圖片。"),
             output_gallery: None,
             output_text: None,
+            download_button: None
         }
         return
     # --- 1. 初始化設定 ---
     # 為本次執行創建一個唯一的子目錄
     run_id = str(uuid.uuid4())
+    base_output_dir = Path('gradio_detection_results')
     run_output_dir = base_output_dir / f"run_{run_id[:8]}"
     run_output_dir.mkdir(parents=True, exist_ok=True)
     loaded_models = []
     if not model_paths:
         # 如果沒有上傳模型，使用預設模型
+        default_model_path = 'yolov8n.pt'
         try:
             model = YOLO(default_model_path)
             loaded_models.append((default_model_path, model))
         except Exception as e:
+            yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
             return
     else:
         for model_path in model_paths:
         image_path = Path(image_path_str)
         progress(i / total_images, desc=f"處理中: {image_path.name}")
         yield {
+            output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
+            output_gallery: gr.update(value=annotated_image_paths)
         }
         original_image = cv2.imread(str(image_path))
         if original_image is None:
             print(f"警告: 無法讀取圖片 '{image_path}'，跳過。")
             continue
         annotated_image = original_image.copy()
         image_base_name = image_path.stem
         # --- 3a. YOLO 物件偵測 ---
         yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
         all_detections_for_image = []
         for model_path_str, model_obj in loaded_models:
             model_name = Path(model_path_str).name
             yolo_output_content.append(f"--- 模型: {model_name} ---")
                         x1, y1, x2, y2 = map(int, box.xyxy[0])
                         cls_id = int(box.cls[0])
                         cls_name = model_obj.names[cls_id]
+                        detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
                         all_detections_for_image.append(detection_info)
+                        yolo_output_content.append(f"  - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
             else:
                 yolo_output_content.append("  未偵測到任何物件。")
         # 繪製偵測框
+        colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
+        color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
         for det in all_detections_for_image:
+            x1, y1, x2, y2 = det['bbox']
+            color = color_map.get(det['model_name'], (200, 200, 200))
             label = f"{det['class_name']} {det['confidence']:.2f}"
             cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
+            cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
         # 儲存 YOLO 標註圖
         output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
         # 儲存 YOLO 辨識資訊
         output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
+        output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
         all_result_files.append(str(output_yolo_txt_path))
         # --- 3b. MLLM 分析 (如果啟用) ---
         output_mllm_txt_path = None
+        mllm_result_content = ""
         if enable_mllm:
             try:
+                prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
+                mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
                 mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
             except Exception as e:
                 mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
             output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
+            output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
             all_result_files.append(str(output_mllm_txt_path))
+        #寫明細表log
+        document = {"log_style":"detail",
+                    "create_datetime": str(global_datetime),
+                    "image_path": str(image_path),
+                    "yolo_result": yolo_output_content,
+                    "enable_mllm": enable_mllm,
+                    "mllm_prompt": mllm_prompt,
+                    "mllm_result": mllm_result_content}
+        mongo.insert_mongodb_log("multi_model_detection",document)  #寫入log方便日後查驗
         # 將本次圖片的結果加入到總列表中
         all_texts.append("\n".join(yolo_output_content))
         if output_mllm_txt_path:
+            all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))
     # --- 4. 完成處理，打包並更新最終結果 ---
     progress(1, desc="打包結果中...")
     zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
     created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
+    final_status = f"處理完成！共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
     combined_text_output = "\n\n".join(all_texts)
     yield {
         output_status: gr.update(value=final_status),
         download_button: gr.update(value=created_zip_path, visible=True),
         output_text: gr.update(value=combined_text_output),
+        output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
     }
 def toggle_mllm_prompt(is_enabled):
     """
     根據 Checkbox 狀態，顯示或隱藏 MLLM prompt 輸入框。
     """
     return gr.update(visible=is_enabled)
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
+    gr.Markdown("上傳圖片與YOLO模型進行物件偵測，並可選用MLLM進行進階圖像理解。   ver.250824.1")
+    # mongo_uri = os.getenv('mongo_uri')
+    # gr.Markdown(mongo_uri)
     with gr.Row():
         with gr.Column(scale=1):
             # 輸入元件
+            image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
+            #model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供，將使用預設的 yolov8n.pt 模型。")
+            model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])
             with gr.Accordion("進階設定", open=False):
+                conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值")
                 mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
+                mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如：請描述圖中人物的穿著與場景。", visible=False)
             run_button = gr.Button("開始辨識", variant="primary")
         with gr.Column(scale=2):
             # 輸出元件
+            output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
+            output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後，所有結果將顯示於此。")
             output_status = gr.Textbox(label="執行狀態", interactive=False)
+            download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)
     # --- 事件綁定 ---
     # 點擊 "開始辨識" 按鈕
     run_button.click(
         fn=gradio_multi_model_detection,
+        inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input],
+        outputs=[output_gallery, output_status, download_button, output_text]
     )
     # 勾選/取消 "開啟MLLM辨識"
     mllm_enabled_checkbox.change(
+        fn=toggle_mllm_prompt,
+        inputs=mllm_enabled_checkbox,
+        outputs=mllm_prompt_input
     )
 # 啟動 Gradio 應用
 if __name__ == "__main__":
     demo.launch(debug=True)
+    #demo.launch(share=True)

main_ver2.py CHANGED Viewed

@@ -15,10 +15,12 @@ from ultralytics import YOLO
 import shutil
 import zipfile
 import uuid  # 匯入 uuid 以生成唯一的執行 ID
-from pathlib import Path  # 匯入 Path 以更方便地操作路徑
-# 假設 gemini_ai.py 在同一個目錄或 Python 路徑中
 import gemini_ai as genai
 def create_zip_archive(files, zip_filename):
@@ -32,7 +34,7 @@ def create_zip_archive(files, zip_filename):
     Returns:
         str: 產生的 zip 檔案路徑。
     """
-    with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
         for file in files:
             if os.path.exists(file):
                 # 使用 os.path.basename 確保只寫入檔案名稱，而非完整路徑
@@ -41,14 +43,13 @@ def create_zip_archive(files, zip_filename):
                 print(f"警告: 檔案 '{file}' 不存在，無法加入壓縮檔。")
     return zip_filename
 def gradio_multi_model_detection(
     image_files,
     model_files,
     conf_threshold,
     enable_mllm,
     mllm_prompt,
-    progress=gr.Progress(track_tqdm=True),
 ):
     """
     Gradio 的主要處理函式，使用生成器 (yield) 實現流式輸出。
@@ -64,19 +65,33 @@ def gradio_multi_model_detection(
     Yields:
         dict: 用於更新 Gradio 介面元件的字典。
     """
     if not image_files:
         yield {
             output_status: gr.update(value="錯誤：請至少上傳一張圖片。"),
             output_gallery: None,
             output_text: None,
-            download_button: None,
         }
         return
     # --- 1. 初始化設定 ---
     # 為本次執行創建一個唯一的子目錄
     run_id = str(uuid.uuid4())
-    base_output_dir = Path("gradio_detection_results")
     run_output_dir = base_output_dir / f"run_{run_id[:8]}"
     run_output_dir.mkdir(parents=True, exist_ok=True)
@@ -88,16 +103,12 @@ def gradio_multi_model_detection(
     loaded_models = []
     if not model_paths:
         # 如果沒有上傳模型，使用預設模型
-        default_model_path = "yolov8n.pt"
         try:
             model = YOLO(default_model_path)
             loaded_models.append((default_model_path, model))
         except Exception as e:
-            yield {
-                output_status: gr.update(
-                    value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}"
-                )
-            }
             return
     else:
         for model_path in model_paths:
@@ -125,24 +136,22 @@ def gradio_multi_model_detection(
         image_path = Path(image_path_str)
         progress(i / total_images, desc=f"處理中: {image_path.name}")
         yield {
-            output_status: gr.update(
-                value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"
-            ),
-            output_gallery: gr.update(value=annotated_image_paths),
         }
         original_image = cv2.imread(str(image_path))
         if original_image is None:
             print(f"警告: 無法讀取圖片 '{image_path}'，跳過。")
             continue
         annotated_image = original_image.copy()
         image_base_name = image_path.stem
         # --- 3a. YOLO 物件偵測 ---
         yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
         all_detections_for_image = []
         for model_path_str, model_obj in loaded_models:
             model_name = Path(model_path_str).name
             yolo_output_content.append(f"--- 模型: {model_name} ---")
@@ -155,47 +164,22 @@ def gradio_multi_model_detection(
                         x1, y1, x2, y2 = map(int, box.xyxy[0])
                         cls_id = int(box.cls[0])
                         cls_name = model_obj.names[cls_id]
-                        detection_info = {
-                            "model_name": model_name,
-                            "class_name": cls_name,
-                            "confidence": conf,
-                            "bbox": (x1, y1, x2, y2),
-                        }
                         all_detections_for_image.append(detection_info)
-                        yolo_output_content.append(
-                            f"  - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]"
-                        )
             else:
                 yolo_output_content.append("  未偵測到任何物件。")
         # 繪製偵測框
-        colors = [
-            (255, 0, 0),
-            (0, 255, 0),
-            (0, 0, 255),
-            (255, 255, 0),
-            (255, 0, 255),
-            (0, 255, 255),
-        ]
-        color_map = {
-            Path(p).name: colors[idx % len(colors)]
-            for idx, (p, _) in enumerate(loaded_models)
-        }
         for det in all_detections_for_image:
-            x1, y1, x2, y2 = det["bbox"]
-            color = color_map.get(det["model_name"], (200, 200, 200))
             label = f"{det['class_name']} {det['confidence']:.2f}"
             cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
-            cv2.putText(
-                annotated_image,
-                label,
-                (x1, y1 - 10),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                0.5,
-                color,
-                2,
-            )
         # 儲存 YOLO 標註圖
         output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
@@ -205,134 +189,110 @@ def gradio_multi_model_detection(
         # 儲存 YOLO 辨識資訊
         output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
-        output_yolo_txt_path.write_text(
-            "\n".join(yolo_output_content), encoding="utf-8"
-        )
         all_result_files.append(str(output_yolo_txt_path))
         # --- 3b. MLLM 分析 (如果啟用) ---
         output_mllm_txt_path = None
         if enable_mllm:
             try:
-                prompt_to_use = (
-                    mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
-                )
-                mllm_str = genai.analyze_content_with_gemini(
-                    str(image_path), prompt_to_use
-                )
                 mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
             except Exception as e:
                 mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
             output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
-            output_mllm_txt_path.write_text(mllm_result_content, encoding="utf-8")
             all_result_files.append(str(output_mllm_txt_path))
         # 將本次圖片的結果加入到總列表中
         all_texts.append("\n".join(yolo_output_content))
         if output_mllm_txt_path:
-            all_texts.append(output_mllm_txt_path.read_text(encoding="utf-8"))
     # --- 4. 完成處理，打包並更新最終結果 ---
     progress(1, desc="打包結果中...")
     zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
     created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
-    final_status = (
-        f"處理完成！共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
-    )
     combined_text_output = "\n\n".join(all_texts)
     yield {
         output_status: gr.update(value=final_status),
         download_button: gr.update(value=created_zip_path, visible=True),
         output_text: gr.update(value=combined_text_output),
-        output_gallery: gr.update(
-            value=annotated_image_paths
-        ),  # 確保最終 gallery 也被更新
     }
 def toggle_mllm_prompt(is_enabled):
     """
     根據 Checkbox 狀態，顯示或隱藏 MLLM prompt 輸入框。
     """
     return gr.update(visible=is_enabled)
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    str1 = os.getenv("mongo_1")
-    str2 = os.getenv("mongo_2")
-    str3 = os.getenv("mongo_3")
     gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
-    gr.Markdown("上傳圖片與YOLO模型進行物件偵測，並可選用MLLM進行進階圖像理解。")
-    gr.Markdown("Str1=" + str1)
-    gr.Markdown("Str2=" + str2)
-    gr.Markdown("Str3=" + str3)
     with gr.Row():
         with gr.Column(scale=1):
             # 輸入元件
-            image_input = gr.File(
-                label="上傳圖片", file_count="multiple", file_types=["image"]
-            )
-            # model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供，將使用預設的 yolov8n.pt 模型。")
-            model_input = gr.File(
-                label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"]
-            )
             with gr.Accordion("進階設定", open=False):
-                conf_slider = gr.Slider(
-                    minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值"
-                )
                 mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
-                mllm_prompt_input = gr.Textbox(
-                    label="自訂 MLLM Prompt (選填)",
-                    placeholder="例如：請描述圖中人物的穿著與場景。",
-                    visible=False,
-                )
             run_button = gr.Button("開始辨識", variant="primary")
         with gr.Column(scale=2):
             # 輸出元件
-            output_gallery = gr.Gallery(
-                label="辨識結果預覽",
-                height=500,
-                object_fit="contain",
-                allow_preview=True,
-            )
-            output_text = gr.Textbox(
-                label="詳細辨識資訊",
-                lines=15,
-                placeholder="辨識完成後，所有結果將顯示於此。",
-            )
             output_status = gr.Textbox(label="執行狀態", interactive=False)
-            download_button = gr.File(
-                label="下載所有結果 (.zip)", file_count="single", visible=False
-            )
     # --- 事件綁定 ---
     # 點擊 "開始辨識" 按鈕
     run_button.click(
         fn=gradio_multi_model_detection,
-        inputs=[
-            image_input,
-            model_input,
-            conf_slider,
-            mllm_enabled_checkbox,
-            mllm_prompt_input,
-        ],
-        outputs=[output_gallery, output_status, download_button, output_text],
     )
     # 勾選/取消 "開啟MLLM辨識"
     mllm_enabled_checkbox.change(
-        fn=toggle_mllm_prompt, inputs=mllm_enabled_checkbox, outputs=mllm_prompt_input
     )
 # 啟動 Gradio 應用
 if __name__ == "__main__":
     demo.launch(debug=True)

 import shutil
 import zipfile
 import uuid  # 匯入 uuid 以生成唯一的執行 ID
+from pathlib import Path # 匯入 Path 以更方便地操作路徑
 import gemini_ai as genai
+from datetime import datetime
+import mongo_lib as mongo
 def create_zip_archive(files, zip_filename):
     Returns:
         str: 產生的 zip 檔案路徑。
     """
+    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
         for file in files:
             if os.path.exists(file):
                 # 使用 os.path.basename 確保只寫入檔案名稱，而非完整路徑
                 print(f"警告: 檔案 '{file}' 不存在，無法加入壓縮檔。")
     return zip_filename
 def gradio_multi_model_detection(
     image_files,
     model_files,
     conf_threshold,
     enable_mllm,
     mllm_prompt,
+    progress=gr.Progress(track_tqdm=True)
 ):
     """
     Gradio 的主要處理函式，使用生成器 (yield) 實現流式輸出。
     Yields:
         dict: 用於更新 Gradio 介面元件的字典。
     """
+    global_datetime = datetime.now()
+    #寫主表log
+    document = {"log_style":"master",
+                "create_datetime": str(global_datetime),
+                "image_files": image_files,
+                "model_files": model_files,
+                "conf_threshold":conf_threshold,
+                "enable_mllm":enable_mllm,
+                "mllm_prompt":mllm_prompt
+                }
+    mongo.insert_mongodb_log("multi_model_detection",document)  #寫入log方便日後查驗
     if not image_files:
         yield {
             output_status: gr.update(value="錯誤：請至少上傳一張圖片。"),
             output_gallery: None,
             output_text: None,
+            download_button: None
         }
         return
     # --- 1. 初始化設定 ---
     # 為本次執行創建一個唯一的子目錄
     run_id = str(uuid.uuid4())
+    base_output_dir = Path('gradio_detection_results')
     run_output_dir = base_output_dir / f"run_{run_id[:8]}"
     run_output_dir.mkdir(parents=True, exist_ok=True)
     loaded_models = []
     if not model_paths:
         # 如果沒有上傳模型，使用預設模型
+        default_model_path = 'yolov8n.pt'
         try:
             model = YOLO(default_model_path)
             loaded_models.append((default_model_path, model))
         except Exception as e:
+            yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
             return
     else:
         for model_path in model_paths:
         image_path = Path(image_path_str)
         progress(i / total_images, desc=f"處理中: {image_path.name}")
         yield {
+            output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
+            output_gallery: gr.update(value=annotated_image_paths)
         }
         original_image = cv2.imread(str(image_path))
         if original_image is None:
             print(f"警告: 無法讀取圖片 '{image_path}'，跳過。")
             continue
         annotated_image = original_image.copy()
         image_base_name = image_path.stem
         # --- 3a. YOLO 物件偵測 ---
         yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
         all_detections_for_image = []
         for model_path_str, model_obj in loaded_models:
             model_name = Path(model_path_str).name
             yolo_output_content.append(f"--- 模型: {model_name} ---")
                         x1, y1, x2, y2 = map(int, box.xyxy[0])
                         cls_id = int(box.cls[0])
                         cls_name = model_obj.names[cls_id]
+                        detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
                         all_detections_for_image.append(detection_info)
+                        yolo_output_content.append(f"  - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
             else:
                 yolo_output_content.append("  未偵測到任何物件。")
         # 繪製偵測框
+        colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
+        color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
         for det in all_detections_for_image:
+            x1, y1, x2, y2 = det['bbox']
+            color = color_map.get(det['model_name'], (200, 200, 200))
             label = f"{det['class_name']} {det['confidence']:.2f}"
             cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
+            cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
         # 儲存 YOLO 標註圖
         output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
         # 儲存 YOLO 辨識資訊
         output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
+        output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
         all_result_files.append(str(output_yolo_txt_path))
         # --- 3b. MLLM 分析 (如果啟用) ---
         output_mllm_txt_path = None
+        mllm_result_content = ""
         if enable_mllm:
             try:
+                prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
+                mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
                 mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
             except Exception as e:
                 mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
             output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
+            output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
             all_result_files.append(str(output_mllm_txt_path))
+        #寫明細表log
+        document = {"log_style":"detail",
+                    "create_datetime": str(global_datetime),
+                    "image_path": str(image_path),
+                    "yolo_result": yolo_output_content,
+                    "enable_mllm": enable_mllm,
+                    "mllm_prompt": mllm_prompt,
+                    "mllm_result": mllm_result_content}
+        mongo.insert_mongodb_log("multi_model_detection",document)  #寫入log方便日後查驗
         # 將本次圖片的結果加入到總列表中
         all_texts.append("\n".join(yolo_output_content))
         if output_mllm_txt_path:
+            all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))
     # --- 4. 完成處理，打包並更新最終結果 ---
     progress(1, desc="打包結果中...")
     zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
     created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
+    final_status = f"處理完成！共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
     combined_text_output = "\n\n".join(all_texts)
     yield {
         output_status: gr.update(value=final_status),
         download_button: gr.update(value=created_zip_path, visible=True),
         output_text: gr.update(value=combined_text_output),
+        output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
     }
 def toggle_mllm_prompt(is_enabled):
     """
     根據 Checkbox 狀態，顯示或隱藏 MLLM prompt 輸入框。
     """
     return gr.update(visible=is_enabled)
 # --- Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
+    gr.Markdown("上傳圖片與YOLO模型進行物件偵測，並可選用MLLM進行進階圖像理解。   ver.250814.1")
+    # mongo_uri = os.getenv('mongo_uri')
+    # gr.Markdown(mongo_uri)
     with gr.Row():
         with gr.Column(scale=1):
             # 輸入元件
+            image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
+            #model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供，將使用預設的 yolov8n.pt 模型。")
+            model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])
             with gr.Accordion("進階設定", open=False):
+                conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值")
                 mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
+                mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如：請描述圖中人物的穿著與場景。", visible=False)
             run_button = gr.Button("開始辨識", variant="primary")
         with gr.Column(scale=2):
             # 輸出元件
+            output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
+            output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後，所有結果將顯示於此。")
             output_status = gr.Textbox(label="執行狀態", interactive=False)
+            download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)
     # --- 事件綁定 ---
     # 點擊 "開始辨識" 按鈕
     run_button.click(
         fn=gradio_multi_model_detection,
+        inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input],
+        outputs=[output_gallery, output_status, download_button, output_text]
     )
     # 勾選/取消 "開啟MLLM辨識"
     mllm_enabled_checkbox.change(
+        fn=toggle_mllm_prompt,
+        inputs=mllm_enabled_checkbox,
+        outputs=mllm_prompt_input
     )
 # 啟動 Gradio 應用
 if __name__ == "__main__":
     demo.launch(debug=True)
+    #demo.launch(share=True)

mongo_lib.py ADDED Viewed

	@@ -0,0 +1,38 @@

+#python -m pip install "pymongo[srv]==3.11"
+from pymongo.mongo_client import MongoClient
+from datetime import datetime
+import os
+def insert_mongodb_log(model_name,document:dict):
+    mongo_uri = os.getenv('mongo_uri')
+    client = MongoClient(mongo_uri)
+    try:
+        #client.admin.command('ping')
+        # 選擇數據庫，如果不存在會自動創建
+        db = client["huggingface-space"]  # 替換成你想要的數據庫名稱
+        # 選擇集合，如果不存在會自動創建
+        collection = db["space-log"]  # 替換成你想要的集合名稱
+        # 將文檔插入到集合中
+        document.update({"model_name":model_name,"process_time": str(datetime.now())})
+        result = collection.insert_one(document)
+        # 打印插入結果
+        #print(f"Document inserted with ID: {result.inserted_id}")
+    except Exception as e:
+        print(e)
+if __name__ == "__main__":
+    # 創建JSON文檔
+    document = {
+        "msg": "hello world",
+        "status": "success",
+    }
+    insert_mongodb_log("test_client",document)

output_images/1411032040-楊宗祥.jpg ADDED Viewed

Git LFS Details

SHA256: 363eb4f6e11ac17f543a3c701b21e78dce280c436d88cf9b57cfd42a169b50aa
Pointer size: 131 Bytes
Size of remote file: 358 kB

requirements.txt CHANGED Viewed

@@ -1,7 +1,11 @@
 google-generativeai
-Pillow
-gradio
-ultralytics
-numpy
-PyYAML
-transformers

+gradio>=4.0.0
+ultralytics>=8.0.0
+opencv-python>=4.8.0
+pillow>=10.0.0
+torch>=2.0.0
+torchvision>=0.15.0
+numpy>=1.24.0
+pathlib
+transformers
 google-generativeai
+pymongo[srv]==3.11

target_object.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# target_JSON = {
+#     "物理環境": ["辦公室", "臥室", "工作室", "工廠"],
+#     "技術應用": ["人工智慧", "虛擬實境", "大數據分析", "其他"],
+#     "社交關係": ["獨立工作(1人)", "團隊合作(2人以上)", "遠程協作(遠端控制)"],
+#     "職業情感": ["快樂", "睡覺", "壓力/焦慮", "成就感"],
+#     "資訊設備": ["AI助手", "投影儀", "手機", "眼鏡投影",
+#                 "智慧手錶", "機械手臂", "平板",
+#                 "電腦", "鍵盤", "滑鼠", "其他"],
+#     "物體": ["床", "椅子", "桌子",
+#             "書架", "PC",
+#             "肖像", "監視器",
+#             "窗戶",  "冷氣機","其他"],
+#     "角色": ["機器人","教師","學生","動物","工作人員"]
+# }
+target_JSON ={
+	"目標物": [
+		"物理環境_辦公室",
+		"物理環境_臥室",
+		"物理環境_工作室",
+		"物理環境_工廠",
+		"技術應用_人工智慧",
+		"技術應用_虛擬實境",
+		"技術應用_大數據分析",
+		"技術應用_其他",
+		"社交關係_獨立工作(1人)",
+		"社交關係_團隊合作(2人以上)",
+		"社交關係_遠程協作(遠端控制)",
+		"職業情感_快樂",
+		"職業情感_睡覺",
+		"職業情感_壓力/焦慮",
+		"職業情感_成就感",
+		"資訊設備_AI助手",
+		"資訊設備_投影儀",
+		"資訊設備_手機",
+		"資訊設備_眼鏡投影",
+		"資訊設備_智慧手錶",
+		"資訊設備_機械手臂",
+		"資訊設備_平板",
+		"資訊設備_電腦",
+		"資訊設備_鍵盤",
+		"資訊設備_滑鼠",
+		"資訊設備_其他",
+		"物體_床",
+		"物體_椅子",
+		"物體_桌子",
+		"物體_書架",
+		"物體_PC",
+		"物體_肖像",
+		"物體_監視器",
+		"物體_窗戶",
+		"物體_冷氣機",
+		"物體_其他",
+		"角色_機器人",
+		"角色_教師",
+		"角色_學生",
+		"角色_動物",
+		"角色_工作人員"
+	]
+}