Yuski commited on
Commit
3e8f5e6
·
verified ·
1 Parent(s): 46c9e6f

新增職業預測

Browse files
Files changed (4) hide show
  1. gemini_ai.py +1 -1
  2. gemini_ai_work.py +109 -0
  3. main.py +339 -298
  4. target_object.py +30 -1
gemini_ai.py CHANGED
@@ -11,7 +11,7 @@ import target_object
11
  #
12
  #
13
  # 設定圖檔位置 (此處僅為範例,純文字查詢時可忽略)
14
- image_path = r'G:\Python\tools\input_images\1411135045-張華桀.jpg'
15
 
16
 
17
  # 要使用的模型種類,免費版一分鐘只能跑最多十筆
 
11
  #
12
  #
13
  # 設定圖檔位置 (此處僅為範例,純文字查詢時可忽略)
14
+ image_path = r'G:\Python\tools\input_images\拾穗.jpeg'
15
 
16
 
17
  # 要使用的模型種類,免費版一分鐘只能跑最多十筆
gemini_ai_work.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!pip install -q -U google-generativeai
2
+ import google.generativeai as genai
3
+ import PIL.Image
4
+ import image_converter as img_converter
5
+ import random
6
+ import os
7
+ import ast
8
+ import target_object
9
+
10
+ #基本設定都放這邊----------------------------------------
11
+ #
12
+ #
13
+ # 設定圖檔位置 (此處僅為範例,純文字查詢時可忽略)
14
+ image_path = r'G:\Python\tools\input_images\拾穗.jpeg'
15
+
16
+
17
+ # 要使用的模型種類,免費版一分鐘只能跑最多十筆
18
+ gemini_model = 'gemini-2.5-flash'
19
+
20
+
21
+ #--------------------------------------------------------
22
+ ## 替換冒號和逗號為換行符號
23
+ def replace_colon_comma_with_newline(input_string):
24
+ processed_string = input_string.replace(':', '\n').replace(':', '\n').replace('],', ']\n')
25
+ return processed_string
26
+
27
+
28
+
29
+ def getApiToken():
30
+ try:
31
+ my_api_key = os.getenv('my_api_key')
32
+ my_list = ast.literal_eval(my_api_key) # Convert string to list因為存在環境變數中是字串格式
33
+
34
+ return random.choice(my_list)
35
+ except Exception as e:
36
+ return ""
37
+
38
+
39
+ # function,輸入是文字或是圖檔的位置
40
+ def analyze_content_with_gemini_work(input_content):
41
+ """
42
+ 透過 Gemini API 辨識內容,可處理純文字或圖片。
43
+
44
+ Args:
45
+ input_content (str or PIL.Image.Image):
46
+ 如果輸入是字串,則代表要辨識的文字訊息或圖片路徑。
47
+ 如果輸入是 PIL.Image.Image 物件,則直接使用該圖片。
48
+ user_prompt (str, optional):
49
+ 使用者提供的自訂 prompt。如果為 None,則使用預設的 prompt。
50
+ Defaults to None.
51
+
52
+ Returns:
53
+ str: 辨識結果的文字描述。
54
+ """
55
+ my_api_key = getApiToken() # 從環境變數中獲取API金鑰
56
+ genai.configure(api_key=my_api_key)
57
+
58
+ # 根據 user_prompt 決定要使用的 prompt
59
+ prompt_to_use = str(target_object.work_JSON)
60
+
61
+ # print("-"*50)
62
+ # print(prompt_to_use)
63
+
64
+ try:
65
+ # 判斷輸入的類型
66
+ if isinstance(input_content, str):
67
+ # 如果輸入是字串,嘗試判斷是否為圖片路徑
68
+ if input_content.lower().endswith(('.png', '.jpg', '.jpeg', '.gif','.webp')):
69
+ if input_content.lower().endswith(('.webp')):
70
+ input_content = img_converter.convert_webp_to_jpg(input_content) # 如果是 webp 圖片,先轉換為 jpg
71
+
72
+ model = genai.GenerativeModel(gemini_model)
73
+ image_obj = PIL.Image.open(input_content)
74
+ response = model.generate_content([prompt_to_use, image_obj])
75
+ else:
76
+ # 純文字輸入
77
+ model = genai.GenerativeModel(gemini_model)
78
+ response = model.generate_content(input_content) # 純文字直接使用輸入內容當 prompt
79
+ elif isinstance(input_content, PIL.Image.Image):
80
+ model = genai.GenerativeModel(gemini_model)
81
+ response = model.generate_content([prompt_to_use, input_content])
82
+ else:
83
+ return "錯誤:輸入必須是文字、圖片路徑(字串)或 PIL.Image 物件。"
84
+
85
+ return replace_colon_comma_with_newline(response.text)
86
+
87
+ except Exception as e:
88
+ return f"發生錯誤:{e}"
89
+
90
+
91
+ if __name__ == '__main__':
92
+ # --- 程式碼使用範例 ---
93
+
94
+ # 範例 1:傳送純文字訊息
95
+ # print("正在處理純文字訊息...")
96
+ # text_message = "你好,請簡要說明一下Python是什麼?"
97
+ # response_text = analyze_content_with_gemini(text_message)
98
+ # print("回應結果:")
99
+ # print(response_text)
100
+ # print("-" * 20)
101
+
102
+ # 範例 2:傳送圖片路徑
103
+ # 請確保 image_path 指向有效的圖片檔案
104
+ print("正在處理圖片訊息...")
105
+
106
+ response_image = analyze_content_with_gemini_work(image_path)
107
+ print("回應結果:")
108
+ print(response_image)
109
+ print("-" * 20)
main.py CHANGED
@@ -1,298 +1,339 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- 系統需求:
4
- - gradio: 用於建立 Web UI
5
- - opencv-python: 用於圖片處理
6
- - ultralytics: YOLOv8 官方函式庫
7
- - Pillow: 圖片處理基礎庫
8
- - transformers: (可選,若YOLO模型需要)
9
- """
10
-
11
- import gradio as gr
12
- import os
13
- import cv2
14
- from ultralytics import YOLO
15
- import shutil
16
- import zipfile
17
- import uuid # 匯入 uuid 以生成唯一的執行 ID
18
- from pathlib import Path # 匯入 Path 以更方便地操作路徑
19
- import gemini_ai as genai
20
- from datetime import datetime
21
- import mongo_lib as mongo
22
-
23
-
24
-
25
-
26
- def create_zip_archive(files, zip_filename):
27
- """
28
- 將一系列檔案壓縮成一個 zip 檔案。
29
-
30
- Args:
31
- files (list): 要壓縮檔案路徑列表
32
- zip_filename (str): 產生的 zip 檔案路徑。
33
-
34
- Returns:
35
- str: 產生的 zip 檔案路徑。
36
- """
37
- with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
38
- for file in files:
39
- if os.path.exists(file):
40
- # 使用 os.path.basename 確保只寫入檔案名稱,而非完整路徑
41
- zipf.write(file, os.path.basename(file))
42
- else:
43
- print(f"警告: 檔案 '{file}' 不存在,無法加壓縮。")
44
- return zip_filename
45
-
46
- def gradio_multi_model_detection(
47
- image_files,
48
- model_files,
49
- conf_threshold,
50
- enable_mllm,
51
- mllm_prompt,
52
- progress=gr.Progress(track_tqdm=True)
53
- ):
54
- """
55
- Gradio 的主要處理函式,使用生成器 (yield) 實現流式輸出。
56
-
57
- Args:
58
- image_files (list): Gradio File 元件回傳的圖片檔案列表。
59
- model_files (list): Gradio File 元件回傳的模型檔案列表
60
- conf_threshold (float): 置信度閾值。
61
- enable_mllm (bool): 是否啟用 MLLM 分析。
62
- mllm_prompt (str): 使用者自訂的 MLLM prompt
63
- progress (gr.Progress): Gradio 的進度條元件。
64
-
65
- Yields:
66
- dict: 用於更新 Gradio 介面元件的字典
67
- """
68
- global_datetime = datetime.now()
69
-
70
- #寫主表log
71
- document = {"log_style":"master",
72
- "create_datetime": str(global_datetime),
73
- "image_files": image_files,
74
- "model_files": model_files,
75
- "conf_threshold":conf_threshold,
76
- "enable_mllm":enable_mllm,
77
- "mllm_prompt":mllm_prompt
78
- }
79
-
80
- mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗
81
-
82
- if not image_files:
83
- yield {
84
- output_status: gr.update(value="錯誤:請至少上傳一張圖片。"),
85
- output_gallery: None,
86
- output_text: None,
87
- download_button: None
88
- }
89
- return
90
-
91
- # --- 1. 初始化設定 ---
92
- # 為本次執行創建一個唯一的子目錄
93
- run_id = str(uuid.uuid4())
94
- base_output_dir = Path('gradio_detection_results')
95
- run_output_dir = base_output_dir / f"run_{run_id[:8]}"
96
- run_output_dir.mkdir(parents=True, exist_ok=True)
97
-
98
- image_paths = [file.name for file in image_files]
99
- model_paths = [file.name for file in model_files] if model_files else []
100
-
101
- # --- 2. 載入模型 ---
102
- yield {output_status: gr.update(value="正在載入模型...")}
103
- loaded_models = []
104
- if not model_paths:
105
- # 如果沒有上傳模型,使用預設模型
106
- default_model_path = 'yolov8n.pt'
107
- try:
108
- model = YOLO(default_model_path)
109
- loaded_models.append((default_model_path, model))
110
- except Exception as e:
111
- yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
112
- return
113
- else:
114
- for model_path in model_paths:
115
- try:
116
- model = YOLO(model_path)
117
- loaded_models.append((model_path, model))
118
- except Exception as e:
119
- print(f"警告: 無法載入模型 '{model_path}' - {e},將跳過此模型。")
120
- continue
121
-
122
- if not loaded_models:
123
- yield {output_status: gr.update(value="錯誤: 沒有任何模型成功載入。")}
124
- return
125
-
126
- # --- 3. 逐一處理圖片 ---
127
- total_images = len(image_paths)
128
- annotated_image_paths = []
129
- all_result_files = []
130
- # results_map 儲存圖片路徑與其對應的文字檔路徑,用於後續點擊查詢
131
- results_map = {}
132
- # all_texts 用於收集所有圖片的辨識結果文字
133
- all_texts = []
134
-
135
- for i, image_path_str in enumerate(image_paths):
136
- image_path = Path(image_path_str)
137
- progress(i / total_images, desc=f"處理中: {image_path.name}")
138
- yield {
139
- output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
140
- output_gallery: gr.update(value=annotated_image_paths)
141
- }
142
-
143
- original_image = cv2.imread(str(image_path))
144
- if original_image is None:
145
- print(f"警告: 無法讀取圖片 '{image_path}',跳過。")
146
- continue
147
-
148
- annotated_image = original_image.copy()
149
- image_base_name = image_path.stem
150
-
151
- # --- 3a. YOLO 物件偵測 ---
152
- yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
153
- all_detections_for_image = []
154
-
155
- for model_path_str, model_obj in loaded_models:
156
- model_name = Path(model_path_str).name
157
- yolo_output_content.append(f"--- 模型: {model_name} ---")
158
- results = model_obj(str(image_path), verbose=False, device="cpu")[0]
159
-
160
- if results.boxes:
161
- for box in results.boxes:
162
- conf = float(box.conf[0])
163
- if conf >= conf_threshold:
164
- x1, y1, x2, y2 = map(int, box.xyxy[0])
165
- cls_id = int(box.cls[0])
166
- cls_name = model_obj.names[cls_id]
167
-
168
- detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
169
- all_detections_for_image.append(detection_info)
170
- yolo_output_content.append(f" - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
171
- else:
172
- yolo_output_content.append(" 未偵測到任何物件。")
173
-
174
- # 繪製偵測框
175
- colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
176
- color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
177
- for det in all_detections_for_image:
178
- x1, y1, x2, y2 = det['bbox']
179
- color = color_map.get(det['model_name'], (200, 200, 200))
180
- label = f"{det['class_name']} {det['confidence']:.2f}"
181
- cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
182
- cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
183
-
184
- # 儲存 YOLO 標註圖
185
- output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
186
- cv2.imwrite(str(output_image_path), annotated_image)
187
- annotated_image_paths.append(str(output_image_path))
188
- all_result_files.append(str(output_image_path))
189
-
190
- # 儲存 YOLO 辨識資訊
191
- output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
192
- output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
193
- all_result_files.append(str(output_yolo_txt_path))
194
-
195
- # --- 3b. MLLM 分析 (如果啟用) ---
196
- output_mllm_txt_path = None
197
- mllm_result_content = ""
198
- if enable_mllm:
199
- try:
200
- prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
201
- mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
202
- mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
203
- except Exception as e:
204
- mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
205
-
206
- output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
207
- output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
208
- all_result_files.append(str(output_mllm_txt_path))
209
-
210
- #寫明細表log
211
- document = {"log_style":"detail",
212
- "create_datetime": str(global_datetime),
213
- "image_path": str(image_path),
214
- "yolo_result": yolo_output_content,
215
- "enable_mllm": enable_mllm,
216
- "mllm_prompt": mllm_prompt,
217
- "mllm_result": mllm_result_content}
218
-
219
- mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗
220
-
221
- # 將本次圖片的結果加入到總列表中
222
- all_texts.append("\n".join(yolo_output_content))
223
- if output_mllm_txt_path:
224
- all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))
225
-
226
-
227
- # --- 4. 完成處理,打包並更新最終結果 ---
228
- progress(1, desc="打包結果中...")
229
- zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
230
- created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
231
-
232
- final_status = f"處理完成!共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
233
- combined_text_output = "\n\n".join(all_texts)
234
-
235
-
236
-
237
-
238
- yield {
239
- output_status: gr.update(value=final_status),
240
- download_button: gr.update(value=created_zip_path, visible=True),
241
- output_text: gr.update(value=combined_text_output),
242
- output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
243
- }
244
-
245
- def toggle_mllm_prompt(is_enabled):
246
- """
247
- 根據 Checkbox 狀態,顯示或隱藏 MLLM prompt 輸入框。
248
- """
249
- return gr.update(visible=is_enabled)
250
-
251
- # --- Gradio Interface ---
252
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
253
- gr.Markdown("# 智慧影像分析工具 (YOLO + MLLM)")
254
- gr.Markdown("上傳圖片與YOLO模型進行物件偵測,並可選用MLLM進行進階圖像理解。 ver.250830.1")
255
- mongo_uri = os.getenv('mongo_uri')
256
- #gr.Markdown(mongo_uri)
257
-
258
- with gr.Row():
259
- with gr.Column(scale=1):
260
- # 輸入元件
261
- image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
262
- #model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供,將使用預設的 yolov8n.pt 模型。")
263
- model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])
264
-
265
- with gr.Accordion("進階設定", open=False):
266
- conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="信賴度閾值")
267
- mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
268
- mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如:請描述圖中人物的穿著與場景。", visible=False)
269
-
270
- run_button = gr.Button("開始辨識", variant="primary")
271
-
272
- with gr.Column(scale=2):
273
- # 輸出元件
274
- output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
275
- output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後,所有結果將顯示於此。")
276
- output_status = gr.Textbox(label="執行狀態", interactive=False)
277
- download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)
278
-
279
- # --- 事件綁定 ---
280
-
281
- # 點擊 "開始辨識" 按鈕
282
- run_button.click(
283
- fn=gradio_multi_model_detection,
284
- inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input],
285
- outputs=[output_gallery, output_status, download_button, output_text]
286
- )
287
-
288
- # 勾選/取消 "開啟MLLM辨識"
289
- mllm_enabled_checkbox.change(
290
- fn=toggle_mllm_prompt,
291
- inputs=mllm_enabled_checkbox,
292
- outputs=mllm_prompt_input
293
- )
294
-
295
- # 啟動 Gradio
296
- if __name__ == "__main__":
297
- demo.launch(debug=True)
298
- #demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 系統需求:
4
+ - gradio: 用於建立 Web UI
5
+ - opencv-python: 用於圖片處理
6
+ - ultralytics: YOLOv8 官方函式庫
7
+ - Pillow: 圖片處理基礎庫
8
+ - transformers: (可選,若YOLO模型需要)
9
+ huggingface上面是用main.py當主畫面
10
+
11
+ """
12
+
13
+ import gradio as gr
14
+ import os
15
+ import cv2
16
+ from ultralytics import YOLO
17
+ import shutil
18
+ import zipfile
19
+ import uuid # 匯入 uuid 以生成唯一的執行 ID
20
+ from pathlib import Path # 匯入 Path 以更方便地操作路徑
21
+ import gemini_ai as genai
22
+ import gemini_ai_work as genai_work
23
+ from datetime import datetime
24
+ import mongo_lib as mongo
25
+
26
+
27
+
28
+
29
+ def create_zip_archive(files, zip_filename):
30
+ """
31
+ 將一系列檔案壓縮成一個 zip 檔案。
32
+
33
+ Args:
34
+ files (list): 要壓縮的檔案路徑列表。
35
+ zip_filename (str): 產生的 zip 檔案路徑。
36
+
37
+ Returns:
38
+ str: 產生的 zip 檔案路徑。
39
+ """
40
+ with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
41
+ for file in files:
42
+ if os.path.exists(file):
43
+ # 使用 os.path.basename 確保只寫入檔案名稱,而非完整路徑
44
+ zipf.write(file, os.path.basename(file))
45
+ else:
46
+ print(f"警告: 檔案 '{file}' 不存在,無法加入壓縮檔。")
47
+ return zip_filename
48
+
49
+ def gradio_multi_model_detection(
50
+ image_files,
51
+ model_files,
52
+ conf_threshold,
53
+ enable_mllm,
54
+ mllm_prompt,
55
+ enable_career_prediction, # 新增職業預測參數
56
+ progress=gr.Progress(track_tqdm=True)
57
+ ):
58
+ """
59
+ Gradio 的主要處理函式,使用生成器 (yield) 實現流式輸出
60
+
61
+ Args:
62
+ image_files (list): Gradio File 元件回傳的圖片檔案列表
63
+ model_files (list): Gradio File 元件回傳的模型檔案列表
64
+ conf_threshold (float): 置信度閾值。
65
+ enable_mllm (bool): 是否啟用 MLLM 分析。
66
+ mllm_prompt (str): 使者自訂的 MLLM prompt
67
+ enable_career_prediction (bool): 是否啟用職業預測分析。
68
+ progress (gr.Progress): Gradio 的進度條元件。
69
+
70
+ Yields:
71
+ dict: 用於更新 Gradio 介面元件的字典。
72
+ """
73
+ global_datetime = datetime.now()
74
+
75
+ #寫主表log
76
+ document = {"log_style":"master",
77
+ "create_datetime": str(global_datetime),
78
+ "image_files": image_files,
79
+ "model_files": model_files,
80
+ "conf_threshold":conf_threshold,
81
+ "enable_mllm":enable_mllm,
82
+ "mllm_prompt":mllm_prompt,
83
+ "enable_career_prediction":enable_career_prediction # 新增職業預測狀態到 log
84
+ }
85
+
86
+ mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗
87
+
88
+ if not image_files:
89
+ yield {
90
+ output_status: gr.update(value="錯誤:請至少上傳一張圖片。"),
91
+ output_gallery: None,
92
+ output_text: None,
93
+ download_button: None
94
+ }
95
+ return
96
+
97
+ # --- 1. 初始化設定 ---
98
+ # 為本次執行創建一個唯一的子目錄
99
+ run_id = str(uuid.uuid4())
100
+ base_output_dir = Path('gradio_detection_results')
101
+ run_output_dir = base_output_dir / f"run_{run_id[:8]}"
102
+ run_output_dir.mkdir(parents=True, exist_ok=True)
103
+
104
+ image_paths = [file.name for file in image_files]
105
+ model_paths = [file.name for file in model_files] if model_files else []
106
+
107
+ # --- 2. 載入模型 ---
108
+ yield {output_status: gr.update(value="正在載入模型...")}
109
+ loaded_models = []
110
+ if not model_paths:
111
+ # 如果沒有上傳模型,使用預設模型
112
+ default_model_path = 'yolov8n.pt'
113
+ try:
114
+ model = YOLO(default_model_path)
115
+ loaded_models.append((default_model_path, model))
116
+ except Exception as e:
117
+ yield {output_status: gr.update(value=f"錯誤: 無法載入預設模型 '{default_model_path}' - {e}")}
118
+ return
119
+ else:
120
+ for model_path in model_paths:
121
+ try:
122
+ model = YOLO(model_path)
123
+ loaded_models.append((model_path, model))
124
+ except Exception as e:
125
+ print(f"警告: 無法載入模型 '{model_path}' - {e},將跳過此模型。")
126
+ continue
127
+
128
+ if not loaded_models:
129
+ yield {output_status: gr.update(value="錯誤: 沒有任何模型成功載入。")}
130
+ return
131
+
132
+ # --- 3. 逐一處理圖片 ---
133
+ total_images = len(image_paths)
134
+ annotated_image_paths = []
135
+ all_result_files = []
136
+ # results_map 儲存圖片路徑與其對應的文字檔路徑,用於後續點擊查詢
137
+ results_map = {}
138
+ # all_texts 用於收集所有圖片的辨識結果文字
139
+ all_texts = []
140
+
141
+ for i, image_path_str in enumerate(image_paths):
142
+ image_path = Path(image_path_str)
143
+ progress(i / total_images, desc=f"處理中: {image_path.name}")
144
+ yield {
145
+ output_status: gr.update(value=f"處理中... ({i+1}/{total_images}) - {image_path.name}"),
146
+ output_gallery: gr.update(value=annotated_image_paths)
147
+ }
148
+
149
+ original_image = cv2.imread(str(image_path))
150
+ if original_image is None:
151
+ print(f"警告: 無法讀取圖片 '{image_path}',跳過。")
152
+ continue
153
+
154
+ annotated_image = original_image.copy()
155
+ image_base_name = image_path.stem
156
+
157
+ # --- 3a. YOLO 物件偵測 ---
158
+ yolo_output_content = [f"--- 檔案: {image_path.name} ---"]
159
+ all_detections_for_image = []
160
+
161
+ for model_path_str, model_obj in loaded_models:
162
+ model_name = Path(model_path_str).name
163
+ yolo_output_content.append(f"--- 模型: {model_name} ---")
164
+ results = model_obj(str(image_path), verbose=False, device="cpu")[0]
165
+
166
+ if results.boxes:
167
+ for box in results.boxes:
168
+ conf = float(box.conf[0])
169
+ if conf >= conf_threshold:
170
+ x1, y1, x2, y2 = map(int, box.xyxy[0])
171
+ cls_id = int(box.cls[0])
172
+ cls_name = model_obj.names[cls_id]
173
+
174
+ detection_info = {'model_name': model_name, 'class_name': cls_name, 'confidence': conf, 'bbox': (x1, y1, x2, y2)}
175
+ all_detections_for_image.append(detection_info)
176
+ yolo_output_content.append(f" - {cls_name} (信賴度: {conf:.2f}) [座標: {x1},{y1},{x2},{y2}]")
177
+ else:
178
+ yolo_output_content.append(" 未偵測到任何物件。")
179
+
180
+ # 繪製偵測框
181
+ colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255)]
182
+ color_map = {Path(p).name: colors[idx % len(colors)] for idx, (p, _) in enumerate(loaded_models)}
183
+ for det in all_detections_for_image:
184
+ x1, y1, x2, y2 = det['bbox']
185
+ color = color_map.get(det['model_name'], (200, 200, 200))
186
+ label = f"{det['class_name']} {det['confidence']:.2f}"
187
+ cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
188
+ cv2.putText(annotated_image, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
189
+
190
+ # 儲存 YOLO 標註圖
191
+ output_image_path = run_output_dir / f"{image_base_name}_yolo_detected.jpg"
192
+ cv2.imwrite(str(output_image_path), annotated_image)
193
+ annotated_image_paths.append(str(output_image_path))
194
+ all_result_files.append(str(output_image_path))
195
+
196
+ # 儲存 YOLO 辨識資訊
197
+ output_yolo_txt_path = run_output_dir / f"{image_base_name}_yolo_objects.txt"
198
+ output_yolo_txt_path.write_text("\n".join(yolo_output_content), encoding='utf-8')
199
+ all_result_files.append(str(output_yolo_txt_path))
200
+
201
+ # --- 3b. MLLM 分析 (如果啟用) ---
202
+ output_mllm_txt_path = None
203
+ mllm_result_content = ""
204
+ if enable_mllm:
205
+ try:
206
+ prompt_to_use = mllm_prompt if mllm_prompt and mllm_prompt.strip() else None
207
+ mllm_str = genai.analyze_content_with_gemini(str(image_path), prompt_to_use)
208
+ mllm_result_content = f"--- MLLM 分析結果 ---\n{mllm_str}"
209
+ except Exception as e:
210
+ mllm_result_content = f"--- MLLM 分析失敗 ---\n原因: {e}"
211
+
212
+ output_mllm_txt_path = run_output_dir / f"{image_base_name}_mllm_result.txt"
213
+ output_mllm_txt_path.write_text(mllm_result_content, encoding='utf-8')
214
+ all_result_files.append(str(output_mllm_txt_path))
215
+
216
+ # --- 3c. 職業預測分析 (如果啟用) ---
217
+ output_career_prediction_txt_path = None
218
+ career_prediction_result_content = ""
219
+ if enable_career_prediction:
220
+ try:
221
+ # 呼叫 genai.analyze_content_with_gemini_work 進行職業預測
222
+ career_prediction_str = genai_work.analyze_content_with_gemini_work(str(image_path))
223
+
224
+ career_prediction_result_content = f"--- 職業預測分析結果 ---\n{career_prediction_str}"
225
+ except Exception as e:
226
+ career_prediction_result_content = f"--- 職業預測分析失敗 ---\n原因: {e}"
227
+
228
+ output_career_prediction_txt_path = run_output_dir / f"{image_base_name}_career_prediction.txt"
229
+ output_career_prediction_txt_path.write_text(career_prediction_result_content, encoding='utf-8')
230
+ all_result_files.append(str(output_career_prediction_txt_path))
231
+
232
+ #寫明細表log
233
+ document = {"log_style":"detail",
234
+ "create_datetime": str(global_datetime),
235
+ "image_path": str(image_path),
236
+ "yolo_result": yolo_output_content,
237
+ "enable_mllm": enable_mllm,
238
+ "mllm_prompt": mllm_prompt,
239
+ "mllm_result": mllm_result_content,
240
+ "enable_career_prediction": enable_career_prediction, # 新增職業預測狀態到 log
241
+ "career_prediction_result": career_prediction_result_content # 新增職業預測結果到 log
242
+ }
243
+
244
+ mongo.insert_mongodb_log("multi_model_detection",document) #寫入log方便日後查驗
245
+
246
+ # 將本次圖片的結果加入到總列表中
247
+ all_texts.append("\n".join(yolo_output_content))
248
+ if output_mllm_txt_path:
249
+ all_texts.append(output_mllm_txt_path.read_text(encoding='utf-8'))
250
+ if output_career_prediction_txt_path: # 如果有職業預測結果,也加入到總列表中
251
+ all_texts.append(output_career_prediction_txt_path.read_text(encoding='utf-8'))
252
+
253
+
254
+ # --- 4. 完成處理打包更新最終結果 ---
255
+ progress(1, desc="打包結果中...")
256
+ zip_filename = run_output_dir / f"run_{run_id[:8]}_results.zip"
257
+ created_zip_path = create_zip_archive(all_result_files, str(zip_filename))
258
+
259
+ final_status = f"處理完成!共 {total_images} 張圖片。結果儲存於: {run_output_dir.absolute()}"
260
+ combined_text_output = "\n\n".join(all_texts)
261
+
262
+
263
+
264
+
265
+ yield {
266
+ output_status: gr.update(value=final_status),
267
+ download_button: gr.update(value=created_zip_path, visible=True),
268
+ output_text: gr.update(value=combined_text_output),
269
+ output_gallery: gr.update(value=annotated_image_paths) # 確保最終 gallery 也被更新
270
+ }
271
+
272
+ def toggle_mllm_prompt(is_enabled):
273
+ """
274
+ 根據 Checkbox 狀態,顯示或隱藏 MLLM prompt 輸入框。
275
+ """
276
+ return gr.update(visible=is_enabled)
277
+
278
+ def toggle_career_prediction_checkbox(is_enabled):
279
+ """
280
+ 根據 Checkbox 狀態,處理職業預測相關邏輯 (目前無需顯示額外輸入框)。
281
+ """
282
+ return None # 職業預測目前不需要額外的輸入框,所以直接返回 None
283
+
284
+ # --- Gradio Interface ---
285
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
286
+ gr.Markdown("# ㊙️智慧影像與職業潛能分析 (YOLO + MLLM)")
287
+ gr.Markdown("上傳圖片與YOLO模型進行物件偵測,並可選用MLLM進行進階圖像理解。 ver.250830.1")
288
+ # mongo_uri = os.getenv('mongo_uri')
289
+ # gr.Markdown(mongo_uri)
290
+
291
+ with gr.Row():
292
+ with gr.Column(scale=1):
293
+ # 輸入元件
294
+ image_input = gr.File(label="上傳圖片", file_count="multiple", file_types=["image"])
295
+ #model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"], info="若不提供,將使預設的 yolov8n.pt 模型。")
296
+ model_input = gr.File(label="上傳YOLO模型 (.pt)", file_count="multiple", file_types=[".pt"])
297
+
298
+ with gr.Accordion("進階設定(開啟越多功能速度越慢呦)", open=False):
299
+ conf_slider = gr.Slider(minimum=0.1, maximum=1, value=0.40, step=0.05, label="yolo信賴度閾值")
300
+ mllm_enabled_checkbox = gr.Checkbox(label="開啟MLLM辨識", value=False)
301
+ mllm_prompt_input = gr.Textbox(label="自訂 MLLM Prompt (選填)", placeholder="例如:請描述圖中人物的穿著與場景。", visible=False)
302
+ career_prediction_checkbox = gr.Checkbox(label="開啟職業預測", value=False) # 新增職業預測 checkbox
303
+
304
+ run_button = gr.Button("開始辨識", variant="primary")
305
+
306
+ with gr.Column(scale=2):
307
+ # 輸出元件
308
+ output_gallery = gr.Gallery(label="辨識結果預覽", height=500, object_fit="contain", allow_preview=True)
309
+ output_text = gr.Textbox(label="詳細辨識資訊", lines=15, placeholder="辨識完成後,所有結果將顯示於此。")
310
+ output_status = gr.Textbox(label="執行狀態", interactive=False)
311
+ download_button = gr.File(label="下載所有結果 (.zip)", file_count="single", visible=False)
312
+
313
+ # --- 事件綁定 ---
314
+
315
+ # 點擊 "開始辨識" 按鈕
316
+ run_button.click(
317
+ fn=gradio_multi_model_detection,
318
+ inputs=[image_input, model_input, conf_slider, mllm_enabled_checkbox, mllm_prompt_input, career_prediction_checkbox], # 新增 career_prediction_checkbox
319
+ outputs=[output_gallery, output_status, download_button, output_text]
320
+ )
321
+
322
+ # 勾選/取消 "開啟MLLM辨識"
323
+ mllm_enabled_checkbox.change(
324
+ fn=toggle_mllm_prompt,
325
+ inputs=mllm_enabled_checkbox,
326
+ outputs=mllm_prompt_input
327
+ )
328
+
329
+ # 勾選/取消 "開啟職業預測"
330
+ career_prediction_checkbox.change(
331
+ fn=toggle_career_prediction_checkbox,
332
+ inputs=career_prediction_checkbox,
333
+ outputs=[] # 職業預測目前不需要更新任何輸出元件
334
+ )
335
+
336
+ # 啟動 Gradio 應用
337
+ if __name__ == "__main__":
338
+ demo.launch(debug=True)
339
+ #demo.launch(share=True)
target_object.py CHANGED
@@ -56,4 +56,33 @@ target_JSON ={
56
  "角色_動物",
57
  "角色_工作人員"
58
  ]
59
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "角色_動物",
57
  "角色_工作人員"
58
  ]
59
+ }
60
+
61
+ work_JSON ={
62
+ "role": "您是一位藝術職涯分析師,擅長從畫作作品中推薦適合的未來職業。您不僅分析作品的意象或外在形象,更深入剖析其技法、氛圍、光影、配色、構圖、透視和隱喻等特質,並將這些特質與MBTI人格類型相結合。",
63
+ "tone": "以第一人稱如朋友般的說話語氣,以積極正面的方向推薦職業,務必讓聽者有沉浸式的感覺。並且會遵守weighting設定的權重來評分,並照著output_format中的規範輸出格式。",
64
+ "occupations": [
65
+ "軟體工程師", "'系統管理員", "網路工程師", "資料科學家", "人工智慧工程師", "雲端工程師",
66
+ "資安專家", "UI/UX設計師", "遊戲開發工程師", "硬體工程師", "電子工程師", "機械工程師",
67
+ "土木工程師", "結構工程師", "建築師", "室內設計師", "工業設計師", "產品經理",
68
+ "專案經理", "品管工程師", "測試工程師", "教師", "醫師", "牙醫師",
69
+ "藥師", "護理師", "放射師", "醫檢師", "物理治療師", "職能治療師",
70
+ "心理師", "營養師", "獸醫師", "律師", "法官", "檢察官",
71
+ "會計師", "審計師", "財務分析師", "金融交易員", "投資顧問", "理財專員",
72
+ "銀行行員", "保險業務員", "不動產經紀人", "採購專員", "物流專員", "倉儲管理員",
73
+ "運輸司機", "航空駕駛員", "空服員", "船舶駕駛員", "消防員", "警察",
74
+ "軍人", "保全人員", "社工師", "公務員", "政治人物", "記者",
75
+ "編輯", "作家", "翻譯員", "攝影師", "導演", "演員",
76
+ "歌手", "音樂製作人", "舞者", "畫家", "設計師", "插畫師",
77
+ "動畫師", "攝影棚工作人員", "廣告創意人員", "行銷專員", "數位行銷專員", "公關專員",
78
+ "品牌經理", "客戶經理", "業務員", "客服人員", "餐飲廚師", "餐飲服務員",
79
+ "飯店櫃檯人員", "旅遊導遊", "旅行社專員", "美容師", "美髮師", "美甲師",
80
+ "健身教練", "運動員", "裁判", "農夫", "漁夫", "牧場管理員",
81
+ "工廠技術員", "焊接工", "水電工", "清潔人員", "網紅", "自創業"
82
+ ],
83
+ "weighting": {"外在意象": "5%", "藝術技法与特質": "45%", "MBTI特質關聯": "50%"},
84
+ "output_format": {
85
+ "推薦的工作 : ": "<工作A> %,<工作B> %,<工作C> %",
86
+ "推薦的原因 : ": "解釋作品特質如何對應到MBTI並適合推薦的職業"
87
+ }
88
+ }