Spaces:

Karan6933
/

imageGenerator

Paused

App Files Files Community

Karan6933 commited on Jan 4

Commit

fc9000d

verified ·

1 Parent(s): 07f244f

Update main.py

Browse files

Files changed (1) hide show

main.py +58 -73

main.py CHANGED Viewed

@@ -18,7 +18,7 @@ import httpx
 from duckduckgo_search import DDGS
 from PIL import Image
-# --- HuggingFace Official Client (Fix for 410 Error) ---
 from huggingface_hub import InferenceClient
 # --- LangChain / AI Core ---
@@ -40,55 +40,47 @@ BASE_URL = "http://localhost:11434"
 HF_TOKEN_GLOBAL = os.getenv("HF_TOKEN", "")
-# --- NEW MODEL: SDXL Base 1.0 (Reliable on Free Tier) ---
-# Instruct-Pix2Pix band ho gaya, isliye hum SDXL Image-to-Image use karenge
-EDIT_MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"
 http_client = httpx.AsyncClient(timeout=120.0, follow_redirects=True)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    try:
-        os.makedirs("static/images", exist_ok=True)
-        os.makedirs("static/uploads", exist_ok=True)
-    except PermissionError:
-        logger.error("Permission denied. Check Dockerfile.")
     yield
     await http_client.aclose()
-app = FastAPI(title="GenAI Fixed Agent", lifespan=lifespan)
 app.mount("/static", StaticFiles(directory="static"), name="static")
 # --------------------------------------------------------------------------------------
-# 2. Tools
 # --------------------------------------------------------------------------------------
 @tool
 async def web_search(query: str) -> str:
     """Search the web for information."""
-    def run_sync_search(q):
-        try:
-            with DDGS() as ddgs:
-                return list(ddgs.text(q, max_results=4))
-        except Exception as e:
-            return str(e)
     try:
         results = await asyncio.to_thread(run_sync_search, query)
-        if isinstance(results, str) or not results: return "No results."
-        return "\n".join([f"Link: {r.get('href')}\nSnippet: {r.get('body')}" for r in results])
     except Exception as e:
         return f"Error: {str(e)}"
 @tool
 async def generate_image(prompt: str) -> str:
-    """Create a NEW image from scratch (Pollinations AI)."""
     try:
         seed = random.randint(0, 99999)
         safe_prompt = prompt.replace(" ", "%20")
         url = f"https://image.pollinations.ai/prompt/{safe_prompt}?seed={seed}&nologo=true&width=1024&height=1024&model=flux"
         resp = await http_client.get(url)
         if resp.status_code != 200: return "Failed."
         filename = f"static/images/gen_{int(time.time())}.png"
         img = Image.open(io.BytesIO(resp.content))
         await asyncio.to_thread(img.save, filename)
@@ -96,76 +88,70 @@ async def generate_image(prompt: str) -> str:
     except Exception as e:
         return f"Error: {str(e)}"
-# --- FIXED EDIT TOOL (Uses huggingface_hub client) ---
 @tool
 async def edit_image(instruction: str, image_path: str) -> str:
     """
-    Edits an uploaded image using Stable Diffusion XL (Image-to-Image).
-    Best for: Changing background, style, or adding elements while keeping structure.
     """
     logger.info(f"🎨 Editing {image_path} | Instruction: {instruction}")
-    if not os.path.exists(image_path):
-        return "Error: Image file not found."
-    if not HF_TOKEN_GLOBAL:
-        return "Error: HuggingFace Token is missing. Please enter it in the UI."
-    # Helper function to run HF Client in thread (Sync library)
     def run_hf_edit():
         try:
-            # Initialize Client
             client = InferenceClient(model=EDIT_MODEL_ID, token=HF_TOKEN_GLOBAL)
-            # Load User Image
             image = Image.open(image_path).convert("RGB")
-            # SDXL Image-to-Image Call
-            # strength: 0.0 = exact copy, 1.0 = completely new image
-            # 0.75 is a good balance for "Make it snowy/cinematic" without losing the person completely
             output_image = client.image_to_image(
                 image=image,
-                prompt=instruction,
-                negative_prompt="bad quality, distorted face, ugly, blurry, low resolution, cartoon",
-                strength=0.75,
-                guidance_scale=8.5
             )
             return output_image
         except Exception as e:
             return str(e)
     try:
-        # Run heavy task in thread
         result = await asyncio.to_thread(run_hf_edit)
-        if isinstance(result, str): # Error returned as string
-            return f"Edit Failed: {result}"
-        # Save Result
         filename = f"static/images/edited_{int(time.time())}_{random.randint(0,999)}.png"
         await asyncio.to_thread(result.save, filename)
         return f"Image Edited Successfully: {filename}"
     except Exception as e:
         return f"System Error: {str(e)}"
 tools = [web_search, generate_image, edit_image]
 # --------------------------------------------------------------------------------------
-# 3. Agent Logic
 # --------------------------------------------------------------------------------------
 class AgentState(TypedDict):
     messages: Annotated[List[BaseMessage], "add_messages"]
-llm = ChatOllama(model=MODEL_NAME, base_url=BASE_URL, temperature=0.2).bind_tools(tools)
-SYSTEM_PROMPT = """You are an AI visual expert.
-1. **New Image:** Use `generate_image` if user asks to create/draw from scratch.
-2. **Edit Image:** Use `edit_image` ONLY if user provides an image or asks to modify "this" image.
-   - Input: User's instruction + Exact path of the uploaded image.
-   - Note: The underlying model is SDXL.
 """
 async def agent_node(state: AgentState):
@@ -176,13 +162,19 @@ async def agent_node(state: AgentState):
 workflow = StateGraph(AgentState)
 workflow.add_node("agent", agent_node)
 workflow.add_node("tools", ToolNode(tools))
 workflow.add_edge(START, "agent")
 workflow.add_conditional_edges("agent", lambda s: "tools" if s["messages"][-1].tool_calls else END)
-workflow.add_edge("tools", "agent")
 app_graph = workflow.compile(checkpointer=MemorySaver())
 # --------------------------------------------------------------------------------------
-# 4. API Endpoints
 # --------------------------------------------------------------------------------------
 class ChatRequest(BaseModel):
@@ -197,19 +189,15 @@ async def chat_endpoint(req: ChatRequest):
     if req.hf_token: HF_TOKEN_GLOBAL = req.hf_token
     initial_msg = req.query
     if req.image_base64:
         try:
-            if "," in req.image_base64: image_base64_data = req.image_base64.split(",")[1]
-            else: image_base64_data = req.image_base64
-            img_data = base64.b64decode(image_base64_data)
-            filename = f"static/uploads/user_upload_{req.thread_id}_{int(time.time())}.png"
-            with open(filename, "wb") as f: f.write(img_data)
-            initial_msg = f"User uploaded an image at path: '{filename}'. Request: {req.query}"
-        except Exception:
-            pass
     config = {"configurable": {"thread_id": req.thread_id}}
     inputs = {"messages": [HumanMessage(content=initial_msg)]}
@@ -218,19 +206,16 @@ async def chat_endpoint(req: ChatRequest):
         try:
             async for event in app_graph.astream_events(inputs, config=config, version="v1"):
                 event_type = event["event"]
-                if event_type == "on_chat_model_stream":
-                    chunk = event["data"]["chunk"].content
-                    if chunk: yield chunk
-                elif event_type == "on_tool_start":
                     yield f"\n\n⚙️ **Processing:** {event['name']}...\n\n"
                 elif event_type == "on_tool_end":
                     out = str(event['data'].get('output'))
                     if "static/" in out:
-                        match = re.search(r'(static/.*\.png)', out)
-                        path = match.group(1) if match else out
                         yield f"\n\n![Result]({path})\n\n"
                     else:
-                        yield f"\nInfo: {out[:100]}\n"
         except Exception as e:
             yield f"Error: {str(e)}"

 from duckduckgo_search import DDGS
 from PIL import Image
+# --- HuggingFace Client ---
 from huggingface_hub import InferenceClient
 # --- LangChain / AI Core ---
 HF_TOKEN_GLOBAL = os.getenv("HF_TOKEN", "")
+# --- BETTER MODEL FOR REALISM ---
+# SDXL Base ki jagah RealVisXL use kar rahe hain (Better photorealism & face consistency)
+EDIT_MODEL_ID = "SG161222/RealVisXL_V4.0"
 http_client = httpx.AsyncClient(timeout=120.0, follow_redirects=True)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    os.makedirs("static/images", exist_ok=True)
+    os.makedirs("static/uploads", exist_ok=True)
     yield
     await http_client.aclose()
+app = FastAPI(title="GenAI Stable Agent", lifespan=lifespan)
 app.mount("/static", StaticFiles(directory="static"), name="static")
 # --------------------------------------------------------------------------------------
+# 2. Tools (Tuned for Consistency)
 # --------------------------------------------------------------------------------------
 @tool
 async def web_search(query: str) -> str:
     """Search the web for information."""
     try:
+        def run_sync_search(q):
+            with DDGS() as ddgs: return list(ddgs.text(q, max_results=3))
         results = await asyncio.to_thread(run_sync_search, query)
+        if not results: return "No results."
+        return "\n".join([f"Snippet: {r.get('body')}" for r in results])
     except Exception as e:
         return f"Error: {str(e)}"
 @tool
 async def generate_image(prompt: str) -> str:
+    """Create a NEW image from scratch (No input image)."""
     try:
         seed = random.randint(0, 99999)
         safe_prompt = prompt.replace(" ", "%20")
         url = f"https://image.pollinations.ai/prompt/{safe_prompt}?seed={seed}&nologo=true&width=1024&height=1024&model=flux"
         resp = await http_client.get(url)
         if resp.status_code != 200: return "Failed."
         filename = f"static/images/gen_{int(time.time())}.png"
         img = Image.open(io.BytesIO(resp.content))
         await asyncio.to_thread(img.save, filename)
     except Exception as e:
         return f"Error: {str(e)}"
 @tool
 async def edit_image(instruction: str, image_path: str) -> str:
     """
+    Edits the uploaded image.
+    IMPORTANT: Provide the EXACT image path.
     """
     logger.info(f"🎨 Editing {image_path} | Instruction: {instruction}")
+    if not os.path.exists(image_path): return "Error: Image file not found."
+    if not HF_TOKEN_GLOBAL: return "Error: HuggingFace Token is missing."
     def run_hf_edit():
         try:
             client = InferenceClient(model=EDIT_MODEL_ID, token=HF_TOKEN_GLOBAL)
             image = Image.open(image_path).convert("RGB")
+            # --- CONSISTENCY HACKS ---
+            # 1. Prompt Booster: Force identity terms
+            full_prompt = f"photorealistic, {instruction}, same person, consistent face, high detail, 8k, sharp focus"
+            # 2. Strong Negatives: Prevent face changing
+            neg_prompt = "cartoon, painting, illustration, distorted face, changed face, different person, ugly, blur, low quality, morphing"
+            # 3. Strength Tuning (Crucial):
+            # 0.5 - 0.6 = Best for keeping face (Face won't change, but background change will be subtle)
+            # 0.7 - 0.8 = Face changes
+            # Hum 0.6 use karenge (Balance)
             output_image = client.image_to_image(
                 image=image,
+                prompt=full_prompt,
+                negative_prompt=neg_prompt,
+                strength=0.6,  # <--- FIXED STRENGTH (Isse loop nahi hoga, consistency maintain rahegi)
+                guidance_scale=7.5
             )
             return output_image
         except Exception as e:
             return str(e)
     try:
         result = await asyncio.to_thread(run_hf_edit)
+        if isinstance(result, str): return f"Edit Failed: {result}"
         filename = f"static/images/edited_{int(time.time())}_{random.randint(0,999)}.png"
         await asyncio.to_thread(result.save, filename)
         return f"Image Edited Successfully: {filename}"
     except Exception as e:
         return f"System Error: {str(e)}"
 tools = [web_search, generate_image, edit_image]
 # --------------------------------------------------------------------------------------
+# 3. Agent Logic (LOOP FIX HERE)
 # --------------------------------------------------------------------------------------
 class AgentState(TypedDict):
     messages: Annotated[List[BaseMessage], "add_messages"]
+llm = ChatOllama(model=MODEL_NAME, base_url=BASE_URL, temperature=0).bind_tools(tools)
+SYSTEM_PROMPT = """You are an AI visual assistant.
+1. Use `edit_image` ONLY if user provides an image path.
+2. Use `generate_image` for new creations.
+3. Once you call a tool, your job is DONE.
 """
 async def agent_node(state: AgentState):
 workflow = StateGraph(AgentState)
 workflow.add_node("agent", agent_node)
 workflow.add_node("tools", ToolNode(tools))
 workflow.add_edge(START, "agent")
+# --- THE LOOP FIX ---
+# Logic: Agent -> Tools -> END
+# Tool chalne ke baad wapis Agent ke paas mat jao. Seedha khatam karo.
 workflow.add_conditional_edges("agent", lambda s: "tools" if s["messages"][-1].tool_calls else END)
+workflow.add_edge("tools", END) # <--- STOP LOOP HERE
 app_graph = workflow.compile(checkpointer=MemorySaver())
 # --------------------------------------------------------------------------------------
+# 4. API (Same logic)
 # --------------------------------------------------------------------------------------
 class ChatRequest(BaseModel):
     if req.hf_token: HF_TOKEN_GLOBAL = req.hf_token
     initial_msg = req.query
     if req.image_base64:
         try:
+            if "," in req.image_base64: d = req.image_base64.split(",")[1]
+            else: d = req.image_base64
+            fname = f"static/uploads/user_upload_{req.thread_id}_{int(time.time())}.png"
+            with open(fname, "wb") as f: f.write(base64.b64decode(d))
+            initial_msg = f"User uploaded an image at path: '{fname}'. Request: {req.query}"
+        except: pass
     config = {"configurable": {"thread_id": req.thread_id}}
     inputs = {"messages": [HumanMessage(content=initial_msg)]}
         try:
             async for event in app_graph.astream_events(inputs, config=config, version="v1"):
                 event_type = event["event"]
+                # Sirf Tool Output aur Final result stream karo
+                if event_type == "on_tool_start":
                     yield f"\n\n⚙️ **Processing:** {event['name']}...\n\n"
                 elif event_type == "on_tool_end":
                     out = str(event['data'].get('output'))
                     if "static/" in out:
+                        path = re.search(r'(static/.*\.png)', out).group(1)
                         yield f"\n\n![Result]({path})\n\n"
                     else:
+                        yield f"Info: {out}\n"
         except Exception as e:
             yield f"Error: {str(e)}"