MINZO4546 commited on
Commit
6219a3a
·
verified ·
1 Parent(s): e9d564d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -27
app.py CHANGED
@@ -1,49 +1,63 @@
1
  import torch
 
2
  from fastapi import FastAPI
3
- from fastapi.middleware.cors import CORSMiddleware
4
- from pydantic import BaseModel
5
  from transformers import pipeline
6
 
7
  main = FastAPI()
8
 
9
- main.add_middleware(
10
- CORSMiddleware,
11
- allow_origins=["*"],
12
- allow_methods=["*"],
13
- allow_headers=["*"],
14
- )
15
-
16
  MODEL_ID = "google/gemma-3-1b-it"
17
 
18
- # 🔱 Optimization 1: KV Cache භාවිතය (වේගය වැඩි කිරීමට)
19
  pipe = pipeline(
20
  "text-generation",
21
  model=MODEL_ID,
22
  device_map="cpu",
23
- torch_dtype=torch.bfloat16, # CPU එක bfloat16 වලට කැමතියි
24
- use_cache=True, # කලින් ජෙනරේට් කරපු ටෝකන් මතක තබා ගනී
25
  trust_remote_code=True
26
  )
27
 
28
- class ChatRequest(BaseModel):
29
- message: str
 
 
 
 
 
 
30
 
31
  @main.post("/v1/chat")
32
- async def chat(request_data: ChatRequest):
33
- user_query = request_data.message.strip()
 
 
 
 
 
 
 
 
34
 
35
- messages = [
36
- {"role": "user", "content": user_query},
37
- ]
 
 
 
 
38
 
39
- # 🔱 Optimization 2: Coding සඳහා ප්‍රමාණවත් ඉඩක් ලබා දීම
40
  results = pipe(
41
- messages,
42
- max_new_tokens=1024, # 🔱 දැන් ඔයාට දිග කෝඩ් එකක් වුණත් ගන්න පුළුවන්
43
- do_sample=False, # Coding වලට sample ඕනේ නැහැ, Greedy search එක වේගවත්
44
- temperature=0.0, # වඩාත් නිවැරදි කෝඩ් එකක් සඳහා (Zero randomness)
45
- pad_token_id=50256
46
  )
 
 
 
 
47
 
48
- reply = results[0]['generated_text'][-1]['content']
49
- return {"reply": reply}
 
 
1
  import torch
2
+ import requests
3
  from fastapi import FastAPI
 
 
4
  from transformers import pipeline
5
 
6
  main = FastAPI()
7
 
8
+ # 🔱 Inachi Core Configuration
 
 
 
 
 
 
9
  MODEL_ID = "google/gemma-3-1b-it"
10
 
11
+ # Load the model with CPU optimizations
12
  pipe = pipeline(
13
  "text-generation",
14
  model=MODEL_ID,
15
  device_map="cpu",
16
+ torch_dtype=torch.bfloat16,
 
17
  trust_remote_code=True
18
  )
19
 
20
+ # 🔱 Simple Web Search Tool (DuckDuckGo API - No Key Required)
21
+ def web_search(query):
22
+ try:
23
+ url = f"https://api.duckduckgo.com/?q={query}&format=json"
24
+ response = requests.get(url).json()
25
+ return response.get("AbstractText", "No specific web data found.")
26
+ except:
27
+ return "Search failed."
28
 
29
  @main.post("/v1/chat")
30
+ async def chat(data: dict):
31
+ user_query = data.get("message", "")
32
+
33
+ # 🔱 Inachi Identity & System Instruction
34
+ # මෙතනින් තමයි එයාට තමන් කවුද කියලා කියලා දෙන්නේ
35
+ system_prompt = (
36
+ "You are Inachi AI, a highly advanced assistant developed by the Inachi Team. "
37
+ "Your goal is to provide technical, precise, and helpful information. "
38
+ "Always identify yourself as Inachi AI when asked."
39
+ )
40
 
41
+ # Web search check (සරලව search කරන්න ඕනෙද කියලා බලනවා)
42
+ search_context = ""
43
+ if "search" in user_query.lower() or "latest" in user_query.lower():
44
+ search_context = f"\nWeb Search Result: {web_search(user_query)}"
45
+
46
+ # Prompt එක සැකසීම
47
+ full_prompt = f"{system_prompt}\nContext: {search_context}\nUser: {user_query}\nInachi AI:"
48
 
 
49
  results = pipe(
50
+ full_prompt,
51
+ max_new_tokens=512,
52
+ do_sample=True,
53
+ temperature=0.7,
54
+ top_p=0.9
55
  )
56
+
57
+ # පිරිසිදු පිළිතුර ලබා ගැනීම
58
+ reply = results[0]['generated_text'].split("Inachi AI:")[-1].strip()
59
+ return {"reply": reply}
60
 
61
+ if __name__ == "__main__":
62
+ import uvicorn
63
+ uvicorn.run(main, host="0.0.0.0", port=7860)