MHD011 commited on
Commit
5d000a9
·
verified ·
1 Parent(s): 919aa75

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -2
  2. app.py +204 -279
  3. dockerignore +9 -0
  4. requirements.txt +4 -4
Dockerfile CHANGED
@@ -12,14 +12,15 @@ COPY . .
12
  RUN mkdir -p /app/model_cache && chmod -R 777 /app
13
 
14
  RUN pip install --upgrade pip setuptools wheel
15
- RUN pip install numpy==1.24.4
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
  ENV FLASK_APP=app.py
19
  ENV FLASK_ENV=production
20
  ENV TRANSFORMERS_CACHE=/app/model_cache
21
  ENV TORCH_CACHE=/app/model_cache
 
 
22
 
23
  EXPOSE 7860
24
 
25
- CMD ["gunicorn", "--workers", "1", "--threads", "2", "--bind", "0.0.0.0:7860", "--timeout", "300", "--preload", "app:app"]
 
12
  RUN mkdir -p /app/model_cache && chmod -R 777 /app
13
 
14
  RUN pip install --upgrade pip setuptools wheel
 
15
  RUN pip install --no-cache-dir -r requirements.txt
16
 
17
  ENV FLASK_APP=app.py
18
  ENV FLASK_ENV=production
19
  ENV TRANSFORMERS_CACHE=/app/model_cache
20
  ENV TORCH_CACHE=/app/model_cache
21
+ ENV PYTHONUNBUFFERED=1
22
+ ENV TOKENIZERS_PARALLELISM=false
23
 
24
  EXPOSE 7860
25
 
26
+ CMD ["gunicorn", "--workers", "1", "--threads", "2", "--bind", "0.0.0.0:7860", "--timeout", "120", "--preload", "app:app"]
app.py CHANGED
@@ -1,280 +1,205 @@
1
- import os
2
- import re
3
- import logging
4
- from flask import Flask, request, jsonify, Response
5
- from flask_cors import CORS
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
- import torch
8
- import psycopg2
9
- import json
10
-
11
- app = Flask(__name__)
12
- CORS(app)
13
-
14
- # إعدادات التسجيل
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
-
18
- # إعدادات النموذج
19
- MODEL_NAME = "tscholak/cxmefzzi" # نموذج أصغر
20
- SUPABASE_DB_URL = os.getenv('SUPABASE_DB_URL')
21
-
22
- # تهيئة مجلد الذاكرة المؤقتة
23
- os.makedirs("model_cache", exist_ok=True)
24
- os.environ["TRANSFORMERS_CACHE"] = "model_cache"
25
- os.environ["TORCH_CACHE"] = "model_cache"
26
-
27
- tokenizer = None
28
- model = None
29
-
30
- def initialize():
31
- global tokenizer, model
32
- try:
33
- logger.info("جاري تحميل النموذج بتهيئة منخفضة الذاكرة...")
34
- tokenizer = AutoTokenizer.from_pretrained(
35
- MODEL_NAME,
36
- cache_dir="model_cache",
37
- local_files_only=False
38
- )
39
- model = AutoModelForSeq2SeqLM.from_pretrained(
40
- MODEL_NAME,
41
- cache_dir="model_cache",
42
- device_map="auto",
43
- torch_dtype=torch.float16,
44
- low_cpu_mem_usage=True
45
- )
46
- model.eval()
47
- logger.info("تم تحميل النموذج بنجاح")
48
- except Exception as e:
49
- logger.error(f"فشل في تحميل النموذج: {str(e)}")
50
- raise
51
-
52
- initialize()
53
- # --- سكيمة قاعدة البيانات ---
54
- DB_SCHEMA = """
55
- CREATE TABLE public.profiles (
56
- id uuid NOT NULL,
57
- updated_at timestamp with time zone,
58
- username text UNIQUE CHECK (char_length(username) >= 3),
59
- full_name text,
60
- avatar_url text,
61
- website text,
62
- cam_mac text UNIQUE,
63
- fcm_token text,
64
- notification_enabled boolean DEFAULT true,
65
- CONSTRAINT profiles_pkey PRIMARY KEY (id),
66
- CONSTRAINT profiles_id_fkey FOREIGN KEY (id) REFERENCES auth.users(id)
67
- );
68
-
69
- CREATE TABLE public.place (
70
- id bigint GENERATED ALWAYS AS IDENTITY NOT NULL,
71
- created_at timestamp with time zone DEFAULT (now() AT TIME ZONE 'utc'::text),
72
- name text,
73
- CONSTRAINT place_pkey PRIMARY KEY (id)
74
- );
75
-
76
- CREATE TABLE public.user_place (
77
- id bigint GENERATED ALWAYS AS IDENTITY NOT NULL,
78
- created_at timestamp with time zone NOT NULL DEFAULT now(),
79
- place_id bigint,
80
- user_cam_mac text,
81
- CONSTRAINT user_place_pkey PRIMARY KEY (id),
82
- CONSTRAINT user_place_place_id_fkey FOREIGN KEY (place_id) REFERENCES public.place(id),
83
- CONSTRAINT user_place_user_cam_mac_fkey FOREIGN KEY (user_cam_mac) REFERENCES public.profiles(cam_mac)
84
- );
85
-
86
- CREATE TABLE public.data (
87
- id bigint GENERATED ALWAYS AS IDENTITY NOT NULL,
88
- created_at timestamp without time zone,
89
- caption text,
90
- image_url text,
91
- latitude double precision DEFAULT '36.1833854'::double precision,
92
- longitude double precision DEFAULT '37.1309255'::double precision,
93
- user_place_id bigint,
94
- cam_mac text,
95
- CONSTRAINT data_pkey PRIMARY KEY (id),
96
- CONSTRAINT data_user_place_id_fkey FOREIGN KEY (user_place_id) REFERENCES public.user_place(id)
97
- );
98
-
99
- CREATE TABLE public.biodata (
100
- id bigint GENERATED ALWAYS AS IDENTITY NOT NULL,
101
- created_at timestamp with time zone NOT NULL DEFAULT now(),
102
- mac_address text,
103
- acceleration_x double precision,
104
- acceleration_y double precision,
105
- acceleration_z double precision,
106
- gyro_x double precision,
107
- gyro_y double precision,
108
- gyro_z double precision,
109
- temperature double precision,
110
- CONSTRAINT biodata_pkey PRIMARY KEY (id),
111
- CONSTRAINT biodata_mac_address_fkey FOREIGN KEY (mac_address) REFERENCES public.profiles(cam_mac)
112
- );
113
-
114
- CREATE TABLE public.notification (
115
- id bigint GENERATED ALWAYS AS IDENTITY NOT NULL,
116
- created_at timestamp without time zone NOT NULL DEFAULT now(),
117
- user_cam_mac text,
118
- title text,
119
- message text,
120
- is_read boolean,
121
- acceleration_x double precision,
122
- acceleration_y double precision,
123
- acceleration_z double precision,
124
- gyro_x double precision,
125
- gyro_y double precision,
126
- gyro_z double precision,
127
- CONSTRAINT notification_pkey PRIMARY KEY (id),
128
- CONSTRAINT notification_user_cam_mac_fkey FOREIGN KEY (user_cam_mac) REFERENCES public.profiles(cam_mac)
129
- );
130
-
131
- CREATE TABLE public.flag (
132
- id bigint GENERATED ALWAYS AS IDENTITY NOT NULL,
133
- flag smallint,
134
- user_mac_address text,
135
- CONSTRAINT flag_pkey PRIMARY KEY (id),
136
- CONSTRAINT flag_user_mac_address_fkey FOREIGN KEY (user_mac_address) REFERENCES public.profiles(cam_mac)
137
- );
138
- """.strip()
139
-
140
- # --- الاتصال بقاعدة البيانات ---
141
- def get_db_connection():
142
- try:
143
- return psycopg2.connect(SUPABASE_DB_URL)
144
- except Exception as err:
145
- logger.error(f"Database connection error: {err}")
146
- return None
147
-
148
- # --- التحقق من صحة cam_mac ---
149
- def validate_cam_mac(cam_mac):
150
- conn = get_db_connection()
151
- if not conn:
152
- return False
153
-
154
- try:
155
- cursor = conn.cursor()
156
- cursor.execute("SELECT 1 FROM profiles WHERE cam_mac = %s;", (cam_mac,))
157
- return cursor.fetchone() is not None
158
- except Exception as e:
159
- logger.error(f"Validation error: {e}")
160
- return False
161
- finally:
162
- if conn:
163
- conn.close()
164
-
165
- @app.route('/api/query', methods=['POST'])
166
- def handle_query():
167
- if tokenizer is None or model is None:
168
- return jsonify({"error": "النموذج غير محمل، يرجى المحاولة لاحقاً"}), 503
169
-
170
- try:
171
- data = request.get_json()
172
- if not data or 'text' not in data or 'cam_mac' not in data:
173
- return jsonify({"error": "يرجى إرسال 'text' و 'cam_mac'"}), 400
174
-
175
- natural_query = data['text']
176
- cam_mac = data['cam_mac']
177
- logger.info(f"استعلام من {cam_mac}: {natural_query}")
178
-
179
- if not validate_cam_mac(cam_mac):
180
- return jsonify({"error": "عنوان MAC غير صالح"}), 403
181
-
182
- prompt = f"""
183
- ### Postgres SQL table definitions
184
- {DB_SCHEMA}
185
-
186
- ### Rules:
187
- - Always filter by cam_mac = '{cam_mac}'
188
- - Use only SELECT statements
189
- - Use proper JOINs
190
- - Use table aliases when helpful
191
- - The output must contain only the SQL query
192
-
193
- ### User question: {natural_query}
194
-
195
- ### SQL query:
196
- SELECT
197
- """.strip()
198
-
199
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
200
-
201
- with torch.no_grad():
202
- outputs = model.generate(**inputs, max_length=256)
203
-
204
- sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
205
-
206
- # تنظيف الناتج
207
- sql = re.sub(r"^```sql\s*", "", sql, flags=re.IGNORECASE)
208
- sql = re.sub(r"\s*```$", "", sql)
209
- sql = re.sub(r"^SQL:\s*", "", sql, flags=re.IGNORECASE)
210
-
211
- if not sql.upper().startswith("SELECT"):
212
- sql = "SELECT " + sql.split("SELECT")[-1] if "SELECT" in sql else f"SELECT * FROM ({sql}) AS subquery"
213
-
214
- if not sql.endswith(";"):
215
- sql += ";"
216
-
217
- logger.info(f"استعلام SQL المولد: {sql}")
218
-
219
- if not sql.upper().strip().startswith("SELECT"):
220
- return jsonify({"error": "يُسمح فقط باستعلامات SELECT"}), 403
221
-
222
- conn = get_db_connection()
223
- if not conn:
224
- return jsonify({"error": "فشل الاتصال بقاعدة البيانات"}), 500
225
-
226
- cursor = None
227
- try:
228
- cursor = conn.cursor()
229
- cursor.execute(sql)
230
- columns = [desc[0] for desc in cursor.description]
231
- rows = cursor.fetchall()
232
- data = [dict(zip(columns, row)) for row in rows]
233
-
234
- response_data = {
235
- "data": data,
236
- "generated_sql": sql
237
- }
238
-
239
- response_json = json.dumps(response_data, ensure_ascii=False)
240
-
241
- return Response(
242
- response_json,
243
- status=200,
244
- mimetype='application/json; charset=utf-8'
245
- )
246
-
247
- except Exception as e:
248
- logger.error(f"خطأ في تنفيذ SQL: {e}")
249
- return jsonify({
250
- "error": str(e),
251
- "generated_sql": sql
252
- }), 500
253
- finally:
254
- if cursor:
255
- cursor.close()
256
- if conn:
257
- conn.close()
258
-
259
- except Exception as e:
260
- logger.error(f"خطأ في التوليد: {str(e)}")
261
- return jsonify({"error": "فشل في توليد الاستعلام"}), 500
262
-
263
- @app.route('/health')
264
- def health_check():
265
- return jsonify({
266
- "status": "healthy",
267
- "model_loaded": model is not None,
268
- "db_connection": get_db_connection() is not None
269
- })
270
-
271
- @app.route('/')
272
- def home():
273
- return """
274
- <h1>Text2SQL API</h1>
275
- <p>استخدم <code>/api/query</code> مع POST {"text": "سؤالك", "cam_mac": "عنوان MAC"}</p>
276
- <p>تحقق من حالة الخدمة: <a href="/health">/health</a></p>
277
- """
278
-
279
- if __name__ == '__main__':
280
  app.run(host='0.0.0.0', port=7860)
 
1
+ import os
2
+ import re
3
+ import logging
4
+ from flask import Flask, request, jsonify, Response
5
+ from flask_cors import CORS
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
+ import torch
8
+ import psycopg2
9
+ import json
10
+
11
+ app = Flask(__name__)
12
+ CORS(app)
13
+
14
+ # إعدادات التسجيل
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # إعدادات النموذج
19
+ MODEL_NAME = "tscholak/cxmefzzi"
20
+ SUPABASE_DB_URL = os.getenv('SUPABASE_DB_URL')
21
+
22
+ # تهيئة مجلد الذاكرة المؤقتة
23
+ os.makedirs("model_cache", exist_ok=True)
24
+ os.environ["TRANSFORMERS_CACHE"] = "model_cache"
25
+ os.environ["TORCH_CACHE"] = "model_cache"
26
+
27
+ tokenizer = None
28
+ model = None
29
+
30
+ def initialize():
31
+ global tokenizer, model
32
+ try:
33
+ logger.info("🚀 جاري تحميل النموذج...")
34
+ tokenizer = AutoTokenizer.from_pretrained(
35
+ MODEL_NAME,
36
+ cache_dir="model_cache",
37
+ local_files_only=False
38
+ )
39
+ model = AutoModelForSeq2SeqLM.from_pretrained(
40
+ MODEL_NAME,
41
+ cache_dir="model_cache",
42
+ device_map="auto",
43
+ torch_dtype=torch.float16,
44
+ low_cpu_mem_usage=True
45
+ )
46
+ model.eval()
47
+ logger.info("تم تحميل النموذج بنجاح")
48
+ except Exception as e:
49
+ logger.error(f"فشل في تحميل النموذج: {str(e)}")
50
+ raise
51
+
52
+ initialize()
53
+
54
+ # سكيما قاعدة البيانات
55
+ DB_SCHEMA = """
56
+ CREATE TABLE public.profiles (
57
+ id uuid NOT NULL,
58
+ cam_mac text UNIQUE,
59
+ ...
60
+ );
61
+ CREATE TABLE public.data (
62
+ id bigint GENERATED ALWAYS AS IDENTITY,
63
+ cam_mac text,
64
+ ...
65
+ );
66
+ """.strip()
67
+
68
+ def get_db_connection():
69
+ try:
70
+ conn = psycopg2.connect(SUPABASE_DB_URL)
71
+ logger.info("✅ تم الاتصال بقاعدة البيانات")
72
+ return conn
73
+ except Exception as err:
74
+ logger.error(f"❌ خطأ في الاتصال بقاعدة البيانات: {err}")
75
+ return None
76
+
77
+ def validate_cam_mac(cam_mac):
78
+ conn = get_db_connection()
79
+ if not conn:
80
+ return False
81
+
82
+ try:
83
+ cursor = conn.cursor()
84
+ cursor.execute("SELECT 1 FROM profiles WHERE cam_mac = %s;", (cam_mac,))
85
+ return cursor.fetchone() is not None
86
+ except Exception as e:
87
+ logger.error(f"❌ خطأ في التحقق من cam_mac: {e}")
88
+ return False
89
+ finally:
90
+ if conn:
91
+ conn.close()
92
+
93
+ @app.route('/api/query', methods=['POST'])
94
+ def handle_query():
95
+ try:
96
+ data = request.get_json()
97
+ logger.info(f"📩 بيانات الطلب: {data}")
98
+
99
+ if not data or 'text' not in data or 'cam_mac' not in data:
100
+ logger.error("❌ بيانات ناقصة في الطلب")
101
+ return jsonify({"error": "يرجى إرسال 'text' و 'cam_mac'"}), 400
102
+
103
+ if not validate_cam_mac(data['cam_mac']):
104
+ logger.error(f"❌ cam_mac غير صالح: {data['cam_mac']}")
105
+ return jsonify({"error": "عنوان MAC غير صالح"}), 403
106
+
107
+ prompt = f"""
108
+ ### هيكل قاعدة البيانات:
109
+ {DB_SCHEMA}
110
+
111
+ ### السؤال:
112
+ {data['text']}
113
+
114
+ ### قم بإنشاء استعلام SQL بحيث:
115
+ - يستخدم SELECT فقط
116
+ - يتضمن تصفية حسب cam_mac = '{data['cam_mac']}'
117
+ - يكون صالحًا لـ PostgreSQL
118
+
119
+ SQL:
120
+ """
121
+
122
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
123
+
124
+ with torch.no_grad():
125
+ outputs = model.generate(
126
+ **inputs,
127
+ max_length=256,
128
+ num_beams=4,
129
+ early_stopping=True
130
+ )
131
+
132
+ sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
133
+ logger.info(f"⚡ الاستعلام المولد: {sql}")
134
+
135
+ # تنظيف الاستعلام
136
+ sql = re.sub(r"^```sql\s*", "", sql, flags=re.IGNORECASE)
137
+ sql = re.sub(r"\s*```$", "", sql).strip()
138
+
139
+ if not sql.upper().startswith("SELECT"):
140
+ sql = f"SELECT {sql}"
141
+ if not sql.endswith(";"):
142
+ sql += ";"
143
+
144
+ conn = get_db_connection()
145
+ if not conn:
146
+ return jsonify({"error": "فشل الاتصال بقاعدة البيانات"}), 500
147
+
148
+ try:
149
+ cursor = conn.cursor()
150
+ cursor.execute(sql)
151
+
152
+ if cursor.description:
153
+ columns = [desc[0] for desc in cursor.description]
154
+ rows = cursor.fetchall()
155
+ data = [dict(zip(columns, row)) for row in rows]
156
+
157
+ response = {
158
+ "data": data,
159
+ "sql": sql,
160
+ "timestamp": datetime.now().isoformat()
161
+ }
162
+ else:
163
+ conn.commit()
164
+ response = {
165
+ "message": "تم تنفيذ الاستعلام بنجاح",
166
+ "sql": sql
167
+ }
168
+
169
+ return jsonify(response)
170
+
171
+ except Exception as e:
172
+ logger.error(f"❌ خطأ في تنفيذ SQL: {e}\nالاستعلام: {sql}")
173
+ return jsonify({
174
+ "error": "خطأ في تنفيذ الاستعلام",
175
+ "sql": sql,
176
+ "details": str(e)
177
+ }), 500
178
+
179
+ finally:
180
+ if conn:
181
+ conn.close()
182
+
183
+ except Exception as e:
184
+ logger.error(f"❌ خطأ غير متوقع: {str(e)}", exc_info=True)
185
+ return jsonify({"error": "فشل في معالجة الطل��"}), 500
186
+
187
+ @app.route('/health')
188
+ def health_check():
189
+ return jsonify({
190
+ "status": "healthy",
191
+ "model_loaded": model is not None,
192
+ "db_connection": get_db_connection() is not None,
193
+ "timestamp": datetime.now().isoformat()
194
+ })
195
+
196
+ @app.route('/')
197
+ def home():
198
+ return """
199
+ <h1>Text2SQL API</h1>
200
+ <p>استخدم <code>/api/query</code> مع POST</p>
201
+ <p>تحقق من الحالة: <a href="/health">/health</a></p>
202
+ """
203
+
204
+ if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  app.run(host='0.0.0.0', port=7860)
dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .DS_Store
6
+ .env
7
+ venv/
8
+ model_cache/
9
+ *.log
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- numpy==1.24.4
2
- torch==2.0.1
3
- transformers==4.30.2
4
  flask==2.3.2
5
  flask-cors==3.0.10
 
 
6
  psycopg2-binary==2.9.7
7
  gunicorn==20.1.0
8
  accelerate==0.21.0
9
- python-dotenv==1.0.0
 
 
 
 
 
1
  flask==2.3.2
2
  flask-cors==3.0.10
3
+ torch==2.0.1
4
+ transformers==4.30.2
5
  psycopg2-binary==2.9.7
6
  gunicorn==20.1.0
7
  accelerate==0.21.0
8
+ python-dotenv==1.0.0
9
+ numpy==1.24.4