fokan commited on
Commit
dc4dad8
·
verified ·
1 Parent(s): 30b350e

Upload 49 files

Browse files
Files changed (4) hide show
  1. Dockerfile +10 -2
  2. app.py +81 -21
  3. main.py +7 -1
  4. setup_fonts.py +23 -4
Dockerfile CHANGED
@@ -46,6 +46,7 @@ RUN fc-cache -fv
46
  # Pre-initialize LibreOffice to avoid first-run errors
47
  # Run LibreOffice once to complete initial setup
48
  RUN mkdir -p /tmp/.config/libreoffice && \
 
49
  HOME=/tmp libreoffice --headless --version || true
50
 
51
  # Set working directory
@@ -56,7 +57,7 @@ COPY requirements.txt .
56
  RUN pip3 install --no-cache-dir -r requirements.txt
57
 
58
  # Create necessary directories with proper permissions
59
- RUN mkdir -p /tmp/libreoffice_conversion /tmp/.config && \
60
  chmod -R 777 /tmp
61
 
62
  # Create static directory
@@ -68,12 +69,19 @@ COPY . .
68
  # Setup additional Arabic fonts using Python script with proper permissions
69
  # Use /tmp for font installation to avoid permission issues
70
  RUN sed -i 's|/usr/share/fonts/truetype|/tmp/fonts/truetype|g' setup_fonts.py && \
71
- mkdir -p /tmp/fonts/truetype && \
 
72
  python3 setup_fonts.py || echo "Font setup failed, continuing with default fonts..."
73
 
74
  # Update font cache after installing additional fonts
75
  RUN fc-cache -fv
76
 
 
 
 
 
 
 
77
  # Expose port (Hugging Face Spaces requires port 7860)
78
  EXPOSE 7860
79
 
 
46
  # Pre-initialize LibreOffice to avoid first-run errors
47
  # Run LibreOffice once to complete initial setup
48
  RUN mkdir -p /tmp/.config/libreoffice && \
49
+ chmod -R 777 /tmp/.config && \
50
  HOME=/tmp libreoffice --headless --version || true
51
 
52
  # Set working directory
 
57
  RUN pip3 install --no-cache-dir -r requirements.txt
58
 
59
  # Create necessary directories with proper permissions
60
+ RUN mkdir -p /tmp/libreoffice_conversion /tmp/.config /tmp/fonts/truetype && \
61
  chmod -R 777 /tmp
62
 
63
  # Create static directory
 
69
  # Setup additional Arabic fonts using Python script with proper permissions
70
  # Use /tmp for font installation to avoid permission issues
71
  RUN sed -i 's|/usr/share/fonts/truetype|/tmp/fonts/truetype|g' setup_fonts.py && \
72
+ mkdir -p /tmp/fonts/truetype/arabic-enhanced && \
73
+ chmod -R 777 /tmp/fonts/truetype && \
74
  python3 setup_fonts.py || echo "Font setup failed, continuing with default fonts..."
75
 
76
  # Update font cache after installing additional fonts
77
  RUN fc-cache -fv
78
 
79
+ # Fix LibreOffice Java integration issues
80
+ RUN mkdir -p /tmp/.config/libreoffice/4/user && \
81
+ chmod -R 777 /tmp/.config/libreoffice && \
82
+ touch /tmp/.config/libreoffice/4/user/registrymodifications.xcu && \
83
+ chmod 666 /tmp/.config/libreoffice/4/user/registrymodifications.xcu
84
+
85
  # Expose port (Hugging Face Spaces requires port 7860)
86
  EXPOSE 7860
87
 
app.py CHANGED
@@ -122,13 +122,23 @@ def setup_local_arial_font():
122
  # Create system fonts directory for local Arial in /tmp to avoid permission issues
123
  system_fonts_dir = Path("/tmp/fonts/truetype/local-arial")
124
  system_fonts_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
125
 
126
  # Copy Arial font to system directory
127
  system_arial_path = system_fonts_dir / "arial.ttf"
128
  if not system_arial_path.exists():
129
  print("📥 Installing local Arial font...")
130
  shutil.copy2(arial_font_path, system_arial_path)
131
- os.chmod(system_arial_path, 0o644)
 
 
 
 
132
  print("✅ Local Arial font installed successfully")
133
  else:
134
  print("✅ Local Arial font already installed")
@@ -150,6 +160,12 @@ def install_arabic_fonts():
150
  # Create fonts directory in /tmp to avoid permission issues
151
  fonts_dir = Path("/tmp/fonts/truetype/arabic-custom")
152
  fonts_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
153
 
154
  print("🔤 Installing Arabic fonts for RTL support...")
155
 
@@ -172,7 +188,11 @@ def install_arabic_fonts():
172
  src = os.path.join(amiri_dir, file)
173
  dst = fonts_dir / file
174
  shutil.copy2(src, dst)
175
- os.chmod(dst, 0o644)
 
 
 
 
176
  print("✅ Amiri font installed successfully")
177
  else:
178
  print("❌ Amiri font directory not found")
@@ -198,7 +218,11 @@ def install_arabic_fonts():
198
  src = os.path.join(scheherazade_dir, file)
199
  dst = fonts_dir / file
200
  shutil.copy2(src, dst)
201
- os.chmod(dst, 0o644)
 
 
 
 
202
  print("✅ Scheherazade New font installed successfully")
203
  else:
204
  print("❌ Scheherazade New font directory not found")
@@ -216,7 +240,11 @@ def install_arabic_fonts():
216
 
217
  dst = fonts_dir / "NotoSansArabic-Regular.ttf"
218
  shutil.copy2(noto_file, dst)
219
- os.chmod(dst, 0o644)
 
 
 
 
220
  print("✅ Noto Sans Arabic font installed successfully")
221
  except Exception as e:
222
  print(f"❌ Noto Sans Arabic font installation failed: {e}")
@@ -232,14 +260,22 @@ def install_arabic_fonts():
232
 
233
  dst = fonts_dir / "Cairo-Regular.ttf"
234
  shutil.copy2(cairo_file, dst)
235
- os.chmod(dst, 0o644)
 
 
 
 
236
  print("✅ Cairo font installed successfully")
237
  except Exception as e:
238
  print(f"❌ Cairo font installation failed: {e}")
239
 
240
  # Update font cache after installation
241
  print("🔄 Updating font cache...")
242
- subprocess.run(["fc-cache", "-f"], capture_output=True, timeout=30)
 
 
 
 
243
  print("🎯 Enhanced Arabic fonts setup completed!")
244
 
245
  except Exception as e:
@@ -1198,14 +1234,31 @@ def post_process_pdf_for_perfect_formatting(pdf_path, docx_info):
1198
  Uses PyMuPDF to verify and correct any layout issues
1199
  """
1200
  try:
1201
- # Try to import PyMuPDF (fitz)
 
1202
  try:
1203
  import fitz # PyMuPDF
1204
  except ImportError:
1205
  try:
1206
  from pymupdf import fitz
1207
  except ImportError:
1208
- import pymupdf as fitz
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
 
1210
  print("🔍 Post-processing PDF for perfect formatting...")
1211
 
@@ -1326,17 +1379,6 @@ def post_process_pdf_for_perfect_formatting(pdf_path, docx_info):
1326
 
1327
  return post_process_results
1328
 
1329
- except ImportError:
1330
- print("⚠️ PyMuPDF not available - skipping advanced post-processing")
1331
- return {
1332
- 'pages_processed': 0,
1333
- 'placeholders_verified': 0,
1334
- 'tables_verified': 0,
1335
- 'arabic_text_verified': 0,
1336
- 'layout_issues_fixed': 0,
1337
- 'warnings': ['PyMuPDF not available for advanced verification'],
1338
- 'success_metrics': ['Basic PDF validation completed']
1339
- }
1340
  except Exception as e:
1341
  print(f"❌ PDF post-processing error: {e}")
1342
  return {
@@ -1692,9 +1734,24 @@ def create_libreoffice_config(temp_path):
1692
  """Create comprehensive LibreOffice configuration for PERFECT Arabic RTL formatting preservation"""
1693
  config_dir = temp_path / ".config" / "libreoffice" / "4" / "user"
1694
  config_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
1695
 
1696
  # Create comprehensive registrymodifications.xcu for maximum formatting preservation
1697
  registry_config = config_dir / "registrymodifications.xcu"
 
 
 
 
 
 
 
 
 
1698
  config_content = '''<?xml version="1.0" encoding="UTF-8"?>
1699
  <oor:items xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
1700
  <!-- PDF Export Settings for Maximum Quality with Arabic Support -->
@@ -1975,8 +2032,11 @@ def create_libreoffice_config(temp_path):
1975
  </item>
1976
  </oor:items>'''
1977
 
1978
- with open(registry_config, 'w', encoding='utf-8') as f:
1979
- f.write(config_content)
 
 
 
1980
 
1981
  return str(config_dir.parent.parent.parent)
1982
 
 
122
  # Create system fonts directory for local Arial in /tmp to avoid permission issues
123
  system_fonts_dir = Path("/tmp/fonts/truetype/local-arial")
124
  system_fonts_dir.mkdir(parents=True, exist_ok=True)
125
+ # Ensure proper permissions
126
+ try:
127
+ system_fonts_dir.chmod(0o777)
128
+ except PermissionError:
129
+ # If we can't change permissions, continue anyway
130
+ pass
131
 
132
  # Copy Arial font to system directory
133
  system_arial_path = system_fonts_dir / "arial.ttf"
134
  if not system_arial_path.exists():
135
  print("📥 Installing local Arial font...")
136
  shutil.copy2(arial_font_path, system_arial_path)
137
+ try:
138
+ system_arial_path.chmod(0o644)
139
+ except PermissionError:
140
+ # If we can't change permissions, continue anyway
141
+ pass
142
  print("✅ Local Arial font installed successfully")
143
  else:
144
  print("✅ Local Arial font already installed")
 
160
  # Create fonts directory in /tmp to avoid permission issues
161
  fonts_dir = Path("/tmp/fonts/truetype/arabic-custom")
162
  fonts_dir.mkdir(parents=True, exist_ok=True)
163
+ # Ensure proper permissions
164
+ try:
165
+ fonts_dir.chmod(0o777)
166
+ except PermissionError:
167
+ # If we can't change permissions, continue anyway
168
+ pass
169
 
170
  print("🔤 Installing Arabic fonts for RTL support...")
171
 
 
188
  src = os.path.join(amiri_dir, file)
189
  dst = fonts_dir / file
190
  shutil.copy2(src, dst)
191
+ try:
192
+ dst.chmod(0o644)
193
+ except PermissionError:
194
+ # If we can't change permissions, continue anyway
195
+ pass
196
  print("✅ Amiri font installed successfully")
197
  else:
198
  print("❌ Amiri font directory not found")
 
218
  src = os.path.join(scheherazade_dir, file)
219
  dst = fonts_dir / file
220
  shutil.copy2(src, dst)
221
+ try:
222
+ dst.chmod(0o644)
223
+ except PermissionError:
224
+ # If we can't change permissions, continue anyway
225
+ pass
226
  print("✅ Scheherazade New font installed successfully")
227
  else:
228
  print("❌ Scheherazade New font directory not found")
 
240
 
241
  dst = fonts_dir / "NotoSansArabic-Regular.ttf"
242
  shutil.copy2(noto_file, dst)
243
+ try:
244
+ dst.chmod(0o644)
245
+ except PermissionError:
246
+ # If we can't change permissions, continue anyway
247
+ pass
248
  print("✅ Noto Sans Arabic font installed successfully")
249
  except Exception as e:
250
  print(f"❌ Noto Sans Arabic font installation failed: {e}")
 
260
 
261
  dst = fonts_dir / "Cairo-Regular.ttf"
262
  shutil.copy2(cairo_file, dst)
263
+ try:
264
+ dst.chmod(0o644)
265
+ except PermissionError:
266
+ # If we can't change permissions, continue anyway
267
+ pass
268
  print("✅ Cairo font installed successfully")
269
  except Exception as e:
270
  print(f"❌ Cairo font installation failed: {e}")
271
 
272
  # Update font cache after installation
273
  print("🔄 Updating font cache...")
274
+ try:
275
+ subprocess.run(["fc-cache", "-f"], capture_output=True, timeout=30)
276
+ print("✅ Font cache updated successfully")
277
+ except Exception as e:
278
+ print(f"❌ Font cache update failed: {e}")
279
  print("🎯 Enhanced Arabic fonts setup completed!")
280
 
281
  except Exception as e:
 
1234
  Uses PyMuPDF to verify and correct any layout issues
1235
  """
1236
  try:
1237
+ # Try to import PyMuPDF (fitz) - multiple import methods for compatibility
1238
+ fitz = None
1239
  try:
1240
  import fitz # PyMuPDF
1241
  except ImportError:
1242
  try:
1243
  from pymupdf import fitz
1244
  except ImportError:
1245
+ try:
1246
+ import pymupdf as fitz
1247
+ except ImportError:
1248
+ fitz = None
1249
+
1250
+ # If PyMuPDF is not available, skip advanced post-processing
1251
+ if fitz is None:
1252
+ print("⚠️ PyMuPDF not available - skipping advanced post-processing")
1253
+ return {
1254
+ 'pages_processed': 0,
1255
+ 'placeholders_verified': 0,
1256
+ 'tables_verified': 0,
1257
+ 'arabic_text_verified': 0,
1258
+ 'layout_issues_fixed': 0,
1259
+ 'warnings': ['PyMuPDF not available for advanced verification'],
1260
+ 'success_metrics': ['Basic PDF validation completed']
1261
+ }
1262
 
1263
  print("🔍 Post-processing PDF for perfect formatting...")
1264
 
 
1379
 
1380
  return post_process_results
1381
 
 
 
 
 
 
 
 
 
 
 
 
1382
  except Exception as e:
1383
  print(f"❌ PDF post-processing error: {e}")
1384
  return {
 
1734
  """Create comprehensive LibreOffice configuration for PERFECT Arabic RTL formatting preservation"""
1735
  config_dir = temp_path / ".config" / "libreoffice" / "4" / "user"
1736
  config_dir.mkdir(parents=True, exist_ok=True)
1737
+ # Ensure proper permissions
1738
+ try:
1739
+ config_dir.chmod(0o777)
1740
+ except PermissionError:
1741
+ # If we can't change permissions, continue anyway
1742
+ pass
1743
 
1744
  # Create comprehensive registrymodifications.xcu for maximum formatting preservation
1745
  registry_config = config_dir / "registrymodifications.xcu"
1746
+
1747
+ # Create empty file first to ensure it exists
1748
+ try:
1749
+ registry_config.touch()
1750
+ registry_config.chmod(0o666)
1751
+ except PermissionError:
1752
+ # If we can't change permissions, continue anyway
1753
+ pass
1754
+
1755
  config_content = '''<?xml version="1.0" encoding="UTF-8"?>
1756
  <oor:items xmlns:oor="http://openoffice.org/2001/registry" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
1757
  <!-- PDF Export Settings for Maximum Quality with Arabic Support -->
 
2032
  </item>
2033
  </oor:items>'''
2034
 
2035
+ try:
2036
+ with open(registry_config, 'w', encoding='utf-8') as f:
2037
+ f.write(config_content)
2038
+ except Exception as e:
2039
+ print(f"❌ Failed to write LibreOffice config: {e}")
2040
 
2041
  return str(config_dir.parent.parent.parent)
2042
 
main.py CHANGED
@@ -111,7 +111,13 @@ async def startup_event():
111
 
112
  # Setup LibreOffice and fonts
113
  libreoffice_available = setup_libreoffice()
114
- setup_font_environment()
 
 
 
 
 
 
115
 
116
  # Create static directory if it doesn't exist
117
  Path("static").mkdir(exist_ok=True)
 
111
 
112
  # Setup LibreOffice and fonts
113
  libreoffice_available = setup_libreoffice()
114
+
115
+ # Setup font environment with error handling
116
+ try:
117
+ setup_font_environment()
118
+ except Exception as e:
119
+ logger.warning(f"Font environment setup failed: {e}")
120
+ logger.warning("Continuing with default system fonts...")
121
 
122
  # Create static directory if it doesn't exist
123
  Path("static").mkdir(exist_ok=True)
setup_fonts.py CHANGED
@@ -43,7 +43,14 @@ def setup_arabic_fonts():
43
 
44
  # Use /tmp for font installation to avoid permission issues
45
  fonts_dir = "/tmp/fonts/truetype/arabic-enhanced"
46
- os.makedirs(fonts_dir, exist_ok=True)
 
 
 
 
 
 
 
47
 
48
  # Try to download and install Amiri font
49
  print("📥 Installing Amiri font...")
@@ -59,7 +66,11 @@ def setup_arabic_fonts():
59
  src = os.path.join(amiri_dir, file)
60
  dst = os.path.join(fonts_dir, file)
61
  shutil.copy2(src, dst)
62
- os.chmod(dst, 0o644)
 
 
 
 
63
  print("✅ Amiri font installed successfully")
64
  amiri_installed = True
65
  else:
@@ -83,7 +94,11 @@ def setup_arabic_fonts():
83
  src = os.path.join(scheherazade_dir, file)
84
  dst = os.path.join(fonts_dir, file)
85
  shutil.copy2(src, dst)
86
- os.chmod(dst, 0o644)
 
 
 
 
87
  print("✅ Scheherazade New font installed successfully")
88
  scheherazade_installed = True
89
  else:
@@ -100,7 +115,11 @@ def setup_arabic_fonts():
100
  if os.path.exists("arial.ttf"):
101
  dst = os.path.join(fonts_dir, "arial.ttf")
102
  shutil.copy2("arial.ttf", dst)
103
- os.chmod(dst, 0o644)
 
 
 
 
104
  print("✅ Arial font copied successfully")
105
  else:
106
  print("❌ Local Arial font not found")
 
43
 
44
  # Use /tmp for font installation to avoid permission issues
45
  fonts_dir = "/tmp/fonts/truetype/arabic-enhanced"
46
+ try:
47
+ os.makedirs(fonts_dir, exist_ok=True)
48
+ # Ensure proper permissions
49
+ os.chmod(fonts_dir, 0o777)
50
+ except PermissionError:
51
+ # If we can't change permissions, continue anyway
52
+ print(f"⚠️ Permission denied when setting up {fonts_dir}, continuing anyway...")
53
+ pass
54
 
55
  # Try to download and install Amiri font
56
  print("📥 Installing Amiri font...")
 
66
  src = os.path.join(amiri_dir, file)
67
  dst = os.path.join(fonts_dir, file)
68
  shutil.copy2(src, dst)
69
+ try:
70
+ os.chmod(dst, 0o644)
71
+ except PermissionError:
72
+ # If we can't change permissions, continue anyway
73
+ pass
74
  print("✅ Amiri font installed successfully")
75
  amiri_installed = True
76
  else:
 
94
  src = os.path.join(scheherazade_dir, file)
95
  dst = os.path.join(fonts_dir, file)
96
  shutil.copy2(src, dst)
97
+ try:
98
+ os.chmod(dst, 0o644)
99
+ except PermissionError:
100
+ # If we can't change permissions, continue anyway
101
+ pass
102
  print("✅ Scheherazade New font installed successfully")
103
  scheherazade_installed = True
104
  else:
 
115
  if os.path.exists("arial.ttf"):
116
  dst = os.path.join(fonts_dir, "arial.ttf")
117
  shutil.copy2("arial.ttf", dst)
118
+ try:
119
+ os.chmod(dst, 0o644)
120
+ except PermissionError:
121
+ # If we can't change permissions, continue anyway
122
+ pass
123
  print("✅ Arial font copied successfully")
124
  else:
125
  print("❌ Local Arial font not found")