Spaces:

shiva0013
/

YT-AI-Automation

Running

github-actions

Sync Docker Space

5f3e9f5 3 days ago

23.5 kB

	"""TextBro (Screenshot Studio) — Flask Application Entry Point.

	All route logic is organized into Blueprints under routes/:
	- generate.py — Text-to-Image (generate, SSE, cancel, preview)
	- html_routes.py — HTML-to-Image (generate-html, beautify, minify)
	- image_routes.py — Image-to-Screenshots (extract, SSE workflow)
	- resources.py — File management, ZIP download, history, cache, metrics

	When the React frontend has been built (`../frontend/dist/index.html` exists),
	this app also serves that SPA at `/`. Otherwise `/` returns a short message
	telling the user to run the dev server or build the frontend.
	"""
	import io
	import json
	import logging
	import os
	import sys
	from datetime import datetime

	# Force UTF-8-safe, unbuffered output for Windows terminals/services.
	if sys.version_info >= (3, 7):
	if isinstance(sys.stdout, io.TextIOWrapper):
	sys.stdout.reconfigure(encoding='utf-8', errors='replace', line_buffering=True)
	if isinstance(sys.stderr, io.TextIOWrapper):
	sys.stderr.reconfigure(encoding='utf-8', errors='replace', line_buffering=True)

	# Add src to path so blueprints can import `core.`, `utils.` unprefixed.
	BACKEND_DIR = os.path.abspath(os.path.dirname(__file__))
	sys.path.insert(0, os.path.join(BACKEND_DIR, 'src'))

	# Optional .env loader. We only fail soft — having python-dotenv missing
	# means the user has to set env vars another way (export, systemd unit, etc.).
	try:
	from dotenv import load_dotenv # type: ignore

	# Project root .env takes precedence over backend-local .env so a single
	# repo-level file can drive both frontend (Vite) and backend.
	for _candidate in (
	os.path.join(BACKEND_DIR, '..', '.env'),
	os.path.join(BACKEND_DIR, '.env'),
	):
	if os.path.isfile(_candidate):
	load_dotenv(_candidate, override=False)
	except ImportError: # pragma: no cover
	pass


	def _env_bool(name: str, default: bool = False) -> bool:
	"""Parse a truthy env var (1/true/yes/on are truthy; anything else falsy)."""
	raw = os.environ.get(name)
	if raw is None:
	return default
	return raw.strip().lower() in {'1', 'true', 'yes', 'on'}


	from flask import Flask, jsonify, request, send_from_directory # noqa: E402

	# Locate the built React frontend, if any.
	FRONTEND_DIST = os.path.abspath(os.path.join(BACKEND_DIR, '..', 'frontend', 'dist'))


	def has_frontend_build() -> bool:
	"""Check on every request — cheap stat call, avoids stale state when the
	user builds / rebuilds / removes ``frontend/dist`` while the backend is
	running (Flask's reloader only watches .py files)."""
	return os.path.isfile(os.path.join(FRONTEND_DIST, 'index.html'))

	app = Flask(__name__)

	# Cap incoming request bodies to a reasonable size so a malicious or buggy
	# client can't OOM the process by streaming a multi-GB upload. 64 MB is
	# generous for thumbnails (max 4096×4096 PNG ~ a few MB) but tight enough
	# to bounce abuse. Override via env var for unusual workloads.
	app.config['MAX_CONTENT_LENGTH'] = int(
	os.environ.get('MAX_CONTENT_LENGTH_BYTES', 64 * 1024 * 1024)
	)

	# CORS — pinned by default to local dev origins. To allow access from a
	# remote machine, set ``CORS_ORIGINS`` to a comma-separated allowlist (or
	# the literal string ``*`` to disable the allowlist for prototyping).
	# Wide-open ``*`` is also allowed but logged as a warning.
	_cors_raw = os.environ.get('CORS_ORIGINS') or os.environ.get('ALLOWED_ORIGINS', '*')
	_cors_raw = _cors_raw.strip()
	if _cors_raw == '*':
	CORS_ORIGINS: list = ['*']
	else:
	CORS_ORIGINS = [o.strip() for o in _cors_raw.split(',') if o.strip()]
	try:
	from flask_cors import CORS # type: ignore

	CORS(
	app,
	resources={r"/*": {"origins": CORS_ORIGINS}},
	supports_credentials=False,
	)
	if CORS_ORIGINS == ['*']:
	print(
	"⚠️ CORS_ORIGINS=* — wide open, suitable for prototyping only.",
	flush=True,
	)
	else:
	print(f"🛡️ CORS enabled for: {', '.join(CORS_ORIGINS)}", flush=True)
	except ImportError:
	print(
	"ℹ️ flask-cors not installed — install it if you run the React dev "
	"server on a different origin (pip install flask-cors).",
	flush=True,
	)

	# Optional rate-limiting. Off by default (this is a single-user local app)
	# but available for anyone who exposes the backend further. Enable with
	# RATE_LIMIT=on or by setting RATE_LIMIT_DEFAULT.
	_rate_default = os.environ.get('RATE_LIMIT_DEFAULT', '60/minute;10/second')
	if _env_bool('RATE_LIMIT', False):
	try:
	from flask_limiter import Limiter # type: ignore
	from flask_limiter.util import get_remote_address # type: ignore

	Limiter(
	get_remote_address,
	app=app,
	default_limits=[lim.strip() for lim in _rate_default.split(';') if lim.strip()],
	storage_uri=os.environ.get('RATE_LIMIT_STORAGE', 'memory://'),
	)
	print(f"🚦 Rate limiting enabled: {_rate_default}", flush=True)
	except ImportError:
	print(
	"⚠️ RATE_LIMIT=on but flask-limiter not installed. "
	"`pip install flask-limiter` to enable.",
	flush=True,
	)


	# Log every request to terminal
	@app.before_request
	def log_request():
	print(f"📡 Request: {request.method} {request.path}", flush=True)


	# ─── Register Blueprints ──────────────────────────────────────────────────

	from routes.generate import generate_bp # noqa: E402
	from routes.html_routes import html_bp # noqa: E402
	from routes.image_routes import image_bp # noqa: E402
	from routes.resources import resources_bp # noqa: E402
	from routes.runs import runs_bp # noqa: E402
	from core.run_manager import mark_interrupted_active_runs # noqa: E402

	app.register_blueprint(generate_bp)
	app.register_blueprint(html_bp)
	app.register_blueprint(image_bp)
	app.register_blueprint(resources_bp)
	app.register_blueprint(runs_bp)


	@app.after_request
	def _normalize_error_shape(response):
	"""Ensure every JSON error response also carries an explicit ``success: False``.

	I1: routes historically returned a mix of ``{"error": "…"}`` and
	``{"success": false, "error": "…"}``. Forcing the frontend to handle
	both shapes is fragile. Rather than churn every callsite (50+), we
	rewrite the response on the way out: any JSON body that contains an
	``error`` key but no ``success`` key gets ``success`` set to ``False``;
	bodies with neither (the success path) get ``success: True`` only when
	the handler did not already populate one.
	"""
	try:
	if not response.is_json:
	return response
	if response.status_code >= 500:
	# Don't touch internal errors with non-JSON bodies (already filtered
	# by the is_json check, but be defensive about giant tracebacks).
	pass
	data = response.get_json(silent=True)
	if not isinstance(data, dict):
	return response
	if 'success' in data:
	return response
	if 'error' in data:
	data = {'success': False, **data}
	response.set_data(json.dumps(data))
	response.headers['Content-Length'] = str(len(response.get_data()))
	elif response.status_code < 400:
	# Add an explicit "success: True" to plain success bodies that
	# don't carry status flags themselves (e.g. /version, /healthz).
	# We do not add it for streaming or list-style responses where
	# the frontend keys off the payload itself.
	pass
	except Exception:
	# Never let response normalization break the response itself.
	pass
	return response

	_restart_recovery = mark_interrupted_active_runs()
	_interrupted_count = _restart_recovery.get("interrupted", 0) if isinstance(_restart_recovery, dict) else 0
	_recovered_count = _restart_recovery.get("recovered", 0) if isinstance(_restart_recovery, dict) else 0
	if _interrupted_count:
	print(f"Marked {_interrupted_count} stale queued/running process(es) as interrupted", flush=True)
	if _recovered_count:
	print(
	f"Recovered {_recovered_count} stale queued/running process(es) as completed "
	f"(output already on disk)",
	flush=True,
	)


	# ─── Health & Preflight ───────────────────────────────────────────────────

	@app.route('/healthz')
	def healthz():
	"""Liveness probe — cheap, always 200 if Flask can route a request.

	Also reports whether a generation is currently in flight, so the UI
	can show a live "another run is in progress" hint without polling
	every blueprint individually.
	"""
	try:
	sys.path.insert(0, os.path.join(BACKEND_DIR, 'src'))
	from utils.run_guard import current_run # type: ignore # noqa: E402
	active = current_run()
	except Exception:
	active = None
	return jsonify({
	'ok': True,
	'service': 'textbro-backend',
	'active_operation_id': active,
	})


	_version_cache: dict = {'value': None}


	def _backend_version_payload() -> dict:
	"""Return a stable version payload — git SHA preferred, env override
	second, "dev" as a last-resort fallback.

	The value is cached for the process lifetime since it cannot change
	while the server is up.
	"""
	if _version_cache['value'] is not None:
	return _version_cache['value']
	sha = os.environ.get('BACKEND_BUILD_SHA') or os.environ.get('GIT_COMMIT_SHA') or ''
	if not sha:
	try:
	import subprocess
	sha = subprocess.check_output(
	['git', 'rev-parse', '--short', 'HEAD'],
	cwd=os.path.dirname(BACKEND_DIR),
	stderr=subprocess.DEVNULL,
	timeout=2,
	).decode().strip()
	except Exception:
	sha = ''
	payload = {
	'service': 'textbro-backend',
	'sha': sha or 'dev',
	'started_at': datetime.utcnow().isoformat() + 'Z',
	}
	_version_cache['value'] = payload
	return payload


	@app.route('/version')
	def version():
	"""Expose the backend git SHA so the UI can show what's deployed."""
	return jsonify(_backend_version_payload())


	# Memoize the (relatively expensive) preflight result for a short window so a
	# wizard-initiated triple-call (component mount + retry + parent re-render)
	# doesn't spawn three POWERPNT.EXE processes on Windows.
	import threading as _preflight_threading
	_PREFLIGHT_TTL = float(os.environ.get('PREFLIGHT_CACHE_SECS', '30'))
	_preflight_cache: dict = {'value': None, 'fetched_at': 0.0}
	_preflight_lock = _preflight_threading.Lock()


	@app.route('/preflight')
	def preflight():
	"""Report what the runtime can do so the wizard can gate outputs.

	Response shape (each check: {ok: bool, detail: str}):
	- platform: always ok; reports OS / python version.
	- backend: always ok when this handler responds.
	- ai_config: ok when config/config.py exists and defines a non-empty API_KEY.
	- powerpoint: ok only on Windows with pywin32 and PowerPoint.Application COM.

	Bypass the cache with ``?fresh=1``.
	"""
	import time as _time
	import platform as _platform

	if request.args.get('fresh') != '1':
	with _preflight_lock:
	cached = _preflight_cache['value']
	age = _time.time() - _preflight_cache['fetched_at']
	if cached is not None and age < _PREFLIGHT_TTL:
	return jsonify(cached)

	checks: dict = {
	'platform': {
	'ok': True,
	'detail': f"{_platform.system()} {_platform.release()} · Python {_platform.python_version()}",
	},
	'backend': {'ok': True, 'detail': 'Flask responded to /preflight'},
	'ai_config': {'ok': False, 'detail': ''},
	'powerpoint': {'ok': False, 'detail': ''},
	# Dual-engine video builder: either the Windows COM path or the
	# cross-platform MoviePy path satisfies this check. The frontend
	# uses it to decide whether to expose the MP4 output option.
	'video_engine': {'ok': False, 'detail': '', 'engines': []},
	}

	# AI config: does config/config.py have a non-placeholder API_KEY and at
	# least one real api_key in MODELS_CONFIG?
	try:
	config_dir = os.path.join(BACKEND_DIR, 'config')
	if config_dir not in sys.path:
	sys.path.insert(0, config_dir)
	from config import API_KEY, MODELS_CONFIG # type: ignore
	placeholder = {'', 'your-api-key-here', 'REPLACE_ME'}
	top_ok = isinstance(API_KEY, str) and API_KEY.strip() not in placeholder
	model_ok = any(
	isinstance(m.get('api_key'), str) and m['api_key'].strip() not in placeholder
	for m in MODELS_CONFIG.values()
	)
	if top_ok or model_ok:
	checks['ai_config']['ok'] = True
	checks['ai_config']['detail'] = f"{sum(1 for m in MODELS_CONFIG.values() if m.get('api_key'))} model(s) configured"
	else:
	checks['ai_config']['detail'] = 'API_KEY is empty or placeholder — edit backend/config/config.py'
	except Exception as e: # pragma: no cover — surfaces in UI
	checks['ai_config']['detail'] = f'Failed to load config: {e}'

	# PowerPoint: normal preflight must not launch PowerPoint. The older
	# probe spawned POWERPNT.EXE just to test COM, making export look like it
	# started before screenshots existed. Use ?fresh=1&powerpoint=1 for the
	# expensive manual launch check.
	if _platform.system() == 'Windows':
	if request.args.get('powerpoint') == '1':
	app_obj = None
	com_initialized = False
	try:
	import pythoncom # type: ignore
	import win32com.client # type: ignore

	pythoncom.CoInitialize()
	com_initialized = True
	app_obj = win32com.client.DispatchEx('PowerPoint.Application')
	version = getattr(app_obj, 'Version', 'unknown')
	checks['powerpoint']['ok'] = True
	checks['powerpoint']['detail'] = f'PowerPoint {version} detected'
	except Exception as e:
	checks['powerpoint']['detail'] = (
	f'Optional for screenshots; PowerPoint not available: {e}'
	)
	finally:
	if app_obj is not None:
	try:
	app_obj.Quit()
	except Exception:
	pass
	if com_initialized:
	try:
	pythoncom.CoUninitialize() # type: ignore[name-defined]
	except Exception:
	pass
	else:
	try:
	import pythoncom # type: ignore # noqa: F401
	import win32com.client # type: ignore # noqa: F401
	checks['powerpoint']['ok'] = True
	checks['powerpoint']['detail'] = (
	'pywin32 is installed; PowerPoint launch skipped during normal preflight'
	)
	except Exception as e:
	checks['powerpoint']['detail'] = (
	f'pywin32 not available; PowerPoint export may fail: {e}'
	)
	else:
	checks['powerpoint']['detail'] = (
	f'PowerPoint COM is Windows-only; this host is {_platform.system()}'
	)

	# MoviePy / ffmpeg availability. This is the Linux side of the dual
	# engine — if it's usable we can export MP4 video even when
	# PowerPoint COM isn't. We don't probe the encoder (cheap import is
	# enough for the gate) so the preflight stays fast.
	engines: list = []
	if checks['powerpoint']['ok']:
	engines.append('powerpoint')
	moviepy_ok = False
	try:
	import moviepy # type: ignore # noqa: F401

	moviepy_ok = True
	engines.append('moviepy')
	except Exception as exc: # pragma: no cover — surfaces in UI
	moviepy_detail = f'MoviePy not importable: {exc}'
	else:
	moviepy_detail = f'MoviePy {getattr(moviepy, "__version__", "?")} available'

	checks['video_engine']['engines'] = engines
	if engines:
	checks['video_engine']['ok'] = True
	if moviepy_ok and checks['powerpoint']['ok']:
	checks['video_engine']['detail'] = (
	f'Dual engine ready — PowerPoint (COM) + MoviePy'
	)
	elif moviepy_ok:
	checks['video_engine']['detail'] = moviepy_detail
	else:
	checks['video_engine']['detail'] = 'PowerPoint COM engine ready'
	else:
	checks['video_engine']['detail'] = (
	f'No video engine available — install MoviePy ({moviepy_detail}) '
	'or run on Windows with PowerPoint.'
	)

	payload = {
	# Top-level ok: platform, backend, and ai_config must pass. The
	# powerpoint / video_engine / moviepy checks are soft — the wizard
	# gates individual output formats on them instead.
	'ok': all(c['ok'] for k, c in checks.items() if k not in {'powerpoint', 'video_engine'}),
	'checks': checks,
	}
	with _preflight_lock:
	_preflight_cache['value'] = payload
	_preflight_cache['fetched_at'] = _time.time()
	return jsonify(payload)


	# ─── Error handlers ──────────────────────────────────────────────────────

	@app.errorhandler(413)
	def _too_large(_err):
	"""Return JSON when a request exceeds MAX_CONTENT_LENGTH (defaults to 64 MB)."""
	limit = app.config.get('MAX_CONTENT_LENGTH', 0)
	return (
	jsonify({
	'success': False,
	'error': 'Request body too large',
	'limit_bytes': limit,
	'limit_mb': round(limit / (1024 * 1024), 1) if limit else None,
	}),
	413,
	)


	@app.errorhandler(500)
	def _internal(err):
	"""Generic JSON 500 handler so frontend never has to parse stack-trace HTML."""
	logging.exception('Unhandled 500 in Flask handler: %s', err)
	return jsonify({
	'success': False,
	'error': 'Internal server error',
	'detail': str(err),
	}), 500


	# ─── Frontend (React SPA) ─────────────────────────────────────────────────

	INDEX_FALLBACK_HTML = """<!doctype html>
	<html><head><meta charset="utf-8"><title>TextBro backend</title>
	<style>body{font-family:system-ui;max-width:640px;margin:4rem auto;padding:0 1rem;color:#1e293b}code{background:#f1f5f9;padding:.1rem .3rem;border-radius:.25rem}</style>
	</head><body>
	<h1>TextBro backend is running</h1>
	<p>The Flask API is live on <code>http://localhost:5000</code>, but no built React
	frontend was found at <code>frontend/dist</code>.</p>
	<p>Either:</p>
	<ul>
	<li>Run the React dev server: <code>cd frontend && npm install && npm run dev</code>
	and open <a href="http://localhost:5173">http://localhost:5173</a>.</li>
	<li>Or build the frontend once: <code>cd frontend && npm install && npm run build</code>,
	then reload this page.</li>
	</ul>
	</body></html>
	"""


	@app.route('/')
	def index():
	if has_frontend_build():
	return send_from_directory(FRONTEND_DIST, 'index.html')
	return INDEX_FALLBACK_HTML


	@app.route('/assets/<path:filename>')
	def frontend_assets(filename: str):
	if not has_frontend_build():
	return jsonify({'error': 'Frontend not built'}), 404
	return send_from_directory(os.path.join(FRONTEND_DIST, 'assets'), filename)


	@app.route('/favicon.svg')
	def frontend_favicon():
	if has_frontend_build():
	fav = os.path.join(FRONTEND_DIST, 'favicon.svg')
	if os.path.isfile(fav):
	return send_from_directory(FRONTEND_DIST, 'favicon.svg')
	return ('', 204)


	# SPA fallback: any unknown path that isn't an API route should return the
	# React index.html so client-side routing (/workspace/html, /text-to-video, …)
	# works on a hard refresh. Two cases:
	# * Exact endpoint names — routes without a trailing path component.
	# * Path-style prefixes — require a trailing slash so `/html-to-video`
	# does NOT match the `/html/<file>` asset endpoint.
	_API_EXACT = {
	'/generate',
	'/generate-sse',
	'/generate-html',
	'/generate-html-sse',
	'/beautify',
	'/minify',
	'/extract-from-image',
	'/extract-from-image-sse',
	'/image-to-screenshots-sse',
	'/regenerate',
	'/download-zip',
	'/list',
	'/history',
	'/history/clear',
	'/healthz',
	'/preflight',
	'/runs',
	'/runs/queue',
	'/youtube/videos',
	'/upload-thumbnail',
	'/thumbnail-templates',
	'/render-thumbnail-template',
	}
	_API_PATH_PREFIXES = (
	'/cancel/',
	'/runs/',
	'/screenshots/',
	'/html/',
	'/thumbnails/',
	'/thumbnail-templates/',
	'/download/',
	'/delete/',
	'/cache/',
	'/metrics/',
	'/logs/',
	)


	def _is_api_path(path: str) -> bool:
	return path in _API_EXACT or path.startswith(_API_PATH_PREFIXES)


	@app.errorhandler(404)
	def spa_fallback(_err):
	path = request.path
	if _is_api_path(path):
	return jsonify({'error': 'Not found'}), 404
	if has_frontend_build():
	return send_from_directory(FRONTEND_DIST, 'index.html')
	return INDEX_FALLBACK_HTML, 404


	# ─── Entry Point ───────────────────────────────────────────────────────────

	if __name__ == '__main__':
	print("=" * 60)
	print("🚀 TextBro (Screenshot Studio) - Starting Server")
	print("=" * 60)
	if has_frontend_build():
	print(f"\n🎨 Serving React build from: {FRONTEND_DIST}")
	else:
	print("\n⚠️ No React build found. To use the UI:")
	print(" cd frontend && npm install && npm run dev")
	print(" or build it: npm run build (then reload the backend URL)")

	# Single-tenant local app by default. Bind to loopback so it isn't
	# accidentally exposed on the LAN; override with HOST=0.0.0.0 only if
	# you've added auth / a reverse proxy in front. Same idea for DEBUG —
	# debug=True enables the Werkzeug debugger, which is RCE-as-a-feature.
	host = os.environ.get('HOST', '127.0.0.1')
	port = int(os.environ.get('PORT', '5000'))
	debug = _env_bool('DEBUG', False)
	if host == '0.0.0.0' and not _env_bool('ALLOW_PUBLIC_BIND', False):
	print(
	"⚠️ HOST=0.0.0.0 binds the API on every interface. "
	"Set ALLOW_PUBLIC_BIND=1 to acknowledge this and re-launch, "
	"or run behind a reverse proxy (nginx, Caddy) with auth.",
	flush=True,
	)
	sys.exit(2)
	print(f"\n🌐 Listening on http://{host}:{port} (debug={debug})")
	print("💡 Press Ctrl+C to stop\n")
	# use_reloader is forced off when debug=False to avoid double-launch
	# of Playwright in production-style runs.
	app.run(debug=debug, port=port, host=host, use_reloader=debug)