| import os |
| import argparse |
| import subprocess |
| import time |
| import requests |
|
|
| |
| API_PORT = 8000 |
| FRONTEND_PORT = 7860 |
| MAX_PAGES = 1000 |
|
|
| def run_scraper(): |
| """Run the web scraper to collect data.""" |
| from buffalo_rag.scraper.scraper import BuffaloScraper |
| |
| if os.path.exists("data/raw"): |
| num_pages = len([name for name in os.listdir("data/raw") if os.path.isfile(os.path.join("data/raw", name))]) |
| if num_pages > MAX_PAGES: |
| print(f"{num_pages} scraped data files found under data/raw. Skipping data scraper stage.") |
| else: |
| print("Starting web scraper...") |
| scraper = BuffaloScraper() |
| scraper.scrape(max_pages=MAX_PAGES) |
| print("Scraping completed!") |
|
|
| def build_embeddings(): |
| """Process documents and create embeddings.""" |
| from buffalo_rag.embeddings.chunker import DocumentChunker |
| |
| print("Creating document chunks and embeddings...") |
| chunker = DocumentChunker() |
| chunks = chunker.create_chunks() |
| chunker.create_embeddings(chunks) |
| print("Embeddings created!") |
|
|
| def start_api_server(): |
| """Start the FastAPI backend server.""" |
| print("Starting API server...") |
| subprocess.run(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", str(API_PORT), "--reload"]) |
|
|
| def start_flask_frontend(): |
| """Start the Flask frontend.""" |
| print("Starting Flask frontend...") |
| subprocess.run(["flask", "run", "--host=0.0.0.0", f"--port={FRONTEND_PORT}"], |
| env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"}) |
|
|
| def wait_for_server(url, timeout=30, interval=1): |
| """Waits for a server at the given URL to be reachable.""" |
| start_time = time.time() |
| print(f"Waiting for server at {url} to be ready...") |
| while time.time() - start_time < timeout: |
| try: |
| response = requests.get(url, timeout=interval) |
| if response.status_code < 500: |
| print(f"Server at {url} is ready.") |
| return True |
| except requests.exceptions.RequestException: |
| pass |
| time.sleep(interval) |
| print(f"Timeout waiting for server at {url}.") |
| return False |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="BuffaloRAG - AI Assistant for UB International Students") |
| parser.add_argument("--scrape", action="store_true", help="Run web scraper") |
| parser.add_argument("--build", action="store_true", help="Build embeddings") |
| parser.add_argument("--api", action="store_true", help="Run API server") |
| parser.add_argument("--frontend", action="store_true", help="Run Flask frontend") |
| parser.add_argument("--all", action="store_true", help="Run the complete pipeline") |
| parser.add_argument("--run", action="store_true", help="Run frontend & backend servers") |
| |
| args = parser.parse_args() |
| |
| if args.scrape or args.all: |
| run_scraper() |
| |
| if args.build or args.all: |
| build_embeddings() |
| |
| if args.api or args.all or args.run: |
| if args.all or args.run: |
| api_process = subprocess.Popen(["uvicorn", "buffalo_rag.api.main:app", "--host", "0.0.0.0", "--port", str(API_PORT), "--reload"]) |
| api_ready = wait_for_server(f'http://localhost:{API_PORT}/', timeout=60) |
|
|
| if api_ready: |
| time.sleep(3) |
| flask_process = subprocess.Popen(["flask", "run", "--host=0.0.0.0", f"--port={FRONTEND_PORT}"], |
| env={**os.environ, "FLASK_APP": "buffalo_rag/frontend/flask_app.py"}) |
| |
| import webbrowser |
| webbrowser.open(f'http://localhost:{FRONTEND_PORT}') |
| |
| input("Press Enter to stop the server and exit...\n") |
| if api_process.poll() is None: |
| api_process.terminate() |
| if flask_process.poll() is None: |
| flask_process.terminate() |
|
|
| if api_process.poll() is None: |
| api_process.terminate() |
| else: |
| start_api_server() |
| |
| if args.frontend: |
| start_flask_frontend() |
|
|
| if __name__ == "__main__": |
| main() |