| import os |
| from flask import Flask, render_template, request, jsonify, stream_with_context, Response |
| from memvid_sdk import create, open as open_memvid |
| from huggingface_hub import hf_hub_download, upload_file, HfApi |
|
|
| app = Flask(__name__) |
|
|
| |
| FILENAME = "knowledge.mv2" |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| DATASET_NAME = "memvid-storage" |
|
|
| |
| db = None |
| DB_PATH = os.path.abspath(FILENAME) |
| DATASET_REPO_ID = None |
|
|
| def get_repo_id(): |
| """Helper to dynamically resolve 'username/dataset_name'""" |
| global DATASET_REPO_ID |
| if DATASET_REPO_ID: |
| return DATASET_REPO_ID |
| |
| if HF_TOKEN: |
| try: |
| api = HfApi(token=HF_TOKEN) |
| username = api.whoami()['name'] |
| DATASET_REPO_ID = f"{username}/{DATASET_NAME}" |
| return DATASET_REPO_ID |
| except Exception as e: |
| print(f"⚠️ Error getting username: {e}") |
| return None |
| return None |
|
|
| def init_db(): |
| """ |
| 1. Ensure Dataset Exists. |
| 2. Try to download existing DB. |
| 3. Initialize Memvid. |
| """ |
| global db, DATASET_REPO_ID |
| |
| repo_id = get_repo_id() |
| |
| |
| if HF_TOKEN and repo_id: |
| print(f"🔄 Checking cloud storage at {repo_id}...") |
| api = HfApi(token=HF_TOKEN) |
| |
| try: |
| |
| api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True) |
| |
| |
| files = api.list_repo_files(repo_id=repo_id, repo_type="dataset") |
| |
| if FILENAME in files: |
| downloaded_path = hf_hub_download( |
| repo_id=repo_id, |
| filename=FILENAME, |
| repo_type="dataset", |
| token=HF_TOKEN, |
| local_dir=".", |
| local_dir_use_symlinks=False |
| ) |
| print(f"✅ Downloaded database to {downloaded_path}") |
| else: |
| print("⚠️ Database file not found in repo. A new one will be created and synced.") |
| |
| except Exception as e: |
| print(f"⚠️ Cloud sync warning: {e}") |
|
|
| try: |
| if os.path.exists(DB_PATH): |
| db = open_memvid(DB_PATH, read_only=False) |
| else: |
| db = create(DB_PATH) |
| |
| except ImportError: |
| from memvid_sdk import Memvid |
| if os.path.exists(DB_PATH): |
| db = Memvid() |
| db.open(DB_PATH) |
| else: |
| db = Memvid() |
| db.create(DB_PATH) |
|
|
| def sync_to_hub(): |
| """Uploads the local .mv2 file back to Hugging Face""" |
| repo_id = get_repo_id() |
| |
| if not HF_TOKEN or not repo_id: |
| print("⚠️ No HF_TOKEN or Repo ID found. Skipping sync.") |
| return |
|
|
| try: |
| print("☁️ Syncing to Hub...") |
| upload_file( |
| path_or_fileobj=DB_PATH, |
| path_in_repo=FILENAME, |
| repo_id=repo_id, |
| repo_type="dataset", |
| token=HF_TOKEN, |
| commit_message="Memvid: Auto-save memory update" |
| ) |
| print("✅ Sync complete.") |
| except Exception as e: |
| print(f"❌ Sync failed: {e}") |
|
|
| |
| init_db() |
|
|
| @app.route('/') |
| def index(): |
| return render_template('index.html') |
|
|
| @app.route('/add', methods=['POST']) |
| def add_memory(): |
| |
| global db |
| content = request.form.get('content') |
| |
| if not content: |
| return jsonify({"error": "No content provided"}), 400 |
|
|
| |
| def generate(): |
| try: |
| |
| global db |
| if not db: |
| init_db() |
| if not db: |
| yield '{"status": "error", "message": "Database init failed"}\n' |
| return |
|
|
| |
| yield '{"status": "processing", "message": "Ingesting content..."}\n' |
| |
| payload = { |
| "text": content, |
| "labels": ["web-entry"], |
| "title": "User Memory" |
| } |
| db.put(payload) |
| |
| |
| yield '{"status": "processing", "message": "Flushing to disk..."}\n' |
| del db |
| db = None |
|
|
| |
| yield '{"status": "processing", "message": "Syncing to cloud (this may take a moment)..."}\n' |
| sync_to_hub() |
| |
| |
| yield '{"status": "processing", "message": "Reloading index..."}\n' |
| init_db() |
| |
| |
| yield '{"status": "success", "message": "Memory added and synced."}\n' |
|
|
| except Exception as e: |
| |
| yield f'{{"status": "error", "message": "{str(e)}"}}\n' |
|
|
| |
| return Response(stream_with_context(generate()), mimetype='application/x-ndjson') |
|
|
| @app.route('/search', methods=['POST']) |
| def search_memory(): |
| if not db: |
| return jsonify({"error": "Database not initialized"}), 500 |
|
|
| query = request.form.get('query') |
| if not query: |
| return jsonify({"error": "No query provided"}), 400 |
|
|
| try: |
| |
| response = db.find(query) |
| |
| |
| clean_results = [] |
| hits = response.get('hits', []) |
| |
| for hit in hits: |
| score = hit.get('score', 0.0) |
| if score < 0.65: continue |
|
|
| |
| raw_snippet = hit.get('snippet', '') |
| |
| lines = raw_snippet.split('\n') |
| content_lines = [ |
| line for line in lines |
| if not line.strip().startswith(('title:', 'tags:', 'labels:', 'extractous_metadata:')) |
| ] |
| clean_text = "\n".join(content_lines).strip() |
| |
| tags = hit.get('tags', []) |
| labels = hit.get('labels', []) |
|
|
| clean_results.append({ |
| "title": hit.get('title') or "Untitled Memory", |
| "text": clean_text, |
| "tags": tags, |
| "labels": labels, |
| "date": hit.get('created_at', ''), |
| "score": f"{score:.2f}" |
| }) |
| |
| return jsonify({"success": True, "results": clean_results}) |
| except Exception as e: |
| return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| if __name__ == '__main__': |
| app.run(host='0.0.0.0', port=7860) |