Spaces:
Sleeping
Sleeping
Scale app: Added .docx support, download feature, and cleaned up DEPLOY.md
Browse files- DEPLOY.md +0 -35
- app/main.py +7 -1
- app/static/main.js +14 -1
- requirements.txt +1 -0
DEPLOY.md
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
# 🌍 Deploying to a Public URL
|
| 2 |
-
|
| 3 |
-
To share your Clinical De-identification Dashboard with others, follow these steps to deploy it to **Hugging Face Spaces**.
|
| 4 |
-
|
| 5 |
-
## Prerequisites
|
| 6 |
-
1. A [Hugging Face](https://huggingface.co/join) account.
|
| 7 |
-
2. Your project already pushed to your [GitHub Repository](https://github.com/sarvanithin/clinical-deidentify).
|
| 8 |
-
|
| 9 |
-
## Step-by-Step Deployment
|
| 10 |
-
|
| 11 |
-
### 1. Create a New Space
|
| 12 |
-
- Go to [huggingface.co/spaces](https://huggingface.co/spaces) and click **"Create new Space"**.
|
| 13 |
-
- **Space Name**: Give it a name (e.g., `clinical-deidentify`).
|
| 14 |
-
- **SDK**: Select **Docker**.
|
| 15 |
-
- **Docker Template**: Choose **Blank**.
|
| 16 |
-
- **Pricing**: The **Free (16GB RAM)** tier is perfect for this models.
|
| 17 |
-
|
| 18 |
-
### 2. Connect to GitHub
|
| 19 |
-
- Instead of uploading files manually, click on the **"Settings"** tab of your new Space.
|
| 20 |
-
- Scroll down to **"GitHub Repo"**.
|
| 21 |
-
- Click **"Connect"** and select your `clinical-deidentify` repository.
|
| 22 |
-
- Ensure the branch is set to `main`.
|
| 23 |
-
|
| 24 |
-
### 3. Automatic Deployment
|
| 25 |
-
- Once connected, Hugging Face will detect your `Dockerfile`.
|
| 26 |
-
- It will automatically start building the image and downloading the transformer model.
|
| 27 |
-
- After a few minutes, your dashboard will be live at `https://huggingface.co/spaces/YOUR_USER/clinical-deidentify`!
|
| 28 |
-
|
| 29 |
-
## Why Hugging Face Spaces?
|
| 30 |
-
- **High RAM**: Clinical-NER models require several GBs of RAM; HF Spaces provides 16GB for free.
|
| 31 |
-
- **CI/CD**: Every time you `git push` to GitHub, your public URL will automatically update.
|
| 32 |
-
- **Privacy**: You can set the Space to **Private** if you only want specific people to access it.
|
| 33 |
-
|
| 34 |
-
---
|
| 35 |
-
*Note: Ensure you have performed the final `git push` from your local machine before connecting to GitHub.*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/main.py
CHANGED
|
@@ -3,6 +3,7 @@ from fastapi.staticfiles import StaticFiles
|
|
| 3 |
from fastapi.responses import FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
import fitz # PyMuPDF
|
|
|
|
| 6 |
from .models import DeidRequest, DeidResponse, BatchDeidRequest, BatchDeidResponse, FeedbackRequest
|
| 7 |
from .pipeline.hybrid import DeidPipeline
|
| 8 |
import json
|
|
@@ -87,8 +88,13 @@ async def deidentify_file(file: UploadFile = File(...)):
|
|
| 87 |
doc.close()
|
| 88 |
elif filename.endswith(".txt"):
|
| 89 |
text = content.decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
else:
|
| 91 |
-
raise HTTPException(status_code=400, detail="Unsupported file type. Please upload .pdf or .
|
| 92 |
|
| 93 |
if not text.strip():
|
| 94 |
raise HTTPException(status_code=400, detail="File is empty or no text could be extracted.")
|
|
|
|
| 3 |
from fastapi.responses import FileResponse
|
| 4 |
from fastapi.middleware.cors import CORSMiddleware
|
| 5 |
import fitz # PyMuPDF
|
| 6 |
+
import docx # python-docx
|
| 7 |
from .models import DeidRequest, DeidResponse, BatchDeidRequest, BatchDeidResponse, FeedbackRequest
|
| 8 |
from .pipeline.hybrid import DeidPipeline
|
| 9 |
import json
|
|
|
|
| 88 |
doc.close()
|
| 89 |
elif filename.endswith(".txt"):
|
| 90 |
text = content.decode("utf-8")
|
| 91 |
+
elif filename.endswith(".docx"):
|
| 92 |
+
# Extract text from Word document
|
| 93 |
+
from io import BytesIO
|
| 94 |
+
doc = docx.Document(BytesIO(content))
|
| 95 |
+
text = "\n".join([para.text for para in doc.paragraphs])
|
| 96 |
else:
|
| 97 |
+
raise HTTPException(status_code=400, detail="Unsupported file type. Please upload .pdf, .txt, or .docx")
|
| 98 |
|
| 99 |
if not text.strip():
|
| 100 |
raise HTTPException(status_code=400, detail="File is empty or no text could be extracted.")
|
app/static/main.js
CHANGED
|
@@ -172,5 +172,18 @@ document.addEventListener('DOMContentLoaded', () => {
|
|
| 172 |
const originalIcon = copyBtn.innerHTML;
|
| 173 |
copyBtn.innerHTML = '<svg viewBox="0 0 24 24" width="18" height="18" style="color:var(--success)"><path fill="currentColor" d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"/></svg>';
|
| 174 |
setTimeout(() => copyBtn.innerHTML = originalIcon, 2000);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
});
|
| 176 |
-
});
|
|
|
|
| 172 |
const originalIcon = copyBtn.innerHTML;
|
| 173 |
copyBtn.innerHTML = '<svg viewBox="0 0 24 24" width="18" height="18" style="color:var(--success)"><path fill="currentColor" d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"/></svg>';
|
| 174 |
setTimeout(() => copyBtn.innerHTML = originalIcon, 2000);
|
| 175 |
+
// Download logic
|
| 176 |
+
downloadBtn.addEventListener('click', () => {
|
| 177 |
+
if (!currentDeidentifiedText) return;
|
| 178 |
+
|
| 179 |
+
const blob = new Blob([currentDeidentifiedText], { type: 'text/plain' });
|
| 180 |
+
const url = URL.createObjectURL(blob);
|
| 181 |
+
const a = document.createElement('a');
|
| 182 |
+
a.href = url;
|
| 183 |
+
a.download = `deidentified_${new Date().getTime()}.txt`;
|
| 184 |
+
document.body.appendChild(a);
|
| 185 |
+
a.click();
|
| 186 |
+
document.body.removeChild(a);
|
| 187 |
+
URL.revokeObjectURL(url);
|
| 188 |
+
});
|
| 189 |
});
|
|
|
requirements.txt
CHANGED
|
@@ -7,3 +7,4 @@ spacy>=3.7.2
|
|
| 7 |
pytest>=7.4.3
|
| 8 |
pymupdf
|
| 9 |
python-multipart
|
|
|
|
|
|
| 7 |
pytest>=7.4.3
|
| 8 |
pymupdf
|
| 9 |
python-multipart
|
| 10 |
+
python-docx
|