Spaces:
Running
Running
fix: web/app/api/parse-upload/route.ts
Browse files
web/app/api/parse-upload/route.ts
CHANGED
|
@@ -30,19 +30,21 @@ export async function POST(req: NextRequest) {
|
|
| 30 |
const buffer = Buffer.from(await file.arrayBuffer());
|
| 31 |
let text = "";
|
| 32 |
|
| 33 |
-
|
| 34 |
-
const mimeType = file.type;
|
| 35 |
-
|
| 36 |
-
if ((name.endsWith(".txt") || name.endsWith(".md")) && (mimeType.includes("text/plain") || mimeType.includes("text/markdown"))) {
|
| 37 |
text = new TextDecoder().decode(buffer);
|
| 38 |
-
} else if (name.endsWith(".pdf")
|
| 39 |
-
// pdf-parse v2
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
} else if (name.endsWith(".docx")) {
|
| 47 |
const mammoth = (await import("mammoth")).default;
|
| 48 |
const result = await mammoth.extractRawText({ buffer });
|
|
|
|
| 30 |
const buffer = Buffer.from(await file.arrayBuffer());
|
| 31 |
let text = "";
|
| 32 |
|
| 33 |
+
if (name.endsWith(".txt") || name.endsWith(".md")) {
|
|
|
|
|
|
|
|
|
|
| 34 |
text = new TextDecoder().decode(buffer);
|
| 35 |
+
} else if (name.endsWith(".pdf")) {
|
| 36 |
+
// FIX v4.3: pdf-parse API compatible with both v1 and v2
|
| 37 |
+
// Try the standard pdf-parse import (works with v1.x which is more common)
|
| 38 |
+
try {
|
| 39 |
+
const pdfParse = (await import("pdf-parse")).default;
|
| 40 |
+
const result = await pdfParse(buffer);
|
| 41 |
+
text = result.text;
|
| 42 |
+
} catch {
|
| 43 |
+
// If pdf-parse fails, try sending to Gradio Space for OCR
|
| 44 |
+
return NextResponse.json({
|
| 45 |
+
error: "PDF parsing failed. Please copy-paste the text directly, or use the Gradio Space which has OCR support."
|
| 46 |
+
}, { status: 400 });
|
| 47 |
+
}
|
| 48 |
} else if (name.endsWith(".docx")) {
|
| 49 |
const mammoth = (await import("mammoth")).default;
|
| 50 |
const result = await mammoth.extractRawText({ buffer });
|