gaurv007 commited on
Commit
063b349
·
verified ·
1 Parent(s): 79665bc

fix: web/app/api/parse-upload/route.ts

Browse files
Files changed (1) hide show
  1. web/app/api/parse-upload/route.ts +14 -12
web/app/api/parse-upload/route.ts CHANGED
@@ -30,19 +30,21 @@ export async function POST(req: NextRequest) {
30
  const buffer = Buffer.from(await file.arrayBuffer());
31
  let text = "";
32
 
33
- // Validate MIME types alongside extension
34
- const mimeType = file.type;
35
-
36
- if ((name.endsWith(".txt") || name.endsWith(".md")) && (mimeType.includes("text/plain") || mimeType.includes("text/markdown"))) {
37
  text = new TextDecoder().decode(buffer);
38
- } else if (name.endsWith(".pdf") && mimeType === "application/pdf") {
39
- // pdf-parse v2
40
- await import("pdf-parse/worker");
41
- const { PDFParse } = await import("pdf-parse");
42
- const parser = new PDFParse({ data: buffer });
43
- const result = await parser.getText();
44
- text = result.text;
45
- await parser.destroy();
 
 
 
 
 
46
  } else if (name.endsWith(".docx")) {
47
  const mammoth = (await import("mammoth")).default;
48
  const result = await mammoth.extractRawText({ buffer });
 
30
  const buffer = Buffer.from(await file.arrayBuffer());
31
  let text = "";
32
 
33
+ if (name.endsWith(".txt") || name.endsWith(".md")) {
 
 
 
34
  text = new TextDecoder().decode(buffer);
35
+ } else if (name.endsWith(".pdf")) {
36
+ // FIX v4.3: pdf-parse API compatible with both v1 and v2
37
+ // Try the standard pdf-parse import (works with v1.x which is more common)
38
+ try {
39
+ const pdfParse = (await import("pdf-parse")).default;
40
+ const result = await pdfParse(buffer);
41
+ text = result.text;
42
+ } catch {
43
+ // If pdf-parse fails, try sending to Gradio Space for OCR
44
+ return NextResponse.json({
45
+ error: "PDF parsing failed. Please copy-paste the text directly, or use the Gradio Space which has OCR support."
46
+ }, { status: 400 });
47
+ }
48
  } else if (name.endsWith(".docx")) {
49
  const mammoth = (await import("mammoth")).default;
50
  const result = await mammoth.extractRawText({ buffer });