Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| import time | |
| from huggingface_hub import InferenceClient | |
| from fpdf import FPDF | |
| from .prompts import REVIEWER_DIRECTION | |
| logger = logging.getLogger(__name__) | |
| class Reviewer: | |
| def __init__(self, model_name: str, hf_key: str): | |
| self.model_name = model_name | |
| self.client = InferenceClient(token=hf_key, timeout=120) | |
| def review(self, report: str) -> str: | |
| logger.info(f"Reviewing and polishing the final report using {self.model_name}...") | |
| try: | |
| response = self.client.chat_completion( | |
| model=self.model_name, | |
| messages=[ | |
| {"role": "system", "content": REVIEWER_DIRECTION}, | |
| {"role": "user", "content": f"Please review and polish this research report:\n\n{report}"} | |
| ], | |
| max_tokens=4000, | |
| temperature=0.7 | |
| ) | |
| polished_report = response.choices[0].message.content | |
| # Clean up potential <think> tags | |
| if "<think>" in polished_report and "</think>" in polished_report: | |
| polished_report = polished_report.split("</think>")[-1].strip() | |
| elif "<think>" in polished_report: | |
| polished_report = polished_report.split("<think>")[-1].strip() | |
| return polished_report | |
| except Exception as e: | |
| logger.error(f"Error during report review: {e}") | |
| return report # Fallback to original report on error | |
| def generate_pdf(self, markdown_text: str, output_path: str): | |
| logger.info(f"Generating PDF at {output_path}...") | |
| try: | |
| pdf = FPDF() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| pdf.add_page() | |
| # Basic styling setup | |
| pdf.set_font("helvetica", "B", 16) | |
| pdf.cell(0, 10, "Research Report", ln=True, align="C") | |
| pdf.ln(5) | |
| pdf.set_font("helvetica", "", 12) | |
| def safe_encode(text): | |
| # 1. Handle encoding calls for standard fonts (latin-1) | |
| text = text.encode('latin-1', 'replace').decode('latin-1') | |
| # 2. Break very long words to prevent FPDF error "Not enough horizontal space" | |
| # A4 width is ~190mm usable. Font size 12. ~90 chars is a safe limit for a single word. | |
| words = text.split(' ') | |
| processed_words = [] | |
| for word in words: | |
| if len(word) > 85: | |
| # Split string into chunks | |
| chunks = [word[i:i+85] for i in range(0, len(word), 85)] | |
| processed_words.append(" ".join(chunks)) | |
| else: | |
| processed_words.append(word) | |
| return " ".join(processed_words) | |
| # Simple Markdown cleanup | |
| clean_text = markdown_text.replace("# ", "").replace("## ", "").replace("### ", "").replace("**", "") | |
| # Split by lines and add to PDF | |
| for line in clean_text.split("\n"): | |
| safe_line = safe_encode(line) | |
| if safe_line.strip(): | |
| pdf.multi_cell(0, 6, safe_line) # Reduced line height slightly for better readability | |
| pdf.ln(2) # frequent small breaks | |
| else: | |
| pdf.ln(5) # Larger break for paragraph separation | |
| pdf.output(output_path) | |
| logger.info("PDF generated successfully.") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to generate PDF: {e}") | |
| return False | |