Spaces:

lvizcaya
/

research_agent

Sleeping

App Files Files Community

research_agent / src /reviewer.py

lvizcaya

pdf downnload fixed

6ee7713 4 months ago

raw

history blame contribute delete

3.77 kB

	import logging
	import os
	import time
	from huggingface_hub import InferenceClient
	from fpdf import FPDF
	from .prompts import REVIEWER_DIRECTION

	logger = logging.getLogger(__name__)

	class Reviewer:
	def __init__(self, model_name: str, hf_key: str):
	self.model_name = model_name
	self.client = InferenceClient(token=hf_key, timeout=120)

	def review(self, report: str) -> str:
	logger.info(f"Reviewing and polishing the final report using {self.model_name}...")

	try:
	response = self.client.chat_completion(
	model=self.model_name,
	messages=[
	{"role": "system", "content": REVIEWER_DIRECTION},
	{"role": "user", "content": f"Please review and polish this research report:\n\n{report}"}
	],
	max_tokens=4000,
	temperature=0.7
	)

	polished_report = response.choices[0].message.content

	# Clean up potential <think> tags
	if "<think>" in polished_report and "</think>" in polished_report:
	polished_report = polished_report.split("</think>")[-1].strip()
	elif "<think>" in polished_report:
	polished_report = polished_report.split("<think>")[-1].strip()

	return polished_report
	except Exception as e:
	logger.error(f"Error during report review: {e}")
	return report # Fallback to original report on error

	def generate_pdf(self, markdown_text: str, output_path: str):
	logger.info(f"Generating PDF at {output_path}...")
	try:
	pdf = FPDF()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.add_page()

	# Basic styling setup
	pdf.set_font("helvetica", "B", 16)
	pdf.cell(0, 10, "Research Report", ln=True, align="C")
	pdf.ln(5)

	pdf.set_font("helvetica", "", 12)

	def safe_encode(text):
	# 1. Handle encoding calls for standard fonts (latin-1)
	text = text.encode('latin-1', 'replace').decode('latin-1')

	# 2. Break very long words to prevent FPDF error "Not enough horizontal space"
	# A4 width is ~190mm usable. Font size 12. ~90 chars is a safe limit for a single word.
	words = text.split(' ')
	processed_words = []
	for word in words:
	if len(word) > 85:
	# Split string into chunks
	chunks = [word[i:i+85] for i in range(0, len(word), 85)]
	processed_words.append(" ".join(chunks))
	else:
	processed_words.append(word)
	return " ".join(processed_words)

	# Simple Markdown cleanup
	clean_text = markdown_text.replace("# ", "").replace("## ", "").replace("### ", "").replace("**", "")

	# Split by lines and add to PDF
	for line in clean_text.split("\n"):
	safe_line = safe_encode(line)
	if safe_line.strip():
	pdf.multi_cell(0, 6, safe_line) # Reduced line height slightly for better readability
	pdf.ln(2) # frequent small breaks
	else:
	pdf.ln(5) # Larger break for paragraph separation

	pdf.output(output_path)
	logger.info("PDF generated successfully.")
	return True
	except Exception as e:
	logger.error(f"Failed to generate PDF: {e}")
	return False