Spaces:

Marek4321
/

QualiLab

Sleeping

App Files Files Community

QualiLab / report_generator.py

Marek4321

Upload 5 files

7889259 verified 9 months ago

raw

history blame contribute delete

16.2 kB

	# report_generator.py - Inteligentny generator raportów z self-prompting

	import time
	import streamlit as st
	from typing import Dict, List, Optional, Tuple
	from datetime import datetime

	try:
	from openai import OpenAI
	OPENAI_AVAILABLE = True
	except ImportError:
	OPENAI_AVAILABLE = False
	st.error("❌ OpenAI library nie jest dostępna")

	from config import REPORT_PROMPTS, MODEL_SETTINGS, INTERVIEW_TYPES

	class ReportGenerator:
	"""Inteligentny generator długich raportów badawczych z self-prompting"""

	def __init__(self, api_key: str):
	if not OPENAI_AVAILABLE:
	raise Exception("OpenAI library nie jest dostępna")

	self.client = OpenAI(api_key=api_key)
	self.api_key = api_key
	self.generation_stats = {
	'sections_generated': 0,
	'sections_expanded': 0,
	'total_tokens_used': 0,
	'total_cost_estimate': 0,
	'generation_time': 0
	}

	def generate_comprehensive_report(self, transcriptions: Dict[str, str], brief: str = "") -> str:
	"""
	Główna funkcja generowania kompletnego raportu
	Używa strategii wieloetapowej z self-prompting
	"""
	start_time = time.time()

	try:
	st.info("📋 Rozpoczynam generowanie raportu...")

	# Przygotuj dane
	combined_transcriptions = self._combine_transcriptions(transcriptions)
	interview_type = self._detect_interview_type(combined_transcriptions)

	st.info(f"🔍 Wykryto typ: {INTERVIEW_TYPES.get(interview_type, 'nieznany')}")

	# ETAP 1: Generowanie outline'u
	st.info("📝 Etap 1/4: Tworzenie struktury raportu...")
	outline = self._generate_outline(combined_transcriptions, brief, interview_type)

	if not outline:
	raise Exception("Nie udało się wygenerować struktury raportu")

	# ETAP 2: Generowanie sekcji po sekcji
	st.info("✍️ Etap 2/4: Generowanie treści sekcji...")
	sections = self._generate_sections_iteratively(
	outline, combined_transcriptions, brief, interview_type
	)

	# ETAP 3: Rozszerzanie zbyt krótkich sekcji (self-prompting)
	st.info("🔍 Etap 3/4: Pogłębianie analizy...")
	expanded_sections = self._expand_short_sections(
	sections, combined_transcriptions, brief
	)

	# ETAP 4: Finalne scalenie z wprowadzeniem i podsumowaniem
	st.info("📄 Etap 4/4: Finalne scalenie...")
	final_report = self._assemble_final_report(
	expanded_sections, brief, interview_type, len(transcriptions)
	)

	# Statystyki
	self.generation_stats['generation_time'] = time.time() - start_time

	st.success(f"🎉 Raport wygenerowany! ({self.generation_stats['generation_time']:.1f}s)")
	self._log_generation_stats()

	return final_report

	except Exception as e:
	st.error(f"❌ Błąd generowania raportu: {str(e)}")
	raise e

	def _combine_transcriptions(self, transcriptions: Dict[str, str]) -> str:
	"""Połącz wszystkie transkrypcje w jeden tekst z oznaczeniami"""
	combined = []

	for i, (filename, transcription) in enumerate(transcriptions.items(), 1):
	header = f"\n\n=== WYWIAD {i}: {filename} ===\n\n"
	combined.append(header + transcription)

	return "\n".join(combined)

	def _detect_interview_type(self, transcriptions: str) -> str:
	"""Automatyczne rozpoznanie typu wywiadu"""
	text_lower = transcriptions.lower()

	# Wskaźniki FGI
	fgi_indicators = [
	'moderator', 'grupa', 'wszyscy', 'uczestnicy', 'dyskusja',
	'czy zgadzacie się', 'co myślicie', 'focus group'
	]

	# Wskaźniki IDI
	idi_indicators = [
	'wywiad indywidualny', 'jeden na jeden', 'interviewer',
	'opowiedz mi', 'jak się czujesz', 'twoje doświadczenie'
	]

	fgi_score = sum(1 for indicator in fgi_indicators if indicator in text_lower)
	idi_score = sum(1 for indicator in idi_indicators if indicator in text_lower)

	if fgi_score > idi_score:
	return 'fgi'
	elif idi_score > fgi_score:
	return 'idi'
	else:
	return 'auto'

	def _generate_outline(self, transcriptions: str, brief: str, interview_type: str) -> Dict:
	"""Generuj strukturę raportu"""
	try:
	prompt = REPORT_PROMPTS['outline_generator'].format(
	transcriptions=transcriptions[:8000], # Limit dla API
	brief=brief or "Brak szczegółowego briefu",
	interview_type=INTERVIEW_TYPES.get(interview_type, 'wywiad')
	)

	response = self._call_gpt(prompt)
	outline = self._parse_outline(response)

	st.success(f"✅ Outline: {len(outline)} sekcji zaplanowanych")
	return outline

	except Exception as e:
	st.error(f"❌ Błąd generowania outline: {e}")
	return {}

	def _generate_sections_iteratively(self, outline: Dict, transcriptions: str, brief: str, interview_type: str) -> Dict:
	"""Generuj sekcje raportu jedna po drugiej"""
	sections = {}

	for section_title, section_points in outline.items():
	if not section_title or section_title.startswith('#'):
	continue

	st.info(f"📝 Generuję: {section_title}")

	try:
	prompt = REPORT_PROMPTS['section_generator'].format(
	transcriptions=transcriptions,
	brief=brief or "Brak szczegółowego briefu",
	interview_type=INTERVIEW_TYPES.get(interview_type, 'wywiad'),
	outline=str(outline),
	section_title=section_title,
	section_points=section_points
	)

	section_content = self._call_gpt(prompt)
	sections[section_title] = section_content

	self.generation_stats['sections_generated'] += 1
	st.success(f"✅ {section_title} ({len(section_content.split())} słów)")

	# Krótka przerwa żeby nie przekroczyć rate limits
	time.sleep(2)

	except Exception as e:
	st.warning(f"⚠️ Błąd sekcji '{section_title}': {e}")
	sections[section_title] = f"[BŁĄD GENEROWANIA SEKCJI: {e}]"

	return sections

	def _expand_short_sections(self, sections: Dict, transcriptions: str, brief: str) -> Dict:
	"""Self-prompting: rozszerz zbyt krótkie sekcje"""
	expanded_sections = {}

	for section_title, section_content in sections.items():
	word_count = len(section_content.split())

	# Sprawdź czy sekcja wymaga rozszerzenia
	if word_count < 500: # Za krótka sekcja
	st.info(f"🔍 Rozszerzam: {section_title} ({word_count} słów)")

	try:
	prompt = REPORT_PROMPTS['section_expander'].format(
	current_section=section_content,
	transcriptions=transcriptions,
	brief=brief or "Brak szczegółowego briefu"
	)

	expanded_content = self._call_gpt(prompt)
	expanded_sections[section_title] = expanded_content

	new_word_count = len(expanded_content.split())
	self.generation_stats['sections_expanded'] += 1

	st.success(f"✅ Rozszerzone: {section_title} ({word_count} → {new_word_count} słów)")

	time.sleep(2) # Rate limit protection

	except Exception as e:
	st.warning(f"⚠️ Nie udało się rozszerzyć '{section_title}': {e}")
	expanded_sections[section_title] = section_content
	else:
	# Sekcja już wystarczająco długa
	expanded_sections[section_title] = section_content
	st.success(f"✅ {section_title} OK ({word_count} słów)")

	return expanded_sections

	def _assemble_final_report(self, sections: Dict, brief: str, interview_type: str, interviews_count: int) -> str:
	"""Scal wszystko w finalny raport"""
	try:
	sections_text = "\n\n".join([
	f"## {title}\n\n{content}"
	for title, content in sections.items()
	])

	prompt = REPORT_PROMPTS['final_assembly'].format(
	sections=sections_text,
	brief=brief or "Brak szczegółowego briefu",
	interview_type=INTERVIEW_TYPES.get(interview_type, 'wywiad'),
	interviews_count=interviews_count,
	date=datetime.now().strftime("%Y-%m-%d")
	)

	final_report = self._call_gpt(prompt, max_tokens=4000)

	# Dodaj metadane na koniec
	metadata = f"""

	---

	## METADATA RAPORTU
	- Wygenerowano: {datetime.now().strftime("%Y-%m-%d %H:%M")}
	- Typ badania: {INTERVIEW_TYPES.get(interview_type, 'nieznany')}
	- Liczba wywiadów: {interviews_count}
	- Sekcji wygenerowanych: {self.generation_stats['sections_generated']}
	- Sekcji rozszerzonych: {self.generation_stats['sections_expanded']}
	- Czas generowania: {self.generation_stats['generation_time']:.1f}s
	- Generator: FGI/IDI Research Analyzer v1.0
	"""

	return final_report + metadata

	except Exception as e:
	st.error(f"❌ Błąd finalnego scalenia: {e}")
	# Fallback - zwróć przynajmniej sekcje
	return self._create_fallback_report(sections, brief, interview_type)

	def _call_gpt(self, prompt: str, max_tokens: int = 3000) -> str:
	"""Wywołanie GPT API z error handling"""
	try:
	response = self.client.chat.completions.create(
	model=MODEL_SETTINGS['gpt']['model'],
	messages=[
	{"role": "system", "content": "Jesteś ekspertem analizy badań jakościowych. Tworzysz profesjonalne, szczegółowe raporty badawcze."},
	{"role": "user", "content": prompt}
	],
	temperature=MODEL_SETTINGS['gpt']['temperature'],
	max_tokens=max_tokens
	)

	# Statystyki
	if hasattr(response, 'usage'):
	self.generation_stats['total_tokens_used'] += response.usage.total_tokens
	# Estymacja kosztu GPT-4o-mini: ~$0.00015 per 1K tokens
	self.generation_stats['total_cost_estimate'] += (response.usage.total_tokens / 1000) * 0.00015

	return response.choices[0].message.content

	except Exception as e:
	if "rate limit" in str(e).lower():
	st.warning("⏳ Rate limit - czekam 60s...")
	time.sleep(60)
	return self._call_gpt(prompt, max_tokens)
	else:
	raise e

	def _parse_outline(self, outline_text: str) -> Dict:
	"""Parsuj outline z odpowiedzi GPT"""
	outline = {}
	current_section = None

	for line in outline_text.split('\n'):
	line = line.strip()

	if line.startswith('## '):
	# Nowa sekcja
	current_section = line[3:].strip()
	outline[current_section] = []
	elif line.startswith('- ') and current_section:
	# Podpunkt sekcji
	outline[current_section].append(line[2:].strip())

	return outline

	def _create_fallback_report(self, sections: Dict, brief: str, interview_type: str) -> str:
	"""Fallback raport jeśli final assembly nie zadziała"""
	report_parts = [
	f"# RAPORT Z BADANIA {INTERVIEW_TYPES.get(interview_type, 'INTERVIEW').upper()}",
	f"\nData: {datetime.now().strftime('%Y-%m-%d')}",
	f"Brief: {brief or 'Brak szczegółowego briefu'}",
	"\n---\n"
	]

	for title, content in sections.items():
	report_parts.append(f"## {title}\n\n{content}\n\n")

	return "\n".join(report_parts)

	def _log_generation_stats(self):
	"""Wyświetl statystyki generowania"""
	stats = self.generation_stats

	st.info(f"""
	📊 Statystyki generowania:
	- Sekcji: {stats['sections_generated']} wygenerowanych, {stats['sections_expanded']} rozszerzonych
	- Tokeny: ~{stats['total_tokens_used']:,}
	- Koszt: ~${stats['total_cost_estimate']:.4f}
	- Czas: {stats['generation_time']:.1f}s
	""")

	def evaluate_section_quality(self, section_content: str) -> Dict:
	"""Oceń jakość sekcji (dla debugowania)"""
	try:
	prompt = REPORT_PROMPTS['quality_checker'].format(section=section_content)
	evaluation = self._call_gpt(prompt, max_tokens=500)

	# Parsuj ocenę (uproszczone)
	lines = evaluation.split('\n')
	scores = {}

	for line in lines:
	if ':' in line and '/10' in line:
	criterion = line.split(':')[0].strip()
	score = line.split(':')[1].strip().split('/')[0]
	try:
	scores[criterion] = int(score)
	except:
	pass

	needs_improvement = 'TAK' in evaluation.upper()

	return {
	'scores': scores,
	'needs_improvement': needs_improvement,
	'evaluation_text': evaluation
	}

	except Exception as e:
	return {'error': str(e)}

	def get_generation_stats(self) -> Dict:
	"""Zwróć statystyki generowania"""
	return self.generation_stats.copy()

	# Funkcje pomocnicze
	def estimate_report_length(transcriptions: Dict[str, str]) -> Dict:
	"""Estymuj długość finalnego raportu"""
	total_words = sum(len(text.split()) for text in transcriptions.values())

	# Raporty są zwykle 15-25% długości transkrypcji
	estimated_report_words = int(total_words * 0.2)
	estimated_pages = estimated_report_words / 250 # ~250 słów na stronę

	return {
	'transcription_words': total_words,
	'estimated_report_words': estimated_report_words,
	'estimated_pages': estimated_pages,
	'estimated_generation_time': len(transcriptions) * 120 # ~2 min per interview
	}

	# Test modułu
	if __name__ == "__main__":
	print("🧪 Test ReportGenerator")

	# Test bez prawdziwego API
	try:
	generator = ReportGenerator("test-key")
	print("✅ ReportGenerator zainicjalizowany")

	# Test estymacji
	test_transcriptions = {
	"test1.mp3": "To jest przykładowa transkrypcja wywiadu. " * 100,
	"test2.mp3": "To jest druga transkrypcja z badania. " * 150
	}

	estimates = estimate_report_length(test_transcriptions)
	print(f"📊 Estymacja: {estimates['estimated_report_words']} słów, {estimates['estimated_pages']:.1f} stron")

	except Exception as e:
	print(f"❌ Błąd testu: {e}")

	print("✅ Test zakończony")