Spaces:

yalrashed
/

ScriptLLM

Sleeping

App Files Files Community

ScriptLLM / src /analysis /analysis_post_processor.py

yalrashed

Upload analysis_post_processor.py

e2cc090 verified over 1 year ago

raw

history blame contribute delete

3.55 kB

	import os
	import google.generativeai as genai
	from pathlib import Path
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class AnalysisPostProcessor:
	def __init__(self):
	api_key = os.getenv("GOOGLE_API_KEY")
	if not api_key:
	raise ValueError("GOOGLE_API_KEY not found")

	genai.configure(api_key=api_key)
	self.model = genai.GenerativeModel('gemini-pro')

	def read_sections(self, filepath: str) -> dict:
	"""Read and separate the analysis into sections"""
	with open(filepath, 'r') as f:
	content = f.read()

	sections = {}
	current_section = None
	current_content = []

	for line in content.split('\n'):
	if line.startswith('### ') and line.endswith(' ###'):
	if current_section:
	sections[current_section] = '\n'.join(current_content)
	current_section = line.strip('#').strip()
	current_content = []
	else:
	current_content.append(line)

	if current_section:
	sections[current_section] = '\n'.join(current_content)

	return sections

	def clean_section(self, title: str, content: str) -> str:
	"""Clean individual section using Gemini"""
	prompt = f"""You are processing a section of screenplay analysis titled "{title}".
	The original analysis was generated by analyzing chunks of the screenplay,
	which may have led to some redundancy and discontinuity.

	Your task:
	1. Remove any redundant observations
	2. Stitch together related insights that may be separated
	3. Ensure the analysis flows naturally from beginning to end
	4. Preserve ALL unique insights and specific examples
	5. Maintain the analytical depth while making it more coherent

	Original {title} section:
	{content}

	Provide the cleaned and coherent version maintaining the same analytical depth."""

	try:
	response = self.model.generate_content(prompt)
	return response.text
	except Exception as e:
	logger.error(f"Error cleaning {title}: {str(e)}")
	return content

	def process_analysis(self, input_path: str, output_path: str):
	"""Process the entire analysis file"""
	try:
	# Read and separate sections
	sections = self.read_sections(input_path)

	# Process each section
	cleaned_sections = {}
	for title, content in sections.items():
	logger.info(f"Processing {title}")
	cleaned_sections[title] = self.clean_section(title, content)

	# Combine sections
	final_analysis = "SCREENPLAY CREATIVE ANALYSIS\n\n"
	for title, content in cleaned_sections.items():
	final_analysis += f"### {title} ###\n\n{content}\n\n"

	# Save result
	with open(output_path, 'w') as f:
	f.write(final_analysis)

	logger.info(f"Cleaned analysis saved to: {output_path}")
	return True

	except Exception as e:
	logger.error(f"Error in post-processing: {str(e)}")
	return False

	def main():
	processor = AnalysisPostProcessor()
	input_file = "path/to/creative_analysis.txt"
	output_file = "path/to/cleaned_creative_analysis.txt"
	processor.process_analysis(input_file, output_file)

	if __name__ == "__main__":
	main()