Spaces:

Solshine
/

LEAP_GAIA

Runtime error

App Files Files Community

LEAP_GAIA / app.py

Solshine

Update app.py

cab3bb7 verified about 2 years ago

raw

history blame contribute delete

8.4 kB

	import gradio as gr
	from langchain_openai import ChatOpenAI
	# from dspy import Agent # Base class for custom agent
	# from dspy import spawn_processes # Distributed computing utility
	from transformers import pipeline

	# Choose model
	model_name = "dolphin-phi"

	# Load the chosen LLM model
	llm = pipeline("text-generation", model="TheBloke/dolphin-2_6-phi-2-GGUF")

	#Vectara config:
	# customer_id =
	# corpus_id =
	# api_key =

	# Brought from Vectara example from Tonic. Global variables to hold component references
	components = {}
	dotenv.load_dotenv()
	seamless_client = Client("TheBloke/dolphin-2_6-phi-2-GGUF")
	HuggingFace_Token = os.getenv("HuggingFace_Token")
	hf_token = os.getenv("HuggingFace_Token")
	base_model_id = os.getenv('BASE_MODEL_ID', 'default_base_model_id')
	model_directory = os.getenv('MODEL_DIRECTORY', 'default_model_directory')
	import requests

	# DSPy-based prompt generation
	from dspy.agents import Agent
	from dspy import spawn_processes
	from dspy.utils import SentenceSplitter, SentimentAnalyzer, NamedEntityRecognizer

	def dspy_generate_agent_prompts(prompt):
	"""
	Generates prompts for different agents based on the provided prompt and DSPy functionalities.

	Args:
	prompt (str): The user-provided prompt (e.g., customer reviews).

	Returns:
	list: A list containing agent-specific prompts.
	"""

	# 1. Split the prompt into individual sentences
	sentences = SentenceSplitter().process(prompt)

	# 2. Analyze sentiment for each sentence
	sentiment_analyzer = SentimentAnalyzer()
	sentiment_labels = []
	for sentence in sentences:
	sentiment_labels.append(sentiment_analyzer.analyze(sentence))

	# 3. Extract named entities related to specific topics
	ner = NamedEntityRecognizer(model_name="en_core_web_sm")
	extracted_entities = {}
	for sentence in sentences:
	entities = ner.process(sentence)
	for entity in entities:
	if entity.label_ in ["FOOD", "ORG", "LOCATION"]: # Customize entity labels based on needs
	extracted_entities.setdefault(entity.label_, []).append(entity.text)

	# 4. Craft prompts for each agent (incomplete)
	agent_prompts = []

	# Sentiment Analyzer Prompt:
	sentiment_prompt = f"Analyze the sentiment of the following sentences:\n" + "\n".join(sentences)
	agent_prompts.append(sentiment_prompt)

	# Topic Extractor Prompt: (Modify based on your specific topics)
	topic_prompt = f"Extract the main topics discussed in the following text, focusing on food, service, and ambiance:\n{prompt}"
	agent_prompts.append(topic_prompt)

	# Recommendation Generator Prompt: (Modify based on your requirements)
	positive_count = sum(label == "POSITIVE" for label in sentiment_labels)
	negative_count = sum(label == "NEGATIVE" for label in sentiment_labels)
	neutral_count = sum(label == "NEUTRAL" for label in sentiment_labels)
	topic_mentions = "\n".join(f"{k}: {','.join(v)}" for k, v in extracted_entities.items())

	recommendation_prompt = f"""Based on the sentiment analysis (positive: {positive_count}, negative: {negative_count}, neutral: {neutral_count}) and extracted topics ({topic_mentions}), suggest recommendations for the restaurant to improve."""
	agent_prompts.append(recommendation_prompt)

	return agent_prompts

	def query_vectara(text):
	user_message = text

	# Read authentication parameters from the .env file
	customer_id = os.getenv('CUSTOMER_ID')
	corpus_id = os.getenv('CORPUS_ID')
	api_key = os.getenv('API_KEY')

	# Define the headers
	api_key_header = {
	"customer-id": customer_id,
	"x-api-key": api_key
	}

	# Define the request body in the structure provided in the example
	request_body = {
	"query": [
	{
	"query": user_message,
	"queryContext": "",
	"start": 1,
	"numResults": 25,
	"contextConfig": {
	"charsBefore": 0,
	"charsAfter": 0,
	"sentencesBefore": 2,
	"sentencesAfter": 2,
	"startTag": "%START_SNIPPET%",
	"endTag": "%END_SNIPPET%",
	},
	"rerankingConfig": {
	"rerankerId": 272725718,
	"mmrConfig": {
	"diversityBias": 0.35
	}
	},
	"corpusKey": [
	{
	"customerId": customer_id,
	"corpusId": corpus_id,
	"semantics": 0,
	"metadataFilter": "",
	"lexicalInterpolationConfig": {
	"lambda": 0
	},
	"dim": []
	}
	],
	"summary": [
	{
	"maxSummarizedResults": 5,
	"responseLang": "auto",
	"summarizerPromptName": "vectara-summary-ext-v1.2.0"
	}
	]
	}
	]
	}

	# Make the API request using Gradio
	response = requests.post(
	"https://api.vectara.io/v1/query",
	json=request_body, # Use json to automatically serialize the request body
	verify=True,
	headers=api_key_header
	)

	if response.status_code == 200:
	query_data = response.json()
	if query_data:
	sources_info = []

	# Extract the summary.
	summary = query_data['responseSet'][0]['summary'][0]['text']

	# Iterate over all response sets
	for response_set in query_data.get('responseSet', []):
	# Extract sources
	# Limit to top 5 sources.
	for source in response_set.get('response', [])[:5]:
	source_metadata = source.get('metadata', [])
	source_info = {}

	for metadata in source_metadata:
	metadata_name = metadata.get('name', '')
	metadata_value = metadata.get('value', '')

	if metadata_name == 'title':
	source_info['title'] = metadata_value
	elif metadata_name == 'author':
	source_info['author'] = metadata_value
	elif metadata_name == 'pageNumber':
	source_info['page number'] = metadata_value

	if source_info:
	sources_info.append(source_info)

	result = {"summary": summary, "sources": sources_info}
	return f"{json.dumps(result, indent=2)}"
	else:
	return "No data found in the response."
	else:
	return f"Error: {response.status_code}"

	# Define the main function to be used with Gradio
	def generate_outputs(user_prompt):
	# 1. Process prompt with langchain (replace with your actual implementation)
	# processed_prompt = dspy_generate_agent_prompts(user_prompt) # Replaced langchain logic with DSPy function below

	# 2. Generate synthetic data using DSPy's distributed computing capabilities
	synthetic_data = generate_synthetic_data_distributed(user_prompt)

	# 3. Combine user prompt and synthetic data
	combined_data = f"{user_prompt}\n{synthetic_data}"

	# 4. Generate prompts for agents using DSPy
	agent_prompts = dspy_generate_agent_prompts(processed_prompt)

	# 5. Use the chosen LLM for two of the prompts and vectara tool use for the third agent
	output_1 = llm(agent_prompts[0], max_length=100)[0][combined_data]
	output_2 = llm(agent_prompts[1], max_length=100)[0][combined_data]
	output_3 = query_vectara(prompt)

	# 6. Produce outputs with Langchain or DSPy (stand in section)
	report, recommendations, visualization = produce_outputs(combined_data)

	return report, recommendations, visualization

	# Create the Gradio interface
	gr.Interface(
	fn=generate_outputs,
	inputs=gr.Textbox(label="Enter a prompt"),
	outputs=["textbox", "textbox", "image"],
	title="Multi-Agent Prompt Processor",
	description="Processes a prompt using Langchain, DSPy, and a chosen Hugging Face LLM to generate diverse outputs.",
	).launch()