Spaces:

yoniif
/

final_assignment

Sleeping

App Files Files Community

final_assignment / app.py

yoniif

Update app.py

0d6d77d verified 9 months ago

raw

history blame

3.9 kB

	# 🔧 Install dependencies (uncomment if running locally)
	# !pip install gradio pandas sentence-transformers

	import os
	import zipfile
	import requests
	import pandas as pd
	import gradio as gr
	from sentence_transformers import SentenceTransformer, util

	### STEP 1: Download and unzip the influencer dataset from Hugging Face

	# Replace this with your actual dataset ZIP URL
	url = "https://huggingface.co/datasets/your-username/influencer-dataset-merged/resolve/main/top_100_influencers.zip"
	zip_path = "top_100_influencers.zip"

	# Download zip file if not already present
	if not os.path.exists(zip_path):
	print("📥 Downloading influencer dataset...")
	r = requests.get(url)
	with open(zip_path, "wb") as f:
	f.write(r.content)

	# Unzip the file into a folder
	unzip_dir = "influencer_data"
	if not os.path.exists(unzip_dir):
	print("📦 Unzipping dataset...")
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(unzip_dir)

	### STEP 2: Merge all CSVs into one

	print("🔗 Merging influencer files...")
	all_dfs = []
	for file in os.listdir(unzip_dir):
	if file.endswith(".csv"):
	df = pd.read_csv(os.path.join(unzip_dir, file))
	df["Source File"] = file # Optional: keep track of file origin
	all_dfs.append(df)

	df = pd.concat(all_dfs, ignore_index=True)

	# Basic cleanup
	df.drop_duplicates(inplace=True)
	df.dropna(subset=["Name", "Niche"], inplace=True)
	df.fillna("", inplace=True)

	# Save combined dataset (optional)
	df.to_csv("top_100_influencers_combined.csv", index=False)
	print("✅ Combined dataset ready!")

	### STEP 3: Build the recommender engine

	# Combine fields for semantic embedding
	df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]

	# Load sentence embedding model
	print("🧠 Loading embedding model...")
	model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

	# Precompute influencer embeddings
	print("🔢 Encoding influencer profiles...")
	influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)

	### STEP 4: Define similarity search + UI

	def recommend_influencers(brand_description):
	query_embedding = model.encode(brand_description, convert_to_tensor=True)
	cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
	top_indices = cosine_scores.topk(3).indices.tolist()

	recommendations = []
	for idx in top_indices:
	row = df.iloc[idx]
	recommendations.append({
	"Influencer": row["Name"],
	"Platform": row["Platform"],
	"Niche": row["Niche"],
	"Country": row["Country"],
	"Engagement Rate": row.get("Engagement Rate", "N/A"),
	"Followers": row.get("Followers", "N/A")
	})
	return recommendations

	def format_output(brand_input):
	recs = recommend_influencers(brand_input)
	output = ""
	for i, rec in enumerate(recs, 1):
	output += f"### {i}. {rec['Influencer']} ({rec['Platform']})\n"
	output += f"- Niche: {rec['Niche']}\n"
	output += f"- Country: {rec['Country']}\n"
	output += f"- Engagement Rate: {rec['Engagement Rate']}\n"
	output += f"- Followers: {rec['Followers']}\n\n"
	return output

	demo = gr.Interface(
	fn=format_output,
	inputs=gr.Textbox(label="Enter your brand or campaign description", placeholder="e.g. Sustainable fashion for Gen Z"),
	outputs=gr.Markdown(label="Top 3 Influencer Matches"),
	title="InfluMatch: Influencer Recommender",
	description="Describe your brand or campaign and get 3 matching influencer suggestions.",
	examples=[
	["Tech gadgets for millennial men"],
	["Skincare brand for Gen Z in the US"],
	["Luxury travel experiences for couples"],
	["Eco-friendly fashion accessories"]
	]
	)

	if __name__ == "__main__":
	demo.launch()