Spaces:
Sleeping
Sleeping
File size: 3,895 Bytes
0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 0d6d77d e815416 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | # π§ Install dependencies (uncomment if running locally)
# !pip install gradio pandas sentence-transformers
import os
import zipfile
import requests
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer, util
### STEP 1: Download and unzip the influencer dataset from Hugging Face
# Replace this with your actual dataset ZIP URL
url = "https://huggingface.co/datasets/your-username/influencer-dataset-merged/resolve/main/top_100_influencers.zip"
zip_path = "top_100_influencers.zip"
# Download zip file if not already present
if not os.path.exists(zip_path):
print("π₯ Downloading influencer dataset...")
r = requests.get(url)
with open(zip_path, "wb") as f:
f.write(r.content)
# Unzip the file into a folder
unzip_dir = "influencer_data"
if not os.path.exists(unzip_dir):
print("π¦ Unzipping dataset...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(unzip_dir)
### STEP 2: Merge all CSVs into one
print("π Merging influencer files...")
all_dfs = []
for file in os.listdir(unzip_dir):
if file.endswith(".csv"):
df = pd.read_csv(os.path.join(unzip_dir, file))
df["Source File"] = file # Optional: keep track of file origin
all_dfs.append(df)
df = pd.concat(all_dfs, ignore_index=True)
# Basic cleanup
df.drop_duplicates(inplace=True)
df.dropna(subset=["Name", "Niche"], inplace=True)
df.fillna("", inplace=True)
# Save combined dataset (optional)
df.to_csv("top_100_influencers_combined.csv", index=False)
print("β
Combined dataset ready!")
### STEP 3: Build the recommender engine
# Combine fields for semantic embedding
df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]
# Load sentence embedding model
print("π§ Loading embedding model...")
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Precompute influencer embeddings
print("π’ Encoding influencer profiles...")
influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)
### STEP 4: Define similarity search + UI
def recommend_influencers(brand_description):
query_embedding = model.encode(brand_description, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
top_indices = cosine_scores.topk(3).indices.tolist()
recommendations = []
for idx in top_indices:
row = df.iloc[idx]
recommendations.append({
"Influencer": row["Name"],
"Platform": row["Platform"],
"Niche": row["Niche"],
"Country": row["Country"],
"Engagement Rate": row.get("Engagement Rate", "N/A"),
"Followers": row.get("Followers", "N/A")
})
return recommendations
def format_output(brand_input):
recs = recommend_influencers(brand_input)
output = ""
for i, rec in enumerate(recs, 1):
output += f"### {i}. {rec['Influencer']} ({rec['Platform']})\n"
output += f"- Niche: {rec['Niche']}\n"
output += f"- Country: {rec['Country']}\n"
output += f"- Engagement Rate: {rec['Engagement Rate']}\n"
output += f"- Followers: {rec['Followers']}\n\n"
return output
demo = gr.Interface(
fn=format_output,
inputs=gr.Textbox(label="Enter your brand or campaign description", placeholder="e.g. Sustainable fashion for Gen Z"),
outputs=gr.Markdown(label="Top 3 Influencer Matches"),
title="InfluMatch: Influencer Recommender",
description="Describe your brand or campaign and get 3 matching influencer suggestions.",
examples=[
["Tech gadgets for millennial men"],
["Skincare brand for Gen Z in the US"],
["Luxury travel experiences for couples"],
["Eco-friendly fashion accessories"]
]
)
if __name__ == "__main__":
demo.launch()
|