Spaces:
Sleeping
Sleeping
| # π§ Install dependencies (uncomment if running locally) | |
| # !pip install gradio pandas sentence-transformers | |
| import os | |
| import zipfile | |
| import requests | |
| import pandas as pd | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer, util | |
| ### STEP 1: Download and unzip the influencer dataset from Hugging Face | |
| # Replace this with your actual dataset ZIP URL | |
| url = "https://huggingface.co/datasets/your-username/influencer-dataset-merged/resolve/main/top_100_influencers.zip" | |
| zip_path = "top_100_influencers.zip" | |
| # Download zip file if not already present | |
| if not os.path.exists(zip_path): | |
| print("π₯ Downloading influencer dataset...") | |
| r = requests.get(url) | |
| with open(zip_path, "wb") as f: | |
| f.write(r.content) | |
| # Unzip the file into a folder | |
| unzip_dir = "influencer_data" | |
| if not os.path.exists(unzip_dir): | |
| print("π¦ Unzipping dataset...") | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| zip_ref.extractall(unzip_dir) | |
| ### STEP 2: Merge all CSVs into one | |
| print("π Merging influencer files...") | |
| all_dfs = [] | |
| for file in os.listdir(unzip_dir): | |
| if file.endswith(".csv"): | |
| df = pd.read_csv(os.path.join(unzip_dir, file)) | |
| df["Source File"] = file # Optional: keep track of file origin | |
| all_dfs.append(df) | |
| df = pd.concat(all_dfs, ignore_index=True) | |
| # Basic cleanup | |
| df.drop_duplicates(inplace=True) | |
| df.dropna(subset=["Name", "Niche"], inplace=True) | |
| df.fillna("", inplace=True) | |
| # Save combined dataset (optional) | |
| df.to_csv("top_100_influencers_combined.csv", index=False) | |
| print("β Combined dataset ready!") | |
| ### STEP 3: Build the recommender engine | |
| # Combine fields for semantic embedding | |
| df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"] | |
| # Load sentence embedding model | |
| print("π§ Loading embedding model...") | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # Precompute influencer embeddings | |
| print("π’ Encoding influencer profiles...") | |
| influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True) | |
| ### STEP 4: Define similarity search + UI | |
| def recommend_influencers(brand_description): | |
| query_embedding = model.encode(brand_description, convert_to_tensor=True) | |
| cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0] | |
| top_indices = cosine_scores.topk(3).indices.tolist() | |
| recommendations = [] | |
| for idx in top_indices: | |
| row = df.iloc[idx] | |
| recommendations.append({ | |
| "Influencer": row["Name"], | |
| "Platform": row["Platform"], | |
| "Niche": row["Niche"], | |
| "Country": row["Country"], | |
| "Engagement Rate": row.get("Engagement Rate", "N/A"), | |
| "Followers": row.get("Followers", "N/A") | |
| }) | |
| return recommendations | |
| def format_output(brand_input): | |
| recs = recommend_influencers(brand_input) | |
| output = "" | |
| for i, rec in enumerate(recs, 1): | |
| output += f"### {i}. {rec['Influencer']} ({rec['Platform']})\n" | |
| output += f"- Niche: {rec['Niche']}\n" | |
| output += f"- Country: {rec['Country']}\n" | |
| output += f"- Engagement Rate: {rec['Engagement Rate']}\n" | |
| output += f"- Followers: {rec['Followers']}\n\n" | |
| return output | |
| demo = gr.Interface( | |
| fn=format_output, | |
| inputs=gr.Textbox(label="Enter your brand or campaign description", placeholder="e.g. Sustainable fashion for Gen Z"), | |
| outputs=gr.Markdown(label="Top 3 Influencer Matches"), | |
| title="InfluMatch: Influencer Recommender", | |
| description="Describe your brand or campaign and get 3 matching influencer suggestions.", | |
| examples=[ | |
| ["Tech gadgets for millennial men"], | |
| ["Skincare brand for Gen Z in the US"], | |
| ["Luxury travel experiences for couples"], | |
| ["Eco-friendly fashion accessories"] | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |