File size: 3,895 Bytes
0d6d77d
e815416
 
0d6d77d
 
 
e815416
0d6d77d
e815416
 
0d6d77d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e815416
0d6d77d
 
 
 
 
 
 
 
 
 
 
 
 
e815416
 
0d6d77d
 
 
 
 
 
 
e815416
 
0d6d77d
 
e815416
 
0d6d77d
 
e815416
 
0d6d77d
 
e815416
 
 
 
0d6d77d
e815416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d6d77d
e815416
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# πŸ”§ Install dependencies (uncomment if running locally)
# !pip install gradio pandas sentence-transformers

import os
import zipfile
import requests
import pandas as pd
import gradio as gr
from sentence_transformers import SentenceTransformer, util

### STEP 1: Download and unzip the influencer dataset from Hugging Face

# Replace this with your actual dataset ZIP URL
url = "https://huggingface.co/datasets/your-username/influencer-dataset-merged/resolve/main/top_100_influencers.zip"
zip_path = "top_100_influencers.zip"

# Download zip file if not already present
if not os.path.exists(zip_path):
    print("πŸ“₯ Downloading influencer dataset...")
    r = requests.get(url)
    with open(zip_path, "wb") as f:
        f.write(r.content)

# Unzip the file into a folder
unzip_dir = "influencer_data"
if not os.path.exists(unzip_dir):
    print("πŸ“¦ Unzipping dataset...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(unzip_dir)

### STEP 2: Merge all CSVs into one

print("πŸ”— Merging influencer files...")
all_dfs = []
for file in os.listdir(unzip_dir):
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join(unzip_dir, file))
        df["Source File"] = file  # Optional: keep track of file origin
        all_dfs.append(df)

df = pd.concat(all_dfs, ignore_index=True)

# Basic cleanup
df.drop_duplicates(inplace=True)
df.dropna(subset=["Name", "Niche"], inplace=True)
df.fillna("", inplace=True)

# Save combined dataset (optional)
df.to_csv("top_100_influencers_combined.csv", index=False)
print("βœ… Combined dataset ready!")

### STEP 3: Build the recommender engine

# Combine fields for semantic embedding
df["profile_text"] = df["Name"] + " - " + df["Platform"] + " - " + df["Niche"] + " - " + df["Country"]

# Load sentence embedding model
print("🧠 Loading embedding model...")
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Precompute influencer embeddings
print("πŸ”’ Encoding influencer profiles...")
influencer_embeddings = model.encode(df["profile_text"].tolist(), convert_to_tensor=True)

### STEP 4: Define similarity search + UI

def recommend_influencers(brand_description):
    query_embedding = model.encode(brand_description, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, influencer_embeddings)[0]
    top_indices = cosine_scores.topk(3).indices.tolist()

    recommendations = []
    for idx in top_indices:
        row = df.iloc[idx]
        recommendations.append({
            "Influencer": row["Name"],
            "Platform": row["Platform"],
            "Niche": row["Niche"],
            "Country": row["Country"],
            "Engagement Rate": row.get("Engagement Rate", "N/A"),
            "Followers": row.get("Followers", "N/A")
        })
    return recommendations

def format_output(brand_input):
    recs = recommend_influencers(brand_input)
    output = ""
    for i, rec in enumerate(recs, 1):
        output += f"### {i}. {rec['Influencer']} ({rec['Platform']})\n"
        output += f"- Niche: {rec['Niche']}\n"
        output += f"- Country: {rec['Country']}\n"
        output += f"- Engagement Rate: {rec['Engagement Rate']}\n"
        output += f"- Followers: {rec['Followers']}\n\n"
    return output

demo = gr.Interface(
    fn=format_output,
    inputs=gr.Textbox(label="Enter your brand or campaign description", placeholder="e.g. Sustainable fashion for Gen Z"),
    outputs=gr.Markdown(label="Top 3 Influencer Matches"),
    title="InfluMatch: Influencer Recommender",
    description="Describe your brand or campaign and get 3 matching influencer suggestions.",
    examples=[
        ["Tech gadgets for millennial men"],
        ["Skincare brand for Gen Z in the US"],
        ["Luxury travel experiences for couples"],
        ["Eco-friendly fashion accessories"]
    ]
)

if __name__ == "__main__":
    demo.launch()