| import streamlit as st |
| import gensim.downloader as api |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from sklearn.manifold import TSNE |
| import pandas as pd |
| import plotly.express as px |
|
|
| |
| @st.cache_resource |
| def load_model(): |
| return api.load("word2vec-google-news-300") |
|
|
| model = load_model() |
|
|
| |
| st.title("๐ Word Embedding Visualization") |
| st.write("Enter words to visualize their embeddings using t-SNE.") |
|
|
| |
| words = st.text_input("Enter words (comma-separated)", "king, queen, man, woman, dog, cat") |
|
|
| |
| words = [word.strip() for word in words.split(",") if word.strip() in model] |
|
|
| if len(words) < 2: |
| st.warning("Please enter at least two valid words from the Word2Vec model.") |
| else: |
| |
| vectors = np.array([model[word] for word in words]) |
|
|
| |
| tsne = TSNE(n_components=2, perplexity=5, random_state=42) |
| vectors_2d = tsne.fit_transform(vectors) |
|
|
| |
| fig, ax = plt.subplots(figsize=(8, 6)) |
| ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1]) |
|
|
| for i, word in enumerate(words): |
| ax.text(vectors_2d[i, 0] + 0.01, vectors_2d[i, 1] + 0.01, word, fontsize=12) |
|
|
| st.pyplot(fig) |
|
|
| |
| tsne_3d = TSNE(n_components=3, perplexity=5, random_state=42) |
| vectors_3d = tsne_3d.fit_transform(vectors) |
|
|
| df = pd.DataFrame(vectors_3d, columns=["x", "y", "z"]) |
| df["word"] = words |
|
|
| fig3d = px.scatter_3d(df, x="x", y="y", z="z", text="word", title="3D t-SNE Word Embeddings") |
| st.plotly_chart(fig3d) |
|
|