| import gradio as gr |
| from transformers import pipeline |
| |
| import numpy as np |
| import pickle |
|
|
| |
| |
| |
|
|
|
|
| examples = [ |
| "High proficiency in Python and AI/ML frameworks, i.e. Pytorch.", |
| "Experience with Unreal and/or Unity and/or native IOS/Android 3D development", |
| ] |
|
|
|
|
| def aggregate_span(results): |
| new_results = [] |
| current_result = results[0] |
|
|
| for result in results[1:]: |
| if result["start"] == current_result["end"] + 1: |
| current_result["word"] += " " + result["word"] |
| current_result["end"] = result["end"] |
| else: |
| new_results.append(current_result) |
| current_result = result |
|
|
| new_results.append(current_result) |
|
|
| return new_results |
|
|
| |
|
|
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
|
|
| |
|
|
| import os |
|
|
| def load_skills_from_date(base_folder, date): |
| date_folder = os.path.join(base_folder, date) |
| all_skills = set() |
| if os.path.exists(date_folder) and os.path.isdir(date_folder): |
| for file_name in os.listdir(date_folder): |
| file_path = os.path.join(date_folder, file_name) |
| if file_name.endswith(".txt"): |
| with open(file_path, 'r', encoding='utf-8') as f: |
| all_skills.update(line.strip() for line in f if line.strip()) |
| return list(all_skills) |
|
|
| def visualize3D(reduced_embeddings, labels, skills, n_clusters, output_folder, date): |
| |
| fig = px.scatter_3d( |
| x=reduced_embeddings[:, 0], |
| y=reduced_embeddings[:, 1], |
| z=reduced_embeddings[:, 2], |
| color=labels, |
| text=skills, |
| title=f"KMeans Clustering with {n_clusters} Clusters ({date})" |
| ) |
| |
| |
| |
| |
| |
| |
| |
| |
| return fig |
|
|
|
|
|
|
| import plotly.express as px |
| import numpy as np |
|
|
| specific_date = "03-01-2024" |
| skills = load_skills_from_date('./tags', specific_date) |
| embeddings = np.load(f"./vectorstore/{specific_date}_embeddings.npy") |
| with open(f"./vectorstore/{specific_date}_metadata.pkl", "rb") as f: |
| metadata = pickle.load(f) |
| labels, skills = metadata["labels"], metadata["skills"] |
| fig = visualize3D(embeddings, labels, skills, n_clusters=5, output_folder="./plots", date=specific_date) |
| fig.update_layout( |
| height=900 |
| ) |
|
|
| with gr.Blocks() as demo: |
| |
| gr.Markdown("# 3D Visualization of Skills in ML Job Postings", elem_id="title") |
| |
| gr.Plot(fig) |
|
|
| |
|
|
| demo.launch(ssr_mode=False) |