| import os |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| from collections import Counter |
|
|
| |
| base_folder = "./tags" |
|
|
| |
| output_folder = "./plots" |
| os.makedirs(output_folder, exist_ok=True) |
|
|
| |
| date_skill_counts = {} |
|
|
| |
| for date_folder in sorted(os.listdir(base_folder)): |
| folder_path = os.path.join(base_folder, date_folder) |
| if os.path.isdir(folder_path): |
| |
| skill_counter = Counter() |
| |
| |
| for file_name in os.listdir(folder_path): |
| file_path = os.path.join(folder_path, file_name) |
| if file_name.endswith(".txt"): |
| with open(file_path, "r", encoding="utf-8") as file: |
| |
| skills = file.read().strip().splitlines() |
| skill_counter.update(skills) |
| |
| |
| date_skill_counts[date_folder] = skill_counter |
|
|
| |
| all_dates = sorted(date_skill_counts.keys()) |
| all_skills = set(skill for counts in date_skill_counts.values() for skill in counts) |
| data = {skill: [date_skill_counts[date].get(skill, 0) for date in all_dates] for skill in all_skills} |
| df = pd.DataFrame(data, index=all_dates) |
|
|
| print(df) |
|
|
| |
| total_counts = df.sum(axis=0) |
| top_skills = total_counts.nlargest(3).index |
|
|
| |
| for skill in top_skills: |
| print(f"Trend of {skill} Over Time") |
| plt.figure(figsize=(8, 5)) |
| plt.plot(df.index, df[skill], marker="o", label=skill) |
| |
| |
| plt.title(f"Trend of {skill} Over Time") |
| plt.xlabel("Date") |
| plt.ylabel("Count") |
| plt.xticks(rotation=45) |
| plt.legend(title="Skill") |
| plt.grid() |
| plt.tight_layout() |
| |
| |
| plot_path = os.path.join(output_folder, f"{skill}_trend.png") |
| plt.savefig(plot_path, format="png", dpi=300) |
| print(f"Saved plot for {skill} at {plot_path}") |
| |
| |
| plt.show() |
|
|