Commit ·
9fce90e
0
Parent(s):
clean init
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +35 -0
- README.md +246 -0
- app.py +694 -0
- create_sample_data.py +115 -0
- data_manager.py +396 -0
- dataset/.DS_Store +0 -0
- dataset/label/.DS_Store +0 -0
- dataset/label/Apache ECharts/.DS_Store +0 -0
- dataset/label/Apache ECharts/bar/apply_source_from_webtxt.py +138 -0
- dataset/label/Apache ECharts/bar/chart_0001_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0002_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0003_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0004_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0005_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0006_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0007_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0008_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0009_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0010_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0011_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0012_bar.json +9 -0
- dataset/label/Apache ECharts/bar/chart_0013_bar.json +9 -0
- dataset/label/Apache ECharts/bar/fix_vchart_json.py +102 -0
- dataset/label/Apache ECharts/bar/rename.bat +24 -0
- dataset/label/Apache ECharts/bar/revise.bat +26 -0
- dataset/label/Apache ECharts/bar/run_apply_source.bat +23 -0
- dataset/label/Apache ECharts/bar/web.txt +13 -0
- dataset/label/Apache ECharts/box/apply_source_from_webtxt.py +138 -0
- dataset/label/Apache ECharts/box/chart_0001_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0002_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0003_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0004_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0005_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0006_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0007_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0008_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0009_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0010_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0011_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0012_box.json +9 -0
- dataset/label/Apache ECharts/box/chart_0013_box.json +9 -0
- dataset/label/Apache ECharts/box/fix_vchart_json.py +102 -0
- dataset/label/Apache ECharts/box/rename.bat +24 -0
- dataset/label/Apache ECharts/box/renamelast.bat +17 -0
- dataset/label/Apache ECharts/box/revise.bat +26 -0
- dataset/label/Apache ECharts/box/run_apply_source.bat +23 -0
- dataset/label/Apache ECharts/box/web.txt +13 -0
- dataset/label/Apache ECharts/heatmap/apply_source_from_webtxt.py +138 -0
- dataset/label/Apache ECharts/heatmap/chart_0001_heatmap.json +9 -0
- dataset/label/Apache ECharts/heatmap/chart_0002_heatmap.json +9 -0
.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📊 图表问答数据集审核系统
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
一个基于 Gradio 的人工审核系统,用于审核图表问答数据集中的问题和答案是否合理正确。
|
| 5 |
+
|
| 6 |
+
## 功能特点
|
| 7 |
+
|
| 8 |
+
- 📊 **图表展示**:支持 HTML 格式的交互式图表展示
|
| 9 |
+
- 📁 **目录导航**:按 Source → Chart Type → Chart → Model 层级浏览
|
| 10 |
+
- ✅ **审核操作**:支持正确、错误、需修改、待定四种状态
|
| 11 |
+
- ✏️ **内容编辑**:可修改问题和答案,添加评论备注
|
| 12 |
+
- 📈 **统计面板**:实时显示审核进度和统计信息
|
| 13 |
+
- 📥 **数据导出**:一键导出审核记录为 JSON 格式
|
| 14 |
+
- ⌨️ **快捷键**:支持键盘导航和状态选择
|
| 15 |
+
|
| 16 |
+
## 目录结构
|
| 17 |
+
|
| 18 |
+
```
|
| 19 |
+
project/
|
| 20 |
+
├── app.py # 主应用
|
| 21 |
+
├── data_manager.py # 数据管理模块
|
| 22 |
+
├── requirements.txt # 依赖列表
|
| 23 |
+
├── README.md # 说明文档
|
| 24 |
+
├── create_sample_data.py # 示例数据生成脚本
|
| 25 |
+
│
|
| 26 |
+
├── dataset/ # 数据集目录(需要自行放置)
|
| 27 |
+
│ ├── web/ # 图表 HTML 文件
|
| 28 |
+
│ │ └── {source}/
|
| 29 |
+
│ │ └── {chart_type}/
|
| 30 |
+
│ │ └── *.html
|
| 31 |
+
│ ├── label/ # 图表标签信息
|
| 32 |
+
│ │ └── {source}/
|
| 33 |
+
│ │ └── {chart_type}/
|
| 34 |
+
│ │ └── *.json
|
| 35 |
+
│ └── question_answer/ # QA 数据
|
| 36 |
+
│ └── {source}/
|
| 37 |
+
│ └── {chart_type}/
|
| 38 |
+
│ └── {model}/
|
| 39 |
+
│ └── *.json
|
| 40 |
+
│
|
| 41 |
+
└── reviews/ # 审核记录(自动创建)
|
| 42 |
+
└── reviews.json
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
## 数据格式
|
| 46 |
+
|
| 47 |
+
### Label JSON 格式
|
| 48 |
+
```json
|
| 49 |
+
{
|
| 50 |
+
"Number": "0001",
|
| 51 |
+
"Type": "bar",
|
| 52 |
+
"Source": "Apache Echarts",
|
| 53 |
+
"Weblink": "https://example.com/chart",
|
| 54 |
+
"Topic": "Weather Statistics by City",
|
| 55 |
+
"Describe": "The chart compares...",
|
| 56 |
+
"Other": ""
|
| 57 |
+
}
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
### QA JSON 格式
|
| 61 |
+
```json
|
| 62 |
+
{
|
| 63 |
+
"id": "chart_0001_bar_q1",
|
| 64 |
+
"chart": "chart_0001_bar",
|
| 65 |
+
"question": "Weather Statistics图表中,三个城市在Showers天气下的总天数是多少?",
|
| 66 |
+
"answer": "203"
|
| 67 |
+
}
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
### 审核记录格式
|
| 71 |
+
```json
|
| 72 |
+
{
|
| 73 |
+
"review_id": "uuid",
|
| 74 |
+
"chart_id": "chart_0001_bar",
|
| 75 |
+
"qa_id": "chart_0001_bar_q1",
|
| 76 |
+
"source": "Apache Echarts",
|
| 77 |
+
"chart_type": "bar",
|
| 78 |
+
"model": "gpt-4",
|
| 79 |
+
"original_question": "...",
|
| 80 |
+
"original_answer": "...",
|
| 81 |
+
"status": "correct",
|
| 82 |
+
"modified_question": "",
|
| 83 |
+
"modified_answer": "",
|
| 84 |
+
"issue_type": "",
|
| 85 |
+
"comment": "",
|
| 86 |
+
"reviewer": "default",
|
| 87 |
+
"review_time": "2024-01-01T00:00:00"
|
| 88 |
+
}
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
## 快速开始
|
| 92 |
+
|
| 93 |
+
### 1. 安装依赖
|
| 94 |
+
|
| 95 |
+
```bash
|
| 96 |
+
pip install -r requirements.txt
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
### 2. 生成示例数据(可选)
|
| 100 |
+
|
| 101 |
+
如果你想先测试系统,可以运行以下命令生成示例数据:
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
python create_sample_data.py
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### 3. 准备数据集
|
| 108 |
+
|
| 109 |
+
将你的数据集放置到 `dataset/` 目录下,按照上述目录结构组织。
|
| 110 |
+
|
| 111 |
+
### 4. 启动应用
|
| 112 |
+
|
| 113 |
+
```bash
|
| 114 |
+
python app.py
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
应用将在 `http://localhost:7860` 启动。
|
| 118 |
+
|
| 119 |
+
## 快捷键
|
| 120 |
+
|
| 121 |
+
| 按键 | 功能 |
|
| 122 |
+
|------|------|
|
| 123 |
+
| ← | 上一个图表 |
|
| 124 |
+
| → | 下一个图表 |
|
| 125 |
+
| 1 | 标记为正确 |
|
| 126 |
+
| 2 | 标记为错误 |
|
| 127 |
+
| 3 | 标记为需修改 |
|
| 128 |
+
| 4 | 标记为待定 |
|
| 129 |
+
|
| 130 |
+
## 部署到 Hugging Face Spaces
|
| 131 |
+
|
| 132 |
+
### 方法 1:通过网页界面上传
|
| 133 |
+
|
| 134 |
+
1. 在 Hugging Face 创建一个新的 Space
|
| 135 |
+
2. 选择 "Gradio" 作为 SDK
|
| 136 |
+
3. 上传所有文件:
|
| 137 |
+
- `app.py`
|
| 138 |
+
- `data_manager.py`
|
| 139 |
+
- `requirements.txt`
|
| 140 |
+
- `README.md`(可选)
|
| 141 |
+
4. 在 Files 标签页创建 `dataset/` 目录结构
|
| 142 |
+
5. 上传你的数据集文件
|
| 143 |
+
|
| 144 |
+
### 方法 2:通过 Git 推送
|
| 145 |
+
|
| 146 |
+
```bash
|
| 147 |
+
# 1. 创建 Space(在 Hugging Face 网站上操作)
|
| 148 |
+
|
| 149 |
+
# 2. 克隆 Space
|
| 150 |
+
git clone https://huggingface.co/spaces/YOUR_USERNAME/YOUR_SPACE_NAME
|
| 151 |
+
cd YOUR_SPACE_NAME
|
| 152 |
+
|
| 153 |
+
# 3. 复制项目文件
|
| 154 |
+
cp /path/to/gradio-review-app/app.py .
|
| 155 |
+
cp /path/to/gradio-review-app/data_manager.py .
|
| 156 |
+
cp /path/to/gradio-review-app/requirements.txt .
|
| 157 |
+
|
| 158 |
+
# 4. 创建数据集目录结构
|
| 159 |
+
mkdir -p dataset/web dataset/label dataset/question_answer
|
| 160 |
+
|
| 161 |
+
# 5. 复制数据集
|
| 162 |
+
# 根据你的数据集结构复制相应文件
|
| 163 |
+
|
| 164 |
+
# 6. 提交并推送
|
| 165 |
+
git add .
|
| 166 |
+
git commit -m "Initial commit with dataset"
|
| 167 |
+
git push
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
### 方法 3:使用 Hugging Face Datasets(推荐大数据集)
|
| 171 |
+
|
| 172 |
+
如果数据集较大,建议将数据集上传为独立的 Dataset,然后在代码中加载:
|
| 173 |
+
|
| 174 |
+
```python
|
| 175 |
+
from datasets import load_dataset
|
| 176 |
+
|
| 177 |
+
# 在 app.py 中添加
|
| 178 |
+
dataset = load_dataset("YOUR_USERNAME/YOUR_DATASET_NAME")
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
## 注意事项
|
| 182 |
+
|
| 183 |
+
### 关于 Hugging Face Spaces
|
| 184 |
+
|
| 185 |
+
- **免费版限制**:免费版 Spaces 有存储限制(约 16GB)和 CPU 时间限制
|
| 186 |
+
- **持久化问题**:免费版 Spaces 重启后文件修改会丢失
|
| 187 |
+
- **解决方案**:
|
| 188 |
+
1. 定期导出审核记录并下载
|
| 189 |
+
2. 使用 Hugging Face Datasets 存储审核结果
|
| 190 |
+
3. 使用外部数据库(如 Supabase、PlanetScale 免费套餐)
|
| 191 |
+
|
| 192 |
+
### 关于审核记录持久化
|
| 193 |
+
|
| 194 |
+
审核记录保存在 `reviews/reviews.json` 文件中。为了防止数据丢失:
|
| 195 |
+
|
| 196 |
+
1. **定期导出**:使用界面上的"导出审核记录"按钮
|
| 197 |
+
2. **云存储同步**:可以配置自动同步到云存储
|
| 198 |
+
3. **外部数据库**:修改 `data_manager.py` 使用数据库存储
|
| 199 |
+
|
| 200 |
+
## 自定义扩展
|
| 201 |
+
|
| 202 |
+
### 添加新的问题类型
|
| 203 |
+
|
| 204 |
+
在 `app.py` 中修改 `issue_type_dropdown` 的 choices:
|
| 205 |
+
|
| 206 |
+
```python
|
| 207 |
+
issue_type_dropdown = gr.Dropdown(
|
| 208 |
+
label="问题类型",
|
| 209 |
+
choices=[
|
| 210 |
+
"问题歧义",
|
| 211 |
+
"答案错误",
|
| 212 |
+
"图表不清晰",
|
| 213 |
+
"问题不合理",
|
| 214 |
+
"答案格式错误",
|
| 215 |
+
"你的新类型", # 添加新类型
|
| 216 |
+
"其他"
|
| 217 |
+
],
|
| 218 |
+
...
|
| 219 |
+
)
|
| 220 |
+
```
|
| 221 |
+
|
| 222 |
+
### 添加新的审核状态
|
| 223 |
+
|
| 224 |
+
在 `app.py` 中修改 `status_radio` 的 choices:
|
| 225 |
+
|
| 226 |
+
```python
|
| 227 |
+
status_radio = gr.Radio(
|
| 228 |
+
label="审核状态",
|
| 229 |
+
choices=[
|
| 230 |
+
("✅ 正确", "correct"),
|
| 231 |
+
("❌ 错误", "incorrect"),
|
| 232 |
+
("✏️ 需修改", "needs_modification"),
|
| 233 |
+
("⏳ 待定", "pending"),
|
| 234 |
+
("🆕 你的新状态", "new_status"), # 添加新状态
|
| 235 |
+
],
|
| 236 |
+
...
|
| 237 |
+
)
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
## 许可证
|
| 241 |
+
|
| 242 |
+
MIT License
|
| 243 |
+
|
| 244 |
+
---
|
| 245 |
+
|
| 246 |
+
如有问题或建议,欢迎提 Issue 或 PR!
|
app.py
ADDED
|
@@ -0,0 +1,694 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
图表问答数据集审核系统 - Gradio 应用
|
| 3 |
+
用于人工审核每个图表对应的问题和答案是否合理正确
|
| 4 |
+
"""
|
| 5 |
+
import gradio as gr
|
| 6 |
+
from data_manager import DataManager, data_manager
|
| 7 |
+
from typing import Dict, List, Optional, Tuple, Any
|
| 8 |
+
import json
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
# ============== 全局状态 ==============
|
| 12 |
+
|
| 13 |
+
class AppState:
|
| 14 |
+
"""应用状态管理"""
|
| 15 |
+
def __init__(self):
|
| 16 |
+
self.current_source: str = ""
|
| 17 |
+
self.current_chart_type: str = ""
|
| 18 |
+
self.current_chart_id: str = ""
|
| 19 |
+
self.current_model: str = ""
|
| 20 |
+
self.all_paths: List[Dict] = []
|
| 21 |
+
self.current_index: int = -1
|
| 22 |
+
|
| 23 |
+
# 初始化时获取所有路径
|
| 24 |
+
self.refresh_paths()
|
| 25 |
+
|
| 26 |
+
def refresh_paths(self):
|
| 27 |
+
"""刷新所有图表路径"""
|
| 28 |
+
self.all_paths = data_manager.get_all_chart_paths()
|
| 29 |
+
|
| 30 |
+
def get_current_path(self) -> Optional[Dict]:
|
| 31 |
+
"""获取当前路径信息"""
|
| 32 |
+
if 0 <= self.current_index < len(self.all_paths):
|
| 33 |
+
return self.all_paths[self.current_index]
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
def set_position(self, source: str, chart_type: str, chart_id: str, model: str):
|
| 37 |
+
"""设置当前位置"""
|
| 38 |
+
self.current_source = source
|
| 39 |
+
self.current_chart_type = chart_type
|
| 40 |
+
self.current_chart_id = chart_id
|
| 41 |
+
self.current_model = model
|
| 42 |
+
|
| 43 |
+
# 更新索引
|
| 44 |
+
for i, path in enumerate(self.all_paths):
|
| 45 |
+
if (path['source'] == source and
|
| 46 |
+
path['chart_type'] == chart_type and
|
| 47 |
+
path['chart_id'] == chart_id and
|
| 48 |
+
path['model'] == model):
|
| 49 |
+
self.current_index = i
|
| 50 |
+
break
|
| 51 |
+
|
| 52 |
+
def navigate(self, direction: int) -> bool:
|
| 53 |
+
"""
|
| 54 |
+
导航到上一个或下一个图表
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
direction: 1 表示下一个,-1 表示上一个
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
是否成功导航
|
| 61 |
+
"""
|
| 62 |
+
new_index = self.current_index + direction
|
| 63 |
+
if 0 <= new_index < len(self.all_paths):
|
| 64 |
+
self.current_index = new_index
|
| 65 |
+
path = self.all_paths[new_index]
|
| 66 |
+
self.current_source = path['source']
|
| 67 |
+
self.current_chart_type = path['chart_type']
|
| 68 |
+
self.current_chart_id = path['chart_id']
|
| 69 |
+
self.current_model = path['model']
|
| 70 |
+
return True
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
state = AppState()
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ============== UI 更新函数 ==============
|
| 77 |
+
|
| 78 |
+
def get_dataset_choices() -> Tuple[List[str], List[str], List[str], List[str]]:
|
| 79 |
+
"""获取数据集的选择项"""
|
| 80 |
+
structure = data_manager.get_dataset_structure()
|
| 81 |
+
|
| 82 |
+
sources = list(structure.get('sources', {}).keys())
|
| 83 |
+
|
| 84 |
+
chart_types = []
|
| 85 |
+
charts = []
|
| 86 |
+
models = []
|
| 87 |
+
|
| 88 |
+
if state.current_source:
|
| 89 |
+
source_data = structure['sources'].get(state.current_source, {})
|
| 90 |
+
chart_types = list(source_data.get('chart_types', {}).keys())
|
| 91 |
+
|
| 92 |
+
if state.current_chart_type:
|
| 93 |
+
charts = data_manager.get_chart_list(state.current_source, state.current_chart_type)
|
| 94 |
+
ct_data = source_data.get('chart_types', {}).get(state.current_chart_type, {})
|
| 95 |
+
models = ct_data.get('models', [])
|
| 96 |
+
|
| 97 |
+
return sources, chart_types, charts, models
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def update_chart_type_dropdown(source: str) -> gr.Dropdown:
|
| 101 |
+
"""更新图表类型下拉框"""
|
| 102 |
+
state.current_source = source
|
| 103 |
+
structure = data_manager.get_dataset_structure()
|
| 104 |
+
|
| 105 |
+
chart_types = list(structure.get('sources', {}).get(source, {}).get('chart_types', {}).keys())
|
| 106 |
+
|
| 107 |
+
return gr.Dropdown(choices=chart_types, value=chart_types[0] if chart_types else None)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def update_chart_dropdown(source: str, chart_type: str) -> Tuple[gr.Dropdown, gr.Dropdown]:
|
| 111 |
+
"""更新图表和模型下拉框"""
|
| 112 |
+
state.current_source = source
|
| 113 |
+
state.current_chart_type = chart_type
|
| 114 |
+
|
| 115 |
+
charts = data_manager.get_chart_list(source, chart_type)
|
| 116 |
+
structure = data_manager.get_dataset_structure()
|
| 117 |
+
ct_data = structure.get('sources', {}).get(source, {}).get('chart_types', {}).get(chart_type, {})
|
| 118 |
+
models = ct_data.get('models', [])
|
| 119 |
+
|
| 120 |
+
return (
|
| 121 |
+
gr.Dropdown(choices=charts, value=charts[0] if charts else None),
|
| 122 |
+
gr.Dropdown(choices=models, value=models[0] if models else None)
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def load_chart_data(source: str, chart_type: str, chart_id: str, model: str):
|
| 127 |
+
"""
|
| 128 |
+
加载图表数据并返回所有 UI 更新
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
包含所有 UI 组件更新值的元组
|
| 132 |
+
"""
|
| 133 |
+
if not all([source, chart_type, chart_id, model]):
|
| 134 |
+
return [
|
| 135 |
+
"", # html_display
|
| 136 |
+
"", # label_info
|
| 137 |
+
[], # qa_data
|
| 138 |
+
"", # status_text
|
| 139 |
+
"请在左侧选择图表", # progress_text
|
| 140 |
+
gr.State({}) # current_qa_reviews
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
# 更新状态
|
| 144 |
+
state.set_position(source, chart_type, chart_id, model)
|
| 145 |
+
|
| 146 |
+
# 获取图表数据
|
| 147 |
+
chart_data = data_manager.get_chart_data(source, chart_type, chart_id)
|
| 148 |
+
html_content = chart_data.get('html_content', '')
|
| 149 |
+
label_info = chart_data.get('label_info', {})
|
| 150 |
+
|
| 151 |
+
# 格式化标签信息
|
| 152 |
+
if label_info:
|
| 153 |
+
label_text = f"""
|
| 154 |
+
### 图表信息
|
| 155 |
+
|
| 156 |
+
| 属性 | 值 |
|
| 157 |
+
|------|-----|
|
| 158 |
+
| **编号** | {label_info.get('Number', '-')} |
|
| 159 |
+
| **类型** | {label_info.get('Type', '-')} |
|
| 160 |
+
| **来源** | {label_info.get('Source', '-')} |
|
| 161 |
+
| **主题** | {label_info.get('Topic', '-')} |
|
| 162 |
+
| **描述** | {label_info.get('Describe', '-')} |
|
| 163 |
+
| **链接** | [查看原图]({label_info.get('Weblink', '#')}) |
|
| 164 |
+
"""
|
| 165 |
+
else:
|
| 166 |
+
label_text = "暂无标签信息"
|
| 167 |
+
|
| 168 |
+
# 获取 QA 列表
|
| 169 |
+
qa_list = data_manager.get_qa_list(source, chart_type, model, chart_id)
|
| 170 |
+
|
| 171 |
+
# 获取已有的审核记录
|
| 172 |
+
existing_reviews = {}
|
| 173 |
+
for review in data_manager.get_reviews_by_chart(chart_id, model):
|
| 174 |
+
existing_reviews[review['qa_id']] = review
|
| 175 |
+
|
| 176 |
+
# 更新进度文本
|
| 177 |
+
progress_text = f"当前: {state.current_index + 1} / {len(state.all_paths)} 个图表"
|
| 178 |
+
|
| 179 |
+
# 状态文本
|
| 180 |
+
stats = data_manager.get_review_stats()
|
| 181 |
+
status_text = f"已审核: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}"
|
| 182 |
+
|
| 183 |
+
return [
|
| 184 |
+
html_content, # html_display
|
| 185 |
+
label_text, # label_info
|
| 186 |
+
qa_list, # qa_data (for state)
|
| 187 |
+
status_text, # status_text
|
| 188 |
+
progress_text, # progress_text
|
| 189 |
+
existing_reviews # current_qa_reviews
|
| 190 |
+
]
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def navigate_prev():
|
| 194 |
+
"""导航到上一个图表"""
|
| 195 |
+
if state.navigate(-1):
|
| 196 |
+
path = state.get_current_path()
|
| 197 |
+
if path:
|
| 198 |
+
return (
|
| 199 |
+
gr.Dropdown(value=path['source']),
|
| 200 |
+
gr.Dropdown(value=path['chart_type']),
|
| 201 |
+
gr.Dropdown(value=path['chart_id']),
|
| 202 |
+
gr.Dropdown(value=path['model'])
|
| 203 |
+
)
|
| 204 |
+
return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()]
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def navigate_next():
|
| 208 |
+
"""导航到下一个图表"""
|
| 209 |
+
if state.navigate(1):
|
| 210 |
+
path = state.get_current_path()
|
| 211 |
+
if path:
|
| 212 |
+
return (
|
| 213 |
+
gr.Dropdown(value=path['source']),
|
| 214 |
+
gr.Dropdown(value=path['chart_type']),
|
| 215 |
+
gr.Dropdown(value=path['chart_id']),
|
| 216 |
+
gr.Dropdown(value=path['model'])
|
| 217 |
+
)
|
| 218 |
+
return [gr.Dropdown(), gr.Dropdown(), gr.Dropdown(), gr.Dropdown()]
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def save_review_handler(
|
| 222 |
+
qa_id: str,
|
| 223 |
+
chart_id: str,
|
| 224 |
+
source: str,
|
| 225 |
+
chart_type: str,
|
| 226 |
+
model: str,
|
| 227 |
+
original_question: str,
|
| 228 |
+
original_answer: str,
|
| 229 |
+
status: str,
|
| 230 |
+
modified_question: str,
|
| 231 |
+
modified_answer: str,
|
| 232 |
+
issue_type: str,
|
| 233 |
+
comment: str,
|
| 234 |
+
reviewer: str
|
| 235 |
+
) -> str:
|
| 236 |
+
"""保存审核记录"""
|
| 237 |
+
review_data = {
|
| 238 |
+
"qa_id": qa_id,
|
| 239 |
+
"chart_id": chart_id,
|
| 240 |
+
"source": source,
|
| 241 |
+
"chart_type": chart_type,
|
| 242 |
+
"model": model,
|
| 243 |
+
"original_question": original_question,
|
| 244 |
+
"original_answer": original_answer,
|
| 245 |
+
"status": status,
|
| 246 |
+
"modified_question": modified_question,
|
| 247 |
+
"modified_answer": modified_answer,
|
| 248 |
+
"issue_type": issue_type,
|
| 249 |
+
"comment": comment,
|
| 250 |
+
"reviewer": reviewer
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
+
result = data_manager.save_review(review_data)
|
| 254 |
+
|
| 255 |
+
# 返回更新后的统计
|
| 256 |
+
stats = data_manager.get_review_stats()
|
| 257 |
+
return f"✅ 已保存! 总计: {stats['total']} | ✅正确: {stats['correct']} | ❌错误: {stats['incorrect']} | ✏️需修改: {stats['needs_modification']}"
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def export_reviews_handler():
|
| 261 |
+
"""导出审核记录"""
|
| 262 |
+
output_path = data_manager.export_reviews("./reviews_export.json")
|
| 263 |
+
return f"✅ 审核记录已导出至: {output_path}"
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
# ============== 创建 Gradio 界面 ==============
|
| 267 |
+
|
| 268 |
+
def create_ui():
|
| 269 |
+
"""创建 Gradio 界面"""
|
| 270 |
+
|
| 271 |
+
# 自定义 CSS
|
| 272 |
+
custom_css = """
|
| 273 |
+
.chart-container {
|
| 274 |
+
height: 600px;
|
| 275 |
+
overflow: auto;
|
| 276 |
+
border: 1px solid #e0e0e0;
|
| 277 |
+
border-radius: 8px;
|
| 278 |
+
padding: 10px;
|
| 279 |
+
background: #fafafa;
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
.qa-item {
|
| 283 |
+
border: 1px solid #e0e0e0;
|
| 284 |
+
border-radius: 8px;
|
| 285 |
+
padding: 15px;
|
| 286 |
+
margin-bottom: 15px;
|
| 287 |
+
background: white;
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
.qa-question {
|
| 291 |
+
font-weight: bold;
|
| 292 |
+
color: #1a73e8;
|
| 293 |
+
margin-bottom: 8px;
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
.qa-answer {
|
| 297 |
+
color: #0d652d;
|
| 298 |
+
padding: 8px;
|
| 299 |
+
background: #e8f5e9;
|
| 300 |
+
border-radius: 4px;
|
| 301 |
+
margin-bottom: 10px;
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.status-correct { background-color: #d4edda !important; border-color: #c3e6cb !important; }
|
| 305 |
+
.status-incorrect { background-color: #f8d7da !important; border-color: #f5c6cb !important; }
|
| 306 |
+
.status-needs-modification { background-color: #fff3cd !important; border-color: #ffeeba !important; }
|
| 307 |
+
.status-pending { background-color: #e2e3e5 !important; border-color: #d6d8db !important; }
|
| 308 |
+
|
| 309 |
+
.control-panel {
|
| 310 |
+
background: #f8f9fa;
|
| 311 |
+
padding: 15px;
|
| 312 |
+
border-radius: 8px;
|
| 313 |
+
margin-bottom: 10px;
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
.stats-panel {
|
| 317 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 318 |
+
color: white;
|
| 319 |
+
padding: 15px;
|
| 320 |
+
border-radius: 8px;
|
| 321 |
+
margin-bottom: 15px;
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
.nav-button {
|
| 325 |
+
min-width: 120px;
|
| 326 |
+
}
|
| 327 |
+
"""
|
| 328 |
+
|
| 329 |
+
with gr.Blocks(
|
| 330 |
+
title="图表问答数据集审核系统",
|
| 331 |
+
theme=gr.themes.Soft(),
|
| 332 |
+
css=custom_css
|
| 333 |
+
) as app:
|
| 334 |
+
|
| 335 |
+
# 隐藏的状态存储
|
| 336 |
+
current_qa_reviews = gr.State({})
|
| 337 |
+
qa_data_state = gr.State([])
|
| 338 |
+
|
| 339 |
+
# ==================== 标题栏 ====================
|
| 340 |
+
gr.Markdown("""
|
| 341 |
+
# 📊 图表问答数据集审核系统
|
| 342 |
+
|
| 343 |
+
审核每个图表对应的问题和答案是否合理正确。使用 ← → 按钮或键盘方向键切换图表。
|
| 344 |
+
""")
|
| 345 |
+
|
| 346 |
+
# ==================== 顶部状态栏 ====================
|
| 347 |
+
with gr.Row():
|
| 348 |
+
status_text = gr.Textbox(
|
| 349 |
+
label="审核统计",
|
| 350 |
+
value="等待加载数据...",
|
| 351 |
+
interactive=False,
|
| 352 |
+
show_label=False
|
| 353 |
+
)
|
| 354 |
+
progress_text = gr.Textbox(
|
| 355 |
+
label="进度",
|
| 356 |
+
value="请在左侧选择图表",
|
| 357 |
+
interactive=False,
|
| 358 |
+
show_label=False,
|
| 359 |
+
scale=1
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
# ==================== 主内容区 ====================
|
| 363 |
+
with gr.Row():
|
| 364 |
+
# ===== 左侧:导航控制 =====
|
| 365 |
+
with gr.Column(scale=1, min_width=250):
|
| 366 |
+
gr.Markdown("### 📁 数据选择")
|
| 367 |
+
|
| 368 |
+
source_dropdown = gr.Dropdown(
|
| 369 |
+
label="数据来源 (Source)",
|
| 370 |
+
choices=[],
|
| 371 |
+
interactive=True
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
chart_type_dropdown = gr.Dropdown(
|
| 375 |
+
label="图表类型 (Chart Type)",
|
| 376 |
+
choices=[],
|
| 377 |
+
interactive=True
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
chart_dropdown = gr.Dropdown(
|
| 381 |
+
label="图表 ID",
|
| 382 |
+
choices=[],
|
| 383 |
+
interactive=True
|
| 384 |
+
)
|
| 385 |
+
|
| 386 |
+
model_dropdown = gr.Dropdown(
|
| 387 |
+
label="模型 (Model)",
|
| 388 |
+
choices=[],
|
| 389 |
+
interactive=True
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
gr.Markdown("---")
|
| 393 |
+
|
| 394 |
+
# 导航按钮
|
| 395 |
+
with gr.Row():
|
| 396 |
+
prev_btn = gr.Button("⬅️ 上一个", elem_classes=["nav-button"])
|
| 397 |
+
next_btn = gr.Button("➡️ 下一个", elem_classes=["nav-button"])
|
| 398 |
+
|
| 399 |
+
# 导出按钮
|
| 400 |
+
export_btn = gr.Button("📥 导出审核记录", variant="secondary")
|
| 401 |
+
export_result = gr.Textbox(label="", visible=False)
|
| 402 |
+
|
| 403 |
+
# 审核人设置
|
| 404 |
+
reviewer_input = gr.Textbox(
|
| 405 |
+
label="审核人",
|
| 406 |
+
value="default",
|
| 407 |
+
interactive=True
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
# ===== 中间:图表展示 =====
|
| 411 |
+
with gr.Column(scale=2, min_width=400):
|
| 412 |
+
gr.Markdown("### 📈 图表展示")
|
| 413 |
+
|
| 414 |
+
# HTML 图表展示
|
| 415 |
+
html_display = gr.HTML(
|
| 416 |
+
value="<div style='text-align:center;padding:50px;color:#999;'>请选择图表</div>",
|
| 417 |
+
elem_classes=["chart-container"]
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
# ===== 右侧:标签信息和 QA 审核 =====
|
| 421 |
+
with gr.Column(scale=2, min_width=400):
|
| 422 |
+
# 标签信息
|
| 423 |
+
gr.Markdown("### 📝 图表标签")
|
| 424 |
+
label_display = gr.Markdown(
|
| 425 |
+
value="暂无信息",
|
| 426 |
+
elem_classes=["control-panel"]
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
# QA 审核区
|
| 430 |
+
gr.Markdown("### ❓ 问答审核")
|
| 431 |
+
|
| 432 |
+
# 审核面板 (动态生成)
|
| 433 |
+
with gr.Column(visible=True) as review_panel:
|
| 434 |
+
# 当前选中的 QA 信息(隐藏)
|
| 435 |
+
current_qa_id = gr.Textbox(visible=False)
|
| 436 |
+
current_qa_question = gr.Textbox(visible=False)
|
| 437 |
+
current_qa_answer = gr.Textbox(visible=False)
|
| 438 |
+
|
| 439 |
+
# QA 显示
|
| 440 |
+
qa_question_display = gr.Textbox(
|
| 441 |
+
label="问题",
|
| 442 |
+
interactive=False,
|
| 443 |
+
lines=2
|
| 444 |
+
)
|
| 445 |
+
qa_answer_display = gr.Textbox(
|
| 446 |
+
label="答案",
|
| 447 |
+
interactive=False,
|
| 448 |
+
lines=1
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
# QA 选择器
|
| 452 |
+
qa_selector = gr.Radio(
|
| 453 |
+
label="选择要审核的问答对",
|
| 454 |
+
choices=[],
|
| 455 |
+
interactive=True
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
gr.Markdown("---")
|
| 459 |
+
gr.Markdown("#### 审核操作")
|
| 460 |
+
|
| 461 |
+
# 审核状态选择
|
| 462 |
+
status_radio = gr.Radio(
|
| 463 |
+
label="审核状态",
|
| 464 |
+
choices=[
|
| 465 |
+
("✅ 正确", "correct"),
|
| 466 |
+
("❌ 错误", "incorrect"),
|
| 467 |
+
("✏️ 需修改", "needs_modification"),
|
| 468 |
+
("⏳ 待定", "pending")
|
| 469 |
+
],
|
| 470 |
+
value="pending",
|
| 471 |
+
interactive=True
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
# 问题类型(仅在选择错误或需修改时显示)
|
| 475 |
+
issue_type_dropdown = gr.Dropdown(
|
| 476 |
+
label="问题类型",
|
| 477 |
+
choices=[
|
| 478 |
+
"问题歧义",
|
| 479 |
+
"答案错误",
|
| 480 |
+
"图表不清晰",
|
| 481 |
+
"问题不合理",
|
| 482 |
+
"答案格式错误",
|
| 483 |
+
"其他"
|
| 484 |
+
],
|
| 485 |
+
interactive=True,
|
| 486 |
+
visible=True
|
| 487 |
+
)
|
| 488 |
+
|
| 489 |
+
# 修改后的问题和答案
|
| 490 |
+
modified_question = gr.Textbox(
|
| 491 |
+
label="修改后的问题",
|
| 492 |
+
placeholder="如需修改问题,请在此输入...",
|
| 493 |
+
lines=2,
|
| 494 |
+
interactive=True
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
modified_answer = gr.Textbox(
|
| 498 |
+
label="修改后的答案",
|
| 499 |
+
placeholder="如需修改答案,请在此输入...",
|
| 500 |
+
lines=1,
|
| 501 |
+
interactive=True
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
# 评论
|
| 505 |
+
comment_textbox = gr.Textbox(
|
| 506 |
+
label="评论/备注",
|
| 507 |
+
placeholder="请输入审核意见或备注...",
|
| 508 |
+
lines=2,
|
| 509 |
+
interactive=True
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
# 保存按钮
|
| 513 |
+
save_btn = gr.Button("💾 保存审核结果", variant="primary")
|
| 514 |
+
save_result = gr.Textbox(label="", visible=False)
|
| 515 |
+
|
| 516 |
+
# ==================== 事件绑定 ====================
|
| 517 |
+
|
| 518 |
+
# 初始化数据集选择
|
| 519 |
+
def init_dataset():
|
| 520 |
+
structure = data_manager.get_dataset_structure()
|
| 521 |
+
sources = list(structure.get('sources', {}).keys())
|
| 522 |
+
return gr.Dropdown(choices=sources, value=sources[0] if sources else None)
|
| 523 |
+
|
| 524 |
+
app.load(
|
| 525 |
+
fn=init_dataset,
|
| 526 |
+
outputs=[source_dropdown]
|
| 527 |
+
)
|
| 528 |
+
|
| 529 |
+
# Source 变化 -> 更新 Chart Type
|
| 530 |
+
source_dropdown.change(
|
| 531 |
+
fn=update_chart_type_dropdown,
|
| 532 |
+
inputs=[source_dropdown],
|
| 533 |
+
outputs=[chart_type_dropdown]
|
| 534 |
+
)
|
| 535 |
+
|
| 536 |
+
# Chart Type 变化 -> 更新 Chart 和 Model
|
| 537 |
+
chart_type_dropdown.change(
|
| 538 |
+
fn=update_chart_dropdown,
|
| 539 |
+
inputs=[source_dropdown, chart_type_dropdown],
|
| 540 |
+
outputs=[chart_dropdown, model_dropdown]
|
| 541 |
+
)
|
| 542 |
+
|
| 543 |
+
# 选择图表 -> 加载数据
|
| 544 |
+
def on_chart_selected(source, chart_type, chart_id, model):
|
| 545 |
+
results = load_chart_data(source, chart_type, chart_id, model)
|
| 546 |
+
# 更新 QA 选择器
|
| 547 |
+
qa_list = results[2] # qa_data
|
| 548 |
+
qa_choices = [f"Q{i+1}: {qa.question[:50]}..." for i, qa in enumerate(qa_list)] if qa_list else []
|
| 549 |
+
return results + [gr.Radio(choices=qa_choices, value=qa_choices[0] if qa_choices else None)]
|
| 550 |
+
|
| 551 |
+
model_dropdown.change(
|
| 552 |
+
fn=on_chart_selected,
|
| 553 |
+
inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown],
|
| 554 |
+
outputs=[
|
| 555 |
+
html_display, label_display, qa_data_state, status_text, progress_text,
|
| 556 |
+
current_qa_reviews, qa_selector
|
| 557 |
+
]
|
| 558 |
+
)
|
| 559 |
+
|
| 560 |
+
chart_dropdown.change(
|
| 561 |
+
fn=on_chart_selected,
|
| 562 |
+
inputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown],
|
| 563 |
+
outputs=[
|
| 564 |
+
html_display, label_display, qa_data_state, status_text, progress_text,
|
| 565 |
+
current_qa_reviews, qa_selector
|
| 566 |
+
]
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
# QA 选择器变化 -> 更新审核面板
|
| 570 |
+
def on_qa_selected(qa_index_str, qa_list, existing_reviews):
|
| 571 |
+
if not qa_index_str or not qa_list:
|
| 572 |
+
return [""] * 3 + [gr.Radio(value="pending"), "", "", "", ""]
|
| 573 |
+
|
| 574 |
+
# 解析索引
|
| 575 |
+
try:
|
| 576 |
+
index = int(qa_index_str.split(":")[0].replace("Q", "")) - 1
|
| 577 |
+
qa = qa_list[index]
|
| 578 |
+
except:
|
| 579 |
+
return [""] * 3 + [gr.Radio(value="pending"), "", "", "", ""]
|
| 580 |
+
|
| 581 |
+
# 检查是否有现有审核
|
| 582 |
+
review = existing_reviews.get(qa.id, {})
|
| 583 |
+
|
| 584 |
+
return [
|
| 585 |
+
qa.id, # current_qa_id
|
| 586 |
+
qa.question, # qa_question_display
|
| 587 |
+
qa.answer, # qa_answer_display
|
| 588 |
+
gr.Radio(value=review.get('status', 'pending')), # status_radio
|
| 589 |
+
review.get('issue_type', ''), # issue_type_dropdown
|
| 590 |
+
review.get('modified_question', ''), # modified_question
|
| 591 |
+
review.get('modified_answer', ''), # modified_answer
|
| 592 |
+
review.get('comment', '') # comment_textbox
|
| 593 |
+
]
|
| 594 |
+
|
| 595 |
+
qa_selector.change(
|
| 596 |
+
fn=on_qa_selected,
|
| 597 |
+
inputs=[qa_selector, qa_data_state, current_qa_reviews],
|
| 598 |
+
outputs=[
|
| 599 |
+
current_qa_id, qa_question_display, qa_answer_display,
|
| 600 |
+
status_radio, issue_type_dropdown, modified_question, modified_answer, comment_textbox
|
| 601 |
+
]
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
# 导航按钮
|
| 605 |
+
prev_btn.click(
|
| 606 |
+
fn=navigate_prev,
|
| 607 |
+
outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown]
|
| 608 |
+
)
|
| 609 |
+
|
| 610 |
+
next_btn.click(
|
| 611 |
+
fn=navigate_next,
|
| 612 |
+
outputs=[source_dropdown, chart_type_dropdown, chart_dropdown, model_dropdown]
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
# 保存审核
|
| 616 |
+
save_btn.click(
|
| 617 |
+
fn=save_review_handler,
|
| 618 |
+
inputs=[
|
| 619 |
+
current_qa_id,
|
| 620 |
+
chart_dropdown,
|
| 621 |
+
source_dropdown,
|
| 622 |
+
chart_type_dropdown,
|
| 623 |
+
model_dropdown,
|
| 624 |
+
qa_question_display,
|
| 625 |
+
qa_answer_display,
|
| 626 |
+
status_radio,
|
| 627 |
+
modified_question,
|
| 628 |
+
modified_answer,
|
| 629 |
+
issue_type_dropdown,
|
| 630 |
+
comment_textbox,
|
| 631 |
+
reviewer_input
|
| 632 |
+
],
|
| 633 |
+
outputs=[save_result]
|
| 634 |
+
).then(
|
| 635 |
+
fn=lambda: gr.Textbox(visible=True),
|
| 636 |
+
outputs=[save_result]
|
| 637 |
+
)
|
| 638 |
+
|
| 639 |
+
# 导出
|
| 640 |
+
export_btn.click(
|
| 641 |
+
fn=export_reviews_handler,
|
| 642 |
+
outputs=[export_result]
|
| 643 |
+
).then(
|
| 644 |
+
fn=lambda: gr.Textbox(visible=True),
|
| 645 |
+
outputs=[export_result]
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
# 键盘快捷键
|
| 649 |
+
app.load(
|
| 650 |
+
fn=lambda: None,
|
| 651 |
+
js="""
|
| 652 |
+
() => {
|
| 653 |
+
document.addEventListener('keydown', (e) => {
|
| 654 |
+
if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA') return;
|
| 655 |
+
|
| 656 |
+
if (e.key === 'ArrowLeft') {
|
| 657 |
+
document.querySelector('button:has(.nav-button)')?.click();
|
| 658 |
+
// 模拟点击上一个按钮
|
| 659 |
+
const prevBtn = Array.from(document.querySelectorAll('button')).find(b => b.textContent.includes('上一个'));
|
| 660 |
+
if (prevBtn) prevBtn.click();
|
| 661 |
+
}
|
| 662 |
+
if (e.key === 'ArrowRight') {
|
| 663 |
+
const nextBtn = Array.from(document.querySelectorAll('button')).find(b => b.textContent.includes('下一个'));
|
| 664 |
+
if (nextBtn) nextBtn.click();
|
| 665 |
+
}
|
| 666 |
+
if (e.key === '1') {
|
| 667 |
+
document.querySelector('input[value="correct"]')?.click();
|
| 668 |
+
}
|
| 669 |
+
if (e.key === '2') {
|
| 670 |
+
document.querySelector('input[value="incorrect"]')?.click();
|
| 671 |
+
}
|
| 672 |
+
if (e.key === '3') {
|
| 673 |
+
document.querySelector('input[value="needs_modification"]')?.click();
|
| 674 |
+
}
|
| 675 |
+
if (e.key === '4') {
|
| 676 |
+
document.querySelector('input[value="pending"]')?.click();
|
| 677 |
+
}
|
| 678 |
+
});
|
| 679 |
+
}
|
| 680 |
+
"""
|
| 681 |
+
)
|
| 682 |
+
|
| 683 |
+
return app
|
| 684 |
+
|
| 685 |
+
|
| 686 |
+
# ============== 主入口 ==============
|
| 687 |
+
|
| 688 |
+
if __name__ == "__main__":
|
| 689 |
+
app = create_ui()
|
| 690 |
+
app.launch(
|
| 691 |
+
server_name="0.0.0.0",
|
| 692 |
+
server_port=7860,
|
| 693 |
+
share=False
|
| 694 |
+
)
|
create_sample_data.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
生成示例数据脚本
|
| 3 |
+
用于测试审核系统
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
def create_sample_dataset():
|
| 10 |
+
"""创建示例数据集"""
|
| 11 |
+
|
| 12 |
+
base_path = Path("./dataset")
|
| 13 |
+
|
| 14 |
+
# 示例数据配置
|
| 15 |
+
sources = ["Apache_Echarts", "Plotly", "ChartJS"]
|
| 16 |
+
chart_types = {
|
| 17 |
+
"Apache_Echarts": ["bar", "line", "pie"],
|
| 18 |
+
"Plotly": ["scatter", "bar", "heatmap"],
|
| 19 |
+
"ChartJS": ["line", "doughnut", "radar"]
|
| 20 |
+
}
|
| 21 |
+
models = ["gpt-4", "claude-3", "gemini-pro"]
|
| 22 |
+
|
| 23 |
+
for source in sources:
|
| 24 |
+
for chart_type in chart_types[source]:
|
| 25 |
+
# 创建目录
|
| 26 |
+
web_dir = base_path / "web" / source / chart_type
|
| 27 |
+
label_dir = base_path / "label" / source / chart_type
|
| 28 |
+
web_dir.mkdir(parents=True, exist_ok=True)
|
| 29 |
+
label_dir.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
|
| 31 |
+
for model in models:
|
| 32 |
+
qa_dir = base_path / "question_answer" / source / chart_type / model
|
| 33 |
+
qa_dir.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
|
| 35 |
+
# 为每个图表类型创建示例图表
|
| 36 |
+
for i in range(1, 4):
|
| 37 |
+
chart_id = f"chart_{str(i).zfill(4)}_{chart_type}"
|
| 38 |
+
|
| 39 |
+
# 创建 HTML 文件
|
| 40 |
+
html_content = f"""<!DOCTYPE html>
|
| 41 |
+
<html>
|
| 42 |
+
<head>
|
| 43 |
+
<meta charset="UTF-8">
|
| 44 |
+
<title>{chart_id}</title>
|
| 45 |
+
<script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
|
| 46 |
+
<style>
|
| 47 |
+
body {{ margin: 0; padding: 20px; font-family: Arial, sans-serif; }}
|
| 48 |
+
#chart {{ width: 100%; height: 400px; }}
|
| 49 |
+
.title {{ text-align: center; color: #333; margin-bottom: 20px; }}
|
| 50 |
+
</style>
|
| 51 |
+
</head>
|
| 52 |
+
<body>
|
| 53 |
+
<h2 class="title">示例图表 - {source} - {chart_type} #{i}</h2>
|
| 54 |
+
<div id="chart"></div>
|
| 55 |
+
<script>
|
| 56 |
+
var chart = echarts.init(document.getElementById('chart'));
|
| 57 |
+
var option = {{
|
| 58 |
+
title: {{ text: 'Sample {chart_type.capitalize()} Chart' }},
|
| 59 |
+
tooltip: {{}},
|
| 60 |
+
xAxis: {{ data: ['A', 'B', 'C', 'D', 'E'] }},
|
| 61 |
+
yAxis: {{}},
|
| 62 |
+
series: [{{
|
| 63 |
+
type: '{chart_type}',
|
| 64 |
+
data: [Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100, Math.random() * 100]
|
| 65 |
+
}}]
|
| 66 |
+
}};
|
| 67 |
+
chart.setOption(option);
|
| 68 |
+
</script>
|
| 69 |
+
</body>
|
| 70 |
+
</html>"""
|
| 71 |
+
|
| 72 |
+
with open(web_dir / f"{chart_id}.html", "w", encoding="utf-8") as f:
|
| 73 |
+
f.write(html_content)
|
| 74 |
+
|
| 75 |
+
# 创建标签文件
|
| 76 |
+
label_data = {
|
| 77 |
+
"Number": str(i).zfill(4),
|
| 78 |
+
"Type": chart_type,
|
| 79 |
+
"Source": source,
|
| 80 |
+
"Weblink": f"https://example.com/{source}/{chart_type}/{i}",
|
| 81 |
+
"Topic": f"Sample {chart_type} chart #{i}",
|
| 82 |
+
"Describe": f"This is a sample {chart_type} chart for testing the review system. It demonstrates the visualization capabilities of {source}.",
|
| 83 |
+
"Other": ""
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
with open(label_dir / f"{chart_id}.json", "w", encoding="utf-8") as f:
|
| 87 |
+
json.dump(label_data, f, ensure_ascii=False, indent=2)
|
| 88 |
+
|
| 89 |
+
# 为每个模型创建 QA 文件
|
| 90 |
+
for j, model in enumerate(models):
|
| 91 |
+
qa_dir = base_path / "question_answer" / source / chart_type / model
|
| 92 |
+
|
| 93 |
+
for q in range(1, 3):
|
| 94 |
+
qa_data = {
|
| 95 |
+
"id": f"{chart_id}_q{q}",
|
| 96 |
+
"chart": chart_id,
|
| 97 |
+
"question": f"在图表 {chart_id} 中,第 {q} 个数据点的值是多少?",
|
| 98 |
+
"answer": f"约为 {int(50 + q * 10 + j * 5)}"
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
with open(qa_dir / f"{chart_id}_q{q}.json", "w", encoding="utf-8") as f:
|
| 102 |
+
json.dump(qa_data, f, ensure_ascii=False, indent=2)
|
| 103 |
+
|
| 104 |
+
print("✅ 示例数据集创建完成!")
|
| 105 |
+
print(f"📁 数据集位置: {base_path.absolute()}")
|
| 106 |
+
|
| 107 |
+
# 打印统计
|
| 108 |
+
total_charts = sum(len(chart_types[s]) * 3 for s in sources)
|
| 109 |
+
total_qa = total_charts * len(models) * 2
|
| 110 |
+
print(f"📊 共创建 {total_charts} 个图表")
|
| 111 |
+
print(f"❓ 共创建 {total_qa} 个问答对")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
create_sample_dataset()
|
data_manager.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
数据管理模块 - 负责加载数据集和管理审核记录
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import json
|
| 6 |
+
import uuid
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Dict, List, Optional, Any
|
| 9 |
+
from dataclasses import dataclass, asdict
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# ============== 数据类定义 ==============
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class LabelInfo:
|
| 17 |
+
"""图表标签信息"""
|
| 18 |
+
Number: str
|
| 19 |
+
Type: str
|
| 20 |
+
Source: str
|
| 21 |
+
Weblink: str
|
| 22 |
+
Topic: str
|
| 23 |
+
Describe: str
|
| 24 |
+
Other: str = ""
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class QAItem:
|
| 29 |
+
"""问答对"""
|
| 30 |
+
id: str
|
| 31 |
+
chart: str
|
| 32 |
+
question: str
|
| 33 |
+
answer: str
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class ReviewRecord:
|
| 38 |
+
"""审核记录"""
|
| 39 |
+
review_id: str
|
| 40 |
+
chart_id: str
|
| 41 |
+
qa_id: str
|
| 42 |
+
source: str
|
| 43 |
+
chart_type: str
|
| 44 |
+
model: str
|
| 45 |
+
original_question: str
|
| 46 |
+
original_answer: str
|
| 47 |
+
status: str # correct, incorrect, needs_modification, pending
|
| 48 |
+
modified_question: str = ""
|
| 49 |
+
modified_answer: str = ""
|
| 50 |
+
issue_type: str = "" # question_ambiguous, answer_wrong, chart_unclear, other
|
| 51 |
+
comment: str = ""
|
| 52 |
+
reviewer: str = "default"
|
| 53 |
+
review_time: str = ""
|
| 54 |
+
|
| 55 |
+
def to_dict(self) -> Dict:
|
| 56 |
+
return asdict(self)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# ============== 数据管理器 ==============
|
| 60 |
+
|
| 61 |
+
class DataManager:
|
| 62 |
+
"""数据管理器"""
|
| 63 |
+
|
| 64 |
+
def __init__(self, dataset_path: str = "./dataset", reviews_path: str = "./reviews"):
|
| 65 |
+
"""
|
| 66 |
+
初始化数据管理器
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
dataset_path: 数据集根目录路径
|
| 70 |
+
reviews_path: 审核记录保存目录路径
|
| 71 |
+
"""
|
| 72 |
+
self.dataset_path = Path(dataset_path)
|
| 73 |
+
self.reviews_path = Path(reviews_path)
|
| 74 |
+
self.web_path = self.dataset_path / "web"
|
| 75 |
+
self.label_path = self.dataset_path / "label"
|
| 76 |
+
self.qa_path = self.dataset_path / "question_answer"
|
| 77 |
+
self.reviews_file = self.reviews_path / "reviews.json"
|
| 78 |
+
|
| 79 |
+
# 确保目录存在
|
| 80 |
+
self.reviews_path.mkdir(parents=True, exist_ok=True)
|
| 81 |
+
if not self.reviews_file.exists():
|
| 82 |
+
self._save_reviews([])
|
| 83 |
+
|
| 84 |
+
# ============== 数据集结构获取 ==============
|
| 85 |
+
|
| 86 |
+
def get_dataset_structure(self) -> Dict[str, Any]:
|
| 87 |
+
"""
|
| 88 |
+
获取数据集的目录结构
|
| 89 |
+
|
| 90 |
+
Returns:
|
| 91 |
+
包含 source -> chart_type -> models 的树形结构
|
| 92 |
+
"""
|
| 93 |
+
structure = {"sources": {}}
|
| 94 |
+
|
| 95 |
+
if not self.web_path.exists():
|
| 96 |
+
return structure
|
| 97 |
+
|
| 98 |
+
# 遍历所有 source
|
| 99 |
+
for source_name in self._list_dirs(self.web_path):
|
| 100 |
+
source_web_path = self.web_path / source_name
|
| 101 |
+
source_qa_path = self.qa_path / source_name
|
| 102 |
+
|
| 103 |
+
structure["sources"][source_name] = {"chart_types": {}}
|
| 104 |
+
|
| 105 |
+
# 遍历所有 chart_type
|
| 106 |
+
for chart_type_name in self._list_dirs(source_web_path):
|
| 107 |
+
chart_type_web_path = source_web_path / chart_type_name
|
| 108 |
+
chart_type_qa_path = source_qa_path / chart_type_name
|
| 109 |
+
|
| 110 |
+
# 获取图表数量
|
| 111 |
+
chart_files = [f for f in self._list_files(chart_type_web_path) if f.endswith('.html')]
|
| 112 |
+
chart_count = len(chart_files)
|
| 113 |
+
|
| 114 |
+
# 获取模型列表
|
| 115 |
+
models = self._list_dirs(chart_type_qa_path) if chart_type_qa_path.exists() else []
|
| 116 |
+
|
| 117 |
+
# 获取已审核数量
|
| 118 |
+
reviews = self.get_all_reviews()
|
| 119 |
+
reviewed_charts = set()
|
| 120 |
+
for r in reviews:
|
| 121 |
+
if r.get('source') == source_name and r.get('chart_type') == chart_type_name:
|
| 122 |
+
reviewed_charts.add(r.get('chart_id'))
|
| 123 |
+
reviewed_count = len(reviewed_charts)
|
| 124 |
+
|
| 125 |
+
structure["sources"][source_name]["chart_types"][chart_type_name] = {
|
| 126 |
+
"chart_count": chart_count,
|
| 127 |
+
"reviewed_count": reviewed_count,
|
| 128 |
+
"models": models
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
return structure
|
| 132 |
+
|
| 133 |
+
def get_chart_list(self, source: str, chart_type: str) -> List[str]:
|
| 134 |
+
"""获取指定 source 和 chart_type 下的所有图表 ID"""
|
| 135 |
+
chart_type_path = self.web_path / source / chart_type
|
| 136 |
+
if not chart_type_path.exists():
|
| 137 |
+
return []
|
| 138 |
+
|
| 139 |
+
return [f.replace('.html', '') for f in self._list_files(chart_type_path) if f.endswith('.html')]
|
| 140 |
+
|
| 141 |
+
def get_all_chart_paths(self) -> List[Dict[str, str]]:
|
| 142 |
+
"""
|
| 143 |
+
获取所有图表的完整路径(用于导航)
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
包含 {source, chart_type, chart_id, model} 的列表
|
| 147 |
+
"""
|
| 148 |
+
paths = []
|
| 149 |
+
structure = self.get_dataset_structure()
|
| 150 |
+
|
| 151 |
+
for source_name, source_data in structure["sources"].items():
|
| 152 |
+
for chart_type_name, chart_type_data in source_data["chart_types"].items():
|
| 153 |
+
chart_ids = self.get_chart_list(source_name, chart_type_name)
|
| 154 |
+
for chart_id in chart_ids:
|
| 155 |
+
for model in chart_type_data.get("models", []):
|
| 156 |
+
paths.append({
|
| 157 |
+
"source": source_name,
|
| 158 |
+
"chart_type": chart_type_name,
|
| 159 |
+
"chart_id": chart_id,
|
| 160 |
+
"model": model
|
| 161 |
+
})
|
| 162 |
+
|
| 163 |
+
return paths
|
| 164 |
+
|
| 165 |
+
# ============== 图表数据获取 ==============
|
| 166 |
+
|
| 167 |
+
def get_chart_data(self, source: str, chart_type: str, chart_id: str) -> Dict[str, Any]:
|
| 168 |
+
"""
|
| 169 |
+
获取图表数据(HTML内容和标签信息)
|
| 170 |
+
|
| 171 |
+
Args:
|
| 172 |
+
source: 数据来源
|
| 173 |
+
chart_type: 图表类型
|
| 174 |
+
chart_id: 图表ID
|
| 175 |
+
|
| 176 |
+
Returns:
|
| 177 |
+
包含 html_content, html_path, label_info 的字典
|
| 178 |
+
"""
|
| 179 |
+
html_path = self.web_path / source / chart_type / f"{chart_id}.html"
|
| 180 |
+
label_path = self.label_path / source / chart_type / f"{chart_id}.json"
|
| 181 |
+
|
| 182 |
+
result = {
|
| 183 |
+
"html_content": "",
|
| 184 |
+
"html_path": str(html_path) if html_path.exists() else "",
|
| 185 |
+
"label_info": None
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
# 读取 HTML 内容
|
| 189 |
+
if html_path.exists():
|
| 190 |
+
with open(html_path, 'r', encoding='utf-8') as f:
|
| 191 |
+
result["html_content"] = f.read()
|
| 192 |
+
|
| 193 |
+
# 读取标签信息
|
| 194 |
+
if label_path.exists():
|
| 195 |
+
try:
|
| 196 |
+
with open(label_path, 'r', encoding='utf-8') as f:
|
| 197 |
+
label_data = json.load(f)
|
| 198 |
+
result["label_info"] = label_data
|
| 199 |
+
except Exception as e:
|
| 200 |
+
print(f"Error reading label file: {e}")
|
| 201 |
+
|
| 202 |
+
return result
|
| 203 |
+
|
| 204 |
+
# ============== QA 数据获取 ==============
|
| 205 |
+
|
| 206 |
+
def get_qa_list(self, source: str, chart_type: str, model: str, chart_id: str) -> List[QAItem]:
|
| 207 |
+
"""
|
| 208 |
+
获取指定图表的 QA 列表
|
| 209 |
+
|
| 210 |
+
Args:
|
| 211 |
+
source: 数据来源
|
| 212 |
+
chart_type: 图表类型
|
| 213 |
+
model: 模型名称
|
| 214 |
+
chart_id: 图表ID
|
| 215 |
+
|
| 216 |
+
Returns:
|
| 217 |
+
QAItem 列表
|
| 218 |
+
"""
|
| 219 |
+
qa_model_path = self.qa_path / source / chart_type / model
|
| 220 |
+
|
| 221 |
+
if not qa_model_path.exists():
|
| 222 |
+
return []
|
| 223 |
+
|
| 224 |
+
qa_list = []
|
| 225 |
+
for qa_file in self._list_files(qa_model_path):
|
| 226 |
+
if not qa_file.endswith('.json'):
|
| 227 |
+
continue
|
| 228 |
+
|
| 229 |
+
try:
|
| 230 |
+
with open(qa_model_path / qa_file, 'r', encoding='utf-8') as f:
|
| 231 |
+
qa_data = json.load(f)
|
| 232 |
+
# 筛选属于当前图表的 QA
|
| 233 |
+
if qa_data.get('chart') == chart_id or chart_id in qa_data.get('id', ''):
|
| 234 |
+
qa_list.append(QAItem(
|
| 235 |
+
id=qa_data.get('id', ''),
|
| 236 |
+
chart=qa_data.get('chart', ''),
|
| 237 |
+
question=qa_data.get('question', ''),
|
| 238 |
+
answer=qa_data.get('answer', '')
|
| 239 |
+
))
|
| 240 |
+
except Exception as e:
|
| 241 |
+
print(f"Error reading QA file {qa_file}: {e}")
|
| 242 |
+
|
| 243 |
+
return qa_list
|
| 244 |
+
|
| 245 |
+
def get_all_qa_for_chart(self, source: str, chart_type: str, chart_id: str) -> Dict[str, List[QAItem]]:
|
| 246 |
+
"""
|
| 247 |
+
获取指定图表所有模型的 QA 数据
|
| 248 |
+
|
| 249 |
+
Returns:
|
| 250 |
+
{model_name: [QAItem, ...], ...} 的字典
|
| 251 |
+
"""
|
| 252 |
+
chart_qa_path = self.qa_path / source / chart_type
|
| 253 |
+
|
| 254 |
+
if not chart_qa_path.exists():
|
| 255 |
+
return {}
|
| 256 |
+
|
| 257 |
+
result = {}
|
| 258 |
+
for model in self._list_dirs(chart_qa_path):
|
| 259 |
+
qa_list = self.get_qa_list(source, chart_type, model, chart_id)
|
| 260 |
+
if qa_list:
|
| 261 |
+
result[model] = qa_list
|
| 262 |
+
|
| 263 |
+
return result
|
| 264 |
+
|
| 265 |
+
# ============== 审核记录管理 ==============
|
| 266 |
+
|
| 267 |
+
def _save_reviews(self, reviews: List[Dict]):
|
| 268 |
+
"""保存审核记录到文件"""
|
| 269 |
+
with open(self.reviews_file, 'w', encoding='utf-8') as f:
|
| 270 |
+
json.dump(reviews, f, ensure_ascii=False, indent=2)
|
| 271 |
+
|
| 272 |
+
def get_all_reviews(self) -> List[Dict]:
|
| 273 |
+
"""获取所有审核记录"""
|
| 274 |
+
if not self.reviews_file.exists():
|
| 275 |
+
return []
|
| 276 |
+
|
| 277 |
+
try:
|
| 278 |
+
with open(self.reviews_file, 'r', encoding='utf-8') as f:
|
| 279 |
+
return json.load(f)
|
| 280 |
+
except:
|
| 281 |
+
return []
|
| 282 |
+
|
| 283 |
+
def get_review_by_qa_id(self, qa_id: str) -> Optional[Dict]:
|
| 284 |
+
"""根据 QA ID 获取审核记录"""
|
| 285 |
+
reviews = self.get_all_reviews()
|
| 286 |
+
for r in reviews:
|
| 287 |
+
if r.get('qa_id') == qa_id:
|
| 288 |
+
return r
|
| 289 |
+
return None
|
| 290 |
+
|
| 291 |
+
def get_reviews_by_chart(self, chart_id: str, model: Optional[str] = None) -> List[Dict]:
|
| 292 |
+
"""获取指定图表的审核记录"""
|
| 293 |
+
reviews = self.get_all_reviews()
|
| 294 |
+
result = []
|
| 295 |
+
for r in reviews:
|
| 296 |
+
if r.get('chart_id') == chart_id:
|
| 297 |
+
if model is None or r.get('model') == model:
|
| 298 |
+
result.append(r)
|
| 299 |
+
return result
|
| 300 |
+
|
| 301 |
+
def save_review(self, review_data: Dict) -> Dict:
|
| 302 |
+
"""
|
| 303 |
+
保存审核记录
|
| 304 |
+
|
| 305 |
+
Args:
|
| 306 |
+
review_data: 审核数据字典
|
| 307 |
+
|
| 308 |
+
Returns:
|
| 309 |
+
保存后的审核记录
|
| 310 |
+
"""
|
| 311 |
+
reviews = self.get_all_reviews()
|
| 312 |
+
|
| 313 |
+
# 生成审核记录
|
| 314 |
+
review = {
|
| 315 |
+
"review_id": review_data.get('review_id') or str(uuid.uuid4()),
|
| 316 |
+
"chart_id": review_data.get('chart_id', ''),
|
| 317 |
+
"qa_id": review_data.get('qa_id', ''),
|
| 318 |
+
"source": review_data.get('source', ''),
|
| 319 |
+
"chart_type": review_data.get('chart_type', ''),
|
| 320 |
+
"model": review_data.get('model', ''),
|
| 321 |
+
"original_question": review_data.get('original_question', ''),
|
| 322 |
+
"original_answer": review_data.get('original_answer', ''),
|
| 323 |
+
"status": review_data.get('status', 'pending'),
|
| 324 |
+
"modified_question": review_data.get('modified_question', ''),
|
| 325 |
+
"modified_answer": review_data.get('modified_answer', ''),
|
| 326 |
+
"issue_type": review_data.get('issue_type', ''),
|
| 327 |
+
"comment": review_data.get('comment', ''),
|
| 328 |
+
"reviewer": review_data.get('reviewer', 'default'),
|
| 329 |
+
"review_time": datetime.now().isoformat()
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
# 更新或添加
|
| 333 |
+
existing_index = None
|
| 334 |
+
for i, r in enumerate(reviews):
|
| 335 |
+
if r.get('qa_id') == review['qa_id']:
|
| 336 |
+
existing_index = i
|
| 337 |
+
break
|
| 338 |
+
|
| 339 |
+
if existing_index is not None:
|
| 340 |
+
reviews[existing_index] = review
|
| 341 |
+
else:
|
| 342 |
+
reviews.append(review)
|
| 343 |
+
|
| 344 |
+
self._save_reviews(reviews)
|
| 345 |
+
return review
|
| 346 |
+
|
| 347 |
+
def get_review_stats(self) -> Dict[str, int]:
|
| 348 |
+
"""获取审核统计信息"""
|
| 349 |
+
reviews = self.get_all_reviews()
|
| 350 |
+
return {
|
| 351 |
+
"total": len(reviews),
|
| 352 |
+
"correct": len([r for r in reviews if r.get('status') == 'correct']),
|
| 353 |
+
"incorrect": len([r for r in reviews if r.get('status') == 'incorrect']),
|
| 354 |
+
"needs_modification": len([r for r in reviews if r.get('status') == 'needs_modification']),
|
| 355 |
+
"pending": len([r for r in reviews if r.get('status') == 'pending'])
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
def export_reviews(self, output_path: str = None) -> str:
|
| 359 |
+
"""
|
| 360 |
+
导出审核记录
|
| 361 |
+
|
| 362 |
+
Args:
|
| 363 |
+
output_path: 输出文件路径,如果为 None 则自动生成
|
| 364 |
+
|
| 365 |
+
Returns:
|
| 366 |
+
导出文件的路径
|
| 367 |
+
"""
|
| 368 |
+
reviews = self.get_all_reviews()
|
| 369 |
+
|
| 370 |
+
if output_path is None:
|
| 371 |
+
output_path = f"./reviews_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
|
| 372 |
+
|
| 373 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 374 |
+
json.dump(reviews, f, ensure_ascii=False, indent=2)
|
| 375 |
+
|
| 376 |
+
return output_path
|
| 377 |
+
|
| 378 |
+
# ============== 辅助方法 ==============
|
| 379 |
+
|
| 380 |
+
def _list_dirs(self, path: Path) -> List[str]:
|
| 381 |
+
"""列出目录下的所有子目录"""
|
| 382 |
+
if not path.exists():
|
| 383 |
+
return []
|
| 384 |
+
return [d.name for d in path.iterdir() if d.is_dir() and not d.name.startswith('.')]
|
| 385 |
+
|
| 386 |
+
def _list_files(self, path: Path) -> List[str]:
|
| 387 |
+
"""列出目录下的所有文件"""
|
| 388 |
+
if not path.exists():
|
| 389 |
+
return []
|
| 390 |
+
return [f.name for f in path.iterdir() if f.is_file() and not f.name.startswith('.')]
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
# ============== 全局实例 ==============
|
| 394 |
+
|
| 395 |
+
# 默认数据管理器实例
|
| 396 |
+
data_manager = DataManager()
|
dataset/.DS_Store
ADDED
|
Binary file (14.3 kB). View file
|
|
|
dataset/label/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
dataset/label/Apache ECharts/.DS_Store
ADDED
|
Binary file (10.2 kB). View file
|
|
|
dataset/label/Apache ECharts/bar/apply_source_from_webtxt.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import re
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
FILENAME_RE = re.compile(r"^chart_(\d+)_bar\.json$", re.IGNORECASE)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_num(p: Path) -> int:
|
| 15 |
+
m = FILENAME_RE.match(p.name)
|
| 16 |
+
if not m:
|
| 17 |
+
raise ValueError(f"文件名不符合规则: {p.name}")
|
| 18 |
+
return int(m.group(1)) # 按数值排序
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def read_lines_no_empty(txt_path: Path) -> list[str]:
|
| 22 |
+
try:
|
| 23 |
+
raw = txt_path.read_text(encoding="utf-8")
|
| 24 |
+
except UnicodeDecodeError:
|
| 25 |
+
raw = txt_path.read_text(encoding="utf-8-sig", errors="replace")
|
| 26 |
+
|
| 27 |
+
# splitlines() 不包含末尾换行符;并过滤空行(即使你说没有空行,也做防御)
|
| 28 |
+
lines = [line for line in raw.splitlines() if line.strip() != ""]
|
| 29 |
+
return lines
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def main():
|
| 33 |
+
ap = argparse.ArgumentParser(
|
| 34 |
+
description="按文件名中间数字正序,将 web.txt 每行写入对应 JSON 的 Weblink 字段"
|
| 35 |
+
)
|
| 36 |
+
ap.add_argument("dir", help="包含 json 与 web.txt 的文件夹路径")
|
| 37 |
+
ap.add_argument("--txt", default="web.txt", help="文本文件名/路径(默认 web.txt)")
|
| 38 |
+
ap.add_argument("--dry-run", action="store_true", help="只预览不写入")
|
| 39 |
+
ap.add_argument("--backup", action="store_true", help="写入前备份为 .bak(仅第一次)")
|
| 40 |
+
args = ap.parse_args()
|
| 41 |
+
|
| 42 |
+
folder = Path(args.dir)
|
| 43 |
+
if not folder.exists() or not folder.is_dir():
|
| 44 |
+
print(f"目录不存在或不是文件夹:{folder}", file=sys.stderr)
|
| 45 |
+
return 2
|
| 46 |
+
|
| 47 |
+
txt_path = Path(args.txt)
|
| 48 |
+
if not txt_path.is_absolute():
|
| 49 |
+
txt_path = folder / txt_path
|
| 50 |
+
if not txt_path.exists():
|
| 51 |
+
print(f"找不到 web.txt:{txt_path}", file=sys.stderr)
|
| 52 |
+
return 2
|
| 53 |
+
|
| 54 |
+
# 找 json(严格匹配 chart_0001_line.json 这种)
|
| 55 |
+
candidates = list(folder.glob("chart_*_bar.json"))
|
| 56 |
+
json_files = []
|
| 57 |
+
bad_names = []
|
| 58 |
+
for p in candidates:
|
| 59 |
+
if FILENAME_RE.match(p.name):
|
| 60 |
+
json_files.append(p)
|
| 61 |
+
else:
|
| 62 |
+
bad_names.append(p.name)
|
| 63 |
+
|
| 64 |
+
if not json_files:
|
| 65 |
+
print("未找到符合 chart_0001_bar.json 规则的文件。", file=sys.stderr)
|
| 66 |
+
if candidates:
|
| 67 |
+
print("但找到一些类似文件:", file=sys.stderr)
|
| 68 |
+
for n in sorted([c.name for c in candidates])[:20]:
|
| 69 |
+
print(f" - {n}", file=sys.stderr)
|
| 70 |
+
return 1
|
| 71 |
+
|
| 72 |
+
# 按“中间数字”数值正序排序
|
| 73 |
+
json_files.sort(key=extract_num)
|
| 74 |
+
|
| 75 |
+
# 读 web.txt(过滤空行;不含换行符)
|
| 76 |
+
lines = read_lines_no_empty(txt_path)
|
| 77 |
+
|
| 78 |
+
if len(lines) != len(json_files):
|
| 79 |
+
print("行数与 JSON 数量不一致,停止执行:", file=sys.stderr)
|
| 80 |
+
print(f" web.txt 有效行数(已过滤空行): {len(lines)}", file=sys.stderr)
|
| 81 |
+
print(f" json 文件数: {len(json_files)}", file=sys.stderr)
|
| 82 |
+
print("\n前几个 json 文件(排序后):", file=sys.stderr)
|
| 83 |
+
for p in json_files[:10]:
|
| 84 |
+
print(f" - {p.name}", file=sys.stderr)
|
| 85 |
+
return 1
|
| 86 |
+
|
| 87 |
+
changed = 0
|
| 88 |
+
|
| 89 |
+
for i, (p, src_value) in enumerate(zip(json_files, lines), start=1):
|
| 90 |
+
try:
|
| 91 |
+
original_text = p.read_text(encoding="utf-8")
|
| 92 |
+
except UnicodeDecodeError:
|
| 93 |
+
original_text = p.read_text(encoding="utf-8-sig", errors="replace")
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
data = json.loads(original_text)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"错误(JSON 解析失败):{p.name} -> {e}")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
if not isinstance(data, dict):
|
| 102 |
+
print(f"跳过(JSON 顶层不是对象):{p.name}")
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
old = data.get("Weblink", None)
|
| 106 |
+
data["Weblink"] = src_value
|
| 107 |
+
will_change = (old != src_value)
|
| 108 |
+
|
| 109 |
+
if args.dry_run:
|
| 110 |
+
print(f"[DRY {i:04d}] {p.name}: Weblink {old!r} -> {src_value!r}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
if will_change and args.backup:
|
| 114 |
+
bak = p.with_suffix(p.suffix + ".bak")
|
| 115 |
+
if not bak.exists():
|
| 116 |
+
bak.write_text(original_text, encoding="utf-8")
|
| 117 |
+
|
| 118 |
+
if will_change:
|
| 119 |
+
p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 120 |
+
changed += 1
|
| 121 |
+
print(f"[OK {i:04d}] {p.name}: Weblink 写入成功")
|
| 122 |
+
else:
|
| 123 |
+
print(f"[NO {i:04d}] {p.name}: Weblink 无变化(已相同)")
|
| 124 |
+
|
| 125 |
+
print("\n==== 完成 ====")
|
| 126 |
+
print(f"总文件数: {len(json_files)}")
|
| 127 |
+
print(f"发生写入: {changed}")
|
| 128 |
+
|
| 129 |
+
if bad_names:
|
| 130 |
+
print("\n提示:同目录下还有这些文件名不符合严格规则(未处理):")
|
| 131 |
+
for n in sorted(bad_names)[:50]:
|
| 132 |
+
print(f" - {n}")
|
| 133 |
+
|
| 134 |
+
return 0
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
raise SystemExit(main())
|
dataset/label/Apache ECharts/bar/chart_0001_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0001",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-rich-text",
|
| 6 |
+
"Topic": "Weather Statistics by City",
|
| 7 |
+
"Describe": "The chart compares the number of sunny, cloudy, and shower days for three cities, showing variations in weather patterns across locations.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0002_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0002",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-animation-delay",
|
| 6 |
+
"Topic": "Bar Animation Delay Example",
|
| 7 |
+
"Describe": "This chart shows two bar series, bar and bar2, with simulated data for 100 categories labeled A0 to A99, demonstrating animation delay effects in a bar chart.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0003_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0003",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=mix-zoom-on-value",
|
| 6 |
+
"Topic": "Obama budget proposal 2012 comparison",
|
| 7 |
+
"Describe": "This bar chart compares the budget allocations for 2011 and 2012 in million USD, showing differences across categories with a focus on the last 6% of data via zoom.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0004_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0004",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=mix-line-bar",
|
| 6 |
+
"Topic": "weekly evaporation precipitation temperature comparison",
|
| 7 |
+
"Describe": "The chart compares evaporation and precipitation in milliliters with temperature in degrees Celsius across days of the week, showing variations in these weather-related metrics.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0005_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0005",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-breaks-simple",
|
| 6 |
+
"Topic": "Bar Chart with Axis Breaks",
|
| 7 |
+
"Describe": "This bar chart compares four data series (Data A, B, C, D) across days of the week, using axis breaks to handle large value ranges, with Data D showing consistently high values.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0006_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0006",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=polar-endAngle",
|
| 6 |
+
"Topic": "none",
|
| 7 |
+
"Describe": "The chart meaning cannot be reliably inferred from the provided content due to insufficient context and generic labels.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0007_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0007",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-y-category",
|
| 6 |
+
"Topic": "World Population by Country and Year",
|
| 7 |
+
"Describe": "This bar chart compares the population of Brazil, Indonesia, USA, India, China, and the World for 2011 and 2012, showing an overall increase in population across all regions.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0008_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0008",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-polar-label-tangential",
|
| 6 |
+
"Topic": "Tangential Polar Bar Label Position",
|
| 7 |
+
"Describe": "The chart shows a polar bar chart comparing values for categories a, b, c, and d, with values ranging from 1.2 to 3.6, indicating that category d has the highest value.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0009_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0009",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-polar-label-radial",
|
| 6 |
+
"Topic": "Radial Polar Bar Label Position",
|
| 7 |
+
"Describe": "The chart shows a radial polar bar chart with categories a, b, c, and d, comparing values of 2, 1.2, 2.4, and 3.6, where d has the highest value.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0010_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0010",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-negative2",
|
| 6 |
+
"Topic": "Cost values for ten categories",
|
| 7 |
+
"Describe": "This bar chart displays cost values for ten categories labeled one through ten, showing both positive and negative values, with some categories having negative costs.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0011_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0011",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-waterfall",
|
| 6 |
+
"Topic": "Living Expenses in Shenzhen Waterfall Chart",
|
| 7 |
+
"Describe": "This waterfall chart shows the breakdown of living expenses in Shenzhen, comparing categories like rent, utilities, transportation, meals, and other costs, with rent being the largest expense.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0012_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0012",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=bar-tick-align",
|
| 6 |
+
"Topic": "direct traffic by day of week",
|
| 7 |
+
"Describe": "The chart shows direct traffic values for each day of the week, with a peak on Friday and a general increase from Monday to Friday followed by a decline over the weekend.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/chart_0013_bar.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0013",
|
| 3 |
+
"Type": "bar",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=dataset-encode0",
|
| 6 |
+
"Topic": "product sales amount and score",
|
| 7 |
+
"Describe": "This bar chart compares the sales amounts of various beverage and dessert products, with bar colors indicating product scores ranging from high to low.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/bar/fix_vchart_json.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
def process_file(path: Path, pattern: re.Pattern, dry_run: bool, backup: bool) -> tuple[bool, str]:
|
| 12 |
+
"""
|
| 13 |
+
Return (changed, message)
|
| 14 |
+
"""
|
| 15 |
+
m = pattern.search(path.name)
|
| 16 |
+
if not m:
|
| 17 |
+
return (False, f"跳过(文件名不匹配):{path.name}")
|
| 18 |
+
|
| 19 |
+
number_str = m.group(1) # 保留前导 0,例如 "0014"
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
text = path.read_text(encoding="utf-8")
|
| 23 |
+
except UnicodeDecodeError:
|
| 24 |
+
# 有些文件可能是 ANSI/GBK
|
| 25 |
+
text = path.read_text(encoding="utf-8-sig", errors="replace")
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
data = json.loads(text)
|
| 29 |
+
except Exception as e:
|
| 30 |
+
return (False, f"错误(JSON 解析失败):{path.name} -> {e}")
|
| 31 |
+
|
| 32 |
+
if not isinstance(data, dict):
|
| 33 |
+
return (False, f"跳过(JSON 顶层不是对象):{path.name}")
|
| 34 |
+
|
| 35 |
+
old_number = data.get("Number", None)
|
| 36 |
+
old_source = data.get("Source", None)
|
| 37 |
+
old_source = data.get("Type", None)
|
| 38 |
+
|
| 39 |
+
data["Number"] = str(number_str) # 明确字符串
|
| 40 |
+
data["Source"] = "Apache Echarts"
|
| 41 |
+
data["Type"] = "bar"
|
| 42 |
+
|
| 43 |
+
# 判断是否有实际变化
|
| 44 |
+
changed = (old_number != data["Number"]) or (old_source != data["Source"])
|
| 45 |
+
|
| 46 |
+
if dry_run:
|
| 47 |
+
return (changed, f"[DRY] {path.name}: Number {old_number!r} -> {data['Number']!r}, Source {old_source!r} -> 'Vchart'")
|
| 48 |
+
|
| 49 |
+
if changed and backup:
|
| 50 |
+
bak = path.with_suffix(path.suffix + ".bak")
|
| 51 |
+
if not bak.exists():
|
| 52 |
+
bak.write_text(text, encoding="utf-8")
|
| 53 |
+
|
| 54 |
+
if changed:
|
| 55 |
+
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 56 |
+
return (True, f"已修改:{path.name} -> Number='{number_str}', Source='Vchart'")
|
| 57 |
+
else:
|
| 58 |
+
return (False, f"无变化:{path.name}(已是目标值)")
|
| 59 |
+
|
| 60 |
+
def iter_json_files(folder: Path, recursive: bool):
|
| 61 |
+
if recursive:
|
| 62 |
+
yield from folder.rglob("*.json")
|
| 63 |
+
else:
|
| 64 |
+
yield from folder.glob("*.json")
|
| 65 |
+
|
| 66 |
+
def main():
|
| 67 |
+
ap = argparse.ArgumentParser(description="批量修改 JSON: Number=文件名中的数字字符串; Source=Vchart")
|
| 68 |
+
ap.add_argument("dir", help="要处理的文件夹路径")
|
| 69 |
+
ap.add_argument("--recursive", action="store_true", help="递归处理子文件夹")
|
| 70 |
+
ap.add_argument("--dry-run", action="store_true", help="只打印不写入")
|
| 71 |
+
ap.add_argument("--backup", action="store_true", help="修改前生成 .bak(仅第一次)")
|
| 72 |
+
ap.add_argument("--pattern", default=r"chart_(\d+)_", help=r"从文件名提取数字的正则,默认: chart_(\d+)_")
|
| 73 |
+
args = ap.parse_args()
|
| 74 |
+
|
| 75 |
+
folder = Path(args.dir)
|
| 76 |
+
if not folder.exists() or not folder.is_dir():
|
| 77 |
+
print(f"目录不存在或不是文件夹:{folder}", file=sys.stderr)
|
| 78 |
+
return 2
|
| 79 |
+
|
| 80 |
+
pattern = re.compile(args.pattern)
|
| 81 |
+
|
| 82 |
+
total = 0
|
| 83 |
+
changed_count = 0
|
| 84 |
+
skipped_or_nochange = 0
|
| 85 |
+
|
| 86 |
+
for p in iter_json_files(folder, args.recursive):
|
| 87 |
+
total += 1
|
| 88 |
+
changed, msg = process_file(p, pattern, args.dry_run, args.backup)
|
| 89 |
+
print(msg)
|
| 90 |
+
if changed:
|
| 91 |
+
changed_count += 1
|
| 92 |
+
else:
|
| 93 |
+
skipped_or_nochange += 1
|
| 94 |
+
|
| 95 |
+
print("\n==== 统计 ====")
|
| 96 |
+
print(f"总计扫描: {total}")
|
| 97 |
+
print(f"发生修改: {changed_count}")
|
| 98 |
+
print(f"跳过/无变化/错误: {skipped_or_nochange}")
|
| 99 |
+
return 0
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
raise SystemExit(main())
|
dataset/label/Apache ECharts/bar/rename.bat
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
setlocal EnableDelayedExpansion
|
| 3 |
+
|
| 4 |
+
rem 起始编号
|
| 5 |
+
set oldStart=14
|
| 6 |
+
set newStart=27
|
| 7 |
+
|
| 8 |
+
for /L %%i in (0,1,12) do (
|
| 9 |
+
set /A oldNum=oldStart+%%i
|
| 10 |
+
set /A newNum=newStart+%%i
|
| 11 |
+
|
| 12 |
+
rem 补零为4位
|
| 13 |
+
set oldStr=0000!oldNum!
|
| 14 |
+
set oldStr=!oldStr:~-4!
|
| 15 |
+
|
| 16 |
+
set newStr=0000!newNum!
|
| 17 |
+
set newStr=!newStr:~-4!
|
| 18 |
+
|
| 19 |
+
echo ren chart_!oldStr!_line.json chart_!newStr!_line.json
|
| 20 |
+
ren chart_!oldStr!_line.json chart_!newStr!_line.json
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
echo Done.
|
| 24 |
+
pause
|
dataset/label/Apache ECharts/bar/revise.bat
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
chcp 65001 >nul
|
| 3 |
+
title 批量修改 JSON(Python + BAT)
|
| 4 |
+
|
| 5 |
+
:: ===== 配置区 =====
|
| 6 |
+
set "DIR=D:\Task_projects\data_collection\label\Apache Echarts\bar"
|
| 7 |
+
set "PYTHON=python"
|
| 8 |
+
:: ==================
|
| 9 |
+
|
| 10 |
+
:: 进入 bat 所在目录,确保能找到 fix_vchart_json.py
|
| 11 |
+
cd /d "%~dp0"
|
| 12 |
+
|
| 13 |
+
echo 处理目录:%DIR%
|
| 14 |
+
echo.
|
| 15 |
+
|
| 16 |
+
:: 先做一次 dry-run(只打印不写入),确认能匹配到文件
|
| 17 |
+
echo ===== 第一步:预览(dry-run)=====
|
| 18 |
+
%PYTHON% "%~dp0fix_vchart_json.py" "%DIR%" --dry-run
|
| 19 |
+
echo.
|
| 20 |
+
|
| 21 |
+
:: 真正执行(写入)。如果你要备份,把 --backup 加上
|
| 22 |
+
echo ===== 第二步:正式执行(写入)=====
|
| 23 |
+
%PYTHON% "%~dp0fix_vchart_json.py" "%DIR%"
|
| 24 |
+
echo.
|
| 25 |
+
|
| 26 |
+
pause
|
dataset/label/Apache ECharts/bar/run_apply_source.bat
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
chcp 65001 >nul
|
| 3 |
+
title 按 web.txt 批量写入 JSON Source(按中间数字正序)
|
| 4 |
+
|
| 5 |
+
:: ===== 配置区 =====
|
| 6 |
+
set "DIR=D:\Task_projects\data_collection\label\Apache Echarts\bar"
|
| 7 |
+
set "PYTHON=python"
|
| 8 |
+
:: ==================
|
| 9 |
+
|
| 10 |
+
cd /d "%~dp0"
|
| 11 |
+
|
| 12 |
+
echo 处理目录:%DIR%
|
| 13 |
+
echo.
|
| 14 |
+
|
| 15 |
+
echo ===== 第一步:预览(dry-run,不写入)=====
|
| 16 |
+
%PYTHON% "%~dp0apply_source_from_webtxt.py" "%DIR%" --dry-run
|
| 17 |
+
echo.
|
| 18 |
+
|
| 19 |
+
echo ===== 第二步:正式执行(写入)=====
|
| 20 |
+
%PYTHON% "%~dp0apply_source_from_webtxt.py" "%DIR%"
|
| 21 |
+
echo.
|
| 22 |
+
|
| 23 |
+
pause
|
dataset/label/Apache ECharts/bar/web.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-rich-text
|
| 2 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-animation-delay
|
| 3 |
+
https://echarts.apache.org/examples/zh/editor.html?c=mix-zoom-on-value
|
| 4 |
+
https://echarts.apache.org/examples/zh/editor.html?c=mix-line-bar
|
| 5 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-breaks-simple
|
| 6 |
+
https://echarts.apache.org/examples/zh/editor.html?c=polar-endAngle
|
| 7 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-y-category
|
| 8 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-polar-label-tangential
|
| 9 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-polar-label-radial
|
| 10 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-negative2
|
| 11 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-waterfall
|
| 12 |
+
https://echarts.apache.org/examples/zh/editor.html?c=bar-tick-align
|
| 13 |
+
https://echarts.apache.org/examples/zh/editor.html?c=dataset-encode0
|
dataset/label/Apache ECharts/box/apply_source_from_webtxt.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import re
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
FILENAME_RE = re.compile(r"^chart_(\d+)_box\.json$", re.IGNORECASE)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_num(p: Path) -> int:
|
| 15 |
+
m = FILENAME_RE.match(p.name)
|
| 16 |
+
if not m:
|
| 17 |
+
raise ValueError(f"文件名不符合规则: {p.name}")
|
| 18 |
+
return int(m.group(1)) # 按数值排序
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def read_lines_no_empty(txt_path: Path) -> list[str]:
|
| 22 |
+
try:
|
| 23 |
+
raw = txt_path.read_text(encoding="utf-8")
|
| 24 |
+
except UnicodeDecodeError:
|
| 25 |
+
raw = txt_path.read_text(encoding="utf-8-sig", errors="replace")
|
| 26 |
+
|
| 27 |
+
# splitlines() 不包含末尾换行符;并过滤空行(即使你说没有空行,也做防御)
|
| 28 |
+
lines = [line for line in raw.splitlines() if line.strip() != ""]
|
| 29 |
+
return lines
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def main():
|
| 33 |
+
ap = argparse.ArgumentParser(
|
| 34 |
+
description="按文件名中间数字正序,将 web.txt 每行写入对应 JSON 的 Weblink 字段"
|
| 35 |
+
)
|
| 36 |
+
ap.add_argument("dir", help="包含 json 与 web.txt 的文件夹路径")
|
| 37 |
+
ap.add_argument("--txt", default="web.txt", help="文本文件名/路径(默认 web.txt)")
|
| 38 |
+
ap.add_argument("--dry-run", action="store_true", help="只预览不写入")
|
| 39 |
+
ap.add_argument("--backup", action="store_true", help="写入前备份为 .bak(仅第一次)")
|
| 40 |
+
args = ap.parse_args()
|
| 41 |
+
|
| 42 |
+
folder = Path(args.dir)
|
| 43 |
+
if not folder.exists() or not folder.is_dir():
|
| 44 |
+
print(f"目录不存在或不是文件夹:{folder}", file=sys.stderr)
|
| 45 |
+
return 2
|
| 46 |
+
|
| 47 |
+
txt_path = Path(args.txt)
|
| 48 |
+
if not txt_path.is_absolute():
|
| 49 |
+
txt_path = folder / txt_path
|
| 50 |
+
if not txt_path.exists():
|
| 51 |
+
print(f"找不到 web.txt:{txt_path}", file=sys.stderr)
|
| 52 |
+
return 2
|
| 53 |
+
|
| 54 |
+
# 找 json(严格匹配 chart_0001_line.json 这种)
|
| 55 |
+
candidates = list(folder.glob("chart_*_box.json"))
|
| 56 |
+
json_files = []
|
| 57 |
+
bad_names = []
|
| 58 |
+
for p in candidates:
|
| 59 |
+
if FILENAME_RE.match(p.name):
|
| 60 |
+
json_files.append(p)
|
| 61 |
+
else:
|
| 62 |
+
bad_names.append(p.name)
|
| 63 |
+
|
| 64 |
+
if not json_files:
|
| 65 |
+
print("未找到符合 chart_0001_box.json 规则的文件。", file=sys.stderr)
|
| 66 |
+
if candidates:
|
| 67 |
+
print("但找到一些类似文件:", file=sys.stderr)
|
| 68 |
+
for n in sorted([c.name for c in candidates])[:20]:
|
| 69 |
+
print(f" - {n}", file=sys.stderr)
|
| 70 |
+
return 1
|
| 71 |
+
|
| 72 |
+
# 按“中间数字”数值正序排序
|
| 73 |
+
json_files.sort(key=extract_num)
|
| 74 |
+
|
| 75 |
+
# 读 web.txt(过滤空行;不含换行符)
|
| 76 |
+
lines = read_lines_no_empty(txt_path)
|
| 77 |
+
|
| 78 |
+
if len(lines) != len(json_files):
|
| 79 |
+
print("行数与 JSON 数量不一致,停止执行:", file=sys.stderr)
|
| 80 |
+
print(f" web.txt 有效行数(已过滤空行): {len(lines)}", file=sys.stderr)
|
| 81 |
+
print(f" json 文件数: {len(json_files)}", file=sys.stderr)
|
| 82 |
+
print("\n前几个 json 文件(排序后):", file=sys.stderr)
|
| 83 |
+
for p in json_files[:10]:
|
| 84 |
+
print(f" - {p.name}", file=sys.stderr)
|
| 85 |
+
return 1
|
| 86 |
+
|
| 87 |
+
changed = 0
|
| 88 |
+
|
| 89 |
+
for i, (p, src_value) in enumerate(zip(json_files, lines), start=1):
|
| 90 |
+
try:
|
| 91 |
+
original_text = p.read_text(encoding="utf-8")
|
| 92 |
+
except UnicodeDecodeError:
|
| 93 |
+
original_text = p.read_text(encoding="utf-8-sig", errors="replace")
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
data = json.loads(original_text)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"错误(JSON 解析失败):{p.name} -> {e}")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
if not isinstance(data, dict):
|
| 102 |
+
print(f"跳过(JSON 顶层不是对象):{p.name}")
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
old = data.get("Weblink", None)
|
| 106 |
+
data["Weblink"] = src_value
|
| 107 |
+
will_change = (old != src_value)
|
| 108 |
+
|
| 109 |
+
if args.dry_run:
|
| 110 |
+
print(f"[DRY {i:04d}] {p.name}: Weblink {old!r} -> {src_value!r}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
if will_change and args.backup:
|
| 114 |
+
bak = p.with_suffix(p.suffix + ".bak")
|
| 115 |
+
if not bak.exists():
|
| 116 |
+
bak.write_text(original_text, encoding="utf-8")
|
| 117 |
+
|
| 118 |
+
if will_change:
|
| 119 |
+
p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 120 |
+
changed += 1
|
| 121 |
+
print(f"[OK {i:04d}] {p.name}: Weblink 写入成功")
|
| 122 |
+
else:
|
| 123 |
+
print(f"[NO {i:04d}] {p.name}: Weblink 无变化(已相同)")
|
| 124 |
+
|
| 125 |
+
print("\n==== 完成 ====")
|
| 126 |
+
print(f"总文件数: {len(json_files)}")
|
| 127 |
+
print(f"发生写入: {changed}")
|
| 128 |
+
|
| 129 |
+
if bad_names:
|
| 130 |
+
print("\n提示:同目录下还有这些文件名不符合严格规则(未处理):")
|
| 131 |
+
for n in sorted(bad_names)[:50]:
|
| 132 |
+
print(f" - {n}")
|
| 133 |
+
|
| 134 |
+
return 0
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
raise SystemExit(main())
|
dataset/label/Apache ECharts/box/chart_0001_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0001",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=boxplot-light-velocity2",
|
| 6 |
+
"Topic": "Michelson Morley Experiment speed measurements",
|
| 7 |
+
"Describe": "The chart shows box plots of speed measurements from the Michelson-Morley experiment, with data expressed as km/s minus 299,000, comparing distributions across five experimental runs and highlighting outliers.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0002_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0002",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=boxplot-light-velocity",
|
| 6 |
+
"Topic": "Michelson Morley Experiment Speed Measurements",
|
| 7 |
+
"Describe": "This chart displays box plots of speed measurements from the Michelson-Morley experiment, showing the distribution and outliers of data across five groups, with values adjusted relative to 299,000 km/s.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0003_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0003",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=data-transform-aggregate",
|
| 6 |
+
"Topic": "Income distribution by country since 1950",
|
| 7 |
+
"Describe": "This chart shows the distribution of income across different countries from 1950 onwards, using box plots to display statistical summaries and scatter points for individual yearly data, allowing comparison of income ranges and trends over time.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0004_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0004",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=candlestick-touch",
|
| 6 |
+
"Topic": "stock market index daily candlestick chart",
|
| 7 |
+
"Describe": "The chart displays daily candlestick data for a stock market index, showing open, high, low, and close prices over time, along with volume bars and moving average lines (MA5, MA10, MA20) to indicate trends and trading activity.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0005_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0005",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=candlestick-sh-2015",
|
| 6 |
+
"Topic": "Shanghai Stock Exchange Index Daily Candlestick Chart",
|
| 7 |
+
"Describe": "The chart displays daily candlestick data for the Shanghai Stock Exchange Index from July to December 2015, showing opening, closing, high, and low prices, along with moving averages (MA5, MA10, MA20, MA30) to indicate trends and volatility over time.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0006_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0006",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=candlestick-brush",
|
| 6 |
+
"Topic": "Dow Jones stock index performance",
|
| 7 |
+
"Describe": "The chart shows the Dow-Jones index with candlestick data and moving averages (MA5, MA10, MA20, MA30) over time, along with trading volume, highlighting price trends and volatility.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0007_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0007",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=candlestick-sh",
|
| 6 |
+
"Topic": "Shanghai Stock Exchange Index Daily Candlestick Chart",
|
| 7 |
+
"Describe": "This chart displays the Shanghai Stock Exchange Index daily candlestick data from January to June 2013, showing open, close, high, and low prices with moving averages (MA5, MA10, MA20, MA30) to indicate trends and volatility over time.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0008_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0008",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=custom-ohlc",
|
| 6 |
+
"Topic": "Dow Jones index stock price chart",
|
| 7 |
+
"Describe": "This chart displays the Dow-Jones index stock prices over time, showing open, close, lowest, and highest values for each period, with data zoom features for detailed analysis.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0009_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0009",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://www.makeapie.cn/echarts_content/xmJdv2T4Tc.html",
|
| 6 |
+
"Topic": "brand discount distribution comparison",
|
| 7 |
+
"Describe": "The chart shows box plots comparing discount ranges in Chinese yuan for five brands, with Midea having the highest median and widest spread, while Vatti has the lowest values.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0010_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0010",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://www.makeapie.cn/echarts_content/xr1e-4SNdW.html",
|
| 6 |
+
"Topic": "Shanghai Stock Exchange Index 2015",
|
| 7 |
+
"Describe": "The chart displays daily candlestick data and moving averages for the Shanghai Stock Exchange Index in 2015, showing price fluctuations and trends over time.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0011_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0011",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://www.makeapie.cn/echarts_content/xrkvZcnXeQ.html",
|
| 6 |
+
"Topic": "refrigerator brand price sales comparison",
|
| 7 |
+
"Describe": "The chart compares price ranges, average prices, product counts, and 30-day total sales across various refrigerator brands, showing variations in market performance and pricing strategies.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0012_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0012",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://www.makeapie.cn/echarts_content/xl1cIFgBSa.html",
|
| 6 |
+
"Topic": "hourly activity frequency distribution",
|
| 7 |
+
"Describe": "The chart shows a candlestick and line series comparing activity frequency across eight hourly intervals from 10:00 to 17:00, with the line representing a specific time period and the candlesticks displaying quartiles and limits.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/chart_0013_box.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0013",
|
| 3 |
+
"Type": "box",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://www.makeapie.cn/echarts_content/xSw4eLraIk.html",
|
| 6 |
+
"Topic": "stock price daily candlestick chart",
|
| 7 |
+
"Describe": "The chart displays daily candlestick data for stock prices over seven days in January and February 2013, showing opening, closing, high, and low values with color-coded increases and decreases.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/box/fix_vchart_json.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
def process_file(path: Path, pattern: re.Pattern, dry_run: bool, backup: bool) -> tuple[bool, str]:
|
| 12 |
+
"""
|
| 13 |
+
Return (changed, message)
|
| 14 |
+
"""
|
| 15 |
+
m = pattern.search(path.name)
|
| 16 |
+
if not m:
|
| 17 |
+
return (False, f"跳过(文件名不匹配):{path.name}")
|
| 18 |
+
|
| 19 |
+
number_str = m.group(1) # 保留前导 0,例如 "0014"
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
text = path.read_text(encoding="utf-8")
|
| 23 |
+
except UnicodeDecodeError:
|
| 24 |
+
# 有些文件可能是 ANSI/GBK
|
| 25 |
+
text = path.read_text(encoding="utf-8-sig", errors="replace")
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
data = json.loads(text)
|
| 29 |
+
except Exception as e:
|
| 30 |
+
return (False, f"错误(JSON 解析失败):{path.name} -> {e}")
|
| 31 |
+
|
| 32 |
+
if not isinstance(data, dict):
|
| 33 |
+
return (False, f"跳过(JSON 顶层不是对象):{path.name}")
|
| 34 |
+
|
| 35 |
+
old_number = data.get("Number", None)
|
| 36 |
+
old_source = data.get("Source", None)
|
| 37 |
+
old_source = data.get("Type", None)
|
| 38 |
+
|
| 39 |
+
data["Number"] = str(number_str) # 明确字符串
|
| 40 |
+
data["Source"] = "Apache Echarts"
|
| 41 |
+
data["Type"] = "box"
|
| 42 |
+
|
| 43 |
+
# 判断是否有实际变化
|
| 44 |
+
changed = (old_number != data["Number"]) or (old_source != data["Source"])
|
| 45 |
+
|
| 46 |
+
if dry_run:
|
| 47 |
+
return (changed, f"[DRY] {path.name}: Number {old_number!r} -> {data['Number']!r}, Source {old_source!r} -> 'Vchart'")
|
| 48 |
+
|
| 49 |
+
if changed and backup:
|
| 50 |
+
bak = path.with_suffix(path.suffix + ".bak")
|
| 51 |
+
if not bak.exists():
|
| 52 |
+
bak.write_text(text, encoding="utf-8")
|
| 53 |
+
|
| 54 |
+
if changed:
|
| 55 |
+
path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 56 |
+
return (True, f"已修改:{path.name} -> Number='{number_str}', Source='Vchart'")
|
| 57 |
+
else:
|
| 58 |
+
return (False, f"无变化:{path.name}(已是目标值)")
|
| 59 |
+
|
| 60 |
+
def iter_json_files(folder: Path, recursive: bool):
|
| 61 |
+
if recursive:
|
| 62 |
+
yield from folder.rglob("*.json")
|
| 63 |
+
else:
|
| 64 |
+
yield from folder.glob("*.json")
|
| 65 |
+
|
| 66 |
+
def main():
|
| 67 |
+
ap = argparse.ArgumentParser(description="批量修改 JSON: Number=文件名中的数字字符串; Source=Vchart")
|
| 68 |
+
ap.add_argument("dir", help="要处理的文件夹路径")
|
| 69 |
+
ap.add_argument("--recursive", action="store_true", help="递归处理子文件夹")
|
| 70 |
+
ap.add_argument("--dry-run", action="store_true", help="只打印不写入")
|
| 71 |
+
ap.add_argument("--backup", action="store_true", help="修改前生成 .bak(仅第一次)")
|
| 72 |
+
ap.add_argument("--pattern", default=r"chart_(\d+)_", help=r"从文件名提取数字的正则,默认: chart_(\d+)_")
|
| 73 |
+
args = ap.parse_args()
|
| 74 |
+
|
| 75 |
+
folder = Path(args.dir)
|
| 76 |
+
if not folder.exists() or not folder.is_dir():
|
| 77 |
+
print(f"目录不存在或不是文件夹:{folder}", file=sys.stderr)
|
| 78 |
+
return 2
|
| 79 |
+
|
| 80 |
+
pattern = re.compile(args.pattern)
|
| 81 |
+
|
| 82 |
+
total = 0
|
| 83 |
+
changed_count = 0
|
| 84 |
+
skipped_or_nochange = 0
|
| 85 |
+
|
| 86 |
+
for p in iter_json_files(folder, args.recursive):
|
| 87 |
+
total += 1
|
| 88 |
+
changed, msg = process_file(p, pattern, args.dry_run, args.backup)
|
| 89 |
+
print(msg)
|
| 90 |
+
if changed:
|
| 91 |
+
changed_count += 1
|
| 92 |
+
else:
|
| 93 |
+
skipped_or_nochange += 1
|
| 94 |
+
|
| 95 |
+
print("\n==== 统计 ====")
|
| 96 |
+
print(f"总计扫描: {total}")
|
| 97 |
+
print(f"发生修改: {changed_count}")
|
| 98 |
+
print(f"跳过/无变化/错误: {skipped_or_nochange}")
|
| 99 |
+
return 0
|
| 100 |
+
|
| 101 |
+
if __name__ == "__main__":
|
| 102 |
+
raise SystemExit(main())
|
dataset/label/Apache ECharts/box/rename.bat
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
setlocal EnableDelayedExpansion
|
| 3 |
+
|
| 4 |
+
rem 起始编号
|
| 5 |
+
set oldStart=14
|
| 6 |
+
set newStart=27
|
| 7 |
+
|
| 8 |
+
for /L %%i in (0,1,12) do (
|
| 9 |
+
set /A oldNum=oldStart+%%i
|
| 10 |
+
set /A newNum=newStart+%%i
|
| 11 |
+
|
| 12 |
+
rem 补零为4位
|
| 13 |
+
set oldStr=0000!oldNum!
|
| 14 |
+
set oldStr=!oldStr:~-4!
|
| 15 |
+
|
| 16 |
+
set newStr=0000!newNum!
|
| 17 |
+
set newStr=!newStr:~-4!
|
| 18 |
+
|
| 19 |
+
echo ren chart_!oldStr!_line.json chart_!newStr!_line.json
|
| 20 |
+
ren chart_!oldStr!_line.json chart_!newStr!_line.json
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
echo Done.
|
| 24 |
+
pause
|
dataset/label/Apache ECharts/box/renamelast.bat
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
setlocal EnableExtensions EnableDelayedExpansion
|
| 3 |
+
|
| 4 |
+
rem 只处理当前目录(不递归),只改结尾的 _bar.html -> _scatter.html
|
| 5 |
+
for %%F in (chart_*_heatmap.json) do (
|
| 6 |
+
set "name=%%~nF"
|
| 7 |
+
set "ext=%%~xF"
|
| 8 |
+
rem 把文件名结尾的 _bar 替换成 _scatter(只影响结尾匹配的文件,因为for已经筛过)
|
| 9 |
+
set "new=!name:_heatmap=_box!!ext!"
|
| 10 |
+
if /I not "%%F"=="!new!" (
|
| 11 |
+
echo ren "%%F" "!new!"
|
| 12 |
+
ren "%%F" "!new!"
|
| 13 |
+
)
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
echo Done.
|
| 17 |
+
endlocal
|
dataset/label/Apache ECharts/box/revise.bat
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
chcp 65001 >nul
|
| 3 |
+
title 批量修改 JSON(Python + BAT)
|
| 4 |
+
|
| 5 |
+
:: ===== 配置区 =====
|
| 6 |
+
set "DIR=D:\Task_projects\data_collection\label\Apache Echarts\box"
|
| 7 |
+
set "PYTHON=python"
|
| 8 |
+
:: ==================
|
| 9 |
+
|
| 10 |
+
:: 进入 bat 所在目录,确保能找到 fix_vchart_json.py
|
| 11 |
+
cd /d "%~dp0"
|
| 12 |
+
|
| 13 |
+
echo 处理目录:%DIR%
|
| 14 |
+
echo.
|
| 15 |
+
|
| 16 |
+
:: 先做一次 dry-run(只打印不写入),确认能匹配到文件
|
| 17 |
+
echo ===== 第一步:预览(dry-run)=====
|
| 18 |
+
%PYTHON% "%~dp0fix_vchart_json.py" "%DIR%" --dry-run
|
| 19 |
+
echo.
|
| 20 |
+
|
| 21 |
+
:: 真正执行(写入)。如果你要备份,把 --backup 加上
|
| 22 |
+
echo ===== 第二步:正式执行(写入)=====
|
| 23 |
+
%PYTHON% "%~dp0fix_vchart_json.py" "%DIR%"
|
| 24 |
+
echo.
|
| 25 |
+
|
| 26 |
+
pause
|
dataset/label/Apache ECharts/box/run_apply_source.bat
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
chcp 65001 >nul
|
| 3 |
+
title 按 web.txt 批量写入 JSON Source(按中间数字正序)
|
| 4 |
+
|
| 5 |
+
:: ===== 配置区 =====
|
| 6 |
+
set "DIR=D:\Task_projects\data_collection\label\Apache Echarts\box"
|
| 7 |
+
set "PYTHON=python"
|
| 8 |
+
:: ==================
|
| 9 |
+
|
| 10 |
+
cd /d "%~dp0"
|
| 11 |
+
|
| 12 |
+
echo 处理目录:%DIR%
|
| 13 |
+
echo.
|
| 14 |
+
|
| 15 |
+
echo ===== 第一步:预览(dry-run,不写入)=====
|
| 16 |
+
%PYTHON% "%~dp0apply_source_from_webtxt.py" "%DIR%" --dry-run
|
| 17 |
+
echo.
|
| 18 |
+
|
| 19 |
+
echo ===== 第二步:正式执行(写入)=====
|
| 20 |
+
%PYTHON% "%~dp0apply_source_from_webtxt.py" "%DIR%"
|
| 21 |
+
echo.
|
| 22 |
+
|
| 23 |
+
pause
|
dataset/label/Apache ECharts/box/web.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
https://echarts.apache.org/examples/zh/editor.html?c=boxplot-light-velocity2
|
| 2 |
+
https://echarts.apache.org/examples/zh/editor.html?c=boxplot-light-velocity
|
| 3 |
+
https://echarts.apache.org/examples/zh/editor.html?c=data-transform-aggregate
|
| 4 |
+
https://echarts.apache.org/examples/zh/editor.html?c=candlestick-touch
|
| 5 |
+
https://echarts.apache.org/examples/zh/editor.html?c=candlestick-sh-2015
|
| 6 |
+
https://echarts.apache.org/examples/zh/editor.html?c=candlestick-brush
|
| 7 |
+
https://echarts.apache.org/examples/zh/editor.html?c=candlestick-sh
|
| 8 |
+
https://echarts.apache.org/examples/zh/editor.html?c=custom-ohlc
|
| 9 |
+
https://www.makeapie.cn/echarts_content/xDm_IPLAhj.html
|
| 10 |
+
https://www.makeapie.cn/echarts_content/xr1e-4SNdW.html
|
| 11 |
+
https://www.makeapie.cn/echarts_content/xrkvZcnXeQ.html
|
| 12 |
+
https://www.makeapie.cn/echarts_content/xl1cIFgBSa.html
|
| 13 |
+
https://www.makeapie.cn/echarts_content/xSw4eLraIk.html
|
dataset/label/Apache ECharts/heatmap/apply_source_from_webtxt.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import re
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
FILENAME_RE = re.compile(r"^chart_(\d+)_heatmap\.json$", re.IGNORECASE)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_num(p: Path) -> int:
|
| 15 |
+
m = FILENAME_RE.match(p.name)
|
| 16 |
+
if not m:
|
| 17 |
+
raise ValueError(f"文件名不符合规则: {p.name}")
|
| 18 |
+
return int(m.group(1)) # 按数值排序
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def read_lines_no_empty(txt_path: Path) -> list[str]:
|
| 22 |
+
try:
|
| 23 |
+
raw = txt_path.read_text(encoding="utf-8")
|
| 24 |
+
except UnicodeDecodeError:
|
| 25 |
+
raw = txt_path.read_text(encoding="utf-8-sig", errors="replace")
|
| 26 |
+
|
| 27 |
+
# splitlines() 不包含末尾换行符;并过滤空行(即使你说没有空行,也做防御)
|
| 28 |
+
lines = [line for line in raw.splitlines() if line.strip() != ""]
|
| 29 |
+
return lines
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def main():
|
| 33 |
+
ap = argparse.ArgumentParser(
|
| 34 |
+
description="按文件名中间数字正序,将 web.txt 每行写入对应 JSON 的 Weblink 字段"
|
| 35 |
+
)
|
| 36 |
+
ap.add_argument("dir", help="包含 json 与 web.txt 的文件夹路径")
|
| 37 |
+
ap.add_argument("--txt", default="web.txt", help="文本文件名/路径(默认 web.txt)")
|
| 38 |
+
ap.add_argument("--dry-run", action="store_true", help="只预览不写入")
|
| 39 |
+
ap.add_argument("--backup", action="store_true", help="写入前备份为 .bak(仅第一次)")
|
| 40 |
+
args = ap.parse_args()
|
| 41 |
+
|
| 42 |
+
folder = Path(args.dir)
|
| 43 |
+
if not folder.exists() or not folder.is_dir():
|
| 44 |
+
print(f"目录不存在或不是文件夹:{folder}", file=sys.stderr)
|
| 45 |
+
return 2
|
| 46 |
+
|
| 47 |
+
txt_path = Path(args.txt)
|
| 48 |
+
if not txt_path.is_absolute():
|
| 49 |
+
txt_path = folder / txt_path
|
| 50 |
+
if not txt_path.exists():
|
| 51 |
+
print(f"找不到 web.txt:{txt_path}", file=sys.stderr)
|
| 52 |
+
return 2
|
| 53 |
+
|
| 54 |
+
# 找 json(严格匹配 chart_0001_line.json 这种)
|
| 55 |
+
candidates = list(folder.glob("chart_*_heatmap.json"))
|
| 56 |
+
json_files = []
|
| 57 |
+
bad_names = []
|
| 58 |
+
for p in candidates:
|
| 59 |
+
if FILENAME_RE.match(p.name):
|
| 60 |
+
json_files.append(p)
|
| 61 |
+
else:
|
| 62 |
+
bad_names.append(p.name)
|
| 63 |
+
|
| 64 |
+
if not json_files:
|
| 65 |
+
print("未找到符合 chart_0001_heatmap.json 规则的文件。", file=sys.stderr)
|
| 66 |
+
if candidates:
|
| 67 |
+
print("但找到一些类似文件:", file=sys.stderr)
|
| 68 |
+
for n in sorted([c.name for c in candidates])[:20]:
|
| 69 |
+
print(f" - {n}", file=sys.stderr)
|
| 70 |
+
return 1
|
| 71 |
+
|
| 72 |
+
# 按“中间数字”数值正序排序
|
| 73 |
+
json_files.sort(key=extract_num)
|
| 74 |
+
|
| 75 |
+
# 读 web.txt(过滤空行;不含换行符)
|
| 76 |
+
lines = read_lines_no_empty(txt_path)
|
| 77 |
+
|
| 78 |
+
if len(lines) != len(json_files):
|
| 79 |
+
print("行数与 JSON 数量不一致,停止执行:", file=sys.stderr)
|
| 80 |
+
print(f" web.txt 有效行数(已过滤空行): {len(lines)}", file=sys.stderr)
|
| 81 |
+
print(f" json 文件数: {len(json_files)}", file=sys.stderr)
|
| 82 |
+
print("\n前几个 json 文件(排序后):", file=sys.stderr)
|
| 83 |
+
for p in json_files[:10]:
|
| 84 |
+
print(f" - {p.name}", file=sys.stderr)
|
| 85 |
+
return 1
|
| 86 |
+
|
| 87 |
+
changed = 0
|
| 88 |
+
|
| 89 |
+
for i, (p, src_value) in enumerate(zip(json_files, lines), start=1):
|
| 90 |
+
try:
|
| 91 |
+
original_text = p.read_text(encoding="utf-8")
|
| 92 |
+
except UnicodeDecodeError:
|
| 93 |
+
original_text = p.read_text(encoding="utf-8-sig", errors="replace")
|
| 94 |
+
|
| 95 |
+
try:
|
| 96 |
+
data = json.loads(original_text)
|
| 97 |
+
except Exception as e:
|
| 98 |
+
print(f"错误(JSON 解析失败):{p.name} -> {e}")
|
| 99 |
+
continue
|
| 100 |
+
|
| 101 |
+
if not isinstance(data, dict):
|
| 102 |
+
print(f"跳过(JSON 顶层不是对象):{p.name}")
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
old = data.get("Weblink", None)
|
| 106 |
+
data["Weblink"] = src_value
|
| 107 |
+
will_change = (old != src_value)
|
| 108 |
+
|
| 109 |
+
if args.dry_run:
|
| 110 |
+
print(f"[DRY {i:04d}] {p.name}: Weblink {old!r} -> {src_value!r}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
if will_change and args.backup:
|
| 114 |
+
bak = p.with_suffix(p.suffix + ".bak")
|
| 115 |
+
if not bak.exists():
|
| 116 |
+
bak.write_text(original_text, encoding="utf-8")
|
| 117 |
+
|
| 118 |
+
if will_change:
|
| 119 |
+
p.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 120 |
+
changed += 1
|
| 121 |
+
print(f"[OK {i:04d}] {p.name}: Weblink 写入成功")
|
| 122 |
+
else:
|
| 123 |
+
print(f"[NO {i:04d}] {p.name}: Weblink 无变化(已相同)")
|
| 124 |
+
|
| 125 |
+
print("\n==== 完成 ====")
|
| 126 |
+
print(f"总文件数: {len(json_files)}")
|
| 127 |
+
print(f"发生写入: {changed}")
|
| 128 |
+
|
| 129 |
+
if bad_names:
|
| 130 |
+
print("\n提示:同目录下还有这些文件名不符合严格规则(未处理):")
|
| 131 |
+
for n in sorted(bad_names)[:50]:
|
| 132 |
+
print(f" - {n}")
|
| 133 |
+
|
| 134 |
+
return 0
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
if __name__ == "__main__":
|
| 138 |
+
raise SystemExit(main())
|
dataset/label/Apache ECharts/heatmap/chart_0001_heatmap.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0001",
|
| 3 |
+
"Type": "heatmap",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=calendar-vertical",
|
| 6 |
+
"Topic": "daily data heatmap calendar years",
|
| 7 |
+
"Describe": "The chart shows a heatmap of daily data values for the years 2015, 2016, and 2017, with color intensity representing values from 0 to 1000, but the data is randomly generated for demonstration purposes.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|
dataset/label/Apache ECharts/heatmap/chart_0002_heatmap.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"Number": "0002",
|
| 3 |
+
"Type": "heatmap",
|
| 4 |
+
"Source": "Apache Echarts",
|
| 5 |
+
"Weblink": "https://echarts.apache.org/examples/zh/editor.html?c=calendar-heatmap",
|
| 6 |
+
"Topic": "daily step count heatmap calendar",
|
| 7 |
+
"Describe": "The chart shows a heatmap of daily step counts for the year 2016, with color intensity representing the number of steps from 0 to 10000.",
|
| 8 |
+
"Other": ""
|
| 9 |
+
}
|