You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

255 lines
9.1 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""
人工验证界面
使用Gradio创建Web界面用于人工验证生成的AIME题目
"""
import json
import os
from typing import List, Dict, Any, Tuple
from datetime import datetime
import gradio as gr
class HumanVerificationUI:
"""人工验证界面"""
def __init__(self, data_path: str):
"""
初始化验证界面
Args:
data_path: 生成数据的JSON文件路径
"""
self.data_path = data_path
self.problems = self._load_problems()
self.current_index = 0
self.verifications = self._load_verifications()
def _load_problems(self) -> List[Dict[str, Any]]:
"""加载题目数据"""
if not os.path.exists(self.data_path):
raise FileNotFoundError(f"数据文件不存在: {self.data_path}")
with open(self.data_path, 'r', encoding='utf-8') as f:
return json.load(f)
def _load_verifications(self) -> Dict[str, Any]:
"""加载已有的验证结果"""
verification_path = self.data_path.replace(".json", "_verifications.json")
if os.path.exists(verification_path):
with open(verification_path, 'r', encoding='utf-8') as f:
return json.load(f)
return {}
def _save_verifications(self):
"""保存验证结果"""
verification_path = self.data_path.replace(".json", "_verifications.json")
with open(verification_path, 'w', encoding='utf-8') as f:
json.dump(self.verifications, f, ensure_ascii=False, indent=2)
def get_current_problem(self) -> Tuple[str, str, str, str, str, str]:
"""获取当前题目信息"""
if not self.problems:
return "无题目", "", "", "", "", "0/0"
problem = self.problems[self.current_index]
problem_id = problem.get("id", "unknown")
# 获取已有的验证信息
verification = self.verifications.get(problem_id, {})
return (
f"题目 {self.current_index + 1}/{len(self.problems)}",
problem.get("problem", ""),
f"答案: {problem.get('answer', 'N/A')}",
problem.get("solution", ""),
f"主题: {problem.get('topic', 'N/A')}",
verification.get("comments", "")
)
def verify_problem(
self,
correctness: int,
clarity: int,
difficulty_match: int,
completeness: int,
status: str,
comments: str
) -> str:
"""
验证当前题目
Args:
correctness: 正确性评分 (1-5)
clarity: 清晰度评分 (1-5)
difficulty_match: 难度匹配评分 (1-5)
completeness: 完整性评分 (1-5)
status: 验证状态 (approved/rejected/needs_revision)
comments: 评论
Returns:
验证结果消息
"""
if not self.problems:
return "❌ 无题目可验证"
problem = self.problems[self.current_index]
problem_id = problem.get("id", "unknown")
# 保存验证结果
self.verifications[problem_id] = {
"problem_id": problem_id,
"scores": {
"correctness": correctness,
"clarity": clarity,
"difficulty_match": difficulty_match,
"completeness": completeness
},
"total_score": (correctness + clarity + difficulty_match + completeness) / 4,
"status": status,
"comments": comments,
"verified_at": datetime.now().isoformat()
}
self._save_verifications()
return f"✅ 题目 {problem_id} 验证完成!\n总分: {self.verifications[problem_id]['total_score']:.2f}/5.0"
def next_problem(self) -> Tuple[str, str, str, str, str, str]:
"""下一个题目"""
if self.current_index < len(self.problems) - 1:
self.current_index += 1
return self.get_current_problem()
def prev_problem(self) -> Tuple[str, str, str, str, str, str]:
"""上一个题目"""
if self.current_index > 0:
self.current_index -= 1
return self.get_current_problem()
def get_statistics(self) -> str:
"""获取验证统计信息"""
if not self.verifications:
return "暂无验证数据"
total = len(self.problems)
verified = len(self.verifications)
approved = sum(1 for v in self.verifications.values() if v["status"] == "approved")
rejected = sum(1 for v in self.verifications.values() if v["status"] == "rejected")
needs_revision = sum(1 for v in self.verifications.values() if v["status"] == "needs_revision")
avg_score = sum(v["total_score"] for v in self.verifications.values()) / verified if verified > 0 else 0
return f"""
📊 验证统计
总题目数: {total}
已验证: {verified} ({verified/total*100:.1f}%)
未验证: {total - verified}
验证结果:
- ✅ 通过: {approved}
- ❌ 拒绝: {rejected}
- 🔄 需修改: {needs_revision}
平均评分: {avg_score:.2f}/5.0
"""
def launch(self, share: bool = False):
"""启动Gradio界面"""
with gr.Blocks(title="AIME题目人工验证") as demo:
gr.Markdown("# 🎯 AIME题目人工验证系统")
gr.Markdown(f"数据文件: `{self.data_path}`")
with gr.Row():
with gr.Column(scale=2):
# 题目显示区域
title = gr.Textbox(label="当前题目", interactive=False)
problem_text = gr.Textbox(label="问题描述", lines=5, interactive=False)
answer_text = gr.Textbox(label="答案", interactive=False)
solution_text = gr.Textbox(label="解答过程", lines=10, interactive=False)
metadata_text = gr.Textbox(label="元数据", interactive=False)
with gr.Column(scale=1):
# 评分区域
gr.Markdown("### 📝 评分 (1-5分)")
correctness_slider = gr.Slider(1, 5, value=3, step=1, label="正确性")
clarity_slider = gr.Slider(1, 5, value=3, step=1, label="清晰度")
difficulty_slider = gr.Slider(1, 5, value=3, step=1, label="难度匹配")
completeness_slider = gr.Slider(1, 5, value=3, step=1, label="完整性")
# 状态选择
gr.Markdown("### ✅ 验证状态")
status_radio = gr.Radio(
choices=["approved", "rejected", "needs_revision"],
value="approved",
label="状态"
)
# 评论
comments_text = gr.Textbox(label="评论", lines=3, placeholder="请输入评论...")
# 验证按钮
verify_btn = gr.Button("✅ 提交验证", variant="primary")
verify_result = gr.Textbox(label="验证结果", interactive=False)
# 导航按钮
with gr.Row():
prev_btn = gr.Button("⬅️ 上一题")
next_btn = gr.Button("下一题 ➡️")
# 统计信息
with gr.Row():
stats_text = gr.Textbox(label="验证统计", lines=10, interactive=False)
refresh_stats_btn = gr.Button("🔄 刷新统计")
# 加载初始题目
demo.load(
fn=self.get_current_problem,
outputs=[title, problem_text, answer_text, solution_text, metadata_text, comments_text]
)
# 绑定事件
verify_btn.click(
fn=self.verify_problem,
inputs=[correctness_slider, clarity_slider, difficulty_slider, completeness_slider, status_radio, comments_text],
outputs=verify_result
)
next_btn.click(
fn=self.next_problem,
outputs=[title, problem_text, answer_text, solution_text, metadata_text, comments_text]
)
prev_btn.click(
fn=self.prev_problem,
outputs=[title, problem_text, answer_text, solution_text, metadata_text, comments_text]
)
refresh_stats_btn.click(
fn=self.get_statistics,
outputs=stats_text
)
demo.launch(share=share, server_name="127.0.0.1", server_port=7860)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("用法: python human_verification_ui.py <data_path>")
print("示例: python human_verification_ui.py generated_data/aime_generated_20250110_120000.json")
sys.exit(1)
data_path = sys.argv[1]
ui = HumanVerificationUI(data_path)
ui.launch(share=False)