{"id":"d21a1e88-93d5-4e80-b1b3-e70f61475862","shortId":"PVdExm","kind":"skill","title":"Benchmark deep research agents across factual, quality, and process dimensions with MiroEval","tagline":"Score deep research agents on benchmark tasks using factual verification, report-quality scoring, and process evaluation before model or workflow changes ship.","description":"# Benchmark deep research agents across factual, quality, and process dimensions with MiroEval\n\nScore deep research agents on benchmark tasks using factual verification, report-quality scoring, and process evaluation before model or workflow changes ship.\n\n## Prerequisites\n\nPython, uv, model result JSON, required API keys for judge and retrieval services\n\n## Installation\n\nNo source-backed install or usage instructions could be extracted automatically. Review the upstream project before running this skill in a sensitive workflow.\n\n- Source: https://github.com/MiroMindAI/MiroEval\n\n## Documentation\n\n- https://github.com/MiroMindAI/MiroEval\n\n## Source\n\n- [Agent Skill Exchange](https://agentskillexchange.com/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval/)","tags":["benchmark","deep","research","agents","across","factual","quality","and","process","dimensions","with","miroeval"],"capabilities":["skill","source-agentskillexchange","skill-benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval","topic-agent-skills","topic-ai-agents","topic-ai-tools","topic-awesome-list","topic-claude-code","topic-codex","topic-cursor","topic-llm","topic-mcp","topic-npx-skills","topic-openclaw","topic-skills-catalog"],"categories":["skills"],"synonyms":[],"warnings":[],"endpointUrl":"https://skills.sh/agentskillexchange/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval","protocol":"skill","transport":"skills-sh","auth":{"type":"none","details":{"cli":"npx skills add agentskillexchange/skills","source_repo":"https://github.com/agentskillexchange/skills","install_from":"skills.sh"}},"qualityScore":"0.454","qualityRationale":"deterministic score 0.45 from registry signals: · indexed on github topic:agent-skills · 8 github stars · SKILL.md body (812 chars)","verified":false,"liveness":"unknown","lastLivenessCheck":null,"agentReviews":{"count":0,"score_avg":null,"cost_usd_avg":null,"success_rate":null,"latency_p50_ms":null,"narrative_summary":null,"summary_updated_at":null},"enrichmentModel":"deterministic:skill-github:v1","enrichmentVersion":1,"enrichedAt":"2026-05-18T19:09:36.799Z","embedding":null,"createdAt":"2026-05-18T13:15:23.965Z","updatedAt":"2026-05-18T19:09:36.799Z","lastSeenAt":"2026-05-18T19:09:36.799Z","tsv":"'/miromindai/miroeval':113,117 '/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval/)':124 'across':5,40 'agent':4,16,39,51,119 'agentskillexchange.com':123 'agentskillexchange.com/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval/)':122 'api':78 'automat':97 'back':89 'benchmark':1,18,36,53 'chang':34,69 'could':94 'deep':2,14,37,49 'dimens':10,45 'document':114 'evalu':29,64 'exchang':121 'extract':96 'factual':6,21,41,56 'github.com':112,116 'github.com/miromindai/miroeval':111,115 'instal':85,90 'instruct':93 'json':76 'judg':81 'key':79 'miroev':12,47 'model':31,66,74 'prerequisit':71 'process':9,28,44,63 'project':101 'python':72 'qualiti':7,25,42,60 'report':24,59 'report-qu':23,58 'requir':77 'research':3,15,38,50 'result':75 'retriev':83 'review':98 'run':103 'score':13,26,48,61 'sensit':108 'servic':84 'ship':35,70 'skill':105,120 'skill-benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval' 'sourc':88,110,118 'source-agentskillexchange' 'source-back':87 'task':19,54 'topic-agent-skills' 'topic-ai-agents' 'topic-ai-tools' 'topic-awesome-list' 'topic-claude-code' 'topic-codex' 'topic-cursor' 'topic-llm' 'topic-mcp' 'topic-npx-skills' 'topic-openclaw' 'topic-skills-catalog' 'upstream':100 'usag':92 'use':20,55 'uv':73 'verif':22,57 'workflow':33,68,109","prices":[{"id":"e4acc2e2-0451-47eb-b8e5-7dac16910540","listingId":"d21a1e88-93d5-4e80-b1b3-e70f61475862","amountUsd":"0","unit":"free","nativeCurrency":null,"nativeAmount":null,"chain":null,"payTo":null,"paymentMethod":"skill-free","isPrimary":true,"details":{"org":"agentskillexchange","category":"skills","install_from":"skills.sh"},"createdAt":"2026-05-18T13:15:23.965Z"}],"sources":[{"listingId":"d21a1e88-93d5-4e80-b1b3-e70f61475862","source":"github","sourceId":"agentskillexchange/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval","sourceUrl":"https://github.com/agentskillexchange/skills/tree/main/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval","isPrimary":false,"firstSeenAt":"2026-05-18T13:15:23.965Z","lastSeenAt":"2026-05-18T19:09:36.799Z"}],"details":{"listingId":"d21a1e88-93d5-4e80-b1b3-e70f61475862","quickStartSnippet":null,"exampleRequest":null,"exampleResponse":null,"schema":null,"openapiUrl":null,"agentsTxtUrl":null,"citations":[],"useCases":[],"bestFor":[],"notFor":[],"kindDetails":{"org":"agentskillexchange","slug":"benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval","github":{"repo":"agentskillexchange/skills","stars":8,"topics":["agent-skills","ai-agents","ai-tools","awesome-list","claude-code","codex","cursor","llm","mcp","npx-skills","openclaw","skills-catalog"],"license":"mit","html_url":"https://github.com/agentskillexchange/skills","pushed_at":"2026-05-18T19:02:17Z","description":"The open catalog of AI agent skills — 2,000+ security-scanned skills for Claude Code, Cursor, Codex, and more.","skill_md_sha":"b3bacb980b6636dbfe1384b38cba1f964159e4bf","skill_md_path":"skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval/SKILL.md","default_branch":"main","skill_tree_url":"https://github.com/agentskillexchange/skills/tree/main/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval"},"layout":"multi","source":"github","category":"skills","frontmatter":{"name":"Benchmark deep research agents across factual, quality, and process dimensions with MiroEval","description":"Score deep research agents on benchmark tasks using factual verification, report-quality scoring, and process evaluation before model or workflow changes ship."},"skills_sh_url":"https://skills.sh/agentskillexchange/skills/benchmark-deep-research-agents-across-factual-quality-and-process-dimensions-with-miroeval"},"updatedAt":"2026-05-18T19:09:36.799Z"}}