{"id":"266928f9-42e8-4f99-a959-b0eef0856e7b","shortId":"9X6bjb","kind":"mcp","title":"ForgeJudge","tagline":"Open evaluation leaderboard and CI gate for autonomous coding agents with sandboxed execution and public traces.","description":"Open evaluation leaderboard and CI gate for autonomous coding agents with sandboxed execution and public traces.\n\nForgeJudge is an open-source evaluation platform for autonomous coding agents. It runs every patch in an isolated sandbox, grades results using a deterministic SWE-bench-based harness against a curated golden test set, and publishes full OpenTelemetry traces publicly. A multi-seed regression gate prevents performance degradation across agent versions, making ForgeJudge a reliable CI gate for teams building LLM-powered coding tools.","tags":["forgejudge"],"capabilities":["mcp","transport-stdio","open-source","pkg-pypi"],"categories":[],"synonyms":[],"warnings":[],"endpointUrl":"https://github.com/ahmedeid1/forgejudge","protocol":"mcp","transport":"stdio","auth":{"type":"mcp","details":{"transport":"stdio"}},"qualityScore":"0.600","qualityRationale":"deterministic score 0.60 from registry signals: · indexed on pulsemcp · has source repo · registry-generated description present","verified":false,"liveness":"unknown","lastLivenessCheck":null,"agentReviews":{"count":0,"score_avg":null,"cost_usd_avg":null,"success_rate":null,"latency_p50_ms":null,"narrative_summary":null,"summary_updated_at":null},"enrichmentModel":"deterministic:mcp:v1","enrichmentVersion":1,"enrichedAt":"2026-06-20T05:22:33.504Z","embedding":null,"createdAt":"2026-05-31T07:21:55.013Z","updatedAt":"2026-06-20T05:22:33.504Z","lastSeenAt":"2026-06-20T05:22:33.504Z","tsv":"'across':85 'agent':11,27,45,86 'autonom':9,25,43 'base':62 'bench':61 'build':96 'ci':6,22,92 'code':10,26,44,100 'curat':66 'degrad':84 'determinist':58 'evalu':3,19,40 'everi':48 'execut':14,30 'forgejudg':1,34,89 'full':72 'gate':7,23,81,93 'golden':67 'grade':54 'har':63 'isol':52 'leaderboard':4,20 'llm':98 'llm-power':97 'make':88 'mcp' 'multi':78 'multi-se':77 'open':2,18,38 'open-sourc':37 'open-source' 'opentelemetri':73 'patch':49 'perform':83 'pkg-pypi' 'platform':41 'power':99 'prevent':82 'public':16,32,75 'publish':71 'regress':80 'reliabl':91 'result':55 'run':47 'sandbox':13,29,53 'seed':79 'set':69 'sourc':39 'swe':60 'swe-bench-bas':59 'team':95 'test':68 'tool':101 'trace':17,33,74 'transport-stdio' 'use':56 'version':87","prices":[{"id":"43f07ac8-6809-4ed2-b054-d3711e73da34","listingId":"266928f9-42e8-4f99-a959-b0eef0856e7b","amountUsd":"0","unit":"free","nativeCurrency":null,"nativeAmount":null,"chain":null,"payTo":null,"paymentMethod":"mcp-free","isPrimary":true,"details":{"transport":"stdio"},"createdAt":"2026-05-31T07:21:55.013Z"}],"sources":[{"listingId":"266928f9-42e8-4f99-a959-b0eef0856e7b","source":"pulsemcp","sourceId":"https://www.pulsemcp.com/servers/ahmedeid1-forgejudge","sourceUrl":"https://api.pulsemcp.com/v0beta/servers","isPrimary":true,"firstSeenAt":"2026-05-31T07:21:55.013Z","lastSeenAt":"2026-06-20T05:22:33.504Z"}],"details":{"listingId":"266928f9-42e8-4f99-a959-b0eef0856e7b","quickStartSnippet":null,"exampleRequest":null,"exampleResponse":null,"schema":null,"openapiUrl":null,"agentsTxtUrl":null,"citations":[],"useCases":[],"bestFor":[],"notFor":[],"kindDetails":{"source":"pulsemcp","transport":"stdio","server_name":"ForgeJudge","external_url":"https://forgejudge.pages.dev","github_stars":0,"package_name":"forgejudge","registry_url":"https://www.pulsemcp.com/servers/ahmedeid1-forgejudge","source_code_url":"https://github.com/ahmedeid1/forgejudge","package_registry":"pypi","package_download_count":1362},"updatedAt":"2026-06-20T05:22:33.504Z"}}