{"id":"4e11af0b-7b37-4aa2-beb2-870948287b6b","shortId":"DEkvp5","kind":"skill","title":"vLLM High-Throughput LLM Serving Engine with PagedAttention","tagline":"vLLM is a fast and memory-efficient inference and serving engine for large language models. It uses PagedAttention for efficient memory management, supports continuous batching, and provides an OpenAI-compatible API server for production-grade LLM deployment.","description":"# vLLM High-Throughput LLM Serving Engine with PagedAttention\n\nvLLM is a fast and memory-efficient inference and serving engine for large language models. It uses PagedAttention for efficient memory management, supports continuous batching, and provides an OpenAI-compatible API server for production-grade LLM deployment.\n\n## Installation\n\nNo source-backed install or usage instructions could be extracted automatically. Review the upstream project before running this skill in a sensitive workflow.\n\n- Source: https://github.com/vllm-project/vllm\n\n## Source\n\n- [Agent Skill Exchange](https://agentskillexchange.com/skills/vllm-high-throughput-llm-serving/)","tags":["vllm","high","throughput","llm","serving","skills","agentskillexchange","agent-skills","ai-agents","ai-tools","awesome-list","claude-code"],"capabilities":["skill","source-agentskillexchange","skill-vllm-high-throughput-llm-serving","topic-agent-skills","topic-ai-agents","topic-ai-tools","topic-awesome-list","topic-claude-code","topic-codex","topic-cursor","topic-llm","topic-mcp","topic-npx-skills","topic-openclaw","topic-skills-catalog"],"categories":["skills"],"synonyms":[],"warnings":[],"endpointUrl":"https://skills.sh/agentskillexchange/skills/vllm-high-throughput-llm-serving","protocol":"skill","transport":"skills-sh","auth":{"type":"none","details":{"cli":"npx skills add agentskillexchange/skills","source_repo":"https://github.com/agentskillexchange/skills","install_from":"skills.sh"}},"qualityScore":"0.454","qualityRationale":"deterministic score 0.45 from registry signals: · indexed on github topic:agent-skills · 8 github stars · SKILL.md body (658 chars)","verified":false,"liveness":"unknown","lastLivenessCheck":null,"agentReviews":{"count":0,"score_avg":null,"cost_usd_avg":null,"success_rate":null,"latency_p50_ms":null,"narrative_summary":null,"summary_updated_at":null},"enrichmentModel":"deterministic:skill-github:v1","enrichmentVersion":1,"enrichedAt":"2026-05-18T19:13:03.344Z","embedding":null,"createdAt":"2026-05-18T13:20:14.815Z","updatedAt":"2026-05-18T19:13:03.344Z","lastSeenAt":"2026-05-18T19:13:03.344Z","tsv":"'/skills/vllm-high-throughput-llm-serving/)':134 '/vllm-project/vllm':127 'agent':129 'agentskillexchange.com':133 'agentskillexchange.com/skills/vllm-high-throughput-llm-serving/)':132 'api':42,91 'automat':111 'back':103 'batch':35,84 'compat':41,90 'continu':34,83 'could':108 'deploy':49,98 'effici':17,30,66,79 'engin':7,21,56,70 'exchang':131 'extract':110 'fast':13,62 'github.com':126 'github.com/vllm-project/vllm':125 'grade':47,96 'high':3,52 'high-throughput':2,51 'infer':18,67 'instal':99,104 'instruct':107 'languag':24,73 'larg':23,72 'llm':5,48,54,97 'manag':32,81 'memori':16,31,65,80 'memory-effici':15,64 'model':25,74 'openai':40,89 'openai-compat':39,88 'pagedattent':9,28,58,77 'product':46,95 'production-grad':45,94 'project':115 'provid':37,86 'review':112 'run':117 'sensit':122 'serv':6,20,55,69 'server':43,92 'skill':119,130 'skill-vllm-high-throughput-llm-serving' 'sourc':102,124,128 'source-agentskillexchange' 'source-back':101 'support':33,82 'throughput':4,53 'topic-agent-skills' 'topic-ai-agents' 'topic-ai-tools' 'topic-awesome-list' 'topic-claude-code' 'topic-codex' 'topic-cursor' 'topic-llm' 'topic-mcp' 'topic-npx-skills' 'topic-openclaw' 'topic-skills-catalog' 'upstream':114 'usag':106 'use':27,76 'vllm':1,10,50,59 'workflow':123","prices":[{"id":"034a2358-3014-40d6-9a7b-2d411eba8a11","listingId":"4e11af0b-7b37-4aa2-beb2-870948287b6b","amountUsd":"0","unit":"free","nativeCurrency":null,"nativeAmount":null,"chain":null,"payTo":null,"paymentMethod":"skill-free","isPrimary":true,"details":{"org":"agentskillexchange","category":"skills","install_from":"skills.sh"},"createdAt":"2026-05-18T13:20:14.815Z"}],"sources":[{"listingId":"4e11af0b-7b37-4aa2-beb2-870948287b6b","source":"github","sourceId":"agentskillexchange/skills/vllm-high-throughput-llm-serving","sourceUrl":"https://github.com/agentskillexchange/skills/tree/main/skills/vllm-high-throughput-llm-serving","isPrimary":false,"firstSeenAt":"2026-05-18T13:20:14.815Z","lastSeenAt":"2026-05-18T19:13:03.344Z"}],"details":{"listingId":"4e11af0b-7b37-4aa2-beb2-870948287b6b","quickStartSnippet":null,"exampleRequest":null,"exampleResponse":null,"schema":null,"openapiUrl":null,"agentsTxtUrl":null,"citations":[],"useCases":[],"bestFor":[],"notFor":[],"kindDetails":{"org":"agentskillexchange","slug":"vllm-high-throughput-llm-serving","github":{"repo":"agentskillexchange/skills","stars":8,"topics":["agent-skills","ai-agents","ai-tools","awesome-list","claude-code","codex","cursor","llm","mcp","npx-skills","openclaw","skills-catalog"],"license":"mit","html_url":"https://github.com/agentskillexchange/skills","pushed_at":"2026-05-18T19:02:17Z","description":"The open catalog of AI agent skills — 2,000+ security-scanned skills for Claude Code, Cursor, Codex, and more.","skill_md_sha":"7001d9e6872c27d595462eb4304af6b101a91dd0","skill_md_path":"skills/vllm-high-throughput-llm-serving/SKILL.md","default_branch":"main","skill_tree_url":"https://github.com/agentskillexchange/skills/tree/main/skills/vllm-high-throughput-llm-serving"},"layout":"multi","source":"github","category":"skills","frontmatter":{"name":"vLLM High-Throughput LLM Serving Engine with PagedAttention","description":"vLLM is a fast and memory-efficient inference and serving engine for large language models. It uses PagedAttention for efficient memory management, supports continuous batching, and provides an OpenAI-compatible API server for production-grade LLM deployment."},"skills_sh_url":"https://skills.sh/agentskillexchange/skills/vllm-high-throughput-llm-serving"},"updatedAt":"2026-05-18T19:13:03.344Z"}}