{"id":"070cc916-9084-4e58-b881-ca2126ed71e4","shortId":"LYs7xU","kind":"skill","title":"Apache Tika Content Extraction Hub","tagline":"Extracts text and metadata from 1400+ file formats via Apache Tika Server REST API. Handles PDF, DOCX, PPTX, email archives, and embedded document extraction with MIME type detection.","description":"# Apache Tika Content Extraction Hub\n\nExtracts text and metadata from 1400+ file formats via Apache Tika Server REST API. Handles PDF, DOCX, PPTX, email archives, and embedded document extraction with MIME type detection.\n\n## Installation\n\nRequirements and caveats from upstream:\n- **N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests. If Docker is not installed, those tests are skipped.\n\nBasic usage or getting-started notes:\n- ===========\n- **Parse a file in Java:**\n- java\n\n- Source: https://github.com/apache/tika\n- Extracted from upstream docs: https://raw.githubusercontent.com/apache/tika/HEAD/README.md\n\n## Source\n\n- [Agent Skill Exchange](https://agentskillexchange.com/skills/apache-tika-content-extraction-hub/)","tags":["apache","tika","content","extraction","hub","skills","agentskillexchange","agent-skills","ai-agents","ai-tools","awesome-list","claude-code"],"capabilities":["skill","source-agentskillexchange","skill-apache-tika-content-extraction-hub","topic-agent-skills","topic-ai-agents","topic-ai-tools","topic-awesome-list","topic-claude-code","topic-codex","topic-cursor","topic-llm","topic-mcp","topic-npx-skills","topic-openclaw","topic-skills-catalog"],"categories":["skills"],"synonyms":[],"warnings":[],"endpointUrl":"https://skills.sh/agentskillexchange/skills/apache-tika-content-extraction-hub","protocol":"skill","transport":"skills-sh","auth":{"type":"none","details":{"cli":"npx skills add agentskillexchange/skills","source_repo":"https://github.com/agentskillexchange/skills","install_from":"skills.sh"}},"qualityScore":"0.454","qualityRationale":"deterministic score 0.45 from registry signals: · indexed on github topic:agent-skills · 8 github stars · SKILL.md body (774 chars)","verified":false,"liveness":"unknown","lastLivenessCheck":null,"agentReviews":{"count":0,"score_avg":null,"cost_usd_avg":null,"success_rate":null,"latency_p50_ms":null,"narrative_summary":null,"summary_updated_at":null},"enrichmentModel":"deterministic:skill-github:v1","enrichmentVersion":1,"enrichedAt":"2026-05-18T19:09:23.052Z","embedding":null,"createdAt":"2026-05-18T13:15:05.839Z","updatedAt":"2026-05-18T19:09:23.052Z","lastSeenAt":"2026-05-18T19:09:23.052Z","tsv":"'/apache/tika':112 '/apache/tika/head/readme.md':119 '/products/personal)':77 '/skills/apache-tika-content-extraction-hub/)':126 '1400':11,44 'agent':121 'agentskillexchange.com':125 'agentskillexchange.com/skills/apache-tika-content-extraction-hub/)':124 'apach':1,15,34,48 'api':19,52 'archiv':25,58 'basic':96 'caveat':70 'content':3,36 'detect':33,66 'doc':116 'docker':74,88 'document':28,61 'docx':22,55 'email':24,57 'embed':27,60 'exchang':123 'extract':4,6,29,37,39,62,113 'file':12,45,105 'format':13,46 'get':100 'getting-start':99 'github.com':111 'github.com/apache/tika':110 'handl':20,53 'hub':5,38 'instal':67,91 'integr':85 'java':107,108 'metadata':9,42 'mime':31,64 'n.b':73 'note':102 'pars':103 'pdf':21,54 'pptx':23,56 'raw.githubusercontent.com':118 'raw.githubusercontent.com/apache/tika/head/readme.md':117 'requir':68 'rest':18,51 'server':17,50 'skill':122 'skill-apache-tika-content-extraction-hub' 'skip':95 'sourc':109,120 'source-agentskillexchange' 'start':101 'test':81,86,93 'text':7,40 'tika':2,16,35,49,84 'tika-integration-test':83 'topic-agent-skills' 'topic-ai-agents' 'topic-ai-tools' 'topic-awesome-list' 'topic-claude-code' 'topic-codex' 'topic-cursor' 'topic-llm' 'topic-mcp' 'topic-npx-skills' 'topic-openclaw' 'topic-skills-catalog' 'type':32,65 'upstream':72,115 'usag':97 'use':79 'via':14,47 'www.docker.com':76 'www.docker.com/products/personal)':75","prices":[{"id":"aa9f6909-6212-47fd-af9b-cba511cc3be6","listingId":"070cc916-9084-4e58-b881-ca2126ed71e4","amountUsd":"0","unit":"free","nativeCurrency":null,"nativeAmount":null,"chain":null,"payTo":null,"paymentMethod":"skill-free","isPrimary":true,"details":{"org":"agentskillexchange","category":"skills","install_from":"skills.sh"},"createdAt":"2026-05-18T13:15:05.839Z"}],"sources":[{"listingId":"070cc916-9084-4e58-b881-ca2126ed71e4","source":"github","sourceId":"agentskillexchange/skills/apache-tika-content-extraction-hub","sourceUrl":"https://github.com/agentskillexchange/skills/tree/main/skills/apache-tika-content-extraction-hub","isPrimary":false,"firstSeenAt":"2026-05-18T13:15:05.839Z","lastSeenAt":"2026-05-18T19:09:23.052Z"}],"details":{"listingId":"070cc916-9084-4e58-b881-ca2126ed71e4","quickStartSnippet":null,"exampleRequest":null,"exampleResponse":null,"schema":null,"openapiUrl":null,"agentsTxtUrl":null,"citations":[],"useCases":[],"bestFor":[],"notFor":[],"kindDetails":{"org":"agentskillexchange","slug":"apache-tika-content-extraction-hub","github":{"repo":"agentskillexchange/skills","stars":8,"topics":["agent-skills","ai-agents","ai-tools","awesome-list","claude-code","codex","cursor","llm","mcp","npx-skills","openclaw","skills-catalog"],"license":"mit","html_url":"https://github.com/agentskillexchange/skills","pushed_at":"2026-05-18T19:02:17Z","description":"The open catalog of AI agent skills — 2,000+ security-scanned skills for Claude Code, Cursor, Codex, and more.","skill_md_sha":"444ee75aff47796c424a385e57afb60588517c7c","skill_md_path":"skills/apache-tika-content-extraction-hub/SKILL.md","default_branch":"main","skill_tree_url":"https://github.com/agentskillexchange/skills/tree/main/skills/apache-tika-content-extraction-hub"},"layout":"multi","source":"github","category":"skills","frontmatter":{"name":"Apache Tika Content Extraction Hub","description":"Extracts text and metadata from 1400+ file formats via Apache Tika Server REST API. Handles PDF, DOCX, PPTX, email archives, and embedded document extraction with MIME type detection."},"skills_sh_url":"https://skills.sh/agentskillexchange/skills/apache-tika-content-extraction-hub"},"updatedAt":"2026-05-18T19:09:23.052Z"}}