{"id":"017d495a-aaca-4464-99ac-15f70707a818","shortId":"RYm2PQ","kind":"skill","title":"Apache Tika Document Parser","tagline":"Extracts structured text, metadata, and embedded objects from PDFs, Office documents, and 1000+ file formats using the Apache Tika REST API. Outputs clean Markdown or JSON with XMP metadata preservation.","description":"# Apache Tika Document Parser\n\nExtracts structured text, metadata, and embedded objects from PDFs, Office documents, and 1000+ file formats using the Apache Tika REST API. Outputs clean Markdown or JSON with XMP metadata preservation.\n\n## Installation\n\nRequirements and caveats from upstream:\n- **N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests. If Docker is not installed, those tests are skipped.\n\nBasic usage or getting-started notes:\n- ===========\n- **Parse a file in Java:**\n- java\n\n- Source: https://github.com/apache/tika\n- Extracted from upstream docs: https://raw.githubusercontent.com/apache/tika/HEAD/README.md\n\n## Source\n\n- [Agent Skill Exchange](https://agentskillexchange.com/skills/apache-tika-document-parser/)","tags":["apache","tika","document","parser","skills","agentskillexchange","agent-skills","ai-agents","ai-tools","awesome-list","claude-code","codex"],"capabilities":["skill","source-agentskillexchange","skill-apache-tika-document-parser","topic-agent-skills","topic-ai-agents","topic-ai-tools","topic-awesome-list","topic-claude-code","topic-codex","topic-cursor","topic-llm","topic-mcp","topic-npx-skills","topic-openclaw","topic-skills-catalog"],"categories":["skills"],"synonyms":[],"warnings":[],"endpointUrl":"https://skills.sh/agentskillexchange/skills/apache-tika-document-parser","protocol":"skill","transport":"skills-sh","auth":{"type":"none","details":{"cli":"npx skills add agentskillexchange/skills","source_repo":"https://github.com/agentskillexchange/skills","install_from":"skills.sh"}},"qualityScore":"0.454","qualityRationale":"deterministic score 0.45 from registry signals: · indexed on github topic:agent-skills · 8 github stars · SKILL.md body (780 chars)","verified":false,"liveness":"unknown","lastLivenessCheck":null,"agentReviews":{"count":0,"score_avg":null,"cost_usd_avg":null,"success_rate":null,"latency_p50_ms":null,"narrative_summary":null,"summary_updated_at":null},"enrichmentModel":"deterministic:skill-github:v1","enrichmentVersion":1,"enrichedAt":"2026-05-18T19:09:23.338Z","embedding":null,"createdAt":"2026-05-18T13:15:06.209Z","updatedAt":"2026-05-18T19:09:23.338Z","lastSeenAt":"2026-05-18T19:09:23.338Z","tsv":"'/apache/tika':114 '/apache/tika/head/readme.md':121 '/products/personal)':79 '/skills/apache-tika-document-parser/)':128 '1000':17,51 'agent':123 'agentskillexchange.com':127 'agentskillexchange.com/skills/apache-tika-document-parser/)':126 'apach':1,22,35,56 'api':25,59 'basic':98 'caveat':72 'clean':27,61 'doc':118 'docker':76,90 'document':3,15,37,49 'embed':10,44 'exchang':125 'extract':5,39,115 'file':18,52,107 'format':19,53 'get':102 'getting-start':101 'github.com':113 'github.com/apache/tika':112 'instal':69,93 'integr':87 'java':109,110 'json':30,64 'markdown':28,62 'metadata':8,33,42,67 'n.b':75 'note':104 'object':11,45 'offic':14,48 'output':26,60 'pars':105 'parser':4,38 'pdfs':13,47 'preserv':34,68 'raw.githubusercontent.com':120 'raw.githubusercontent.com/apache/tika/head/readme.md':119 'requir':70 'rest':24,58 'skill':124 'skill-apache-tika-document-parser' 'skip':97 'sourc':111,122 'source-agentskillexchange' 'start':103 'structur':6,40 'test':83,88,95 'text':7,41 'tika':2,23,36,57,86 'tika-integration-test':85 'topic-agent-skills' 'topic-ai-agents' 'topic-ai-tools' 'topic-awesome-list' 'topic-claude-code' 'topic-codex' 'topic-cursor' 'topic-llm' 'topic-mcp' 'topic-npx-skills' 'topic-openclaw' 'topic-skills-catalog' 'upstream':74,117 'usag':99 'use':20,54,81 'www.docker.com':78 'www.docker.com/products/personal)':77 'xmp':32,66","prices":[{"id":"bac2c0fa-af42-400e-bc44-556f69fb4ecc","listingId":"017d495a-aaca-4464-99ac-15f70707a818","amountUsd":"0","unit":"free","nativeCurrency":null,"nativeAmount":null,"chain":null,"payTo":null,"paymentMethod":"skill-free","isPrimary":true,"details":{"org":"agentskillexchange","category":"skills","install_from":"skills.sh"},"createdAt":"2026-05-18T13:15:06.209Z"}],"sources":[{"listingId":"017d495a-aaca-4464-99ac-15f70707a818","source":"github","sourceId":"agentskillexchange/skills/apache-tika-document-parser","sourceUrl":"https://github.com/agentskillexchange/skills/tree/main/skills/apache-tika-document-parser","isPrimary":false,"firstSeenAt":"2026-05-18T13:15:06.209Z","lastSeenAt":"2026-05-18T19:09:23.338Z"}],"details":{"listingId":"017d495a-aaca-4464-99ac-15f70707a818","quickStartSnippet":null,"exampleRequest":null,"exampleResponse":null,"schema":null,"openapiUrl":null,"agentsTxtUrl":null,"citations":[],"useCases":[],"bestFor":[],"notFor":[],"kindDetails":{"org":"agentskillexchange","slug":"apache-tika-document-parser","github":{"repo":"agentskillexchange/skills","stars":8,"topics":["agent-skills","ai-agents","ai-tools","awesome-list","claude-code","codex","cursor","llm","mcp","npx-skills","openclaw","skills-catalog"],"license":"mit","html_url":"https://github.com/agentskillexchange/skills","pushed_at":"2026-05-18T19:02:17Z","description":"The open catalog of AI agent skills — 2,000+ security-scanned skills for Claude Code, Cursor, Codex, and more.","skill_md_sha":"fcf3be6fef0ff4110c2291fbb65640c597ab954b","skill_md_path":"skills/apache-tika-document-parser/SKILL.md","default_branch":"main","skill_tree_url":"https://github.com/agentskillexchange/skills/tree/main/skills/apache-tika-document-parser"},"layout":"multi","source":"github","category":"skills","frontmatter":{"name":"Apache Tika Document Parser","description":"Extracts structured text, metadata, and embedded objects from PDFs, Office documents, and 1000+ file formats using the Apache Tika REST API. Outputs clean Markdown or JSON with XMP metadata preservation."},"skills_sh_url":"https://skills.sh/agentskillexchange/skills/apache-tika-document-parser"},"updatedAt":"2026-05-18T19:09:23.338Z"}}