{"id":"017c27ac-a298-433b-835c-f734225bcbb8","shortId":"a3y2DE","kind":"skill","title":"Apache Tika Document Parser Agent","tagline":"Extracts text and metadata from 1000+ file formats using Apache Tika server REST API. Handles PDF OCR via Tesseract integration, Office document parsing, and email archive extraction with MIME detection.","description":"# Apache Tika Document Parser Agent\n\nExtracts text and metadata from 1000+ file formats using Apache Tika server REST API. Handles PDF OCR via Tesseract integration, Office document parsing, and email archive extraction with MIME detection.\n\n## Installation\n\nRequirements and caveats from upstream:\n- **N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests. If Docker is not installed, those tests are skipped.\n\nBasic usage or getting-started notes:\n- ===========\n- **Parse a file in Java:**\n- java\n\n- Source: https://github.com/apache/tika\n- Extracted from upstream docs: https://raw.githubusercontent.com/apache/tika/HEAD/README.md\n\n## Source\n\n- [Agent Skill Exchange](https://agentskillexchange.com/skills/apache-tika-document-parser-agent/)","tags":["apache","tika","document","parser","agent","skills","agentskillexchange","agent-skills","ai-agents","ai-tools","awesome-list","claude-code"],"capabilities":["skill","source-agentskillexchange","skill-apache-tika-document-parser-agent","topic-agent-skills","topic-ai-agents","topic-ai-tools","topic-awesome-list","topic-claude-code","topic-codex","topic-cursor","topic-llm","topic-mcp","topic-npx-skills","topic-openclaw","topic-skills-catalog"],"categories":["skills"],"synonyms":[],"warnings":[],"endpointUrl":"https://skills.sh/agentskillexchange/skills/apache-tika-document-parser-agent","protocol":"skill","transport":"skills-sh","auth":{"type":"none","details":{"cli":"npx skills add agentskillexchange/skills","source_repo":"https://github.com/agentskillexchange/skills","install_from":"skills.sh"}},"qualityScore":"0.454","qualityRationale":"deterministic score 0.45 from registry signals: · indexed on github topic:agent-skills · 8 github stars · SKILL.md body (792 chars)","verified":false,"liveness":"unknown","lastLivenessCheck":null,"agentReviews":{"count":0,"score_avg":null,"cost_usd_avg":null,"success_rate":null,"latency_p50_ms":null,"narrative_summary":null,"summary_updated_at":null},"enrichmentModel":"deterministic:skill-github:v1","enrichmentVersion":1,"enrichedAt":"2026-05-18T19:09:23.240Z","embedding":null,"createdAt":"2026-05-18T13:15:06.081Z","updatedAt":"2026-05-18T19:09:23.240Z","lastSeenAt":"2026-05-18T19:09:23.240Z","tsv":"'/apache/tika':116 '/apache/tika/head/readme.md':123 '/products/personal)':81 '/skills/apache-tika-document-parser-agent/)':130 '1000':11,46 'agent':5,40,125 'agentskillexchange.com':129 'agentskillexchange.com/skills/apache-tika-document-parser-agent/)':128 'apach':1,15,36,50 'api':19,54 'archiv':31,66 'basic':100 'caveat':74 'detect':35,70 'doc':120 'docker':78,92 'document':3,27,38,62 'email':30,65 'exchang':127 'extract':6,32,41,67,117 'file':12,47,109 'format':13,48 'get':104 'getting-start':103 'github.com':115 'github.com/apache/tika':114 'handl':20,55 'instal':71,95 'integr':25,60,89 'java':111,112 'metadata':9,44 'mime':34,69 'n.b':77 'note':106 'ocr':22,57 'offic':26,61 'pars':28,63,107 'parser':4,39 'pdf':21,56 'raw.githubusercontent.com':122 'raw.githubusercontent.com/apache/tika/head/readme.md':121 'requir':72 'rest':18,53 'server':17,52 'skill':126 'skill-apache-tika-document-parser-agent' 'skip':99 'sourc':113,124 'source-agentskillexchange' 'start':105 'tesseract':24,59 'test':85,90,97 'text':7,42 'tika':2,16,37,51,88 'tika-integration-test':87 'topic-agent-skills' 'topic-ai-agents' 'topic-ai-tools' 'topic-awesome-list' 'topic-claude-code' 'topic-codex' 'topic-cursor' 'topic-llm' 'topic-mcp' 'topic-npx-skills' 'topic-openclaw' 'topic-skills-catalog' 'upstream':76,119 'usag':101 'use':14,49,83 'via':23,58 'www.docker.com':80 'www.docker.com/products/personal)':79","prices":[{"id":"7f29ed3d-e4bd-4659-b34d-0b06482bf8ba","listingId":"017c27ac-a298-433b-835c-f734225bcbb8","amountUsd":"0","unit":"free","nativeCurrency":null,"nativeAmount":null,"chain":null,"payTo":null,"paymentMethod":"skill-free","isPrimary":true,"details":{"org":"agentskillexchange","category":"skills","install_from":"skills.sh"},"createdAt":"2026-05-18T13:15:06.081Z"}],"sources":[{"listingId":"017c27ac-a298-433b-835c-f734225bcbb8","source":"github","sourceId":"agentskillexchange/skills/apache-tika-document-parser-agent","sourceUrl":"https://github.com/agentskillexchange/skills/tree/main/skills/apache-tika-document-parser-agent","isPrimary":false,"firstSeenAt":"2026-05-18T13:15:06.081Z","lastSeenAt":"2026-05-18T19:09:23.240Z"}],"details":{"listingId":"017c27ac-a298-433b-835c-f734225bcbb8","quickStartSnippet":null,"exampleRequest":null,"exampleResponse":null,"schema":null,"openapiUrl":null,"agentsTxtUrl":null,"citations":[],"useCases":[],"bestFor":[],"notFor":[],"kindDetails":{"org":"agentskillexchange","slug":"apache-tika-document-parser-agent","github":{"repo":"agentskillexchange/skills","stars":8,"topics":["agent-skills","ai-agents","ai-tools","awesome-list","claude-code","codex","cursor","llm","mcp","npx-skills","openclaw","skills-catalog"],"license":"mit","html_url":"https://github.com/agentskillexchange/skills","pushed_at":"2026-05-18T19:02:17Z","description":"The open catalog of AI agent skills — 2,000+ security-scanned skills for Claude Code, Cursor, Codex, and more.","skill_md_sha":"6ab5c6b01b470fe284e95980147f1321ccb61a0b","skill_md_path":"skills/apache-tika-document-parser-agent/SKILL.md","default_branch":"main","skill_tree_url":"https://github.com/agentskillexchange/skills/tree/main/skills/apache-tika-document-parser-agent"},"layout":"multi","source":"github","category":"skills","frontmatter":{"name":"Apache Tika Document Parser Agent","description":"Extracts text and metadata from 1000+ file formats using Apache Tika server REST API. Handles PDF OCR via Tesseract integration, Office document parsing, and email archive extraction with MIME detection."},"skills_sh_url":"https://skills.sh/agentskillexchange/skills/apache-tika-document-parser-agent"},"updatedAt":"2026-05-18T19:09:23.240Z"}}