1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 setuptools, 6 pymupdf, 7}: 8 9buildPythonPackage rec { 10 pname = "pymupdf4llm"; 11 version = "0.0.27"; 12 pyproject = true; 13 14 src = fetchFromGitHub { 15 owner = "pymupdf"; 16 repo = "RAG"; 17 tag = "v${version}"; 18 hash = "sha256-rezdDsjNCDetvrX3uvykYuL/y40MZnr0fFMvQY3JRr0="; 19 }; 20 21 sourceRoot = "${src.name}/pymupdf4llm"; 22 23 build-system = [ setuptools ]; 24 25 dependencies = [ pymupdf ]; 26 27 checkPhase = '' 28 runHook preCheck 29 30 python3 - <<'EOF' 31 import fitz 32 import pymupdf4llm 33 34 doc = fitz.open() 35 page = doc.new_page() 36 page.insert_text((72, 72), "Hello, Nix!") 37 doc.save("input.pdf") 38 39 md = pymupdf4llm.to_markdown("input.pdf") 40 assert isinstance(md, str), "Returned value is not a string" 41 assert "Hello, Nix!" in md, "Returned value does not contain the expected text" 42 EOF 43 44 runHook postCheck 45 ''; 46 47 pythonImportsCheck = [ "pymupdf4llm" ]; 48 49 meta = { 50 description = "PyMuPDF Utilities for LLM/RAG - converts PDF pages to Markdown format for Retrieval-Augmented Generation"; 51 homepage = "https://github.com/pymupdf/RAG"; 52 changelog = "https://github.com/pymupdf/RAG/blob/${src.tag}/CHANGES.md"; 53 license = lib.licenses.agpl3Only; 54 maintainers = with lib.maintainers; [ ryota2357 ]; 55 }; 56}