at master 1.6 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 hatchling, 6 beautifulsoup4, 7 defusedxml, 8 ffmpeg-headless, 9 magika, 10 mammoth, 11 markdownify, 12 numpy, 13 openai, 14 openpyxl, 15 pandas, 16 pathvalidate, 17 pdfminer-six, 18 puremagic, 19 pydub, 20 python-pptx, 21 requests, 22 speechrecognition, 23 youtube-transcript-api, 24 olefile, 25 xlrd, 26 lxml, 27 pytestCheckHook, 28 gitUpdater, 29}: 30 31buildPythonPackage rec { 32 pname = "markitdown"; 33 version = "0.1.3"; 34 pyproject = true; 35 36 src = fetchFromGitHub { 37 owner = "microsoft"; 38 repo = "markitdown"; 39 tag = "v${version}"; 40 hash = "sha256-bHnJsv4ln1W0lVbWwLmCzQ15KOGJZ9gF2yx4TDuBqBI="; 41 }; 42 43 sourceRoot = "${src.name}/packages/markitdown"; 44 45 build-system = [ hatchling ]; 46 47 dependencies = [ 48 beautifulsoup4 49 defusedxml 50 ffmpeg-headless 51 lxml 52 magika 53 mammoth 54 markdownify 55 numpy 56 olefile 57 openai 58 openpyxl 59 pandas 60 pathvalidate 61 pdfminer-six 62 puremagic 63 pydub 64 python-pptx 65 requests 66 speechrecognition 67 xlrd 68 youtube-transcript-api 69 ]; 70 71 pythonImportsCheck = [ "markitdown" ]; 72 73 nativeCheckInputs = [ pytestCheckHook ]; 74 75 disabledTests = [ 76 # Require network access 77 "test_markitdown_remote" 78 "test_module_vectors" 79 "test_cli_vectors" 80 "test_module_misc" 81 ]; 82 83 passthru.updateScripts = gitUpdater { }; 84 85 meta = { 86 description = "Python tool for converting files and office documents to Markdown"; 87 homepage = "https://github.com/microsoft/markitdown"; 88 license = lib.licenses.mit; 89 maintainers = with lib.maintainers; [ ]; 90 }; 91}