at master 2.7 kB view raw
1{ 2 lib, 3 buildPythonPackage, 4 fetchFromGitHub, 5 setuptools, 6 setuptools-scm, 7 attrdict, 8 beautifulsoup4, 9 cython, 10 fire, 11 fonttools, 12 lmdb, 13 lxml, 14 numpy, 15 opencv-python, 16 openpyxl, 17 pdf2docx, 18 pillow, 19 pyclipper, 20 pymupdf, 21 python-docx, 22 rapidfuzz, 23 scikit-image, 24 shapely, 25 tqdm, 26 paddlepaddle, 27 lanms-neo, 28 polygon3, 29 paddlex, 30 pyyaml, 31}: 32 33buildPythonPackage rec { 34 pname = "paddleocr"; 35 version = "3.2.0"; 36 pyproject = true; 37 38 src = fetchFromGitHub { 39 owner = "PaddlePaddle"; 40 repo = "PaddleOCR"; 41 tag = "v${version}"; 42 hash = "sha256-lrFwrbDzOYFzZEz+P0roTtQMxeWBCDZuEVviyUzM3M4="; 43 }; 44 45 patches = [ 46 # The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste` 47 # classes. These classes depend on the `imgaug` package which is 48 # unmaintained and has been removed from nixpkgs. 49 # 50 # The image OCR feature of PaddleOCR doesn't use these classes though, so 51 # they work even after stripping the the `IaaAugment` and `CopyPaste` 52 # exports. It probably breaks some of the OCR model creation tooling that 53 # PaddleOCR provides, however. 54 ./remove-import-imaug.patch 55 ]; 56 57 postPatch = '' 58 substituteInPlace pyproject.toml \ 59 --replace-fail "==72.1.0" "" 60 ''; 61 62 build-system = [ 63 setuptools 64 setuptools-scm 65 ]; 66 67 # trying to relax only pymupdf makes the whole build fail 68 pythonRelaxDeps = true; 69 pythonRemoveDeps = [ 70 "imgaug" 71 "visualdl" 72 "opencv-contrib-python" 73 ]; 74 75 dependencies = [ 76 attrdict 77 beautifulsoup4 78 cython 79 fire 80 fonttools 81 lmdb 82 lxml 83 numpy 84 opencv-python 85 openpyxl 86 pdf2docx 87 pillow 88 pyclipper 89 pymupdf 90 python-docx 91 rapidfuzz 92 scikit-image 93 shapely 94 tqdm 95 paddlepaddle 96 lanms-neo 97 polygon3 98 paddlex 99 pyyaml 100 ]; 101 102 # TODO: The tests depend, among possibly other things, on `cudatoolkit`. 103 # But Cudatoolkit fails to install. 104 # preCheck = "export HOME=$TMPDIR"; 105 # nativeCheckInputs = with pkgs; [ which cudatoolkit ]; 106 doCheck = false; 107 108 meta = { 109 homepage = "https://github.com/PaddlePaddle/PaddleOCR"; 110 license = lib.licenses.asl20; 111 description = "Multilingual OCR toolkits based on PaddlePaddle"; 112 longDescription = '' 113 PaddleOCR aims to create multilingual, awesome, leading, and practical OCR 114 tools that help users train better models and apply them into practice. 115 ''; 116 changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/${src.tag}"; 117 maintainers = with lib.maintainers; [ happysalada ]; 118 platforms = [ 119 "x86_64-linux" 120 "x86_64-darwin" 121 "aarch64-darwin" 122 ]; 123 }; 124}