1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 fetchzip,
7 replaceVars,
8
9 setuptools,
10 colorlog,
11 pyclipper,
12 opencv-python,
13 omegaconf,
14 numpy,
15 six,
16 shapely,
17 pyyaml,
18 pillow,
19 onnxruntime,
20 tqdm,
21
22 pytestCheckHook,
23 requests,
24}:
25let
26 version = "3.4.1";
27
28 src = fetchFromGitHub {
29 owner = "RapidAI";
30 repo = "RapidOCR";
31 tag = "v${version}";
32 hash = "sha256-Q8QtjI+5QDv6zQ96aXLyEepHfMh75DR+ZWj/ygVx3o0=";
33 };
34
35 models =
36 fetchzip {
37 url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
38 hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
39 stripRoot = false;
40 }
41 + "/required_for_whl_v1.3.0/resources/models";
42in
43buildPythonPackage {
44 pname = "rapidocr";
45 inherit version src;
46 pyproject = true;
47
48 sourceRoot = "${src.name}/python";
49
50 # HACK:
51 # Upstream uses a very unconventional structure to organize the packages, and we have to coax the
52 # existing infrastructure to work with it.
53 # See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
54 # for the "intended" way of building this package.
55
56 # The setup.py supplied by upstream tries to determine the current version by
57 # fetching the latest version of the package from PyPI, and then bumping the version number.
58 # This is not allowed in the Nix build environment as we do not have internet access,
59 # hence we patch that out and get the version from the build environment directly.
60 patches = [
61 (replaceVars ./setup-py-override-version-checking.patch {
62 inherit version;
63 })
64 ];
65
66 postPatch = ''
67 mkdir -p rapidocr/models
68
69 ln -s ${models}/* rapidocr/models
70
71 echo "from .rapidocr.main import RapidOCR, VisRes" > __init__.py
72 '';
73
74 # Upstream expects the source files to be under rapidocr/rapidocr
75 # instead of rapidocr for the wheel to build correctly.
76 preBuild = ''
77 mkdir rapidocr_t
78 mv rapidocr rapidocr_t
79 mv rapidocr_t rapidocr
80 '';
81
82 # Revert the above hack
83 postBuild = ''
84 mv rapidocr rapidocr_t
85 mv rapidocr_t/* .
86 '';
87
88 build-system = [ setuptools ];
89
90 dependencies = [
91 colorlog
92 numpy
93 omegaconf
94 onnxruntime
95 opencv-python
96 pillow
97 pyclipper
98 pyyaml
99 requests
100 shapely
101 six
102 tqdm
103 ];
104
105 pythonImportsCheck = [ "rapidocr" ];
106
107 # As of version 2.1.0, 61 out of 70 tests require internet access.
108 # It's just not plausible to manually pick out ones that actually work
109 # in a hermetic build environment anymore :(
110 doCheck = false;
111
112 meta = {
113 # This seems to be related to https://github.com/microsoft/onnxruntime/issues/10038
114 # Also some related issue: https://github.com/NixOS/nixpkgs/pull/319053#issuecomment-2167713362
115 badPlatforms = [ "aarch64-linux" ];
116 changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/${src.tag}";
117 description = "Cross platform OCR Library based on OnnxRuntime";
118 homepage = "https://github.com/RapidAI/RapidOCR";
119 license = with lib.licenses; [ asl20 ];
120 maintainers = with lib.maintainers; [ pluiedev ];
121 mainProgram = "rapidocr";
122 };
123}