1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5
6 fetchzip,
7 replaceVars,
8
9 setuptools,
10 pyclipper,
11 opencv-python,
12 numpy,
13 six,
14 shapely,
15 pyyaml,
16 pillow,
17 onnxruntime,
18 tqdm,
19
20 pytestCheckHook,
21 requests,
22}:
23let
24 version = "1.4.4";
25
26 src = fetchFromGitHub {
27 owner = "RapidAI";
28 repo = "RapidOCR";
29 tag = "v${version}";
30 hash = "sha256-x0VELDKOffxbV3v0aDFJFuDC4YfsGM548XWgINmRc3M=";
31 };
32
33 models =
34 fetchzip {
35 url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
36 hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
37 stripRoot = false;
38 }
39 + "/required_for_whl_v1.3.0/resources/models";
40in
41buildPythonPackage {
42 pname = "rapidocr-onnxruntime";
43 inherit version src;
44 pyproject = true;
45
46 sourceRoot = "${src.name}/python";
47
48 # HACK:
49 # Upstream uses a very unconventional structure to organize the packages, and we have to coax the
50 # existing infrastructure to work with it.
51 # See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
52 # for the "intended" way of building this package.
53
54 # The setup.py supplied by upstream tries to determine the current version by
55 # fetching the latest version of the package from PyPI, and then bumping the version number.
56 # This is not allowed in the Nix build environment as we do not have internet access,
57 # hence we patch that out and get the version from the build environment directly.
58 patches = [
59 (replaceVars ./setup-py-override-version-checking.patch {
60 inherit version;
61 })
62 ];
63
64 postPatch = ''
65 mv setup_onnxruntime.py setup.py
66
67 ln -s ${models}/* rapidocr_onnxruntime/models
68
69 echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
70 '';
71
72 # Upstream expects the source files to be under rapidocr_onnxruntime/rapidocr_onnxruntime
73 # instead of rapidocr_onnxruntime for the wheel to build correctly.
74 preBuild = ''
75 mkdir rapidocr_onnxruntime_t
76 mv rapidocr_onnxruntime rapidocr_onnxruntime_t
77 mv rapidocr_onnxruntime_t rapidocr_onnxruntime
78 '';
79
80 # Revert the above hack
81 postBuild = ''
82 mv rapidocr_onnxruntime rapidocr_onnxruntime_t
83 mv rapidocr_onnxruntime_t/* .
84 '';
85
86 build-system = [ setuptools ];
87
88 dependencies = [
89 pyclipper
90 opencv-python
91 numpy
92 six
93 shapely
94 pyyaml
95 pillow
96 onnxruntime
97 tqdm
98 ];
99
100 pythonImportsCheck = [ "rapidocr_onnxruntime" ];
101
102 nativeCheckInputs = [
103 pytestCheckHook
104 requests
105 ];
106
107 # These are tests for different backends.
108 disabledTestPaths = [
109 "tests/test_vino.py"
110 "tests/test_paddle.py"
111 ];
112
113 disabledTests = [
114 # Needs Internet access
115 "test_long_img"
116 ];
117
118 # rapidocr-onnxruntime has been renamed to rapidocr by upstream since 2.0.0. However, some packages like open-webui still requires rapidocr-onnxruntime 1.4.4. Therefore we set no auto update here.
119 # nixpkgs-update: no auto update
120 passthru.skipBulkUpdate = true;
121
122 meta = {
123 # This seems to be related to https://github.com/microsoft/onnxruntime/issues/10038
124 # Also some related issue: https://github.com/NixOS/nixpkgs/pull/319053#issuecomment-2167713362
125 badPlatforms = [ "aarch64-linux" ];
126 changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/${src.tag}";
127 description = "Cross platform OCR Library based on OnnxRuntime";
128 homepage = "https://github.com/RapidAI/RapidOCR";
129 license = lib.licenses.asl20;
130 maintainers = with lib.maintainers; [ wrvsrx ];
131 mainProgram = "rapidocr_onnxruntime";
132 };
133}