1{
2 lib,
3 buildPythonPackage,
4 deprecation,
5 fetchFromGitHub,
6 ghostscript_headless,
7 hatch-vcs,
8 hatchling,
9 hypothesis,
10 img2pdf,
11 jbig2enc,
12 packaging,
13 pdfminer-six,
14 pillow-heif,
15 pikepdf,
16 pillow,
17 pluggy,
18 pngquant,
19 pytest-xdist,
20 pytestCheckHook,
21 rich,
22 reportlab,
23 replaceVars,
24 tesseract,
25 unpaper,
26 installShellFiles,
27}:
28
29buildPythonPackage rec {
30 pname = "ocrmypdf";
31 version = "16.11.0";
32 pyproject = true;
33
34 src = fetchFromGitHub {
35 owner = "ocrmypdf";
36 repo = "OCRmyPDF";
37 rev = "v${version}";
38 # The content of .git_archival.txt is substituted upon tarball creation,
39 # which creates indeterminism if master no longer points to the tag.
40 # See https://github.com/ocrmypdf/OCRmyPDF/issues/841
41 postFetch = ''
42 rm "$out/.git_archival.txt"
43 '';
44 hash = "sha256-seylNBl29+QxN+3SbgRUdtTo1JwvW1sODpsz7Gwer3E=";
45 };
46
47 patches = [
48 ./use-pillow-heif.patch
49 (replaceVars ./paths.patch {
50 gs = lib.getExe ghostscript_headless;
51 jbig2 = lib.getExe jbig2enc;
52 pngquant = lib.getExe pngquant;
53 tesseract = lib.getExe tesseract;
54 unpaper = lib.getExe unpaper;
55 })
56 ];
57
58 build-system = [
59 hatch-vcs
60 hatchling
61 ];
62
63 nativeBuildInputs = [ installShellFiles ];
64
65 dependencies = [
66 deprecation
67 img2pdf
68 packaging
69 pdfminer-six
70 pillow-heif
71 pikepdf
72 pillow
73 pluggy
74 rich
75 ];
76
77 nativeCheckInputs = [
78 hypothesis
79 pytest-xdist
80 pytestCheckHook
81 reportlab
82 ];
83
84 pythonImportsCheck = [ "ocrmypdf" ];
85
86 disabledTests = [
87 # Broken by Python 3.13.4 change
88 # https://github.com/python/cpython/commit/8e923f36596370aedfdfb12251447bface41317a
89 # https://github.com/ocrmypdf/OCRmyPDF/blob/9f6e5a48ada5df7006a8c68b84e2aeae61943d8b/src/ocrmypdf/_exec/ghostscript.py#L66
90 "TestDuplicateFilter"
91 ];
92
93 postInstall = ''
94 installShellCompletion --cmd ocrmypdf \
95 --bash misc/completion/ocrmypdf.bash \
96 --fish misc/completion/ocrmypdf.fish
97 '';
98
99 meta = with lib; {
100 homepage = "https://github.com/ocrmypdf/OCRmyPDF";
101 description = "Adds an OCR text layer to scanned PDF files, allowing them to be searched";
102 license = with licenses; [
103 mpl20
104 mit
105 ];
106 maintainers = with maintainers; [
107 dotlambda
108 ];
109 changelog = "https://github.com/ocrmypdf/OCRmyPDF/blob/${src.rev}/docs/release_notes.md";
110 mainProgram = "ocrmypdf";
111 };
112}