···
18
-
enableGhostScript ? true,
24
-
enableGOCR ? false, # Disabled by default due to crashes
26
-
# Tesseract support is currently broken
27
-
# See: https://github.com/NixOS/nixpkgs/issues/368349
28
-
enableTesseract ? false,
30
-
enableLeptonica ? true,
36
-
# k2pdfopt is a pain to package. It requires modified versions of mupdf,
37
-
# leptonica, and tesseract. Instead of shipping patches for these upstream
38
-
# packages, k2pdfopt includes just the modified source files for these
39
-
# packages. The individual files from the {mupdf,leptonica,tesseract}_mod/
40
-
# directories are intended to replace the corresponding source files in the
41
-
# upstream packages, for a particular version of that upstream package.
43
-
# There are a few ways we could approach packaging these modified versions of
44
-
# mupdf, leptonica, and mupdf:
45
-
# 1) Override the upstream source with a new derivation that involves copying
46
-
# the modified source files from k2pdfopt and replacing the corresponding
47
-
# source files in the upstream packages. Since the files are intended for a
48
-
# particular version of the upstream package, this would not allow us to easily
49
-
# use updates to those packages in nixpkgs.
50
-
# 2) Manually produce patches which can be applied against the upstream
51
-
# project, and have the same effect as replacing those files. This is what I
52
-
# believe k2pdfopt should do this for us anyway. The benefit of creating and
53
-
# applying patches in this way is that minor updates (esp. security fixes) to
54
-
# upstream packages might still allow these patches to apply successfully.
55
-
# 3) Automatically produce these patches inside a nix derivation. This is the
56
-
# approach taken here, using the "mkPatch" provided below. This has the
57
-
# benefit of easier review and should hopefully be simpler to update in the
61
-
# Create a patch against src based on changes applied in patchCommands
68
-
runCommand "${name}-k2pdfopt.patch" { inherit src; } ''
72
-
new=$sourceRoot-modded
75
-
pushd $new >/dev/null
79
-
diff -Naur $orig $new > $out || true
84
-
k2pdfopt_src = fetchzip {
85
-
url = "http://www.willus.com/${pname}/src/${pname}_v${version}_src.zip";
86
-
hash = "sha256-orQNDXQkkcCtlA8wndss6SiJk4+ImiFCG8XRLEg963k=";
89
-
stdenv.mkDerivation rec {
90
-
inherit pname version;
94
-
./0001-Fix-CMakeLists.patch
97
-
version = "${version}+ds";
98
-
debianRevision = "3.1";
99
-
patch = "0007-k2pdfoptlib-k2ocr.c-conditionally-enable-tesseract-r.patch";
100
-
hash = "sha256-uJ9Gpyq64n/HKqo0hkQ2dnkSLCKNN4DedItPGzHfqR8=";
102
-
(fetchDebianPatch {
104
-
version = "${version}+ds";
105
-
debianRevision = "3.1";
106
-
patch = "0009-willuslib-CMakeLists.txt-conditionally-add-source-fi.patch";
107
-
hash = "sha256-cBSlcuhsw4YgAJtBJkKLW6u8tK5gFwWw7pZEJzVMJDE=";
112
-
substituteInPlace willuslib/bmpdjvu.c \
113
-
--replace "<djvu.h>" "<libdjvu/ddjvuapi.h>"
116
-
nativeBuildInputs = [
124
-
# We use specific versions of these sources below to match the versions
125
-
# used in the k2pdfopt source. Note that this does _not_ need to match the
126
-
# version used elsewhere in nixpkgs, since it is only used to create the
127
-
# patch that can then be applied to the version in nixpkgs.
128
-
mupdf_patch = mkPatch {
131
-
url = "https://mupdf.com/downloads/archive/mupdf-1.23.7-source.tar.gz";
132
-
hash = "sha256-NaVJM/QA6JZnoImkJfHGXNadRiOU/tnAZ558Uu+6pWg=";
135
-
cp ${k2pdfopt_src}/mupdf_mod/{filter-basic,font,stext-device,string}.c ./source/fitz/
136
-
cp ${k2pdfopt_src}/mupdf_mod/pdf-* ./source/pdf/
139
-
# mupdf_patch no longer applies cleanly against mupdf 1.25.0 or later, due to a conflicting
140
-
# hunk (mupdf_conflict) introduced in commit bd8d337939f36f55b96cb6984f5c7bbf2f488ce0 of mupdf.
141
-
# This merge conflict can be resolved as desired by reverting mupdf_conflict, applying mupdf_patch,
142
-
# and finally reapplying mupdf_conflict, with an increased fuzz factor (see mupdf_modded below).
143
-
# TODO: remove workaround with conflicting hunk when mupdf in k2pdfopt is updated to 1.25.0 or later
147
-
name = "mupdf-conflicting-hunk" + (lib.optionalString revert "-reverted") + ".patch";
148
-
url = "https://github.com/ArtifexSoftware/mupdf/commit/bd8d337939f36f55b96cb6984f5c7bbf2f488ce0.patch";
149
-
inherit hash revert;
150
-
includes = [ "source/fitz/stext-device.c" ];
152
-
filterdiff -#6 "$out" > "$tmpfile"
153
-
mv "$tmpfile" "$out"
156
-
mupdf_modded = mupdf.overrideAttrs (
162
-
# The fuzz factor is increased to automatically resolve the merge conflict.
167
-
# Reverting and reapplying the conflicting hunk is necessary, otherwise the result will be faulty.
168
-
patches = patches ++ [
169
-
# revert conflicting hunk
170
-
(mupdf_conflict "sha256-24tl9YBuZBYhb12yY3T0lKsA7NswfK0QcMYhb2IpepA=" true)
171
-
# apply modifications
173
-
# reapply conflicting hunk
174
-
(mupdf_conflict "sha256-bnBV7LyX1w/BXxBFF1bkA8x+/0I9Am33o8GiAeEKHYQ=" false)
176
-
# This function is missing in font.c, see font-win32.c
178
-
echo "void pdf_install_load_system_font_funcs(fz_context *ctx) {}" >> source/fitz/font.c
183
-
leptonica_patch = mkPatch {
184
-
name = "leptonica";
186
-
url = "http://www.leptonica.org/source/leptonica-1.83.0.tar.gz";
187
-
hash = "sha256-IGWR3VjPhO84CDba0TO1jJ0a+SSR9amCXDRqFiBEvP4=";
189
-
patchCommands = "cp -r ${k2pdfopt_src}/leptonica_mod/. ./src/";
191
-
leptonica_modded = leptonica.overrideAttrs (
197
-
patches = patches ++ [ leptonica_patch ];
201
-
tesseract_patch = mkPatch {
202
-
name = "tesseract";
203
-
src = fetchFromGitHub {
204
-
owner = "tesseract-ocr";
205
-
repo = "tesseract";
207
-
hash = "sha256-/aGzwm2+0y8fheOnRi/OJXZy3o0xjY1cCq+B3GTzfos=";
210
-
cp ${k2pdfopt_src}/tesseract_mod/tesseract.* include/tesseract/
211
-
cp ${k2pdfopt_src}/tesseract_mod/tesseract/baseapi.h include/tesseract/
212
-
cp ${k2pdfopt_src}/tesseract_mod/{baseapi,config_auto,tesscapi,tesseract}.* src/api/
213
-
cp ${k2pdfopt_src}/tesseract_mod/tesseract/baseapi.h src/api/
214
-
cp ${k2pdfopt_src}/tesseract_mod/{tesscapi,tessedit,tesseract}.* src/ccmain/
215
-
cp ${k2pdfopt_src}/tesseract_mod/tesseract/baseapi.h src/ccmain/
216
-
cp ${k2pdfopt_src}/tesseract_mod/dotproduct{avx,fma,sse}.* src/arch/
217
-
cp ${k2pdfopt_src}/tesseract_mod/{intsimdmatrixsse,simddetect}.* src/arch/
218
-
cp ${k2pdfopt_src}/tesseract_mod/{errcode,genericvector,mainblk,params,serialis,tessdatamanager,tess_version,tprintf,unicharset}.* src/ccutil/
219
-
cp ${k2pdfopt_src}/tesseract_mod/{input,lstmrecognizer}.* src/lstm/
220
-
cp ${k2pdfopt_src}/tesseract_mod/openclwrapper.* src/opencl/
223
-
tesseract_modded = tesseract5.override {
224
-
tesseractBase = tesseract5.tesseractBase.overrideAttrs (
231
-
pname = "tesseract-k2pdfopt";
232
-
version = tesseract_patch.src.rev;
233
-
src = tesseract_patch.src;
234
-
# opencl-headers were removed from tesseract in Version 5.4
235
-
buildInputs = buildInputs ++ [ opencl-headers ];
236
-
patches = patches ++ [ tesseract_patch ];
237
-
# Additional compilation fixes
239
-
echo libtesseract_la_SOURCES += src/api/tesscapi.cpp >> Makefile.am
240
-
substituteInPlace src/api/tesseract.h \
241
-
--replace "#include <leptonica.h>" "//#include <leptonica.h>"
242
-
substituteInPlace include/tesseract/tesseract.h \
243
-
--replace "#include <leptonica.h>" "//#include <leptonica.h>"
255
-
++ lib.optional enableGSL gsl
256
-
++ lib.optional enableGhostScript ghostscript
257
-
++ lib.optional enableMuPDF mupdf_modded
258
-
++ lib.optional enableDJVU djvulibre
259
-
++ lib.optional enableGOCR gocr
260
-
++ lib.optional enableTesseract tesseract_modded
261
-
++ lib.optional (enableLeptonica || enableTesseract) leptonica_modded;
263
-
dontUseCmakeBuildDir = true;
265
-
cmakeFlags = [ "-DCMAKE_C_FLAGS=-I${src}/include_mod" ];
267
-
NIX_LDFLAGS = "-lpthread";
270
-
install -D -m 755 k2pdfopt $out/bin/k2pdfopt
273
-
preFixup = lib.optionalString enableTesseract ''
274
-
wrapProgram $out/bin/k2pdfopt --set-default TESSDATA_PREFIX ${tesseract5}/share/tessdata
278
-
description = "Optimizes PDF/DJVU files for mobile e-readers (e.g. the Kindle) and smartphones";
279
-
homepage = "http://www.willus.com/k2pdfopt";
280
-
changelog = "https://www.willus.com/k2pdfopt/k2pdfopt_version.txt";
281
-
license = licenses.gpl3;
282
-
platforms = platforms.linux;
283
-
maintainers = with maintainers; [