1commit cfc05af26b571e9ca09e9c709c0fb8934e9e46dd
2Author: Guillaume Girol <symphorien+git@xlumurb.eu>
3Date: Sat Aug 20 17:48:01 2022 +0200
4
5 Fix finding tesseract
6
7diff --git a/src/pyocr/libtesseract/tesseract_raw.py b/src/pyocr/libtesseract/tesseract_raw.py
8index 1edec8c..434a336 100644
9--- a/src/pyocr/libtesseract/tesseract_raw.py
10+++ b/src/pyocr/libtesseract/tesseract_raw.py
11@@ -2,7 +2,6 @@ import ctypes
12 import locale
13 import logging
14 import os
15-import sys
16
17 from ..error import TesseractError
18
19@@ -10,51 +9,16 @@ from ..error import TesseractError
20 logger = logging.getLogger(__name__)
21
22 TESSDATA_PREFIX = os.getenv('TESSDATA_PREFIX', None)
23-libnames = []
24+if TESSDATA_PREFIX is None:
25+ TESSDATA_PREFIX = '@tesseract@/share/tessdata'
26+ os.environ['TESSDATA_PREFIX'] = TESSDATA_PREFIX
27+
28+
29 # 70 is the minimum credible dpi for tesseract and force it to compute an
30 # estimate of the image dpi
31 DPI_DEFAULT = 70
32
33-
34-if getattr(sys, 'frozen', False) and hasattr(sys, '_MEIPASS'):
35- # Pyinstaller integration
36- libnames += [os.path.join(sys._MEIPASS, "libtesseract-4.dll")]
37- libnames += [os.path.join(sys._MEIPASS, "libtesseract-3.dll")]
38- tessdata = os.path.join(sys._MEIPASS, "data")
39- if not os.path.exists(os.path.join(tessdata, "tessdata")):
40- logger.warning(
41- "Running from container, but no tessdata ({}) found !".format(
42- tessdata
43- )
44- )
45- else:
46- TESSDATA_PREFIX = os.path.join(tessdata, "tessdata")
47-
48-
49-if sys.platform[:3] == "win": # pragma: no cover
50- libnames += [
51- # Jflesch> Don't they have the equivalent of LD_LIBRARY_PATH on
52- # Windows ?
53- "../vs2010/DLL_Release/libtesseract302.dll",
54- # prefer the most recent first
55- "libtesseract305.dll",
56- "libtesseract304.dll",
57- "libtesseract303.dll",
58- "libtesseract302.dll",
59- "libtesseract400.dll", # Tesseract 4 is still in alpha stage
60- "libtesseract.dll",
61- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-4.dll",
62- "C:\\Program Files (x86)\\Tesseract-OCR\\libtesseract-3.dll",
63- ]
64-else:
65- libnames += [
66- "libtesseract.so.5",
67- "libtesseract.so.4",
68- "libtesseract.so.3",
69- "libtesseract.5.dylib",
70- "libtesseract.4.dylib",
71- ]
72-
73+libnames = [ "@tesseractLibraryLocation@" ]
74
75 g_libtesseract = None
76
77@@ -367,12 +331,12 @@ def init(lang=None):
78 try:
79 if lang:
80 lang = lang.encode("utf-8")
81- prefix = None
82- if TESSDATA_PREFIX: # pragma: no cover
83- prefix = TESSDATA_PREFIX.encode("utf-8")
84+
85+ prefix = TESSDATA_PREFIX
86+
87 g_libtesseract.TessBaseAPIInit3(
88 ctypes.c_void_p(handle),
89- ctypes.c_char_p(prefix),
90+ ctypes.c_char_p(prefix.encode('utf-8')),
91 ctypes.c_char_p(lang)
92 )
93 g_libtesseract.TessBaseAPISetVariable(
94diff --git a/src/pyocr/tesseract.py b/src/pyocr/tesseract.py
95index 0fe0d20..c1fdd27 100644
96--- a/src/pyocr/tesseract.py
97+++ b/src/pyocr/tesseract.py
98@@ -28,8 +28,7 @@ from .builders import DigitBuilder # backward compatibility
99 from .error import TesseractError # backward compatibility
100 from .util import digits_only
101
102-# CHANGE THIS IF TESSERACT IS NOT IN YOUR PATH, OR IS NAMED DIFFERENTLY
103-TESSERACT_CMD = 'tesseract.exe' if os.name == 'nt' else 'tesseract'
104+TESSERACT_CMD = '@tesseract@/bin/tesseract'
105
106 TESSDATA_EXTENSION = ".traineddata"
107
108diff --git a/tests/test_libtesseract.py b/tests/test_libtesseract.py
109index cc31a50..890c02c 100644
110--- a/tests/test_libtesseract.py
111+++ b/tests/test_libtesseract.py
112@@ -167,7 +167,8 @@ class TestLibTesseractRaw(BaseTest):
113 args = libtess.TessBaseAPIInit3.call_args[0]
114 self.assertEqual(len(args), 3)
115 self.assertEqual(args[0].value, self.handle)
116- self.assertEqual(args[1].value, None)
117+ # we hardcode tesseract data, so we don't get None
118+ #self.assertEqual(args[1].value, None)
119 self.assertEqual(args[2].value, lang.encode() if lang else None)
120
121 self.assertEqual(
122@@ -203,7 +204,8 @@ class TestLibTesseractRaw(BaseTest):
123 args = libtess.TessBaseAPIInit3.call_args[0]
124 self.assertEqual(len(args), 3)
125 self.assertEqual(args[0].value, self.handle)
126- self.assertEqual(args[1].value, None)
127+ # we hardcode tesseract data, so we don't get None
128+ #self.assertEqual(args[1].value, None)
129 self.assertEqual(args[2].value, lang.encode() if lang else None)
130
131 self.assertEqual(
132diff --git a/tests/test_tesseract.py b/tests/test_tesseract.py
133index 823818f..2ee5fb4 100644
134--- a/tests/test_tesseract.py
135+++ b/tests/test_tesseract.py
136@@ -37,7 +37,7 @@ class TestTesseract(BaseTest):
137 def test_available(self, which):
138 which.return_value = True
139 self.assertTrue(tesseract.is_available())
140- which.assert_called_once_with("tesseract")
141+ which.assert_called_once_with("@tesseract@/bin/tesseract")
142
143 @patch("subprocess.Popen")
144 def test_version_error(self, popen):
145@@ -163,7 +163,7 @@ class TestTesseract(BaseTest):
146 for lang in ("eng", "fra", "jpn", "osd"):
147 self.assertIn(lang, langs)
148 popen.assert_called_once_with(
149- ["tesseract", "--list-langs"],
150+ ["@tesseract@/bin/tesseract", "--list-langs"],
151 startupinfo=None, creationflags=0,
152 stdout=subprocess.PIPE, stderr=subprocess.STDOUT
153 )
154@@ -178,7 +178,7 @@ class TestTesseract(BaseTest):
155 self.assertEqual(te.exception.status, 1)
156 self.assertEqual("unable to get languages", te.exception.message)
157 popen.assert_called_once_with(
158- ["tesseract", "--list-langs"],
159+ ["@tesseract@/bin/tesseract", "--list-langs"],
160 startupinfo=None, creationflags=0,
161 stdout=subprocess.PIPE, stderr=subprocess.STDOUT
162 )
163@@ -255,7 +255,7 @@ class TestTesseract(BaseTest):
164 self.assertEqual(status, 0)
165 self.assertEqual(error, message)
166 popen.assert_called_once_with(
167- ["tesseract", "input.bmp", "output"],
168+ ["@tesseract@/bin/tesseract", "input.bmp", "output"],
169 cwd=tmpdir,
170 startupinfo=None,
171 creationflags=0,
172@@ -278,7 +278,7 @@ class TestTesseract(BaseTest):
173 self.assertEqual(status, 0)
174 self.assertEqual(error, message)
175 popen.assert_called_with(
176- ["tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
177+ ["@tesseract@/bin/tesseract", "input2.bmp", "output2", "-l", "fra", "--psm", "3"],
178 cwd=tmpdir,
179 startupinfo=None,
180 creationflags=0,
181@@ -309,7 +309,7 @@ class TestTesseract(BaseTest):
182 self.assertEqual(result["angle"], 90)
183 self.assertEqual(result["confidence"], 9.30)
184 popen.assert_called_once_with(
185- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
186+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
187 stdin=subprocess.PIPE,
188 shell=False,
189 startupinfo=None,
190@@ -345,7 +345,7 @@ class TestTesseract(BaseTest):
191 self.assertEqual(result["angle"], 90)
192 self.assertEqual(result["confidence"], 9.30)
193 popen.assert_called_once_with(
194- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
195+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
196 stdin=subprocess.PIPE,
197 shell=False,
198 startupinfo=None,
199@@ -378,7 +378,7 @@ class TestTesseract(BaseTest):
200 self.assertEqual(result["angle"], 90)
201 self.assertEqual(result["confidence"], 9.30)
202 popen.assert_called_once_with(
203- ["tesseract", "input.bmp", "stdout",
204+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout",
205 "--psm", "0", "-l", "osd"],
206 stdin=subprocess.PIPE,
207 shell=False,
208@@ -406,7 +406,7 @@ class TestTesseract(BaseTest):
209 with self.assertRaises(tesseract.TesseractError) as te:
210 tesseract.detect_orientation(self.image)
211 popen.assert_called_once_with(
212- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
213+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
214 stdin=subprocess.PIPE,
215 shell=False,
216 startupinfo=None,
217@@ -440,7 +440,7 @@ class TestTesseract(BaseTest):
218 with self.assertRaises(tesseract.TesseractError) as te:
219 tesseract.detect_orientation(self.image)
220 popen.assert_called_once_with(
221- ["tesseract", "input.bmp", "stdout", "--psm", "0"],
222+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "--psm", "0"],
223 stdin=subprocess.PIPE,
224 shell=False,
225 startupinfo=None,
226@@ -474,7 +474,7 @@ class TestTesseract(BaseTest):
227 self.assertEqual(result["angle"], 90)
228 self.assertEqual(result["confidence"], 9.30)
229 popen.assert_called_once_with(
230- ["tesseract", "input.bmp", "stdout", "-psm", "0"],
231+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
232 stdin=subprocess.PIPE,
233 shell=False,
234 startupinfo=None,
235@@ -507,7 +507,7 @@ class TestTesseract(BaseTest):
236 self.assertEqual(result["angle"], 90)
237 self.assertEqual(result["confidence"], 9.30)
238 popen.assert_called_once_with(
239- ["tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
240+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0", "-l", "fra"],
241 stdin=subprocess.PIPE,
242 shell=False,
243 startupinfo=None,
244@@ -534,7 +534,7 @@ class TestTesseract(BaseTest):
245 with self.assertRaises(tesseract.TesseractError) as te:
246 tesseract.detect_orientation(self.image)
247 popen.assert_called_once_with(
248- ["tesseract", "input.bmp", "stdout", "-psm", "0"],
249+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
250 stdin=subprocess.PIPE,
251 shell=False,
252 startupinfo=None,
253@@ -568,7 +568,7 @@ class TestTesseract(BaseTest):
254 with self.assertRaises(tesseract.TesseractError) as te:
255 tesseract.detect_orientation(self.image)
256 popen.assert_called_once_with(
257- ["tesseract", "input.bmp", "stdout", "-psm", "0"],
258+ ["@tesseract@/bin/tesseract", "input.bmp", "stdout", "-psm", "0"],
259 stdin=subprocess.PIPE,
260 shell=False,
261 startupinfo=None,