1{
2 lib,
3 buildPythonPackage,
4 distro,
5 fetchFromGitHub,
6 jre,
7 numpy,
8 pandas,
9 pytestCheckHook,
10 pythonOlder,
11 setuptools,
12 setuptools-scm,
13 jpype1,
14}:
15
16buildPythonPackage rec {
17 pname = "tabula-py";
18 version = "2.10.0";
19 pyproject = true;
20
21 disabled = pythonOlder "3.9";
22
23 src = fetchFromGitHub {
24 owner = "chezou";
25 repo = "tabula-py";
26 tag = "v${version}";
27 hash = "sha256-PQbwm9ho3XtpmZ7N7ASkrV8gk9Jom+yQKlt2fUa948s=";
28 };
29
30 postPatch = ''
31 substituteInPlace tabula/backend.py \
32 --replace-fail '"java"' '"${lib.getExe jre}"'
33 '';
34
35 build-system = [
36 setuptools
37 setuptools-scm
38 ];
39
40 buildInputs = [ jre ];
41
42 dependencies = [
43 distro
44 numpy
45 pandas
46 jpype1
47 ];
48
49 nativeCheckInputs = [ pytestCheckHook ];
50
51 pythonImportsCheck = [ "tabula" ];
52
53 disabledTests = [
54 # Tests require network access
55 "test_convert_remote_file"
56 "test_read_pdf_with_remote_template"
57 "test_read_remote_pdf"
58 "test_read_remote_pdf_with_custom_user_agent"
59 # not sure what it checks
60 # probably related to jpype, but we use subprocess instead
61 # https://github.com/chezou/tabula-py/issues/352#issuecomment-1730791540
62 # Failed: DID NOT RAISE <class 'RuntimeError'>
63 "test_read_pdf_with_silent_true"
64 ];
65
66 meta = with lib; {
67 description = "Module to extract table from PDF into pandas DataFrame";
68 homepage = "https://github.com/chezou/tabula-py";
69 changelog = "https://github.com/chezou/tabula-py/releases/tag/v${version}";
70 license = licenses.mit;
71 maintainers = with maintainers; [ fab ];
72 };
73}