python3Packages.nltk: add data(Dir) passthru, run tests (#409680)

Yt 4a2c6cc5 aad53c52

Changed files
+98 -38
pkgs
by-name
un
unstructured-api
development
python-modules
aider-chat
nltk
type-infer
+4 -10
pkgs/by-name/un/unstructured-api/package.nix
···
python3,
makeWrapper,
nix-update-script,
-
symlinkJoin,
-
nltk-data,
}:
let
pythonEnv = python3.withPackages (
···
++ unstructured.optional-dependencies.all-docs
);
version = "0.0.82";
-
unstructured_api_nltk_data = symlinkJoin {
-
name = "unstructured_api_nltk_data";
-
-
paths = [
-
nltk-data.punkt
-
nltk-data.averaged-perceptron-tagger
-
];
-
};
+
unstructured_api_nltk_data = python3.pkgs.nltk.dataDir (d: [
+
d.punkt
+
d.averaged-perceptron-tagger
+
]);
in
stdenvNoCC.mkDerivation {
pname = "unstructured-api";
+4 -9
pkgs/development/python-modules/aider-chat/default.nix
···
gitMinimal,
portaudio,
playwright-driver,
-
symlinkJoin,
-
nltk-data,
pythonOlder,
pythonAtLeast,
setuptools-scm,
···
}:
let
-
aider-nltk-data = symlinkJoin {
-
name = "aider-nltk-data";
-
paths = [
-
nltk-data.punkt-tab
-
nltk-data.stopwords
-
];
-
};
+
aider-nltk-data = nltk.dataDir (d: [
+
d.punkt-tab
+
d.stopwords
+
]);
version = "0.83.1";
aider-chat = buildPythonPackage {
+15
pkgs/development/python-modules/nltk/data-dir.nix
···
+
{
+
lib,
+
pkgs,
+
python3Packages,
+
}:
+
lib.makeOverridable (
+
{ ... }@nltkDataPkgs:
+
f:
+
pkgs.symlinkJoin {
+
inherit (python3Packages.nltk) meta;
+
name = "nltk-data-dir";
+
+
paths = f nltkDataPkgs;
+
}
+
) python3Packages.nltk.data
+70 -9
pkgs/development/python-modules/nltk/default.nix
···
{
lib,
+
pkgs,
fetchPypi,
buildPythonPackage,
pythonOlder,
···
joblib,
regex,
tqdm,
+
+
# preInstallCheck
+
nltk,
+
+
# nativeCheckInputs
+
matplotlib,
+
numpy,
+
pyparsing,
+
pytestCheckHook,
+
pytest-mock,
}:
buildPythonPackage rec {
···
hash = "sha256-h9EnvT3kvYmk+BJl5fpZyxsZmydEAXU3D3QX0rx66Gg=";
};
-
propagatedBuildInputs = [
+
dependencies = [
click
joblib
regex
tqdm
];
-
# Tests require some data, the downloading of which is impure. It would
-
# probably make sense to make the data another derivation, but then feeding
-
# that into the tests (given that we need nltk itself to download the data,
-
# unless there's an easy way to download it without nltk's downloader) might
-
# be complicated. For now let's just disable the tests and hope for the
-
# best.
-
doCheck = false;
+
# Use new passthru function to pass dependencies required for testing
+
preInstallCheck = ''
+
export NLTK_DATA=${
+
nltk.dataDir (
+
d: with d; [
+
averaged-perceptron-tagger-eng
+
averaged-perceptron-tagger-rus
+
brown
+
cess-cat
+
cess-esp
+
conll2007
+
floresta
+
gutenberg
+
inaugural
+
indian
+
large-grammars
+
nombank-1-0
+
omw-1-4
+
pl196x
+
porter-test
+
ptb
+
punkt-tab
+
rte
+
sinica-treebank
+
stopwords
+
tagsets-json
+
treebank
+
twitter-samples
+
udhr
+
universal-tagset
+
wmt15-eval
+
wordnet
+
wordnet-ic
+
words
+
]
+
)
+
}
+
'';
+
+
nativeCheckInputs = [
+
pytestCheckHook
+
matplotlib
+
numpy
+
pyparsing
+
pytest-mock
+
+
pkgs.which
+
];
+
+
disabledTestPaths = [
+
"nltk/test/unit/test_downloader.py" # Touches network
+
];
pythonImportsCheck = [ "nltk" ];
+
passthru = {
+
data = pkgs.nltk-data;
+
dataDir = pkgs.callPackage ./data-dir.nix { };
+
};
+
meta = with lib; {
description = "Natural Language Processing ToolKit";
mainProgram = "nltk";
homepage = "http://nltk.org/";
license = licenses.asl20;
-
maintainers = [ ];
+
maintainers = [ lib.maintainers.bengsparks ];
};
}
+5 -10
pkgs/development/python-modules/type-infer/default.nix
···
python-dateutil,
scipy,
toml,
-
nltk-data,
-
symlinkJoin,
}:
let
-
testNltkData = symlinkJoin {
-
name = "nltk-test-data";
-
paths = [
-
nltk-data.punkt
-
nltk-data.punkt-tab
-
nltk-data.stopwords
-
];
-
};
+
testNltkData = nltk.dataDir (d: [
+
d.punkt
+
d.punkt-tab
+
d.stopwords
+
]);
version = "0.0.21";
tag = "v${version}";