python3Packages.trafilatura: skip failing tests, cleanup

https://github.com/adbar/trafilatura/issues/805

Changed files
+23 -9
pkgs
development
python-modules
trafilatura
+23 -9
pkgs/development/python-modules/trafilatura/default.nix
···
{
lib,
buildPythonPackage,
+
fetchFromGitHub,
+
+
# build-system
+
setuptools,
+
+
# dependencies
certifi,
charset-normalizer,
courlan,
-
fetchPypi,
htmldate,
justext,
lxml,
-
pytestCheckHook,
-
pythonOlder,
-
setuptools,
urllib3,
+
+
# tests
+
pytestCheckHook,
}:
buildPythonPackage rec {
···
version = "2.0.0";
pyproject = true;
-
disabled = pythonOlder "3.9";
-
-
src = fetchPypi {
-
inherit pname version;
-
hash = "sha256-zrcJSm7Ml+cv6nPH26NnFMXFtXe2Rw5FINyok3BtYkc=";
+
src = fetchFromGitHub {
+
owner = "adbar";
+
repo = "trafilatura";
+
tag = "v${version}";
+
hash = "sha256-Cf1W3JEGSMkVmRZVTXYsXzZK/Nt/aDG890Sf0/0OZAA=";
};
postPatch = ''
···
nativeCheckInputs = [ pytestCheckHook ];
disabledTests = [
+
# TypeError: argument of type 'NoneType' is not iterable
+
# https://github.com/adbar/trafilatura/issues/805
+
"test_external"
+
"test_extract"
+
+
# AttributeError: 'NoneType' object has no attribute 'find'
+
# https://github.com/adbar/trafilatura/issues/805
+
"test_table_processing"
+
# Disable tests that require an internet connection
"test_cli_pipeline"
"test_crawl_page"