1{
2 lib,
3 buildPythonPackage,
4 fetchFromGitHub,
5 html-text,
6 jstyleson,
7 lxml,
8 mf2py,
9 mock,
10 pyrdfa3,
11 pytestCheckHook,
12 pythonOlder,
13 rdflib,
14 setuptools,
15 six,
16 w3lib,
17}:
18
19buildPythonPackage rec {
20 pname = "extruct";
21 version = "0.18.0";
22 pyproject = true;
23
24 disabled = pythonOlder "3.8";
25
26 src = fetchFromGitHub {
27 owner = "scrapinghub";
28 repo = "extruct";
29 tag = "v${version}";
30 hash = "sha256-hUSlIlWxrsxGLCE8/DAGSqxx9+7TEkynmXrVnXGjDQ8=";
31 };
32
33 nativeBuildInputs = [ setuptools ];
34
35 propagatedBuildInputs = [
36 html-text
37 jstyleson
38 lxml
39 mf2py
40 pyrdfa3
41 rdflib
42 six
43 w3lib
44 ];
45
46 nativeCheckInputs = [
47 mock
48 pytestCheckHook
49 ];
50
51 pythonImportsCheck = [ "extruct" ];
52
53 disabledTests = [
54 # AssertionError: Lists differ
55 "test_microformat"
56 "test_umicroformat"
57 ];
58
59 meta = with lib; {
60 description = "Extract embedded metadata from HTML markup";
61 mainProgram = "extruct";
62 homepage = "https://github.com/scrapinghub/extruct";
63 changelog = "https://github.com/scrapinghub/extruct/blob/v${version}/HISTORY.rst";
64 license = licenses.bsd3;
65 maintainers = with maintainers; [ ambroisie ];
66 };
67}