at master 4.0 kB view raw
1{ 2 lib, 3 stdenv, 4 botocore, 5 buildPythonPackage, 6 cryptography, 7 cssselect, 8 defusedxml, 9 fetchFromGitHub, 10 glibcLocales, 11 hatchling, 12 installShellFiles, 13 itemadapter, 14 itemloaders, 15 jmespath, 16 lxml, 17 packaging, 18 parsel, 19 pexpect, 20 protego, 21 pydispatcher, 22 pyopenssl, 23 pytest-xdist, 24 pytestCheckHook, 25 pythonOlder, 26 queuelib, 27 service-identity, 28 setuptools, 29 sybil, 30 testfixtures, 31 tldextract, 32 twisted, 33 uvloop, 34 w3lib, 35 zope-interface, 36}: 37 38buildPythonPackage rec { 39 pname = "scrapy"; 40 version = "2.13.3"; 41 pyproject = true; 42 43 disabled = pythonOlder "3.8"; 44 45 src = fetchFromGitHub { 46 owner = "scrapy"; 47 repo = "scrapy"; 48 tag = version; 49 hash = "sha256-M+Lko0O0xsEPHLghvIGHxIv22XBXaZsujJ2+bjBzGZ4="; 50 }; 51 52 pythonRelaxDeps = [ 53 "defusedxml" 54 ]; 55 56 build-system = [ 57 hatchling 58 ]; 59 60 nativeBuildInputs = [ 61 installShellFiles 62 setuptools 63 ]; 64 65 propagatedBuildInputs = [ 66 cryptography 67 cssselect 68 defusedxml 69 itemadapter 70 itemloaders 71 lxml 72 packaging 73 parsel 74 protego 75 pydispatcher 76 pyopenssl 77 queuelib 78 service-identity 79 tldextract 80 twisted 81 w3lib 82 zope-interface 83 ]; 84 85 nativeCheckInputs = [ 86 botocore 87 glibcLocales 88 jmespath 89 pexpect 90 pytest-xdist 91 pytestCheckHook 92 sybil 93 testfixtures 94 uvloop 95 ]; 96 97 LC_ALL = "en_US.UTF-8"; 98 99 disabledTestPaths = [ 100 "tests/test_proxy_connect.py" 101 "tests/test_utils_display.py" 102 "tests/test_command_check.py" 103 104 # ConnectionRefusedError: [Errno 111] Connection refused 105 "tests/test_feedexport.py::TestFTPFeedStorage::test_append" 106 "tests/test_feedexport.py::TestFTPFeedStorage::test_append_active_mode" 107 "tests/test_feedexport.py::TestFTPFeedStorage::test_overwrite" 108 "tests/test_feedexport.py::TestFTPFeedStorage::test_overwrite_active_mode" 109 110 # this test is testing that the *first* deprecation warning is a specific one 111 # but for some reason we get other deprecation warnings appearing first 112 # but this isn't a material issue and the deprecation warning is still raised 113 "tests/test_spider_start.py::MainTestCase::test_start_deprecated_super" 114 115 # Don't test the documentation 116 "docs" 117 ]; 118 119 disabledTests = [ 120 # Requires network access 121 "AnonymousFTPTestCase" 122 "FTPFeedStorageTest" 123 "FeedExportTest" 124 "test_custom_asyncio_loop_enabled_true" 125 "test_custom_loop_asyncio" 126 "test_custom_loop_asyncio_deferred_signal" 127 "FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157 128 "test_persist" 129 "test_timeout_download_from_spider_nodata_rcvd" 130 "test_timeout_download_from_spider_server_hangs" 131 "test_unbounded_response" 132 "CookiesMiddlewareTest" 133 # Test fails on Hydra 134 "test_start_requests_laziness" 135 ] 136 ++ lib.optionals stdenv.hostPlatform.isDarwin [ 137 "test_xmliter_encoding" 138 "test_download" 139 "test_reactor_default_twisted_reactor_select" 140 "URIParamsSettingTest" 141 "URIParamsFeedOptionTest" 142 # flaky on darwin-aarch64 143 "test_fixed_delay" 144 "test_start_requests_laziness" 145 ]; 146 147 postInstall = '' 148 installManPage extras/scrapy.1 149 installShellCompletion --cmd scrapy \ 150 --zsh extras/scrapy_zsh_completion \ 151 --bash extras/scrapy_bash_completion 152 ''; 153 154 pythonImportsCheck = [ "scrapy" ]; 155 156 __darwinAllowLocalNetworking = true; 157 158 meta = with lib; { 159 description = "High-level web crawling and web scraping framework"; 160 mainProgram = "scrapy"; 161 longDescription = '' 162 Scrapy is a fast high-level web crawling and web scraping framework, used to crawl 163 websites and extract structured data from their pages. It can be used for a wide 164 range of purposes, from data mining to monitoring and automated testing. 165 ''; 166 homepage = "https://scrapy.org/"; 167 changelog = "https://github.com/scrapy/scrapy/raw/${src.tag}/docs/news.rst"; 168 license = licenses.bsd3; 169 maintainers = with maintainers; [ vinnymeller ]; 170 }; 171}