1# keep-sorted start
2- import: (data)/bots/_deny-pathological.yaml
3- import: (data)/bots/aggressive-brazilian-scrapers.yaml
4- import: (data)/clients/x-firefox-ai.yaml
5- import: (data)/common/keep-internet-working.yaml
6- import: (data)/common/rfc-violations.yaml
7- import: (data)/crawlers/_allow-good.yaml
8- import: (data)/meta/ai-block-aggressive.yaml
9# keep-sorted end
10- name: realistic-browser-catchall
11 expression:
12 all:
13 - '"User-Agent" in headers'
14 - '( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
15 - '"Accept" in headers'
16 - '"Sec-Fetch-Dest" in headers'
17 - '"Sec-Fetch-Mode" in headers'
18 - '"Sec-Fetch-Site" in headers'
19 - '"Accept-Encoding" in headers'
20 - '( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
21 - '"Accept-Language" in headers'
22 action: WEIGH
23 weight:
24 adjust: -10
25 # The Upgrade-Insecure-Requests header is typically sent by browsers, but not always
26- name: upgrade-insecure-requests
27 expression: '"Upgrade-Insecure-Requests" in headers'
28 action: WEIGH
29 weight:
30 adjust: -2
31# Chrome should behave like Chrome
32- name: chrome-is-proper
33 expression:
34 all:
35 - userAgent.contains("Chrome")
36 - '"Sec-Ch-Ua" in headers'
37 - 'headers["Sec-Ch-Ua"].contains("Chromium")'
38 - '"Sec-Ch-Ua-Mobile" in headers'
39 - '"Sec-Ch-Ua-Platform" in headers'
40 action: WEIGH
41 weight:
42 adjust: -5
43- name: should-have-accept
44 expression: '!("Accept" in headers)'
45 action: WEIGH
46 weight:
47 adjust: 5
48# Generic catchall rule
49- name: generic-browser
50 user_agent_regex: >-
51 Mozilla|Opera|Chrome|Chromium
52 action: WEIGH
53 weight:
54 adjust: 10