diff --git a/.gitignore b/.gitignore index 85d347b..941f743 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Output +gfwlist.txt release-info.md *.pac dist/ diff --git a/README.md b/README.md index 0dfebaa..12bf6e9 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,22 @@ PAC scripts for proxies +## 特点 + +基于 IP 地址白名单设计,位于白名单中的 IP 地址走直连,白名单以外的 IP 地址走代理(暂不支持 IPv6)。 + +另有 GFWList 版本从 [gfwlist/gfwlist](https://github.com/gfwlist/gfwlist) 获取域名及 URL 列表,优先匹配列表中的黑白名单,有效防止 DNS 污染。 + +每周六 12:00 (UTC) 会自动使用 GitHub Actions 运行[生成脚本](build.py)从数据源获取 IP 地址列表并生成 PAC 文件。 + ## 使用 获取方式:[本仓库的 Releases](https://github.com/iBug/pac/releases/latest) -每周六 12:00 (UTC) 会自动使用 GitHub Actions 运行[生成脚本](build.py)从数据源获取 IP 地址列表并生成 PAC 文件。 +- `pac-.txt` 包含从数据源 `` 获取的 IP 地址列表(白名单) +- `pad-gfwlist-.txt` 在 IP 白名单的基础上添加了 GFWList 的匹配 -本代码是为 Shadowsocks Windows 4.1.9 及以上版本设计的,若要在旧版本中使用请手动修改一处字符串,详情见 [shadowsocks-windows#2761](https://github.com/shadowsocks/shadowsocks-windows/issues/2761)。 +本代码是为 Shadowsocks Windows 4.1.9 及以上版本设计的,若要在旧版本中使用(或使用其他代理软件)请手动修改代码第 4 行 `__PROXY__` 为你的代理地址,详情见 [shadowsocks-windows#2761](https://github.com/shadowsocks/shadowsocks-windows/issues/2761)。 ## 贡献 diff --git a/build.py b/build.py index 5092b82..802d7a3 100755 --- a/build.py +++ b/build.py @@ -6,6 +6,8 @@ import ipaddress import requests from requests.exceptions import RequestException, HTTPError +import gfwlist + SOURCES = { 'ipdeny.com': 'http://www.ipdeny.com/ipblocks/data/aggregated/cn-aggregated.zone', @@ -13,6 +15,9 @@ SOURCES = { } OUT_DIR = "dist" +# Stub content to disable GFWList check +GFWLIST_STUB = "var DOMAINS = {};\nvar BLACKPAT = [];\nvar WHITEPAT = [];\n" + def fetch_and_convert(src): response = requests.get(src) @@ -36,6 +41,9 @@ def main(): code = f.read() code = code.replace("@@TIME@@", now.isoformat()[:-7]) + gfwlist_part = gfwlist.generate_pac_partial() + gfwlist_stub = GFWLIST_STUB + os.makedirs(OUT_DIR, mode=0o755, exist_ok=True) for key in SOURCES: print(f"Generating PAC script from source {key}") @@ -45,10 +53,19 @@ def main(): continue except HTTPError: continue + filename = f"pac-{key}.txt" + filename_gfwlist = f"pac-gfwlist-{key}.txt" with open(os.path.join(OUT_DIR, filename), "w") as f: f.write(code) f.write(data) + f.write("\n") + f.write(gfwlist_stub) + with open(os.path.join(OUT_DIR, filename_gfwlist), "w") as f: + f.write(code) + f.write(data) + f.write("\n") + f.write(gfwlist_part) if __name__ == '__main__': diff --git a/code.js b/code.js index e31ac57..817b525 100644 --- a/code.js +++ b/code.js @@ -1,6 +1,9 @@ // Author: iBug // Time: @@TIME@@ +var proxy = __PROXY__; +var direct = "DIRECT"; + function belongsToSubnet(host, list) { var ip = host.split(".").map(Number); ip = 0x1000000 * ip[0] + 0x10000 * ip[1] + 0x100 * ip[2] + ip[3]; @@ -23,6 +26,40 @@ function belongsToSubnet(host, list) { return (masked ^ list[x][0]) == 0; } +function hasMatchedPattern(text, patterns) { + for (var i = 0; i < patterns.length; i++) { + if (shExpMatch(text, patterns[i])) + return true; + } + return false; +} + +function checkDomainType(host) { + // Check if a domain is blacklisted or whitelisted + var segments = host.split(".").reverse(); + var ptr = DOMAINS; + var type = DOMAINS["@"]; + for (var i = 0; i < segments.length; i++) { + var segment = segments[i]; + ptr = ptr[segment]; + if (ptr === undefined) + break; + if (typeof ptr === "number") + return ptr; + if (ptr["@"] !== undefined) + type = ptr["@"]; + } + return type; +} + +function hasWhitelistedPattern(url) { + return hasMatchedPattern(url, WHITEPAT); +} + +function hasBlacklistedPattern(url) { + return hasMatchedPattern(url, BLACKPAT); +} + function isChina(host) { return belongsToSubnet(host, CHINA); } @@ -31,10 +68,21 @@ function isLan(host) { return belongsToSubnet(host, LAN); } -var proxy = __PROXY__; -var direct = "DIRECT"; - function FindProxyForURL(url, host) { + if (hasWhitelistedPattern(url)) { + return direct; + } + if (hasBlacklistedPattern(url)) { + return proxy; + } + var domainType = checkDomainType(host); + if (domainType === 0) { + return proxy; + } else if (domainType === 1) { + return direct; + } + + // Fallback to IP whitelist var remote = dnsResolve(host); if (!remote || remote.indexOf(":") !== -1) { // resolution failed or is IPv6 addr diff --git a/gfwlist.py b/gfwlist.py new file mode 100755 index 0000000..5292b7f --- /dev/null +++ b/gfwlist.py @@ -0,0 +1,99 @@ +#!/usr/bin/python3 + +import os +import base64 +import json +import urllib.parse +import requests + + +GFWLIST_FILE = "gfwlist.txt" +GFWLIST_URL = 'https://raw.githubusercontent.com/gfwlist/gfwlist/master/gfwlist.txt' + + +def get_gfwlist(): + if os.path.isfile(GFWLIST_FILE): + with open(GFWLIST_FILE, "r") as f: + text = f.read() + else: + r = requests.get(GFWLIST_URL) + r.raise_for_status() + text = r.text + return base64.b64decode(text).decode("utf-8").rstrip("\n") + + +def update_domains(domains, host, mode=0): + segments = host.strip(".").split(".")[::-1] + + this = domains + for segment in segments: + if segment not in this: + this[segment] = {} + this = this[segment] + this["@"] = mode + + +def postproc_domains(domains): + # Turn all {"@": 1} into 1 to save some text + keys = list(domains.keys()) + for key in keys: + if key == "@": + continue + obj = domains[key] + if len(obj) == 1 and "@" in obj: + domains[key] = obj["@"] + else: + postproc_domains(obj) + + +def parse_gfwlist(text): + domains = {} + blackpat = [] # blacklisted patterns + whitepat = [] # whitelisted patterns + + for line in text.splitlines()[1:]: + if not line.strip() or line.startswith("!"): + continue # ignore comments and empty lines + + mode = 0 # default to blacklist + if line.startswith("@@"): + mode = 1 # now it's whitelist + line = line[2:] + + if line.startswith("||"): + # domain prefix + update_domains(domains, line[2:], mode) + elif line.startswith("/"): + # regex, can't handle yet + pass + else: + # Keyword pattern + # Single vertical line at either side means string boundary + if line.startswith("|"): + line = line[1:] + else: + line = "*" + line + if line.endswith("|"): + line = line[:-1] + else: + line = line + "*" + if mode == 0: + blackpat.append(line) + else: + whitepat.append(line) + postproc_domains(domains) + return domains, blackpat, whitepat + + +def generate_pac_partial(): + gfwlist = get_gfwlist() + domains, blackpat, whitepat = parse_gfwlist(gfwlist) + return "var DOMAINS = {};\n\nvar BLACKPAT = {};\n\nvar WHITEPAT = {};\n".format( + json.dumps(domains, indent=2), + json.dumps(blackpat, indent=2), + json.dumps(whitepat, indent=2), + ) + + +if __name__ == '__main__': + print(generate_pac_partial())