122 lines
4.0 KiB
Python
Executable File
122 lines
4.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import json
|
|
import re
|
|
import urllib.request
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
BASE = Path(__file__).resolve().parent
|
|
REG_PATH = BASE / 'data' / 'registrars.json'
|
|
TIMEOUT = 8
|
|
|
|
MONEY_RE = re.compile(r'(\d{1,4}(?:[\.,]\d{1,2})?)\s*(zł|pln|eur|€|usd|\$)', re.IGNORECASE)
|
|
|
|
REGISTER_HINTS = ["rejestr", "rejestracja", "new", "first year", "1 rok", "1 rok"]
|
|
RENEW_HINTS = ["odnow", "renew", "renewal", "kolejny", "next year", "2 rok"]
|
|
|
|
|
|
def fetch_text(url: str) -> str:
|
|
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0 DomainHunterBot/1.0"})
|
|
with urllib.request.urlopen(req, timeout=TIMEOUT) as resp:
|
|
raw = resp.read()
|
|
text = raw.decode('utf-8', errors='ignore')
|
|
text = re.sub(r'<script[\s\S]*?</script>', ' ', text, flags=re.IGNORECASE)
|
|
text = re.sub(r'<style[\s\S]*?</style>', ' ', text, flags=re.IGNORECASE)
|
|
text = re.sub(r'<[^>]+>', ' ', text)
|
|
text = re.sub(r'\s+', ' ', text)
|
|
return text
|
|
|
|
|
|
def to_float(num: str):
|
|
try:
|
|
return float(num.replace(',', '.'))
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def find_price(text: str, tld: str, kind: str):
|
|
hints = REGISTER_HINTS if kind == 'register' else RENEW_HINTS
|
|
tld_token = f'.{tld.lower()}'
|
|
|
|
# 1) szukaj okna z tld + hint
|
|
for m in re.finditer(re.escape(tld_token), text, flags=re.IGNORECASE):
|
|
start = max(0, m.start() - 240)
|
|
end = min(len(text), m.end() + 240)
|
|
chunk = text[start:end].lower()
|
|
if any(h in chunk for h in hints):
|
|
m2 = MONEY_RE.search(chunk)
|
|
if m2:
|
|
val = to_float(m2.group(1))
|
|
if val is not None:
|
|
return val, chunk[:220]
|
|
|
|
# 2) fallback: hint + cena
|
|
for h in hints:
|
|
for mh in re.finditer(re.escape(h), text, flags=re.IGNORECASE):
|
|
start = max(0, mh.start() - 100)
|
|
end = min(len(text), mh.end() + 200)
|
|
chunk = text[start:end].lower()
|
|
m2 = MONEY_RE.search(chunk)
|
|
if m2:
|
|
val = to_float(m2.group(1))
|
|
if val is not None:
|
|
return val, chunk[:220]
|
|
|
|
return None, None
|
|
|
|
|
|
def main():
|
|
data = json.loads(REG_PATH.read_text(encoding='utf-8'))
|
|
now = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
|
|
|
|
updated_count = 0
|
|
|
|
for reg in data.get('registrars', []):
|
|
auto = reg.get('autoPricing', {})
|
|
for tld in ['pl', 'com', 'ai']:
|
|
rule = auto.get(tld)
|
|
if not rule or not rule.get('url'):
|
|
continue
|
|
url = rule['url']
|
|
try:
|
|
text = fetch_text(url)
|
|
except Exception as e:
|
|
reg.setdefault('autoMeta', {})[tld] = {
|
|
'status': 'error',
|
|
'error': str(e),
|
|
'checkedAt': now,
|
|
'url': url,
|
|
}
|
|
continue
|
|
|
|
pricing = reg.setdefault('pricing', {}).setdefault(tld, {'register': None, 'renew': None})
|
|
reg_val, reg_ctx = find_price(text, tld, 'register')
|
|
ren_val, ren_ctx = find_price(text, tld, 'renew')
|
|
|
|
if reg_val is not None:
|
|
pricing['register'] = reg_val
|
|
updated_count += 1
|
|
if ren_val is not None:
|
|
pricing['renew'] = ren_val
|
|
updated_count += 1
|
|
|
|
reg.setdefault('autoMeta', {})[tld] = {
|
|
'status': 'ok',
|
|
'checkedAt': now,
|
|
'url': url,
|
|
'registerFound': reg_val,
|
|
'renewFound': ren_val,
|
|
'registerContext': reg_ctx,
|
|
'renewContext': ren_ctx,
|
|
}
|
|
|
|
data['updatedAt'] = now[:10]
|
|
data['autoLastRunAt'] = now
|
|
data['autoUpdatedFields'] = updated_count
|
|
REG_PATH.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
|
|
print(f'OK: updated fields={updated_count}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|