domainhunter-panel/refresh_domain_data.py
Adrian Miesikowski df0f0515a0
Some checks are pending
CI / lint (push) Waiting to run
Deploy / deploy (push) Waiting to run
feat: add OpenClaw LLM scoring pipeline for all domains and surface in panel
2026-02-17 23:55:48 +01:00

157 lines
5.1 KiB
Python
Executable File

#!/usr/bin/env python3
import json
import sqlite3
import subprocess
from pathlib import Path
BASE = Path(__file__).resolve().parent
ENV_PATH = Path('/home/szmyt/docker/databases/.env')
REG_PATH = BASE / 'data' / 'registrars.json'
DB_PATH = BASE / 'data' / 'domainhunter.db'
def read_env(path: Path):
data = {}
if not path.exists():
return data
for line in path.read_text(encoding='utf-8').splitlines():
line = line.strip()
if not line or line.startswith('#') or '=' not in line:
continue
k, v = line.split('=', 1)
data[k.strip()] = v.strip()
return data
def fetch_latest_from_mongo(user, pwd):
js = r'''
const dbx = db.getSiblingDB("aiagent");
const first = dbx.domain_scans.find().sort({scannedAt:-1}).limit(1).toArray();
if (!first.length) {
print(JSON.stringify({runId:null,scannedAt:null,totalAvailable:0,domains:[]}));
} else {
const runId = first[0].runId;
const scannedAt = first[0].scannedAt;
const docs = dbx.domain_scans.find({runId: runId, available: true}).sort({score:-1}).limit(500).toArray().map(d => ({
domain: d.domain,
tld: d.tld,
score: d.score,
status: d.status,
keywords: d.keywords || []
}));
print(JSON.stringify({runId, scannedAt, totalAvailable: docs.length, domains: docs}));
}
'''
cmd = [
'docker', 'exec', 'mongo', 'mongosh', '--quiet',
'--username', user,
'--password', pwd,
'--authenticationDatabase', 'admin',
'--eval', js,
]
p = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if p.returncode != 0:
raise RuntimeError((p.stderr or p.stdout or 'mongosh failed').strip())
out = p.stdout.strip().splitlines()[-1].strip()
return json.loads(out)
def upsert_metadata(cur, key, value):
cur.execute('INSERT INTO metadata(key,value) VALUES(?,?) ON CONFLICT(key) DO UPDATE SET value=excluded.value', (key, value))
def main():
env = read_env(ENV_PATH)
user = env.get('MONGO_INITDB_ROOT_USERNAME', 'root')
pwd = env.get('MONGO_INITDB_ROOT_PASSWORD', '')
latest = fetch_latest_from_mongo(user, pwd)
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
con = sqlite3.connect(DB_PATH)
cur = con.cursor()
# ensure schema exists
cur.executescript('''
CREATE TABLE IF NOT EXISTS metadata (key TEXT PRIMARY KEY, value TEXT);
CREATE TABLE IF NOT EXISTS domains (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id TEXT,
scanned_at TEXT,
domain TEXT,
tld TEXT,
score INTEGER,
status TEXT,
keywords_json TEXT
);
CREATE INDEX IF NOT EXISTS idx_domains_run ON domains(run_id);
CREATE INDEX IF NOT EXISTS idx_domains_domain ON domains(domain);
CREATE TABLE IF NOT EXISTS registrar_prices (
id INTEGER PRIMARY KEY AUTOINCREMENT,
registrar TEXT,
url TEXT,
tld TEXT,
register_price REAL,
renew_price REAL,
updated_at TEXT,
UNIQUE(registrar, tld)
);
CREATE TABLE IF NOT EXISTS llm_scores (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id TEXT,
domain TEXT,
llm_score REAL,
decision TEXT,
reason TEXT,
updated_at TEXT,
UNIQUE(run_id, domain)
);
''')
run_id = latest.get('runId')
scanned_at = latest.get('scannedAt')
if run_id:
cur.execute('DELETE FROM domains WHERE run_id = ?', (run_id,))
for d in latest.get('domains', []):
cur.execute('''INSERT INTO domains(run_id, scanned_at, domain, tld, score, status, keywords_json)
VALUES(?,?,?,?,?,?,?)''', (
run_id,
scanned_at,
d.get('domain'),
d.get('tld'),
int(d.get('score') or 0),
d.get('status'),
json.dumps(d.get('keywords') or [], ensure_ascii=False),
))
upsert_metadata(cur, 'latest_run_id', run_id or '')
upsert_metadata(cur, 'latest_scanned_at', scanned_at or '')
upsert_metadata(cur, 'latest_total_available', str(latest.get('totalAvailable', 0)))
if REG_PATH.exists():
reg = json.loads(REG_PATH.read_text(encoding='utf-8'))
for r in reg.get('registrars', []):
name = r.get('name')
url = r.get('url')
pricing = r.get('pricing', {})
for tld in ['pl', 'com', 'ai']:
p = pricing.get(tld, {})
cur.execute('''INSERT INTO registrar_prices(registrar,url,tld,register_price,renew_price,updated_at)
VALUES(?,?,?,?,?,?)
ON CONFLICT(registrar,tld) DO UPDATE SET
url=excluded.url,
register_price=excluded.register_price,
renew_price=excluded.renew_price,
updated_at=excluded.updated_at''', (
name, url, tld, p.get('register'), p.get('renew'), reg.get('autoLastRunAt') or reg.get('updatedAt') or ''
))
con.commit()
con.close()
print(f"OK: db refreshed {DB_PATH} | available={latest.get('totalAvailable',0)}")
if __name__ == '__main__':
main()