157 lines
5.1 KiB
Python
Executable File
157 lines
5.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import json
|
|
import sqlite3
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
BASE = Path(__file__).resolve().parent
|
|
ENV_PATH = Path('/home/szmyt/docker/databases/.env')
|
|
REG_PATH = BASE / 'data' / 'registrars.json'
|
|
DB_PATH = BASE / 'data' / 'domainhunter.db'
|
|
|
|
|
|
def read_env(path: Path):
|
|
data = {}
|
|
if not path.exists():
|
|
return data
|
|
for line in path.read_text(encoding='utf-8').splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith('#') or '=' not in line:
|
|
continue
|
|
k, v = line.split('=', 1)
|
|
data[k.strip()] = v.strip()
|
|
return data
|
|
|
|
|
|
def fetch_latest_from_mongo(user, pwd):
|
|
js = r'''
|
|
const dbx = db.getSiblingDB("aiagent");
|
|
const first = dbx.domain_scans.find().sort({scannedAt:-1}).limit(1).toArray();
|
|
if (!first.length) {
|
|
print(JSON.stringify({runId:null,scannedAt:null,totalAvailable:0,domains:[]}));
|
|
} else {
|
|
const runId = first[0].runId;
|
|
const scannedAt = first[0].scannedAt;
|
|
const docs = dbx.domain_scans.find({runId: runId, available: true}).sort({score:-1}).limit(500).toArray().map(d => ({
|
|
domain: d.domain,
|
|
tld: d.tld,
|
|
score: d.score,
|
|
status: d.status,
|
|
keywords: d.keywords || []
|
|
}));
|
|
print(JSON.stringify({runId, scannedAt, totalAvailable: docs.length, domains: docs}));
|
|
}
|
|
'''
|
|
cmd = [
|
|
'docker', 'exec', 'mongo', 'mongosh', '--quiet',
|
|
'--username', user,
|
|
'--password', pwd,
|
|
'--authenticationDatabase', 'admin',
|
|
'--eval', js,
|
|
]
|
|
p = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
if p.returncode != 0:
|
|
raise RuntimeError((p.stderr or p.stdout or 'mongosh failed').strip())
|
|
out = p.stdout.strip().splitlines()[-1].strip()
|
|
return json.loads(out)
|
|
|
|
|
|
def upsert_metadata(cur, key, value):
|
|
cur.execute('INSERT INTO metadata(key,value) VALUES(?,?) ON CONFLICT(key) DO UPDATE SET value=excluded.value', (key, value))
|
|
|
|
|
|
def main():
|
|
env = read_env(ENV_PATH)
|
|
user = env.get('MONGO_INITDB_ROOT_USERNAME', 'root')
|
|
pwd = env.get('MONGO_INITDB_ROOT_PASSWORD', '')
|
|
|
|
latest = fetch_latest_from_mongo(user, pwd)
|
|
|
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
con = sqlite3.connect(DB_PATH)
|
|
cur = con.cursor()
|
|
|
|
# ensure schema exists
|
|
cur.executescript('''
|
|
CREATE TABLE IF NOT EXISTS metadata (key TEXT PRIMARY KEY, value TEXT);
|
|
CREATE TABLE IF NOT EXISTS domains (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
run_id TEXT,
|
|
scanned_at TEXT,
|
|
domain TEXT,
|
|
tld TEXT,
|
|
score INTEGER,
|
|
status TEXT,
|
|
keywords_json TEXT
|
|
);
|
|
CREATE INDEX IF NOT EXISTS idx_domains_run ON domains(run_id);
|
|
CREATE INDEX IF NOT EXISTS idx_domains_domain ON domains(domain);
|
|
CREATE TABLE IF NOT EXISTS registrar_prices (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
registrar TEXT,
|
|
url TEXT,
|
|
tld TEXT,
|
|
register_price REAL,
|
|
renew_price REAL,
|
|
updated_at TEXT,
|
|
UNIQUE(registrar, tld)
|
|
);
|
|
CREATE TABLE IF NOT EXISTS llm_scores (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
run_id TEXT,
|
|
domain TEXT,
|
|
llm_score REAL,
|
|
decision TEXT,
|
|
reason TEXT,
|
|
updated_at TEXT,
|
|
UNIQUE(run_id, domain)
|
|
);
|
|
''')
|
|
|
|
run_id = latest.get('runId')
|
|
scanned_at = latest.get('scannedAt')
|
|
|
|
if run_id:
|
|
cur.execute('DELETE FROM domains WHERE run_id = ?', (run_id,))
|
|
for d in latest.get('domains', []):
|
|
cur.execute('''INSERT INTO domains(run_id, scanned_at, domain, tld, score, status, keywords_json)
|
|
VALUES(?,?,?,?,?,?,?)''', (
|
|
run_id,
|
|
scanned_at,
|
|
d.get('domain'),
|
|
d.get('tld'),
|
|
int(d.get('score') or 0),
|
|
d.get('status'),
|
|
json.dumps(d.get('keywords') or [], ensure_ascii=False),
|
|
))
|
|
|
|
upsert_metadata(cur, 'latest_run_id', run_id or '')
|
|
upsert_metadata(cur, 'latest_scanned_at', scanned_at or '')
|
|
upsert_metadata(cur, 'latest_total_available', str(latest.get('totalAvailable', 0)))
|
|
|
|
if REG_PATH.exists():
|
|
reg = json.loads(REG_PATH.read_text(encoding='utf-8'))
|
|
for r in reg.get('registrars', []):
|
|
name = r.get('name')
|
|
url = r.get('url')
|
|
pricing = r.get('pricing', {})
|
|
for tld in ['pl', 'com', 'ai']:
|
|
p = pricing.get(tld, {})
|
|
cur.execute('''INSERT INTO registrar_prices(registrar,url,tld,register_price,renew_price,updated_at)
|
|
VALUES(?,?,?,?,?,?)
|
|
ON CONFLICT(registrar,tld) DO UPDATE SET
|
|
url=excluded.url,
|
|
register_price=excluded.register_price,
|
|
renew_price=excluded.renew_price,
|
|
updated_at=excluded.updated_at''', (
|
|
name, url, tld, p.get('register'), p.get('renew'), reg.get('autoLastRunAt') or reg.get('updatedAt') or ''
|
|
))
|
|
|
|
con.commit()
|
|
con.close()
|
|
print(f"OK: db refreshed {DB_PATH} | available={latest.get('totalAvailable',0)}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|