#!/bin/bash
#
# Sync ~/webGoggles to a NAS share.
#
# Uses mount -t cifs with sudo (prompts for password if no credentials file).
# Create ~/.smb/ts.nas to avoid the password prompt each time:
#   echo -e "username=schmeeve\npassword=yourpass" > ~/.smb/ts.nas
#   chmod 600 ~/.smb/ts.nas
#
# Post-sync cleanup: deduplicates identical screenshots by content hash,
# and thins screenshots in sessions older than 6 weeks to 1 per 2 minutes
# (based on page-info.json timestamps, not filenames).
#
# Adjust SHARE, SUBPATH, and SOURCE to match your environment.

for arg in "$@"; do
  case "$arg" in
    --help|-h)
      cat <<'EOF'
Usage: sync-webgoggles [options]

Sync ~/webGoggles to a NAS share, with deduplication and old-session thinning.

Options:
  -h, --help    Show this help message and exit
EOF
      exit 0
      ;;
  esac
done

SHARE="//ts.nas/aura"
SUBPATH="webGoggles"
MOUNTPOINT="${HOME}/mnt/ts.nas/aura"
CREDENTIALS="${HOME}/.smb/ts.nas"
SOURCE="${HOME}/webGoggles"
START="$(date +%s)"

echo "[sync-webgoggles] Starting at $(date)"

# Verify source exists
if [ ! -d "${SOURCE}" ]; then
  echo "[sync-webgoggles] ERROR: Source ${SOURCE} does not exist"
  exit 1
fi

# 1. Create mountpoint and mount if not already mounted
if mount | grep -q "${MOUNTPOINT}"; then
  echo "[sync-webgoggles] Already mounted at ${MOUNTPOINT}"
else
  echo "[sync-webgoggles] Mounting ${SHARE} → ${MOUNTPOINT}"
  mkdir -p "${MOUNTPOINT}"
  OPTS="username=schmeeve,uid=$(id -u),gid=$(id -g),forceuid,forcegid,nounix,serverino"
  if [ -f "${CREDENTIALS}" ]; then
    OPTS="${OPTS},credentials=${CREDENTIALS}"
  fi
  sudo mount -t cifs "${SHARE}" "${MOUNTPOINT}" -o "${OPTS}"
  if ! mount | grep -q "${MOUNTPOINT}"; then
    echo "[sync-webgoggles] ERROR: Failed to mount ${SHARE}"
    exit 1
  fi
  echo "[sync-webgoggles] Mounted at ${MOUNTPOINT}"
fi

# 2. Run pick-most-skin on local source to find best screenshot per user
PICK_SCRIPT="$(dirname "$0")/pick-most-skin"
if [ -x "${PICK_SCRIPT}" ]; then
  echo "[sync-webgoggles] Running ${PICK_SCRIPT} ${SOURCE}"
  "${PICK_SCRIPT}" "${SOURCE}"
else
  echo "[sync-webgoggles] Warning: ${PICK_SCRIPT} not found, skipping"
fi

# 3. One-way rsync of SOURCE into the share subpath
MP="${MOUNTPOINT}/${SUBPATH}"
mkdir -p "${MP}"
echo "[sync-webgoggles] Syncing ${SOURCE}/ → ${MP}/"
rsync -vrau \
  "${SOURCE}/" \
  "${MP}/" \
  --backup \
  --suffix="-CONFLICT" \
  --exclude=".DS_Store" \
  --progress --stats

# 4. Cleanup: deduplicate and thin old screenshots on destination
cleanup_destination() {
  local dest="$1"
  local thin_days="${2:-42}"      # default 6 weeks
  local keep_interval="${3:-120}" # default 2 minutes

  echo "[sync-webgoggles] Running cleanup on ${dest}"

  python3 - "$1" "$2" "$3" <<'PYEOF'
import os, json, time, sys, hashlib
from collections import defaultdict
from datetime import datetime

dest = sys.argv[1]
thin_days = int(sys.argv[2])
keep_interval = int(sys.argv[3])
cutoff = time.time() - thin_days * 86400

# --- Phase 1: Deduplicate by content hash ---
print("[sync-webgoggles cleanup] Deduplicating screenshots...")

# Collect all screenshot variants (CONFLICT copies included)
size_groups = defaultdict(list)
for root, dirs, files in os.walk(dest):
    for f in files:
        if f in ('screenshot.png', 'screenshot-CONFLICT.png'):
            path = os.path.join(root, f)
            try:
                size_groups[os.path.getsize(path)].append(path)
            except OSError:
                pass

dedup_removed = 0
for size, paths in size_groups.items():
    if len(paths) < 2:
        continue
    hash_groups = defaultdict(list)
    for p in paths:
        try:
            h = hashlib.md5(open(p, 'rb').read()).hexdigest()
            hash_groups[h].append(p)
        except OSError:
            pass
    for h, same_paths in hash_groups.items():
        if len(same_paths) < 2:
            continue
        # Keep the chronologically first one, delete the rest
        same_paths.sort(key=lambda x: os.path.getmtime(x))
        keep = same_paths[0]
        for p in same_paths[1:]:
            try:
                os.remove(p)
                dedup_removed += 1
            except OSError:
                pass

print(f"[sync-webgoggles cleanup] Deduplication removed {dedup_removed} files")

# --- Phase 2: Thin old session screenshots ---
print("[sync-webgoggles cleanup] Thinning old session screenshots...")

# Collect all page dirs that have both page-info.json and screenshot.png,
# grouped by session (site/user/sessions/TS/)
sessions = defaultdict(list)  # session_dir -> [(page_dir, timestamp_epoch)]

for root, dirs, files in os.walk(dest):
    if 'page-info.json' not in files or 'screenshot.png' not in files:
        continue
    # Only process pages inside sessions/ structure
    parent = os.path.dirname(root)
    grandparent = os.path.dirname(parent)
    if os.path.basename(grandparent) != 'sessions':
        continue
    session_dir = parent  # .../sessions/2026-05-17T01-10-23-730Z
    info_path = os.path.join(root, 'page-info.json')
    try:
        with open(info_path) as f:
            info = json.load(f)
        ts_str = info.get('timestamp', '')
        if not ts_str:
            continue
        # Parse ISO-8601, handle Z suffix
        if ts_str.endswith('Z'):
            ts_str = ts_str[:-1] + '+00:00'
        ts = datetime.fromisoformat(ts_str).timestamp()
        sessions[session_dir].append((root, ts))
    except (json.JSONDecodeError, KeyError, ValueError, OSError):
        pass

thin_removed = 0
thin_kept = 0
for session_dir, pages in sessions.items():
    pages.sort(key=lambda x: x[1])
    # Only thin sessions where the first page is older than cutoff
    if pages[0][1] > cutoff:
        continue
    session_start = pages[0][1]
    # Group pages into 120s slots by proximity to ideal keep times
    # slot N covers [start+N*K - K/2, start+N*K + K/2)
    slots = defaultdict(list)
    for page_dir, ts in pages:
        slot = int((ts - session_start + keep_interval / 2) / keep_interval)
        slots[slot].append((page_dir, ts))
    for slot, slot_pages in sorted(slots.items()):
        ideal = session_start + slot * keep_interval
        # Keep the screenshot closest to the ideal slot center
        best = min(slot_pages, key=lambda x: abs(x[1] - ideal))
        thin_kept += 1
        for page_dir, ts in slot_pages:
            if page_dir != best[0]:
                ss_path = os.path.join(page_dir, 'screenshot.png')
                try:
                    os.remove(ss_path)
                    thin_removed += 1
                except OSError:
                    pass

print(f"[sync-webgoggles cleanup] Thinning kept {thin_kept}, removed {thin_removed} screenshots")
print(f"[sync-webgoggles cleanup] Done")
PYEOF
}

cleanup_destination "${MP}" 42 120

DURATION=$(( $(date +%s) - START ))
echo "[sync-webgoggles] Done in ${DURATION}s at $(date)"
