horny jail

This commit is contained in:
2026-05-16 19:11:25 -07:00
parent ce58e6b6ab
commit 43b17b57c2
3 changed files with 325 additions and 2 deletions

View File

@@ -7,8 +7,9 @@
# echo -e "username=schmeeve\npassword=yourpass" > ~/.smb/ts.nas
# chmod 600 ~/.smb/ts.nas
#
# No deletes or overwrites: rsync --backup renames any existing dest file
# with a -CONFLICT suffix before writing the source version.
# Post-sync cleanup: deduplicates identical screenshots by content hash,
# and thins screenshots in sessions older than 6 weeks to 1 per 2 minutes
# (based on page-info.json timestamps, not filenames).
#
# Adjust SHARE, SUBPATH, and SOURCE to match your environment.
@@ -57,5 +58,129 @@ rsync -vrau \
--exclude=".DS_Store" \
--progress --stats
# 3. Cleanup: deduplicate and thin old screenshots on destination
cleanup_destination() {
local dest="$1"
local thin_days="${2:-42}" # default 6 weeks
local keep_interval="${3:-120}" # default 2 minutes
echo "[sync-webgoggles] Running cleanup on ${dest}"
python3 - "$1" "$2" "$3" <<'PYEOF'
import os, json, time, sys, hashlib
from collections import defaultdict
from datetime import datetime
dest = sys.argv[1]
thin_days = int(sys.argv[2])
keep_interval = int(sys.argv[3])
cutoff = time.time() - thin_days * 86400
# --- Phase 1: Deduplicate by content hash ---
print("[sync-webgoggles cleanup] Deduplicating screenshots...")
# Collect all screenshot variants (CONFLICT copies included)
size_groups = defaultdict(list)
for root, dirs, files in os.walk(dest):
for f in files:
if f in ('screenshot.png', 'screenshot-CONFLICT.png'):
path = os.path.join(root, f)
try:
size_groups[os.path.getsize(path)].append(path)
except OSError:
pass
dedup_removed = 0
for size, paths in size_groups.items():
if len(paths) < 2:
continue
hash_groups = defaultdict(list)
for p in paths:
try:
h = hashlib.md5(open(p, 'rb').read()).hexdigest()
hash_groups[h].append(p)
except OSError:
pass
for h, same_paths in hash_groups.items():
if len(same_paths) < 2:
continue
# Keep the chronologically first one, delete the rest
same_paths.sort(key=lambda x: os.path.getmtime(x))
keep = same_paths[0]
for p in same_paths[1:]:
try:
os.remove(p)
dedup_removed += 1
except OSError:
pass
print(f"[sync-webgoggles cleanup] Deduplication removed {dedup_removed} files")
# --- Phase 2: Thin old session screenshots ---
print("[sync-webgoggles cleanup] Thinning old session screenshots...")
# Collect all page dirs that have both page-info.json and screenshot.png,
# grouped by session (site/user/sessions/TS/)
sessions = defaultdict(list) # session_dir -> [(page_dir, timestamp_epoch)]
for root, dirs, files in os.walk(dest):
if 'page-info.json' not in files or 'screenshot.png' not in files:
continue
# Only process pages inside sessions/ structure
parent = os.path.dirname(root)
grandparent = os.path.dirname(parent)
if os.path.basename(grandparent) != 'sessions':
continue
session_dir = parent # .../sessions/2026-05-17T01-10-23-730Z
info_path = os.path.join(root, 'page-info.json')
try:
with open(info_path) as f:
info = json.load(f)
ts_str = info.get('timestamp', '')
if not ts_str:
continue
# Parse ISO-8601, handle Z suffix
if ts_str.endswith('Z'):
ts_str = ts_str[:-1] + '+00:00'
ts = datetime.fromisoformat(ts_str).timestamp()
sessions[session_dir].append((root, ts))
except (json.JSONDecodeError, KeyError, ValueError, OSError):
pass
thin_removed = 0
thin_kept = 0
for session_dir, pages in sessions.items():
pages.sort(key=lambda x: x[1])
# Only thin sessions where the first page is older than cutoff
if pages[0][1] > cutoff:
continue
session_start = pages[0][1]
# Group pages into 120s slots by proximity to ideal keep times
# slot N covers [start+N*K - K/2, start+N*K + K/2)
slots = defaultdict(list)
for page_dir, ts in pages:
slot = int((ts - session_start + keep_interval / 2) / keep_interval)
slots[slot].append((page_dir, ts))
for slot, slot_pages in sorted(slots.items()):
ideal = session_start + slot * keep_interval
# Keep the screenshot closest to the ideal slot center
best = min(slot_pages, key=lambda x: abs(x[1] - ideal))
thin_kept += 1
for page_dir, ts in slot_pages:
if page_dir != best[0]:
ss_path = os.path.join(page_dir, 'screenshot.png')
try:
os.remove(ss_path)
thin_removed += 1
except OSError:
pass
print(f"[sync-webgoggles cleanup] Thinning kept {thin_kept}, removed {thin_removed} screenshots")
print(f"[sync-webgoggles cleanup] Done")
PYEOF
}
cleanup_destination "${MP}" 42 120
DURATION=$(( $(date +%s) - START ))
echo "[sync-webgoggles] Done in ${DURATION}s at $(date)"