horny jail
This commit is contained in:
129
sync-webgoggles
129
sync-webgoggles
@@ -7,8 +7,9 @@
|
||||
# echo -e "username=schmeeve\npassword=yourpass" > ~/.smb/ts.nas
|
||||
# chmod 600 ~/.smb/ts.nas
|
||||
#
|
||||
# No deletes or overwrites: rsync --backup renames any existing dest file
|
||||
# with a -CONFLICT suffix before writing the source version.
|
||||
# Post-sync cleanup: deduplicates identical screenshots by content hash,
|
||||
# and thins screenshots in sessions older than 6 weeks to 1 per 2 minutes
|
||||
# (based on page-info.json timestamps, not filenames).
|
||||
#
|
||||
# Adjust SHARE, SUBPATH, and SOURCE to match your environment.
|
||||
|
||||
@@ -57,5 +58,129 @@ rsync -vrau \
|
||||
--exclude=".DS_Store" \
|
||||
--progress --stats
|
||||
|
||||
# 3. Cleanup: deduplicate and thin old screenshots on destination
|
||||
cleanup_destination() {
|
||||
local dest="$1"
|
||||
local thin_days="${2:-42}" # default 6 weeks
|
||||
local keep_interval="${3:-120}" # default 2 minutes
|
||||
|
||||
echo "[sync-webgoggles] Running cleanup on ${dest}"
|
||||
|
||||
python3 - "$1" "$2" "$3" <<'PYEOF'
|
||||
import os, json, time, sys, hashlib
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
|
||||
dest = sys.argv[1]
|
||||
thin_days = int(sys.argv[2])
|
||||
keep_interval = int(sys.argv[3])
|
||||
cutoff = time.time() - thin_days * 86400
|
||||
|
||||
# --- Phase 1: Deduplicate by content hash ---
|
||||
print("[sync-webgoggles cleanup] Deduplicating screenshots...")
|
||||
|
||||
# Collect all screenshot variants (CONFLICT copies included)
|
||||
size_groups = defaultdict(list)
|
||||
for root, dirs, files in os.walk(dest):
|
||||
for f in files:
|
||||
if f in ('screenshot.png', 'screenshot-CONFLICT.png'):
|
||||
path = os.path.join(root, f)
|
||||
try:
|
||||
size_groups[os.path.getsize(path)].append(path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
dedup_removed = 0
|
||||
for size, paths in size_groups.items():
|
||||
if len(paths) < 2:
|
||||
continue
|
||||
hash_groups = defaultdict(list)
|
||||
for p in paths:
|
||||
try:
|
||||
h = hashlib.md5(open(p, 'rb').read()).hexdigest()
|
||||
hash_groups[h].append(p)
|
||||
except OSError:
|
||||
pass
|
||||
for h, same_paths in hash_groups.items():
|
||||
if len(same_paths) < 2:
|
||||
continue
|
||||
# Keep the chronologically first one, delete the rest
|
||||
same_paths.sort(key=lambda x: os.path.getmtime(x))
|
||||
keep = same_paths[0]
|
||||
for p in same_paths[1:]:
|
||||
try:
|
||||
os.remove(p)
|
||||
dedup_removed += 1
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
print(f"[sync-webgoggles cleanup] Deduplication removed {dedup_removed} files")
|
||||
|
||||
# --- Phase 2: Thin old session screenshots ---
|
||||
print("[sync-webgoggles cleanup] Thinning old session screenshots...")
|
||||
|
||||
# Collect all page dirs that have both page-info.json and screenshot.png,
|
||||
# grouped by session (site/user/sessions/TS/)
|
||||
sessions = defaultdict(list) # session_dir -> [(page_dir, timestamp_epoch)]
|
||||
|
||||
for root, dirs, files in os.walk(dest):
|
||||
if 'page-info.json' not in files or 'screenshot.png' not in files:
|
||||
continue
|
||||
# Only process pages inside sessions/ structure
|
||||
parent = os.path.dirname(root)
|
||||
grandparent = os.path.dirname(parent)
|
||||
if os.path.basename(grandparent) != 'sessions':
|
||||
continue
|
||||
session_dir = parent # .../sessions/2026-05-17T01-10-23-730Z
|
||||
info_path = os.path.join(root, 'page-info.json')
|
||||
try:
|
||||
with open(info_path) as f:
|
||||
info = json.load(f)
|
||||
ts_str = info.get('timestamp', '')
|
||||
if not ts_str:
|
||||
continue
|
||||
# Parse ISO-8601, handle Z suffix
|
||||
if ts_str.endswith('Z'):
|
||||
ts_str = ts_str[:-1] + '+00:00'
|
||||
ts = datetime.fromisoformat(ts_str).timestamp()
|
||||
sessions[session_dir].append((root, ts))
|
||||
except (json.JSONDecodeError, KeyError, ValueError, OSError):
|
||||
pass
|
||||
|
||||
thin_removed = 0
|
||||
thin_kept = 0
|
||||
for session_dir, pages in sessions.items():
|
||||
pages.sort(key=lambda x: x[1])
|
||||
# Only thin sessions where the first page is older than cutoff
|
||||
if pages[0][1] > cutoff:
|
||||
continue
|
||||
session_start = pages[0][1]
|
||||
# Group pages into 120s slots by proximity to ideal keep times
|
||||
# slot N covers [start+N*K - K/2, start+N*K + K/2)
|
||||
slots = defaultdict(list)
|
||||
for page_dir, ts in pages:
|
||||
slot = int((ts - session_start + keep_interval / 2) / keep_interval)
|
||||
slots[slot].append((page_dir, ts))
|
||||
for slot, slot_pages in sorted(slots.items()):
|
||||
ideal = session_start + slot * keep_interval
|
||||
# Keep the screenshot closest to the ideal slot center
|
||||
best = min(slot_pages, key=lambda x: abs(x[1] - ideal))
|
||||
thin_kept += 1
|
||||
for page_dir, ts in slot_pages:
|
||||
if page_dir != best[0]:
|
||||
ss_path = os.path.join(page_dir, 'screenshot.png')
|
||||
try:
|
||||
os.remove(ss_path)
|
||||
thin_removed += 1
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
print(f"[sync-webgoggles cleanup] Thinning kept {thin_kept}, removed {thin_removed} screenshots")
|
||||
print(f"[sync-webgoggles cleanup] Done")
|
||||
PYEOF
|
||||
}
|
||||
|
||||
cleanup_destination "${MP}" 42 120
|
||||
|
||||
DURATION=$(( $(date +%s) - START ))
|
||||
echo "[sync-webgoggles] Done in ${DURATION}s at $(date)"
|
||||
|
||||
Reference in New Issue
Block a user