ArchiveBox/tests/test_remove.py
Nick Sweeting 6c769d831c
wip 2
2025-12-24 21:46:14 -08:00

87 lines
3.8 KiB
Python

import os
import sqlite3
from .fixtures import *
def test_remove_single_snapshot(tmp_path, process, disable_extractors_dict):
"""Test removing a snapshot by URL pattern"""
os.chdir(tmp_path)
# Add a URL - creates source file snapshot
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
# Verify snapshot exists
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.close()
assert count_before >= 1
# Remove all snapshots (including source file snapshots)
remove_process = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes'], capture_output=True)
# Check that it ran successfully (either output indicates success or return code 0)
output = remove_process.stdout.decode("utf-8") + remove_process.stderr.decode("utf-8")
assert remove_process.returncode == 0 or "removed" in output.lower() or "Found" in output
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.close()
assert count == 0
def test_remove_with_delete_flag(tmp_path, process, disable_extractors_dict):
"""Test removing snapshot with --delete also removes archive folder"""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
# Get archives before delete
archive_dir = tmp_path / "archive"
archives_before = list(archive_dir.iterdir()) if archive_dir.exists() else []
# Only run the rest of the test if archives were created
if archives_before:
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
archives_after = list(archive_dir.iterdir()) if archive_dir.exists() else []
assert len(archives_after) < len(archives_before)
else:
# With --index-only, archive folders may not be created immediately
# Just verify that remove command doesn't error
remove_result = subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
assert remove_result.returncode in (0, 1) # 0 = success, 1 = no matches
def test_remove_regex(tmp_path, process, disable_extractors_dict):
"""Test removing snapshots by regex pattern"""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_before = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.close()
assert count_before >= 2
subprocess.run(['archivebox', 'remove', '--filter-type=regex', '.*', '--yes', '--delete'], capture_output=True)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
count_after = c.execute("SELECT COUNT() from core_snapshot").fetchone()[0]
conn.close()
assert count_after == 0
def test_add_creates_crawls(tmp_path, process, disable_extractors_dict):
"""Test that adding URLs creates crawls in database"""
os.chdir(tmp_path)
subprocess.run(['archivebox', 'add', '--index-only', 'https://example.com'], capture_output=True, env=disable_extractors_dict)
subprocess.run(['archivebox', 'add', '--index-only', 'https://iana.org'], capture_output=True, env=disable_extractors_dict)
conn = sqlite3.connect("index.sqlite3")
c = conn.cursor()
crawl_count = c.execute("SELECT COUNT() from crawls_crawl").fetchone()[0]
conn.close()
assert crawl_count == 2