remove Seed model in favor of Crawl as template

2025-12-28 06:34:15 +00:00 · 2025-12-25 01:52:38 -08:00 · 2025-12-25 01:52:38 -08:00 · bb53228ebf
commit bb53228ebf
parent 28e6c5bb65
30 changed files with 785 additions and 690 deletions
--- a/archivebox/api/migrations/0002_alter_outboundwebhook_options_and_more.py
+++ b/archivebox/api/migrations/0002_alter_outboundwebhook_options_and_more.py
@ -0,0 +1,113 @@
+# Generated by Django 6.0 on 2025-12-25 09:34
+
+import django.utils.timezone
+import signal_webhooks.fields
+import signal_webhooks.utils
+import uuid
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('api', '0001_squashed'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name='outboundwebhook',
+            options={'verbose_name': 'API Outbound Webhook'},
+        ),
+        migrations.AddField(
+            model_name='outboundwebhook',
+            name='created',
+            field=models.DateTimeField(auto_now_add=True, default=django.utils.timezone.now, help_text='When the webhook was created.', verbose_name='created'),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name='outboundwebhook',
+            name='updated',
+            field=models.DateTimeField(auto_now=True, help_text='When the webhook was last updated.', verbose_name='updated'),
+        ),
+        migrations.AlterField(
+            model_name='apitoken',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AlterField(
+            model_name='apitoken',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='auth_token',
+            field=signal_webhooks.fields.TokenField(blank=True, default='', help_text='Authentication token to use in an Authorization header.', max_length=8000, validators=[signal_webhooks.utils.decode_cipher_key], verbose_name='authentication token'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='enabled',
+            field=models.BooleanField(default=True, help_text='Is this webhook enabled?', verbose_name='enabled'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='endpoint',
+            field=models.URLField(help_text='Target endpoint for this webhook.', max_length=2047, verbose_name='endpoint'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='headers',
+            field=models.JSONField(blank=True, default=dict, help_text='Headers to send with the webhook request.', validators=[signal_webhooks.utils.is_dict], verbose_name='headers'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='keep_last_response',
+            field=models.BooleanField(default=False, help_text='Should the webhook keep a log of the latest response it got?', verbose_name='keep last response'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='last_failure',
+            field=models.DateTimeField(default=None, help_text='When the webhook last failed.', null=True, verbose_name='last failure'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='last_response',
+            field=models.CharField(blank=True, default='', help_text='Latest response to this webhook.', max_length=8000, verbose_name='last response'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='last_success',
+            field=models.DateTimeField(default=None, help_text='When the webhook last succeeded.', null=True, verbose_name='last success'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='name',
+            field=models.CharField(db_index=True, help_text='Webhook name.', max_length=255, unique=True, verbose_name='name'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='ref',
+            field=models.CharField(db_index=True, help_text='Dot import notation to the model the webhook is for.', max_length=1023, validators=[signal_webhooks.utils.model_from_reference], verbose_name='referenced model'),
+        ),
+        migrations.AlterField(
+            model_name='outboundwebhook',
+            name='signal',
+            field=models.CharField(choices=[('CREATE', 'Create'), ('UPDATE', 'Update'), ('DELETE', 'Delete'), ('M2M', 'M2M changed'), ('CREATE_OR_UPDATE', 'Create or Update'), ('CREATE_OR_DELETE', 'Create or Delete'), ('CREATE_OR_M2M', 'Create or M2M changed'), ('UPDATE_OR_DELETE', 'Update or Delete'), ('UPDATE_OR_M2M', 'Update or M2M changed'), ('DELETE_OR_M2M', 'Delete or M2M changed'), ('CREATE_UPDATE_OR_DELETE', 'Create, Update or Delete'), ('CREATE_UPDATE_OR_M2M', 'Create, Update or M2M changed'), ('CREATE_DELETE_OR_M2M', 'Create, Delete or M2M changed'), ('UPDATE_DELETE_OR_M2M', 'Update, Delete or M2M changed'), ('CREATE_UPDATE_DELETE_OR_M2M', 'Create, Update or Delete, or M2M changed')], help_text='Signal the webhook fires to.', max_length=255, verbose_name='signal'),
+        ),
+        migrations.AddConstraint(
+            model_name='outboundwebhook',
+            constraint=models.UniqueConstraint(fields=('ref', 'endpoint'), name='prevent_duplicate_hooks_api_outboundwebhook'),
+        ),
+    ]
--- a/archivebox/api/v1_core.py
+++ b/archivebox/api/v1_core.py
@ -15,7 +15,7 @@ from ninja.pagination import paginate, PaginationBase
 from ninja.errors import HttpError

 from core.models import Snapshot, ArchiveResult, Tag
-from api.v1_crawls import CrawlSchema, SeedSchema
+from api.v1_crawls import CrawlSchema


 router = Router(tags=['Core Models'])
@ -271,9 +271,9 @@ def get_tag(request, tag_id: str, with_snapshots: bool = True):
        return Tag.objects.get(slug__icontains=tag_id)


-@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, SeedSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
+@router.get("/any/{id}", response=Union[SnapshotSchema, ArchiveResultSchema, TagSchema, CrawlSchema], url_name="get_any", summary="Get any object by its ID")
 def get_any(request, id: str):
-    """Get any object by its ID (e.g. snapshot, archiveresult, tag, seed, crawl, etc.)."""
+    """Get any object by its ID (e.g. snapshot, archiveresult, tag, crawl, etc.)."""
    request.with_snapshots = False
    request.with_archiveresults = False

@ -285,14 +285,6 @@ def get_any(request, id: str):
        except Exception:
            pass

-    try:
-        from api.v1_crawls import get_seed
-        response = get_seed(request, id)
-        if response:
-            return redirect(f"/api/v1/{response._meta.app_label}/{response._meta.model_name}/{response.id}?{request.META['QUERY_STRING']}")
-    except Exception:
-        pass
-
    try:
        from api.v1_crawls import get_crawl
        response = get_crawl(request, id)
--- a/archivebox/api/v1_crawls.py
+++ b/archivebox/api/v1_crawls.py
@ -10,53 +10,13 @@ from django.contrib.auth import get_user_model
 from ninja import Router, Schema

 from core.models import Snapshot
-from crawls.models import Seed, Crawl
+from crawls.models import Crawl

 from .auth import API_AUTH_METHODS

 router = Router(tags=['Crawl Models'], auth=API_AUTH_METHODS)


-class SeedSchema(Schema):
-    TYPE: str = 'crawls.models.Seed'
-
-    id: UUID
-    
-    modified_at: datetime
-    created_at: datetime
-    created_by_id: str
-    created_by_username: str
-    
-    uri: str
-    tags_str: str
-    config: dict
-    
-    @staticmethod
-    def resolve_created_by_id(obj):
-        return str(obj.created_by_id)
-    
-    @staticmethod
-    def resolve_created_by_username(obj):
-        User = get_user_model()
-        return User.objects.get(id=obj.created_by_id).username
-    
-@router.get("/seeds", response=List[SeedSchema], url_name="get_seeds")
-def get_seeds(request):
-    return Seed.objects.all().distinct()
-
-@router.get("/seed/{seed_id}", response=SeedSchema, url_name="get_seed")
-def get_seed(request, seed_id: str):
-    seed = None
-    request.with_snapshots = False
-    request.with_archiveresults = False
-    
-    try:
-        seed = Seed.objects.get(Q(id__icontains=seed_id))
-    except Exception:
-        pass
-    return seed
-
-
 class CrawlSchema(Schema):
    TYPE: str = 'crawls.models.Crawl'

@ -66,24 +26,27 @@ class CrawlSchema(Schema):
    created_at: datetime
    created_by_id: str
    created_by_username: str
-    
+
    status: str
    retry_at: datetime | None

-    seed: SeedSchema
+    urls: str
+    extractor: str
    max_depth: int
-    
+    tags_str: str
+    config: dict
+
    # snapshots: List[SnapshotSchema]

    @staticmethod
    def resolve_created_by_id(obj):
        return str(obj.created_by_id)
-    
+
    @staticmethod
    def resolve_created_by_username(obj):
        User = get_user_model()
        return User.objects.get(id=obj.created_by_id).username
-    
+
    @staticmethod
    def resolve_snapshots(obj, context):
        if context['request'].with_snapshots:
--- a/archivebox/cli/init.py
+++ b/archivebox/cli/init.py
@ -21,6 +21,7 @@ class ArchiveBoxGroup(click.Group):
    meta_commands = {
        'help': 'archivebox.cli.archivebox_help.main',
        'version': 'archivebox.cli.archivebox_version.main',
+        'mcp': 'archivebox.cli.archivebox_mcp.main',
    }
    setup_commands = {
        'init': 'archivebox.cli.archivebox_init.main',
--- a/archivebox/cli/archivebox_add.py
+++ b/archivebox/cli/archivebox_add.py
@ -36,15 +36,14 @@ def add(urls: str | list[str],
        created_by_id: int | None=None) -> QuerySet['Snapshot']:
    """Add a new URL or list of URLs to your archive.

-    The new flow is:
+    The flow is:
    1. Save URLs to sources file
-    2. Create Seed pointing to the file
-    3. Create Crawl with max_depth
-    4. Create root Snapshot pointing to file:// URL (depth=0)
-    5. Orchestrator runs parser extractors on root snapshot
-    6. Parser extractors output to urls.jsonl
-    7. URLs are added to Crawl.urls and child Snapshots are created
-    8. Repeat until max_depth is reached
+    2. Create Crawl with URLs and max_depth
+    3. Orchestrator creates Snapshots from Crawl URLs (depth=0)
+    4. Orchestrator runs parser extractors on root snapshots
+    5. Parser extractors output to urls.jsonl
+    6. URLs are added to Crawl.urls and child Snapshots are created
+    7. Repeat until max_depth is reached
    """

    from rich import print
@ -55,7 +54,7 @@ def add(urls: str | list[str],

    # import models once django is set up
    from core.models import Snapshot
-    from crawls.models import Seed, Crawl
+    from crawls.models import Crawl
    from archivebox.base_models.models import get_or_create_system_user_pk
    from workers.orchestrator import Orchestrator

@ -66,19 +65,24 @@ def add(urls: str | list[str],
    sources_file.parent.mkdir(parents=True, exist_ok=True)
    sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))

-    # 2. Create a new Seed pointing to the sources file
+    # 2. Create a new Crawl with inline URLs
    cli_args = [*sys.argv]
    if cli_args[0].lower().endswith('archivebox'):
        cli_args[0] = 'archivebox'
    cmd_str = ' '.join(cli_args)

    timestamp = timezone.now().strftime("%Y-%m-%d__%H-%M-%S")
-    seed = Seed.from_file(
-        sources_file,
+
+    # Read URLs directly into crawl
+    urls_content = sources_file.read_text()
+
+    crawl = Crawl.objects.create(
+        urls=urls_content,
+        extractor=parser,
+        max_depth=depth,
+        tags_str=tag,
        label=f'{USER}@{HOSTNAME} $ {cmd_str} [{timestamp}]',
-        parser=parser,
-        tag=tag,
-        created_by=created_by_id,
+        created_by_id=created_by_id,
        config={
            'ONLY_NEW': not update,
            'INDEX_ONLY': index_only,
@ -88,15 +92,13 @@ def add(urls: str | list[str],
        }
    )

-    # 3. Create a new Crawl pointing to the Seed (status=queued)
-    crawl = Crawl.from_seed(seed, max_depth=depth)
-
    print(f'[green]\\[+] Created Crawl {crawl.id} with max_depth={depth}[/green]')
-    print(f'    [dim]Seed: {seed.uri}[/dim]')
+    first_url = crawl.get_urls_list()[0] if crawl.get_urls_list() else ''
+    print(f'    [dim]First URL: {first_url}[/dim]')

-    # 4. The CrawlMachine will create the root Snapshot when started
-    #    Root snapshot URL = file:///path/to/sources/...txt
-    #    Parser extractors will run on it and discover URLs
+    # 3. The CrawlMachine will create the root Snapshot when started
+    #    If URLs are from a file: first URL = file:///path/to/sources/...txt
+    #    Parser extractors will run on it and discover more URLs
    #    Those URLs become child Snapshots (depth=1)

    if index_only:
--- a/archivebox/cli/archivebox_crawl.py
+++ b/archivebox/cli/archivebox_crawl.py
@ -76,7 +76,7 @@ def discover_outlinks(
    )
    from archivebox.base_models.models import get_or_create_system_user_pk
    from core.models import Snapshot, ArchiveResult
-    from crawls.models import Seed, Crawl
+    from crawls.models import Crawl
    from archivebox.config import CONSTANTS
    from workers.orchestrator import Orchestrator

@ -117,12 +117,12 @@ def discover_outlinks(
        sources_file.parent.mkdir(parents=True, exist_ok=True)
        sources_file.write_text('\n'.join(r.get('url', '') for r in new_url_records if r.get('url')))

-        seed = Seed.from_file(
+        crawl = Crawl.from_file(
            sources_file,
+            max_depth=depth,
            label=f'crawl --depth={depth}',
            created_by=created_by_id,
        )
-        crawl = Crawl.from_seed(seed, max_depth=depth)

        # Create snapshots for new URLs
        for record in new_url_records:
--- a/archivebox/cli/archivebox_install.py
+++ b/archivebox/cli/archivebox_install.py
@ -42,27 +42,20 @@ def install(dry_run: bool=False) -> None:
    setup_django()

    from django.utils import timezone
-    from crawls.models import Seed, Crawl
+    from crawls.models import Crawl
    from archivebox.base_models.models import get_or_create_system_user_pk

-    # Create a seed and crawl for dependency detection
+    # Create a crawl for dependency detection
    # Using a minimal crawl that will trigger on_Crawl hooks
    created_by_id = get_or_create_system_user_pk()

-    seed, _created = Seed.objects.get_or_create(
-        uri='archivebox://install',
+    crawl, created = Crawl.objects.get_or_create(
+        urls='archivebox://install',
        label='Dependency detection',
        created_by_id=created_by_id,
        defaults={
            'extractor': 'auto',
-        }
-    )
-
-    crawl, created = Crawl.objects.get_or_create(
-        seed=seed,
-        max_depth=0,
-        created_by_id=created_by_id,
-        defaults={
+            'max_depth': 0,
            'status': 'queued',
        }
    )
--- a/archivebox/cli/archivebox_snapshot.py
+++ b/archivebox/cli/archivebox_snapshot.py
@ -92,7 +92,7 @@ def create_snapshots(
    )
    from archivebox.base_models.models import get_or_create_system_user_pk
    from core.models import Snapshot
-    from crawls.models import Seed, Crawl
+    from crawls.models import Crawl
    from archivebox.config import CONSTANTS

    created_by_id = created_by_id or get_or_create_system_user_pk()
@ -108,17 +108,17 @@ def create_snapshots(
    # If depth > 0, we need a Crawl to manage recursive discovery
    crawl = None
    if depth > 0:
-        # Create a seed for this batch
+        # Create a crawl for this batch
        sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__snapshot.txt'
        sources_file.parent.mkdir(parents=True, exist_ok=True)
        sources_file.write_text('\n'.join(r.get('url', '') for r in records if r.get('url')))

-        seed = Seed.from_file(
+        crawl = Crawl.from_file(
            sources_file,
+            max_depth=depth,
            label=f'snapshot --depth={depth}',
            created_by=created_by_id,
        )
-        crawl = Crawl.from_seed(seed, max_depth=depth)

    # Process each record
    created_snapshots = []
--- a/archivebox/cli/archivebox_version.py
+++ b/archivebox/cli/archivebox_version.py
@ -111,53 +111,27 @@ def version(quiet: bool=False,

    machine = Machine.current()

-    # Get all *_BINARY config values
-    binary_config_keys = [key for key in config.keys() if key.endswith('_BINARY')]
+    # Get all installed binaries from the database
+    all_installed = InstalledBinary.objects.filter(
+        machine=machine
+    ).exclude(abspath='').exclude(abspath__isnull=True).order_by('name')

-    if not binary_config_keys:
-        prnt('', '[grey53]No binary dependencies defined in config.[/grey53]')
+    if not all_installed.exists():
+        prnt('', '[grey53]No binaries detected. Run [green]archivebox install[/green] to detect dependencies.[/grey53]')
    else:
-        for key in sorted(set(binary_config_keys)):
-            # Get the actual binary name/path from config value
-            # Prioritize Machine.config overrides over base config
-            bin_value = machine.config.get(key) or config.get(key, '').strip()
-            if not bin_value:
+        for installed in all_installed:
+            # Skip if user specified specific binaries and this isn't one
+            if binaries and installed.name not in binaries:
                continue

-            # Check if it's a path (has slashes) or just a name
-            is_path = '/' in str(bin_value)
-
-            if is_path:
-                # It's a full path - match against abspath
-                bin_name = Path(bin_value).name
-                # Skip if user specified specific binaries and this isn't one
-                if binaries and bin_name not in binaries:
-                    continue
-                # Find InstalledBinary where abspath ends with this path
-                installed = InstalledBinary.objects.filter(
-                    machine=machine,
-                    abspath__endswith=bin_value,
-                ).exclude(abspath='').exclude(abspath__isnull=True).order_by('-modified_at').first()
-            else:
-                # It's just a binary name - match against name
-                bin_name = bin_value
-                # Skip if user specified specific binaries and this isn't one
-                if binaries and bin_name not in binaries:
-                    continue
-                # Find InstalledBinary by name
-                installed = InstalledBinary.objects.filter(
-                    machine=machine,
-                    name__iexact=bin_name,
-                ).exclude(abspath='').exclude(abspath__isnull=True).order_by('-modified_at').first()
-
-            if installed and installed.is_valid:
+            if installed.is_valid:
                display_path = installed.abspath.replace(str(DATA_DIR), '.').replace(str(Path('~').expanduser()), '~')
                version_str = (installed.version or 'unknown')[:15]
                provider = (installed.binprovider or 'env')[:8]
-                prnt('', '[green]√[/green]', '', bin_name.ljust(18), version_str.ljust(16), provider.ljust(8), display_path, overflow='ignore', crop=False)
+                prnt('', '[green]√[/green]', '', installed.name.ljust(18), version_str.ljust(16), provider.ljust(8), display_path, overflow='ignore', crop=False)
            else:
-                prnt('', '[red]X[/red]', '', bin_name.ljust(18), '[grey53]not installed[/grey53]', overflow='ignore', crop=False)
-                failures.append(bin_name)
+                prnt('', '[red]X[/red]', '', installed.name.ljust(18), '[grey53]not installed[/grey53]', overflow='ignore', crop=False)
+                failures.append(installed.name)

    # Show hint if no binaries are installed yet
    has_any_installed = InstalledBinary.objects.filter(machine=machine).exclude(abspath='').exists()
--- a/archivebox/config/constants.py
+++ b/archivebox/config/constants.py
@ -96,10 +96,8 @@ class ConstantsDict(Mapping):
    # Data dir files
    CONFIG_FILENAME: str                = 'ArchiveBox.conf'
    SQL_INDEX_FILENAME: str             = 'index.sqlite3'
-    QUEUE_DATABASE_FILENAME: str        = 'queue.sqlite3'
    CONFIG_FILE: Path                   = DATA_DIR / CONFIG_FILENAME
    DATABASE_FILE: Path                 = DATA_DIR / SQL_INDEX_FILENAME
-    QUEUE_DATABASE_FILE: Path           = DATA_DIR / QUEUE_DATABASE_FILENAME
    
    JSON_INDEX_FILENAME: str            = 'index.json'
    HTML_INDEX_FILENAME: str            = 'index.html'
@ -184,10 +182,10 @@ class ConstantsDict(Mapping):
        SQL_INDEX_FILENAME,
        f"{SQL_INDEX_FILENAME}-wal",
        f"{SQL_INDEX_FILENAME}-shm",
-        QUEUE_DATABASE_FILENAME,
-        f"{QUEUE_DATABASE_FILENAME}-wal",
-        f"{QUEUE_DATABASE_FILENAME}-shm",
        "search.sqlite3",
+        "queue.sqlite3",
+        "queue.sqlite3-wal",
+        "queue.sqlite3-shm",
        JSON_INDEX_FILENAME,
        HTML_INDEX_FILENAME,
        ROBOTS_TXT_FILENAME,
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@ -56,6 +56,14 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}" 2>/dev/null')
            os.system(f'chown {ARCHIVEBOX_USER}:{ARCHIVEBOX_GROUP} "{CONSTANTS.DATA_DIR}"/* 2>/dev/null')

+    # Suppress the "database access during app initialization" warning
+    # This warning can be triggered during django.setup() but is safe to ignore
+    # since we're doing intentional setup operations
+    import warnings
+    warnings.filterwarnings('ignore',
+        message='.*Accessing the database during app initialization.*',
+        category=RuntimeWarning)
+
    try:
        from django.core.management import call_command

@ -87,7 +95,8 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
                        style='bold red',
                    ))
                    STDERR.print()
-                    STDERR.print_exception(show_locals=False)
+                    import traceback
+                    traceback.print_exc()
                return

        from django.conf import settings
--- a/archivebox/config/paths.py
+++ b/archivebox/config/paths.py
@ -224,12 +224,6 @@ def get_data_locations():
            "is_valid": os.path.isfile(DATABASE_FILE) and os.access(DATABASE_FILE, os.R_OK) and os.access(DATABASE_FILE, os.W_OK),
            "is_mount": os.path.ismount(DATABASE_FILE.resolve()),
        },
-        "QUEUE_DATABASE": {
-            "path": CONSTANTS.QUEUE_DATABASE_FILE,
-            "enabled": True,
-            "is_valid": os.path.isfile(CONSTANTS.QUEUE_DATABASE_FILE) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.R_OK) and os.access(CONSTANTS.QUEUE_DATABASE_FILE, os.W_OK),
-            "is_mount": os.path.ismount(CONSTANTS.QUEUE_DATABASE_FILE),
-        },
        "ARCHIVE_DIR": {
            "path": ARCHIVE_DIR.resolve(),
            "enabled": True,
--- a/archivebox/core/admin_snapshots.py
+++ b/archivebox/core/admin_snapshots.py
@ -33,15 +33,18 @@ GLOBAL_CONTEXT = {}


 class SnapshotActionForm(ActionForm):
-    tags = forms.ModelMultipleChoiceField(
-        label='Edit tags',
-        queryset=Tag.objects.all(),
-        required=False,
-        widget=FilteredSelectMultiple(
-            'core_tag__name',
-            False,
-        ),
-    )
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Define tags field in __init__ to avoid database access during app initialization
+        self.fields['tags'] = forms.ModelMultipleChoiceField(
+            label='Edit tags',
+            queryset=Tag.objects.all(),
+            required=False,
+            widget=FilteredSelectMultiple(
+                'core_tag__name',
+                False,
+            ),
+        )

    # TODO: allow selecting actions for specific extractors? is this useful?
    # extractor = forms.ChoiceField(
@ -165,14 +168,69 @@ class SnapshotAdmin(SearchResultsAdminMixin, ConfigEditorMixin, BaseModelAdmin):

    def admin_actions(self, obj):
        return format_html(
-            # URL Hash: <code style="font-size: 10px; user-select: all">{}</code><br/>
            '''
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}">Summary page ➡️</a> &nbsp; &nbsp;
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/archive/{}/index.html#all">Result files 📑</a> &nbsp; &nbsp;
-            <a class="btn" style="font-size: 18px; display: inline-block; border-radius: 10px; border: 3px solid #eee; padding: 4px 8px" href="/admin/core/snapshot/?id__exact={}">Admin actions ⚙️</a>
+            <div style="display: flex; flex-wrap: wrap; gap: 12px; align-items: center;">
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/archive/{}"
+                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
+                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
+                    📄 Summary Page
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/archive/{}/index.html#all"
+                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
+                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
+                    📁 Result Files
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #f8fafc; border: 1px solid #e2e8f0; border-radius: 8px; color: #334155; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="{}"
+                   target="_blank"
+                   onmouseover="this.style.background='#f1f5f9'; this.style.borderColor='#cbd5e1';"
+                   onmouseout="this.style.background='#f8fafc'; this.style.borderColor='#e2e8f0';">
+                    🔗 Original URL
+                </a>
+
+                <span style="border-left: 1px solid #e2e8f0; height: 24px; margin: 0 4px;"></span>
+
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #ecfdf5; border: 1px solid #a7f3d0; border-radius: 8px; color: #065f46; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/admin/core/snapshot/?id__exact={}"
+                   title="Get missing extractors"
+                   onmouseover="this.style.background='#d1fae5';"
+                   onmouseout="this.style.background='#ecfdf5';">
+                    ⬇️ Get Missing
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; color: #1e40af; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/admin/core/snapshot/?id__exact={}"
+                   title="Create a fresh new snapshot of this URL"
+                   onmouseover="this.style.background='#dbeafe';"
+                   onmouseout="this.style.background='#eff6ff';">
+                    🆕 Archive Again
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fffbeb; border: 1px solid #fde68a; border-radius: 8px; color: #92400e; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/admin/core/snapshot/?id__exact={}"
+                   title="Re-run all extractors (overwrite existing)"
+                   onmouseover="this.style.background='#fef3c7';"
+                   onmouseout="this.style.background='#fffbeb';">
+                    🔄 Redo All
+                </a>
+                <a class="btn" style="display: inline-flex; align-items: center; gap: 6px; padding: 10px 16px; background: #fef2f2; border: 1px solid #fecaca; border-radius: 8px; color: #991b1b; text-decoration: none; font-size: 14px; font-weight: 500; transition: all 0.15s;"
+                   href="/admin/core/snapshot/?id__exact={}"
+                   title="Permanently delete this snapshot"
+                   onmouseover="this.style.background='#fee2e2';"
+                   onmouseout="this.style.background='#fef2f2';">
+                    ☠️ Delete
+                </a>
+            </div>
+            <p style="margin-top: 12px; font-size: 12px; color: #64748b;">
+                <b>Tip:</b> Action buttons link to the list view with this snapshot pre-selected. Select it and use the action dropdown to execute.
+            </p>
            ''',
            obj.timestamp,
            obj.timestamp,
+            obj.url,
+            obj.pk,
+            obj.pk,
+            obj.pk,
            obj.pk,
        )

--- a/archivebox/core/migrations/0026_remove_archiveresult_output_dir_and_more.py
+++ b/archivebox/core/migrations/0026_remove_archiveresult_output_dir_and_more.py
@ -0,0 +1,101 @@
+# Generated by Django 6.0 on 2025-12-25 09:34
+
+import archivebox.base_models.models
+import django.db.models.deletion
+import django.utils.timezone
+import uuid
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('core', '0025_allow_duplicate_urls_per_crawl'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='archiveresult',
+            name='output_dir',
+        ),
+        migrations.RemoveField(
+            model_name='snapshot',
+            name='output_dir',
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='created_by',
+            field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='archiveresult_set', to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='extractor',
+            field=models.CharField(db_index=True, max_length=32),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='id',
+            field=models.AutoField(editable=False, primary_key=True, serialize=False),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='status',
+            field=models.CharField(choices=[('queued', 'Queued'), ('started', 'Started'), ('backoff', 'Waiting to retry'), ('succeeded', 'Succeeded'), ('failed', 'Failed'), ('skipped', 'Skipped')], db_index=True, default='queued', max_length=15),
+        ),
+        migrations.AlterField(
+            model_name='archiveresult',
+            name='uuid',
+            field=models.UUIDField(blank=True, db_index=True, default=uuid.uuid7, null=True, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='bookmarked_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='created_at',
+            field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='created_by',
+            field=models.ForeignKey(default=None, on_delete=django.db.models.deletion.CASCADE, related_name='snapshot_set', to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='downloaded_at',
+            field=models.DateTimeField(blank=True, db_index=True, default=None, editable=False, null=True),
+        ),
+        migrations.AlterField(
+            model_name='snapshot',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        # migrations.AlterField(
+        #     model_name='snapshot',
+        #     name='tags',
+        #     field=models.ManyToManyField(blank=True, related_name='snapshot_set', through='core.SnapshotTag', through_fields=('snapshot', 'tag'), to='core.tag'),
+        # ),
+        migrations.AlterField(
+            model_name='snapshottag',
+            name='id',
+            field=models.AutoField(primary_key=True, serialize=False),
+        ),
+        migrations.AlterField(
+            model_name='tag',
+            name='created_by',
+            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, related_name='tag_set', to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AlterUniqueTogether(
+            name='snapshottag',
+            unique_together={('snapshot', 'tag')},
+        ),
+    ]
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@ -59,7 +59,7 @@ INSTALLED_APPS = [
    "config",  # ArchiveBox config settings (loaded as a plugin, don't need to add it here)
    "machine",  # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
    "workers",  # handles starting and managing background workers and processes (orchestrators and actors)
-    "crawls",  # handles Seed, Crawl, and CrawlSchedule models and management
+    "crawls",  # handles Crawl and CrawlSchedule models and management
    "personas",  # handles Persona and session management
    "core",  # core django model with Snapshot, ArchiveResult, etc.
    "api",  # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
@ -194,10 +194,6 @@ DATABASES = {
        "NAME": DATABASE_NAME,
        **SQLITE_CONNECTION_OPTIONS,
    },
-    "queue": {
-        "NAME": CONSTANTS.QUEUE_DATABASE_FILE,
-        **SQLITE_CONNECTION_OPTIONS,
-    },
    # "filestore": {
    #     "NAME": CONSTANTS.FILESTORE_DATABASE_FILE,
    #     **SQLITE_CONNECTION_OPTIONS,
--- a/archivebox/core/settings_logging.py
+++ b/archivebox/core/settings_logging.py
@ -2,8 +2,6 @@ __package__ = 'archivebox.core'

 import re
 import os
-
-import shutil
 import tempfile
 import logging

@ -11,7 +9,6 @@ import pydantic
 import django.template

 from archivebox.config import CONSTANTS
-from archivebox.misc.logging import IS_TTY


 IGNORABLE_URL_PATTERNS = [
@ -79,7 +76,6 @@ SETTINGS_LOGGING = {
    "formatters": {
        "rich": {
            "datefmt": "[%Y-%m-%d %H:%M:%S]",
-            # "format": "{asctime} {levelname} {module} {name} {message} {username}",
            "format": "%(name)s %(message)s",
        },
        "outbound_webhooks": {
@ -99,26 +95,13 @@ SETTINGS_LOGGING = {
        },
    },
    "handlers": {
-        # "console": {
-        #     "level": "DEBUG",
-        #     'formatter': 'simple',
-        #     "class": "logging.StreamHandler",
-        #     'filters': ['noisyrequestsfilter', 'add_extra_logging_attrs'],
-        # },
        "default": {
            "class": "rich.logging.RichHandler",
            "formatter": "rich",
            "level": "DEBUG",
            "markup": False,
-            "rich_tracebacks": IS_TTY,
+            "rich_tracebacks": False,  # Use standard Python tracebacks (no frame/box)
            "filters": ["noisyrequestsfilter"],
-            "tracebacks_suppress": [
-                django,
-                pydantic,
-            ],
-            "tracebacks_width": shutil.get_terminal_size((100, 10)).columns - 1,
-            "tracebacks_word_wrap": False,
-            "tracebacks_show_locals": False,
        },
        "logfile": {
            "level": "INFO",
@ -132,7 +115,7 @@ SETTINGS_LOGGING = {
        "outbound_webhooks": {
            "class": "rich.logging.RichHandler",
            "markup": False,
-            "rich_tracebacks": True,
+            "rich_tracebacks": False,  # Use standard Python tracebacks (no frame/box)
            "formatter": "outbound_webhooks",
        },
        # "mail_admins": {
--- a/archivebox/core/statemachines.py
+++ b/archivebox/core/statemachines.py
@ -15,7 +15,7 @@ from statemachine import State, StateMachine
 # from workers.actor import ActorType

 from core.models import Snapshot, ArchiveResult
-from crawls.models import Crawl, Seed
+from crawls.models import Crawl


 class SnapshotMachine(StateMachine, strict_states=True):
@ -247,17 +247,14 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
        )
        self.archiveresult.save(write_indexes=True)

-        # Increment health stats on ArchiveResult, Snapshot, and optionally Crawl/Seed
+        # Increment health stats on ArchiveResult, Snapshot, and optionally Crawl
        ArchiveResult.objects.filter(pk=self.archiveresult.pk).update(num_uses_succeeded=F('num_uses_succeeded') + 1)
        Snapshot.objects.filter(pk=self.archiveresult.snapshot_id).update(num_uses_succeeded=F('num_uses_succeeded') + 1)

-        # Also update Crawl and Seed health stats if snapshot has a crawl
+        # Also update Crawl health stats if snapshot has a crawl
        snapshot = self.archiveresult.snapshot
        if snapshot.crawl_id:
            Crawl.objects.filter(pk=snapshot.crawl_id).update(num_uses_succeeded=F('num_uses_succeeded') + 1)
-            crawl = Crawl.objects.filter(pk=snapshot.crawl_id).values_list('seed_id', flat=True).first()
-            if crawl:
-                Seed.objects.filter(pk=crawl).update(num_uses_succeeded=F('num_uses_succeeded') + 1)

    @failed.enter
    def enter_failed(self):
@ -268,17 +265,14 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
            end_ts=timezone.now(),
        )

-        # Increment health stats on ArchiveResult, Snapshot, and optionally Crawl/Seed
+        # Increment health stats on ArchiveResult, Snapshot, and optionally Crawl
        ArchiveResult.objects.filter(pk=self.archiveresult.pk).update(num_uses_failed=F('num_uses_failed') + 1)
        Snapshot.objects.filter(pk=self.archiveresult.snapshot_id).update(num_uses_failed=F('num_uses_failed') + 1)

-        # Also update Crawl and Seed health stats if snapshot has a crawl
+        # Also update Crawl health stats if snapshot has a crawl
        snapshot = self.archiveresult.snapshot
        if snapshot.crawl_id:
            Crawl.objects.filter(pk=snapshot.crawl_id).update(num_uses_failed=F('num_uses_failed') + 1)
-            crawl = Crawl.objects.filter(pk=snapshot.crawl_id).values_list('seed_id', flat=True).first()
-            if crawl:
-                Seed.objects.filter(pk=crawl).update(num_uses_failed=F('num_uses_failed') + 1)

    @skipped.enter
    def enter_skipped(self):
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@ -33,7 +33,7 @@ from archivebox.search import query_search_index

 from core.models import Snapshot
 from core.forms import AddLinkForm
-from crawls.models import Seed, Crawl
+from crawls.models import Crawl
 from archivebox.hooks import get_extractors, get_extractor_name


@ -119,7 +119,11 @@ class SnapshotView(View):
            if result_file.name in existing_files or result_file.name == 'index.html':
                continue

-            file_size = result_file.stat().st_size or 0
+            # Skip circular symlinks and other stat() failures
+            try:
+                file_size = result_file.stat().st_size or 0
+            except OSError:
+                continue

            if file_size > min_size_threshold:
                archiveresults[result_file.name] = {
@ -471,14 +475,16 @@ class AddView(UserPassesTestMixin, FormView):
        sources_file = CONSTANTS.SOURCES_DIR / f'{timezone.now().strftime("%Y-%m-%d__%H-%M-%S")}__web_ui_add_by_user_{self.request.user.pk}.txt'
        sources_file.write_text(urls if isinstance(urls, str) else '\n'.join(urls))

-        # 2. create a new Seed pointing to the sources/2024-11-05__23-59-59__web_ui_add_by_user_<user_pk>.txt
+        # 2. create a new Crawl with the URLs from the file
        timestamp = timezone.now().strftime("%Y-%m-%d__%H-%M-%S")
-        seed = Seed.from_file(
-            sources_file,
+        urls_content = sources_file.read_text()
+        crawl = Crawl.objects.create(
+            urls=urls_content,
+            extractor=parser,
+            max_depth=depth,
+            tags_str=tag,
            label=f'{self.request.user.username}@{HOSTNAME}{self.request.path} {timestamp}',
-            parser=parser,
-            tag=tag,
-            created_by=self.request.user.pk,
+            created_by_id=self.request.user.pk,
            config={
                # 'ONLY_NEW': not update,
                # 'INDEX_ONLY': index_only,
@ -486,9 +492,8 @@ class AddView(UserPassesTestMixin, FormView):
                'DEPTH': depth,
                'EXTRACTORS': extractors or '',
                # 'DEFAULT_PERSONA': persona or 'Default',
-            })
-        # 3. create a new Crawl pointing to the Seed
-        crawl = Crawl.from_seed(seed, max_depth=depth)
+            }
+        )
        
        # 4. start the Orchestrator & wait until it completes
        #    ... orchestrator will create the root Snapshot, which creates pending ArchiveResults, which gets run by the ArchiveResultActors ...
@ -569,19 +574,7 @@ def live_progress_view(request):
            # Count URLs in the crawl (for when snapshots haven't been created yet)
            urls_count = 0
            if crawl.urls:
-                urls_count = len([u for u in crawl.urls.split('\n') if u.strip()])
-            elif crawl.seed and crawl.seed.uri:
-                # Try to get URL count from seed
-                if crawl.seed.uri.startswith('file:///'):
-                    try:
-                        from pathlib import Path
-                        seed_file = Path(crawl.seed.uri.replace('file://', ''))
-                        if seed_file.exists():
-                            urls_count = len([l for l in seed_file.read_text().split('\n') if l.strip() and not l.startswith('#')])
-                    except:
-                        pass
-                else:
-                    urls_count = 1  # Single URL seed
+                urls_count = len([u for u in crawl.urls.split('\n') if u.strip() and not u.startswith('#')])

            # Calculate crawl progress
            crawl_progress = int((completed_snapshots / total_snapshots) * 100) if total_snapshots > 0 else 0
@ -635,8 +628,8 @@ def live_progress_view(request):
                })

            # Check if crawl can start (for debugging stuck crawls)
-            can_start = bool(crawl.seed and crawl.seed.uri)
-            seed_uri = crawl.seed.uri[:60] if crawl.seed and crawl.seed.uri else None
+            can_start = bool(crawl.urls)
+            urls_preview = crawl.urls[:60] if crawl.urls else None

            # Check if retry_at is in the future (would prevent worker from claiming)
            retry_at_future = crawl.retry_at > timezone.now() if crawl.retry_at else False
@ -657,7 +650,7 @@ def live_progress_view(request):
                'pending_snapshots': pending_snapshots,
                'active_snapshots': active_snapshots_for_crawl,
                'can_start': can_start,
-                'seed_uri': seed_uri,
+                'urls_preview': urls_preview,
                'retry_at_future': retry_at_future,
                'seconds_until_retry': seconds_until_retry,
            })
--- a/archivebox/crawls/admin.py
+++ b/archivebox/crawls/admin.py
@ -17,7 +17,7 @@ from django_object_actions import action
 from archivebox.base_models.admin import BaseModelAdmin, ConfigEditorMixin

 from core.models import Snapshot
-from crawls.models import Seed, Crawl, CrawlSchedule
+from crawls.models import Crawl, CrawlSchedule


 def render_snapshots_list(snapshots_qs, limit=20):
@ -136,16 +136,16 @@ def render_snapshots_list(snapshots_qs, limit=20):
    ''')


-class SeedAdmin(ConfigEditorMixin, BaseModelAdmin):
-    list_display = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str', 'crawls', 'num_crawls', 'num_snapshots')
-    sort_fields = ('id', 'created_at', 'created_by', 'label', 'notes', 'uri', 'extractor', 'tags_str')
-    search_fields = ('id', 'created_by__username', 'label', 'notes', 'uri', 'extractor', 'tags_str')
+class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
+    list_display = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'urls_preview', 'schedule_str', 'status', 'retry_at', 'num_snapshots')
+    sort_fields = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'schedule_str', 'status', 'retry_at')
+    search_fields = ('id', 'created_by__username', 'max_depth', 'label', 'notes', 'schedule_id', 'status', 'urls')

-    readonly_fields = ('created_at', 'modified_at', 'scheduled_crawls', 'crawls', 'snapshots', 'contents')
+    readonly_fields = ('created_at', 'modified_at', 'snapshots', 'urls_editor')

    fieldsets = (
-        ('Source', {
-            'fields': ('uri', 'contents'),
+        ('URLs', {
+            'fields': ('urls_editor',),
            'classes': ('card', 'wide'),
        }),
        ('Info', {
@ -153,83 +153,7 @@ class SeedAdmin(ConfigEditorMixin, BaseModelAdmin):
            'classes': ('card',),
        }),
        ('Settings', {
-            'fields': ('extractor', 'config'),
-            'classes': ('card',),
-        }),
-        ('Metadata', {
-            'fields': ('created_by', 'created_at', 'modified_at'),
-            'classes': ('card',),
-        }),
-        ('Crawls', {
-            'fields': ('scheduled_crawls', 'crawls'),
-            'classes': ('card',),
-        }),
-        ('Snapshots', {
-            'fields': ('snapshots',),
-            'classes': ('card',),
-        }),
-    )
-
-    list_filter = ('extractor', 'created_by')
-    ordering = ['-created_at']
-    list_per_page = 100
-    actions = ["delete_selected"]
-
-    def num_crawls(self, obj):
-        return obj.crawl_set.count()
-
-    def num_snapshots(self, obj):
-        return obj.snapshot_set.count()
-
-    def scheduled_crawls(self, obj):
-        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
-            (scheduledcrawl.admin_change_url, scheduledcrawl)
-            for scheduledcrawl in  obj.scheduled_crawl_set.all().order_by('-created_at')[:20]
-        )) or mark_safe('<i>No Scheduled Crawls yet...</i>')
-
-    def crawls(self, obj):
-        return format_html_join('<br/>', ' - <a href="{}">{}</a>', (
-            (crawl.admin_change_url, crawl)
-            for crawl in obj.crawl_set.all().order_by('-created_at')[:20]
-        )) or mark_safe('<i>No Crawls yet...</i>')
-
-    def snapshots(self, obj):
-        return render_snapshots_list(obj.snapshot_set.all())
-
-    def contents(self, obj):
-        source_file = obj.get_file_path()
-        if source_file:
-            contents = ""
-            try:
-                contents = source_file.read_text().strip()[:14_000]
-            except Exception as e:
-                contents = f'Error reading {source_file}: {e}'
-
-            return format_html('<b><code>{}</code>:</b><br/><pre>{}</pre>', source_file, contents)
-
-        return format_html('See URLs here: <a href="{}">{}</a>', obj.uri, obj.uri)
-
-
-
-
-class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
-    list_display = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at', 'num_snapshots')
-    sort_fields = ('id', 'created_at', 'created_by', 'max_depth', 'label', 'notes', 'seed_str', 'schedule_str', 'status', 'retry_at')
-    search_fields = ('id', 'created_by__username', 'max_depth', 'label', 'notes', 'seed_id', 'schedule_id', 'status', 'seed__uri')
-
-    readonly_fields = ('created_at', 'modified_at', 'snapshots', 'seed_urls_editor')
-
-    fieldsets = (
-        ('URLs', {
-            'fields': ('seed_urls_editor',),
-            'classes': ('card', 'wide'),
-        }),
-        ('Info', {
-            'fields': ('label', 'notes'),
-            'classes': ('card',),
-        }),
-        ('Settings', {
-            'fields': ('max_depth', 'config'),
+            'fields': ('max_depth', 'extractor', 'config'),
            'classes': ('card',),
        }),
        ('Status', {
@ -237,7 +161,7 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
            'classes': ('card',),
        }),
        ('Relations', {
-            'fields': ('seed', 'schedule', 'created_by'),
+            'fields': ('schedule', 'created_by'),
            'classes': ('card',),
        }),
        ('Timestamps', {
@ -250,7 +174,7 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
        }),
    )

-    list_filter = ('max_depth', 'seed', 'schedule', 'created_by', 'status', 'retry_at')
+    list_filter = ('max_depth', 'extractor', 'schedule', 'created_by', 'status', 'retry_at')
    ordering = ['-created_at', '-retry_at']
    list_per_page = 100
    actions = ["delete_selected"]
@ -258,23 +182,20 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):

    @action(label='Recrawl', description='Create a new crawl with the same settings')
    def recrawl(self, request, obj):
-        """Duplicate this crawl as a new crawl with the same seed and settings."""
+        """Duplicate this crawl as a new crawl with the same URLs and settings."""
        from django.utils import timezone
        from django.shortcuts import redirect

-        # Validate seed has a URI (required for crawl to start)
-        if not obj.seed:
-            messages.error(request, 'Cannot recrawl: original crawl has no seed.')
-            return redirect('admin:crawls_crawl_change', obj.id)
-
-        if not obj.seed.uri:
-            messages.error(request, 'Cannot recrawl: seed has no URI.')
+        # Validate URLs (required for crawl to start)
+        if not obj.urls:
+            messages.error(request, 'Cannot recrawl: original crawl has no URLs.')
            return redirect('admin:crawls_crawl_change', obj.id)

        new_crawl = Crawl.objects.create(
-            seed=obj.seed,
            urls=obj.urls,
+            extractor=obj.extractor,
            max_depth=obj.max_depth,
+            tags_str=obj.tags_str,
            config=obj.config,
            schedule=obj.schedule,
            label=f"{obj.label} (recrawl)" if obj.label else "",
@ -292,43 +213,6 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):

        return redirect('admin:crawls_crawl_change', new_crawl.id)

-    def get_urls(self):
-        urls = super().get_urls()
-        custom_urls = [
-            path('<path:object_id>/save_seed_contents/',
-                 self.admin_site.admin_view(self.save_seed_contents_view),
-                 name='crawls_crawl_save_seed_contents'),
-        ]
-        return custom_urls + urls
-
-    def save_seed_contents_view(self, request, object_id):
-        """Handle saving seed file contents via AJAX."""
-        if request.method != 'POST':
-            return JsonResponse({'success': False, 'error': 'POST required'}, status=405)
-
-        try:
-            crawl = Crawl.objects.get(pk=object_id)
-        except Crawl.DoesNotExist:
-            return JsonResponse({'success': False, 'error': 'Crawl not found'}, status=404)
-
-        source_file = crawl.seed.get_file_path() if crawl.seed else None
-        if not source_file:
-            return JsonResponse({'success': False, 'error': 'Seed is not a local file'}, status=400)
-
-        try:
-            data = json.loads(request.body)
-            contents = data.get('contents', '')
-        except json.JSONDecodeError:
-            return JsonResponse({'success': False, 'error': 'Invalid JSON'}, status=400)
-
-        try:
-            # Ensure parent directory exists
-            source_file.parent.mkdir(parents=True, exist_ok=True)
-            source_file.write_text(contents)
-            return JsonResponse({'success': True, 'message': f'Saved {len(contents)} bytes to {source_file.name}'})
-        except Exception as e:
-            return JsonResponse({'success': False, 'error': str(e)}, status=500)
-
    def num_snapshots(self, obj):
        return obj.snapshot_set.count()

@ -341,163 +225,68 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
            return mark_safe('<i>None</i>')
        return format_html('<a href="{}">{}</a>', obj.schedule.admin_change_url, obj.schedule)

-    @admin.display(description='Seed', ordering='seed')
-    def seed_str(self, obj):
-        if not obj.seed:
-            return mark_safe('<i>None</i>')
-        return format_html('<a href="{}">{}</a>', obj.seed.admin_change_url, obj.seed)
+    @admin.display(description='URLs', ordering='urls')
+    def urls_preview(self, obj):
+        first_url = obj.get_urls_list()[0] if obj.get_urls_list() else ''
+        return first_url[:80] + '...' if len(first_url) > 80 else first_url

    @admin.display(description='URLs')
-    def seed_urls_editor(self, obj):
-        """Combined editor showing seed URL and file contents."""
-        widget_id = f'seed_urls_{obj.pk}'
-
-        # Get the seed URI (or use urls field if no seed)
-        seed_uri = ''
-        if obj.seed and obj.seed.uri:
-            seed_uri = obj.seed.uri
-        elif obj.urls:
-            seed_uri = obj.urls
+    def urls_editor(self, obj):
+        """Editor for crawl URLs."""
+        widget_id = f'crawl_urls_{obj.pk}'

        # Check if it's a local file we can edit
-        source_file = obj.seed.get_file_path() if obj.seed else None
+        source_file = obj.get_file_path()
        is_file = source_file is not None
-        contents = ""
+        file_contents = ""
        error = None

        if is_file and source_file:
            try:
-                contents = source_file.read_text().strip()
+                file_contents = source_file.read_text().strip()
            except Exception as e:
                error = f'Error reading {source_file}: {e}'

        # Escape for safe HTML embedding
-        escaped_uri = seed_uri.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
-        escaped_contents = (contents or '').replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+        escaped_urls = (obj.urls or '').replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')
+        escaped_file_contents = file_contents.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;')

        # Count lines for auto-expand logic
-        line_count = len(contents.split('\n')) if contents else 0
-        uri_rows = min(max(1, seed_uri.count('\n') + 1), 3)
+        line_count = len((obj.urls or '').split('\n'))
+        file_line_count = len(file_contents.split('\n')) if file_contents else 0
+        uri_rows = min(max(3, line_count), 10)

        html = f'''
        <div id="{widget_id}_container" style="max-width: 900px;">
-            <!-- Seed URL input (auto-expands) -->
+            <!-- URLs input -->
            <div style="margin-bottom: 12px;">
-                <label style="font-weight: bold; display: block; margin-bottom: 4px;">Seed URL:</label>
-                <textarea id="{widget_id}_uri"
+                <label style="font-weight: bold; display: block; margin-bottom: 4px;">URLs (one per line):</label>
+                <textarea id="{widget_id}_urls"
                          style="width: 100%; font-family: monospace; font-size: 13px;
                                 padding: 8px; border: 1px solid #ccc; border-radius: 4px;
-                                 resize: vertical; min-height: 32px; overflow: hidden;"
+                                 resize: vertical;"
                          rows="{uri_rows}"
-                          placeholder="file:///data/sources/... or https://..."
-                          {"readonly" if not obj.pk else ""}>{escaped_uri}</textarea>
+                          placeholder="https://example.com&#10;https://example2.com&#10;# Comments start with #"
+                          readonly>{escaped_urls}</textarea>
+                <p style="color: #666; font-size: 12px; margin: 4px 0 0 0;">
+                    {line_count} URL{'s' if line_count != 1 else ''} · URLs are read-only in admin, edit via API or CLI
+                </p>
            </div>

            {"" if not is_file else f'''
-            <!-- File contents editor -->
+            <!-- File contents preview (if first URL is a file://) -->
            <div style="margin-bottom: 8px;">
                <label style="font-weight: bold; display: block; margin-bottom: 4px;">
-                    File Contents: <code style="font-weight: normal; color: #666;">{source_file}</code>
+                    File Preview: <code style="font-weight: normal; color: #666;">{source_file}</code>
                </label>
                {"<div style='color: #dc3545; margin-bottom: 8px;'>" + error + "</div>" if error else ""}
-                <textarea id="{widget_id}_contents"
-                          style="width: 100%; height: {min(400, max(150, line_count * 18))}px; font-family: monospace; font-size: 12px;
-                                 padding: 8px; border: 1px solid #ccc; border-radius: 4px; resize: vertical;"
-                          placeholder="Enter URLs, one per line...">{escaped_contents}</textarea>
-            </div>
-
-            <div style="display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
-                <button type="button" id="{widget_id}_save_btn"
-                        onclick="saveSeedUrls_{widget_id}()"
-                        style="padding: 8px 20px; background: #417690; color: white; border: none;
-                               border-radius: 4px; cursor: pointer; font-weight: bold;">
-                    Save URLs
-                </button>
-                <span id="{widget_id}_line_count" style="color: #666; font-size: 12px;"></span>
-                <span id="{widget_id}_status" style="color: #666; font-size: 12px;"></span>
+                <textarea id="{widget_id}_file_preview"
+                          style="width: 100%; height: {min(400, max(150, file_line_count * 18))}px; font-family: monospace; font-size: 12px;
+                                 padding: 8px; border: 1px solid #ccc; border-radius: 4px; resize: vertical; background: #f9f9f9;"
+                          readonly>{escaped_file_contents}</textarea>
            </div>
            '''}

-            {"" if is_file else f'''
-            <div style="margin-top: 8px; color: #666;">
-                <a href="{seed_uri}" target="_blank">{seed_uri}</a>
-            </div>
-            '''}
-
-            <script>
-                (function() {{
-                    var uriInput = document.getElementById('{widget_id}_uri');
-                    var contentsInput = document.getElementById('{widget_id}_contents');
-                    var status = document.getElementById('{widget_id}_status');
-                    var lineCount = document.getElementById('{widget_id}_line_count');
-                    var saveBtn = document.getElementById('{widget_id}_save_btn');
-
-                    // Auto-resize URI input
-                    function autoResizeUri() {{
-                        uriInput.style.height = 'auto';
-                        uriInput.style.height = Math.min(100, uriInput.scrollHeight) + 'px';
-                    }}
-                    uriInput.addEventListener('input', autoResizeUri);
-                    autoResizeUri();
-
-                    if (contentsInput) {{
-                        function updateLineCount() {{
-                            var lines = contentsInput.value.split('\\n').filter(function(l) {{ return l.trim(); }});
-                            lineCount.textContent = lines.length + ' URLs';
-                        }}
-
-                        contentsInput.addEventListener('input', function() {{
-                            updateLineCount();
-                            if (status) {{
-                                status.textContent = '(unsaved changes)';
-                                status.style.color = '#c4820e';
-                            }}
-                        }});
-
-                        updateLineCount();
-                    }}
-
-                    window.saveSeedUrls_{widget_id} = function() {{
-                        if (!saveBtn) return;
-                        saveBtn.disabled = true;
-                        saveBtn.textContent = 'Saving...';
-                        if (status) status.textContent = '';
-
-                        fetch(window.location.pathname + 'save_seed_contents/', {{
-                            method: 'POST',
-                            headers: {{
-                                'Content-Type': 'application/json',
-                                'X-CSRFToken': document.querySelector('[name=csrfmiddlewaretoken]').value
-                            }},
-                            body: JSON.stringify({{ contents: contentsInput ? contentsInput.value : '' }})
-                        }})
-                        .then(function(response) {{ return response.json(); }})
-                        .then(function(data) {{
-                            if (data.success) {{
-                                if (status) {{
-                                    status.textContent = '✓ ' + data.message;
-                                    status.style.color = '#28a745';
-                                }}
-                            }} else {{
-                                if (status) {{
-                                    status.textContent = '✗ ' + data.error;
-                                    status.style.color = '#dc3545';
-                                }}
-                            }}
-                        }})
-                        .catch(function(err) {{
-                            if (status) {{
-                                status.textContent = '✗ Error: ' + err;
-                                status.style.color = '#dc3545';
-                            }}
-                        }})
-                        .finally(function() {{
-                            saveBtn.disabled = false;
-                            saveBtn.textContent = 'Save URLs';
-                        }});
-                    }};
-                }})();
-            </script>
        </div>
        '''
        return mark_safe(html)
@ -507,7 +296,7 @@ class CrawlAdmin(ConfigEditorMixin, BaseModelAdmin):
 class CrawlScheduleAdmin(BaseModelAdmin):
    list_display = ('id', 'created_at', 'created_by', 'label', 'notes', 'template_str', 'crawls', 'num_crawls', 'num_snapshots')
    sort_fields = ('id', 'created_at', 'created_by', 'label', 'notes', 'template_str')
-    search_fields = ('id', 'created_by__username', 'label', 'notes', 'schedule_id', 'template_id', 'template__seed__uri')
+    search_fields = ('id', 'created_by__username', 'label', 'notes', 'schedule_id', 'template_id', 'template__urls')

    readonly_fields = ('created_at', 'modified_at', 'crawls', 'snapshots')

@ -561,6 +350,5 @@ class CrawlScheduleAdmin(BaseModelAdmin):


 def register_admin(admin_site):
-    admin_site.register(Seed, SeedAdmin)
    admin_site.register(Crawl, CrawlAdmin)
    admin_site.register(CrawlSchedule, CrawlScheduleAdmin)
--- a/archivebox/crawls/migrations/0002_drop_seed_model.py
+++ b/archivebox/crawls/migrations/0002_drop_seed_model.py
@ -0,0 +1,61 @@
+# Generated by Django 6.0 on 2025-12-25 09:34
+
+import archivebox.base_models.models
+import django.db.models.deletion
+import pathlib
+import uuid
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('crawls', '0001_initial'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name='crawl',
+            name='seed',
+        ),
+        migrations.AddField(
+            model_name='crawl',
+            name='extractor',
+            field=models.CharField(default='auto', help_text='Parser for reading URLs (auto, html, json, rss, etc)', max_length=32),
+        ),
+        migrations.AlterField(
+            model_name='crawl',
+            name='created_by',
+            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AlterField(
+            model_name='crawl',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='crawl',
+            name='output_dir',
+            field=models.FilePathField(blank=True, default='', path=pathlib.PurePosixPath('/Users/squash/Local/Code/archiveboxes/archivebox-nue/data/archive')),
+        ),
+        migrations.AlterField(
+            model_name='crawl',
+            name='urls',
+            field=models.TextField(help_text='Newline-separated list of URLs to crawl'),
+        ),
+        migrations.AlterField(
+            model_name='crawlschedule',
+            name='created_by',
+            field=models.ForeignKey(default=archivebox.base_models.models.get_or_create_system_user_pk, on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AlterField(
+            model_name='crawlschedule',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.DeleteModel(
+            name='Seed',
+        ),
+    ]
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@ -20,91 +20,6 @@ if TYPE_CHECKING:
    from core.models import Snapshot, ArchiveResult


-class Seed(ModelWithOutputDir, ModelWithConfig, ModelWithNotes, ModelWithHealthStats):
-    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
-    created_at = models.DateTimeField(default=timezone.now, db_index=True)
-    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False)
-    modified_at = models.DateTimeField(auto_now=True)
-
-    uri = models.URLField(max_length=2048)
-    extractor = models.CharField(default='auto', max_length=32)
-    tags_str = models.CharField(max_length=255, null=False, blank=True, default='')
-    label = models.CharField(max_length=255, null=False, blank=True, default='')
-    config = models.JSONField(default=dict)
-    output_dir = models.FilePathField(path=settings.ARCHIVE_DIR, null=False, blank=True, default='')
-    notes = models.TextField(blank=True, null=False, default='')
-
-    crawl_set: models.Manager['Crawl']
-
-    class Meta:
-        verbose_name = 'Seed'
-        verbose_name_plural = 'Seeds'
-        unique_together = (('created_by', 'uri', 'extractor'), ('created_by', 'label'))
-
-    def __str__(self):
-        return f'[{self.id}] {self.uri[:64]}'
-
-    def save(self, *args, **kwargs):
-        is_new = self._state.adding
-        super().save(*args, **kwargs)
-        if is_new:
-            from archivebox.misc.logging_util import log_worker_event
-            log_worker_event(
-                worker_type='DB',
-                event='Created Seed',
-                indent_level=0,
-                metadata={
-                    'id': str(self.id),
-                    'uri': str(self.uri)[:64],
-                    'extractor': self.extractor,
-                    'label': self.label or None,
-                },
-            )
-
-    @classmethod
-    def from_file(cls, source_file: Path, label: str = '', parser: str = 'auto', tag: str = '', created_by=None, config=None):
-        # Use absolute path for file:// URLs so extractors can find the files
-        source_path = str(source_file.resolve())
-        seed, _ = cls.objects.get_or_create(
-            label=label or source_file.name, uri=f'file://{source_path}',
-            created_by_id=getattr(created_by, 'pk', created_by) or get_or_create_system_user_pk(),
-            extractor=parser, tags_str=tag, config=config or {},
-        )
-        return seed
-
-    @property
-    def source_type(self):
-        return self.uri.split('://', 1)[0].lower()
-
-    @property
-    def api_url(self) -> str:
-        return reverse_lazy('api-1:get_seed', args=[self.id])
-
-    def get_file_path(self) -> Path | None:
-        """
-        Get the filesystem path for file:// URIs.
-        Handles both old format (file:///data/...) and new format (file:///absolute/path).
-        Returns None if URI is not a file:// URI.
-        """
-        if not self.uri.startswith('file://'):
-            return None
-
-        # Remove file:// prefix
-        path_str = self.uri.replace('file://', '', 1)
-
-        # Handle old format: file:///data/... -> DATA_DIR/...
-        if path_str.startswith('/data/'):
-            return CONSTANTS.DATA_DIR / path_str.replace('/data/', '', 1)
-
-        # Handle new format: file:///absolute/path
-        return Path(path_str)
-
-    @property
-    def snapshot_set(self) -> QuerySet['Snapshot']:
-        from core.models import Snapshot
-        return Snapshot.objects.filter(crawl_id__in=self.crawl_set.values_list('pk', flat=True))
-
-
 class CrawlSchedule(ModelWithSerializers, ModelWithNotes, ModelWithHealthStats):
    id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
    created_at = models.DateTimeField(default=timezone.now, db_index=True)
@ -124,14 +39,15 @@ class CrawlSchedule(ModelWithSerializers, ModelWithNotes, ModelWithHealthStats):
        verbose_name_plural = 'Scheduled Crawls'

    def __str__(self) -> str:
-        return f'[{self.id}] {self.template.seed.uri[:64] if self.template and self.template.seed else ""} @ {self.schedule}'
+        urls_preview = self.template.urls[:64] if self.template and self.template.urls else ""
+        return f'[{self.id}] {urls_preview} @ {self.schedule}'

    @property
    def api_url(self) -> str:
        return reverse_lazy('api-1:get_any', args=[self.id])

    def save(self, *args, **kwargs):
-        self.label = self.label or (self.template.seed.label if self.template and self.template.seed else '')
+        self.label = self.label or (self.template.label if self.template else '')
        super().save(*args, **kwargs)
        if self.template:
            self.template.schedule = self
@ -144,8 +60,8 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=get_or_create_system_user_pk, null=False)
    modified_at = models.DateTimeField(auto_now=True)

-    seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False)
-    urls = models.TextField(blank=True, null=False, default='')
+    urls = models.TextField(blank=False, null=False, help_text='Newline-separated list of URLs to crawl')
+    extractor = models.CharField(default='auto', max_length=32, help_text='Parser for reading URLs (auto, html, json, rss, etc)')
    config = models.JSONField(default=dict)
    max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
    tags_str = models.CharField(max_length=1024, blank=True, null=False, default='')
@ -171,31 +87,40 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
        verbose_name_plural = 'Crawls'

    def __str__(self):
-        return f'[{self.id}] {self.seed.uri[:64] if self.seed else ""}'
+        first_url = self.get_urls_list()[0] if self.get_urls_list() else ''
+        return f'[{self.id}] {first_url[:64]}'

    def save(self, *args, **kwargs):
        is_new = self._state.adding
        super().save(*args, **kwargs)
        if is_new:
            from archivebox.misc.logging_util import log_worker_event
+            first_url = self.get_urls_list()[0] if self.get_urls_list() else ''
            log_worker_event(
                worker_type='DB',
                event='Created Crawl',
                indent_level=1,
                metadata={
                    'id': str(self.id),
-                    'seed_uri': str(self.seed.uri)[:64] if self.seed else None,
+                    'first_url': first_url[:64],
                    'max_depth': self.max_depth,
                    'status': self.status,
                },
            )

    @classmethod
-    def from_seed(cls, seed: Seed, max_depth: int = 0, persona: str = 'Default', tags_str: str = '', config=None, created_by=None):
-        crawl, _ = cls.objects.get_or_create(
-            seed=seed, max_depth=max_depth, tags_str=tags_str or seed.tags_str,
-            config=seed.config or config or {},
-            created_by_id=getattr(created_by, 'pk', created_by) or seed.created_by_id,
+    def from_file(cls, source_file: Path, max_depth: int = 0, label: str = '', extractor: str = 'auto',
+                  tags_str: str = '', config=None, created_by=None):
+        """Create a crawl from a file containing URLs."""
+        urls_content = source_file.read_text()
+        crawl = cls.objects.create(
+            urls=urls_content,
+            extractor=extractor,
+            max_depth=max_depth,
+            tags_str=tags_str,
+            label=label or source_file.name,
+            config=config or {},
+            created_by_id=getattr(created_by, 'pk', created_by) or get_or_create_system_user_pk(),
        )
        return crawl

@ -203,14 +128,47 @@ class Crawl(ModelWithOutputDir, ModelWithConfig, ModelWithHealthStats, ModelWith
    def api_url(self) -> str:
        return reverse_lazy('api-1:get_crawl', args=[self.id])

+    def get_urls_list(self) -> list[str]:
+        """Get list of URLs from urls field, filtering out comments and empty lines."""
+        if not self.urls:
+            return []
+        return [
+            url.strip()
+            for url in self.urls.split('\n')
+            if url.strip() and not url.strip().startswith('#')
+        ]
+
+    def get_file_path(self) -> Path | None:
+        """
+        Get filesystem path if this crawl references a local file.
+        Checks if the first URL is a file:// URI.
+        """
+        urls = self.get_urls_list()
+        if not urls:
+            return None
+
+        first_url = urls[0]
+        if not first_url.startswith('file://'):
+            return None
+
+        # Remove file:// prefix
+        path_str = first_url.replace('file://', '', 1)
+        return Path(path_str)
+
    def create_root_snapshot(self) -> 'Snapshot':
        from core.models import Snapshot
+
+        first_url = self.get_urls_list()[0] if self.get_urls_list() else None
+        if not first_url:
+            raise ValueError(f'Crawl {self.id} has no URLs to create root snapshot from')
+
        try:
-            return Snapshot.objects.get(crawl=self, url=self.seed.uri)
+            return Snapshot.objects.get(crawl=self, url=first_url)
        except Snapshot.DoesNotExist:
            pass
+
        root_snapshot, _ = Snapshot.objects.update_or_create(
-            crawl=self, url=self.seed.uri,
+            crawl=self, url=first_url,
            defaults={
                'status': Snapshot.INITIAL_STATE,
                'retry_at': timezone.now(),
--- a/archivebox/crawls/statemachines.py
+++ b/archivebox/crawls/statemachines.py
@ -42,11 +42,12 @@ class CrawlMachine(StateMachine, strict_states=True):
        return self.__repr__()
        
    def can_start(self) -> bool:
-        if not self.crawl.seed:
-            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: no seed[/red]')
+        if not self.crawl.urls:
+            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: no URLs[/red]')
            return False
-        if not self.crawl.seed.uri:
-            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: seed has no URI[/red]')
+        urls_list = self.crawl.get_urls_list()
+        if not urls_list:
+            print(f'[red]⚠️ Crawl {self.crawl.id} cannot start: no valid URLs in urls field[/red]')
            return False
        return True
        
@ -121,13 +122,14 @@ class CrawlMachine(StateMachine, strict_states=True):
        output_dir.mkdir(parents=True, exist_ok=True)

        # Run all on_Crawl hooks
+        first_url = self.crawl.get_urls_list()[0] if self.crawl.get_urls_list() else ''
        results = run_hooks(
            event_name='Crawl',
            output_dir=output_dir,
            timeout=60,
-            config_objects=[self.crawl, self.crawl.seed] if self.crawl.seed else [self.crawl],
+            config_objects=[self.crawl],
            crawl_id=str(self.crawl.id),
-            seed_uri=self.crawl.seed.uri if self.crawl.seed else '',
+            seed_uri=first_url,
        )

        # Process hook results - parse JSONL output and create DB objects
--- a/archivebox/logs/errors.log
+++ b/archivebox/logs/errors.log
@ -1,2 +0,0 @@
-
-> /Users/squash/Local/Code/archiveboxes/archivebox-nue/archivebox/cli/archivebox_init.py --force; TS=2025-12-25__08:03:12 VERSION=0.9.0rc1 IN_DOCKER=False IS_TTY=False
--- a/archivebox/machine/migrations/0002_alter_dependency_bin_name_and_more.py
+++ b/archivebox/machine/migrations/0002_alter_dependency_bin_name_and_more.py
@ -0,0 +1,65 @@
+# Generated by Django 6.0 on 2025-12-25 09:34
+
+import django.db.models.deletion
+import uuid
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('machine', '0001_squashed'),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name='dependency',
+            name='bin_name',
+            field=models.CharField(db_index=True, help_text='Binary executable name (e.g., wget, yt-dlp, chromium)', max_length=63, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='dependency',
+            name='bin_providers',
+            field=models.CharField(default='*', help_text='Comma-separated list of allowed providers: apt,brew,pip,npm,gem,nix,custom or * for any', max_length=127),
+        ),
+        migrations.AlterField(
+            model_name='dependency',
+            name='config',
+            field=models.JSONField(blank=True, default=dict, help_text='JSON map of env var config to use during install'),
+        ),
+        migrations.AlterField(
+            model_name='dependency',
+            name='custom_cmds',
+            field=models.JSONField(blank=True, default=dict, help_text="JSON map of provider -> custom install command (e.g., {'apt': 'apt install -y wget'})"),
+        ),
+        migrations.AlterField(
+            model_name='dependency',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='installedbinary',
+            name='dependency',
+            field=models.ForeignKey(blank=True, help_text='The Dependency this binary satisfies', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='installedbinary_set', to='machine.dependency'),
+        ),
+        migrations.AlterField(
+            model_name='installedbinary',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='machine',
+            name='config',
+            field=models.JSONField(blank=True, default=dict, help_text='Machine-specific config overrides (e.g., resolved binary paths like WGET_BINARY)'),
+        ),
+        migrations.AlterField(
+            model_name='machine',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+        migrations.AlterField(
+            model_name='networkinterface',
+            name='id',
+            field=models.UUIDField(default=uuid.uuid7, editable=False, primary_key=True, serialize=False, unique=True),
+        ),
+    ]
--- a/archivebox/misc/jsonl.py
+++ b/archivebox/misc/jsonl.py
@ -27,10 +27,9 @@ TYPE_SNAPSHOT = 'Snapshot'
 TYPE_ARCHIVERESULT = 'ArchiveResult'
 TYPE_TAG = 'Tag'
 TYPE_CRAWL = 'Crawl'
-TYPE_SEED = 'Seed'
 TYPE_INSTALLEDBINARY = 'InstalledBinary'

-VALID_TYPES = {TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_TAG, TYPE_CRAWL, TYPE_SEED, TYPE_INSTALLEDBINARY}
+VALID_TYPES = {TYPE_SNAPSHOT, TYPE_ARCHIVERESULT, TYPE_TAG, TYPE_CRAWL, TYPE_INSTALLEDBINARY}


 def parse_line(line: str) -> Optional[Dict[str, Any]]:
@ -206,7 +205,8 @@ def crawl_to_jsonl(crawl) -> Dict[str, Any]:
    return {
        'type': TYPE_CRAWL,
        'id': str(crawl.id),
-        'seed_id': str(crawl.seed_id),
+        'urls': crawl.urls,
+        'extractor': crawl.extractor,
        'status': crawl.status,
        'max_depth': crawl.max_depth,
        'created_at': crawl.created_at.isoformat() if crawl.created_at else None,
--- a/archivebox/misc/logging.py
+++ b/archivebox/misc/logging.py
@ -13,9 +13,11 @@ from rich.console import Console
 from rich.highlighter import Highlighter

 # SETUP RICH CONSOLE / TTY detection / COLOR / PROGRESS BARS
-CONSOLE = Console()
-STDERR = Console(stderr=True)
-IS_TTY = CONSOLE.is_interactive
+# Disable wrapping - use soft_wrap=True and large width so text flows naturally
+# Colors are preserved, just no hard line breaks inserted
+CONSOLE = Console(width=32768, soft_wrap=True, force_terminal=True)
+STDERR = Console(stderr=True, width=32768, soft_wrap=True, force_terminal=True)
+IS_TTY = sys.stdout.isatty()

 class RainbowHighlighter(Highlighter):
    def highlight(self, text):
--- a/archivebox/misc/logging_util.py
+++ b/archivebox/misc/logging_util.py
@ -603,21 +603,17 @@ def log_worker_event(

    # Build final message
    error_str = f' {type(error).__name__}: {error}' if error else ''
-    # Build colored message - worker_label needs to be inside color tags
-    # But first we need to format the color tags separately from the worker label
    from archivebox.misc.logging import CONSOLE
    from rich.text import Text

    # Create a Rich Text object for proper formatting
    text = Text()
-    text.append(indent)  # Indentation
-    # Append worker label and event with color
+    text.append(indent)
    text.append(f'{worker_label} {event}{error_str}', style=color)
-    # Append metadata without color (add separator if metadata exists)
    if metadata_str:
        text.append(f' | {metadata_str}')

-    CONSOLE.print(text)
+    CONSOLE.print(text, soft_wrap=True)


@enforce_types
--- a/archivebox/misc/monkey_patches.py
+++ b/archivebox/misc/monkey_patches.py
@ -1,7 +1,5 @@
 __package__ = 'archivebox'

-import sys
-import shutil
 import django
 import pydantic

@ -20,14 +18,10 @@ timezone.utc = datetime.timezone.utc
 # DjangoSignalWebhooksConfig.verbose_name = 'API'


-# Install rich for pretty tracebacks in console logs
-# https://rich.readthedocs.io/en/stable/traceback.html#traceback-handler
-
-from rich.traceback import install      # noqa
-
-TERM_WIDTH = (shutil.get_terminal_size((200, 10)).columns - 1) if sys.stdout.isatty() else 200
-# os.environ.setdefault('COLUMNS', str(TERM_WIDTH))
-install(show_locals=True, word_wrap=False, locals_max_length=10, locals_hide_dunder=True, suppress=[django, pydantic], extra_lines=2, width=TERM_WIDTH)
+# Rich traceback handler disabled - it adds frames/boxes that wrap weirdly in log files
+# Standard Python tracebacks are used instead (full width, no frames)
+# from rich.traceback import install
+# install(show_locals=True, word_wrap=False, ...)


 # Hide site-packages/sonic/client.py:115: SyntaxWarning
--- a/archivebox/templates/admin/progress_monitor.html
+++ b/archivebox/templates/admin/progress_monitor.html
@ -552,21 +552,21 @@
        if (crawl.status === 'queued' && !crawl.can_start) {
            warningHtml = `
                <div style="padding: 8px 14px; background: rgba(248, 81, 73, 0.1); border-top: 1px solid #f85149; color: #f85149; font-size: 11px;">
-                    ⚠️ Crawl cannot start: ${crawl.seed_uri ? 'unknown error' : 'no seed URI'}
+                    ⚠️ Crawl cannot start: ${crawl.urls_preview ? 'unknown error' : 'no URLs'}
                </div>
            `;
        } else if (crawl.status === 'queued' && crawl.retry_at_future) {
            // Queued but retry_at is in future (was claimed by worker, will retry)
            warningHtml = `
                <div style="padding: 8px 14px; background: rgba(88, 166, 255, 0.1); border-top: 1px solid #58a6ff; color: #58a6ff; font-size: 11px;">
-                    🔄 Retrying in ${crawl.seconds_until_retry}s...${crawl.seed_uri ? ` (${crawl.seed_uri})` : ''}
+                    🔄 Retrying in ${crawl.seconds_until_retry}s...${crawl.urls_preview ? ` (${crawl.urls_preview})` : ''}
                </div>
            `;
        } else if (crawl.status === 'queued' && crawl.total_snapshots === 0) {
            // Queued and waiting to be picked up by worker
            warningHtml = `
                <div style="padding: 8px 14px; background: rgba(210, 153, 34, 0.1); border-top: 1px solid #d29922; color: #d29922; font-size: 11px;">
-                    ⏳ Waiting for worker to pick up...${crawl.seed_uri ? ` (${crawl.seed_uri})` : ''}
+                    ⏳ Waiting for worker to pick up...${crawl.urls_preview ? ` (${crawl.urls_preview})` : ''}
                </div>
            `;
        }
@ -577,8 +577,8 @@
            metaText += ` | ${crawl.total_snapshots} snapshots`;
        } else if (crawl.urls_count > 0) {
            metaText += ` | ${crawl.urls_count} URLs`;
-        } else if (crawl.seed_uri) {
-            metaText += ` | ${crawl.seed_uri.substring(0, 40)}${crawl.seed_uri.length > 40 ? '...' : ''}`;
+        } else if (crawl.urls_preview) {
+            metaText += ` | ${crawl.urls_preview.substring(0, 40)}${crawl.urls_preview.length > 40 ? '...' : ''}`;
        }

        return `
--- a/archivebox/workers/supervisord_util.py
+++ b/archivebox/workers/supervisord_util.py
@ -26,6 +26,9 @@ CONFIG_FILE_NAME = "supervisord.conf"
 PID_FILE_NAME = "supervisord.pid"
 WORKERS_DIR_NAME = "workers"

+# Global reference to supervisord process for cleanup
+_supervisord_proc = None
+
 ORCHESTRATOR_WORKER = {
    "name": "worker_orchestrator",
    "command": "archivebox manage orchestrator",  # runs forever by default
@ -78,7 +81,7 @@ def create_supervisord_config():
    config_content = f"""
 [supervisord]
 nodaemon = true
-environment = IS_SUPERVISORD_PARENT="true"
+environment = IS_SUPERVISORD_PARENT="true",COLUMNS="200"
 pidfile = {PID_FILE}
 logfile = {LOG_FILE}
 childlogdir = {CONSTANTS.LOGS_DIR}
@ -143,11 +146,27 @@ def get_existing_supervisord_process():
        return None

 def stop_existing_supervisord_process():
+    global _supervisord_proc
    SOCK_FILE = get_sock_file()
    PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
-    
+
    try:
-        # if pid file exists, load PID int
+        # First try to stop via the global proc reference
+        if _supervisord_proc and _supervisord_proc.poll() is None:
+            try:
+                print(f"[🦸‍♂️] Stopping supervisord process (pid={_supervisord_proc.pid})...")
+                _supervisord_proc.terminate()
+                try:
+                    _supervisord_proc.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    _supervisord_proc.kill()
+                    _supervisord_proc.wait(timeout=2)
+            except (BaseException, BrokenPipeError, IOError, KeyboardInterrupt):
+                pass
+            _supervisord_proc = None
+            return
+
+        # Fallback: if pid file exists, load PID int and kill that process
        try:
            pid = int(PID_FILE.read_text())
        except (FileNotFoundError, ValueError):
@ -156,8 +175,25 @@ def stop_existing_supervisord_process():
        try:
            print(f"[🦸‍♂️] Stopping supervisord process (pid={pid})...")
            proc = psutil.Process(pid)
+            # Kill the entire process group to ensure all children are stopped
+            children = proc.children(recursive=True)
            proc.terminate()
+            # Also terminate all children
+            for child in children:
+                try:
+                    child.terminate()
+                except psutil.NoSuchProcess:
+                    pass
            proc.wait(timeout=5)
+            # Kill any remaining children
+            for child in children:
+                try:
+                    if child.is_running():
+                        child.kill()
+                except psutil.NoSuchProcess:
+                    pass
+        except psutil.NoSuchProcess:
+            pass
        except (BaseException, BrokenPipeError, IOError, KeyboardInterrupt):
            pass
    finally:
@ -174,7 +210,7 @@ def start_new_supervisord_process(daemonize=False):
    LOG_FILE = CONSTANTS.LOGS_DIR / LOG_FILE_NAME
    CONFIG_FILE = SOCK_FILE.parent / CONFIG_FILE_NAME
    PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
-    
+
    print(f"[🦸‍♂️] Supervisord starting{' in background' if daemonize else ''}...")
    pretty_log_path = pretty_path(LOG_FILE)
    print(f"    > Writing supervisord logs to: {pretty_log_path}")
@ -182,50 +218,54 @@ def start_new_supervisord_process(daemonize=False):
    print(f'    > Using supervisord config file: {pretty_path(CONFIG_FILE)}')
    print(f"    > Using supervisord UNIX socket: {pretty_path(SOCK_FILE)}")
    print()
-    
+
    # clear out existing stale state files
    shutil.rmtree(WORKERS_DIR, ignore_errors=True)
    PID_FILE.unlink(missing_ok=True)
    get_sock_file().unlink(missing_ok=True)
    CONFIG_FILE.unlink(missing_ok=True)
-    
+
    # create the supervisord config file
    create_supervisord_config()

-    # Start supervisord
-    # panel = Panel(f"Starting supervisord with config: {SUPERVISORD_CONFIG_FILE}")
-    # with Live(panel, refresh_per_second=1) as live:
-    
-    subprocess.Popen(
-        f"supervisord --configuration={CONFIG_FILE}",
-        stdin=None,
-        shell=True,
-        start_new_session=daemonize,
-    )
+    # Open log file for supervisord output
+    LOG_FILE.parent.mkdir(parents=True, exist_ok=True)
+    log_handle = open(LOG_FILE, 'a')

-    def exit_signal_handler(signum, frame):
-        if signum == 2:
-            STDERR.print("\n[🛑] Got Ctrl+C. Terminating child processes...")
-        elif signum != 13:
-            STDERR.print(f"\n[🦸‍♂️] Supervisord got stop signal ({signal.strsignal(signum)}). Terminating child processes...")
-        stop_existing_supervisord_process()
-        raise SystemExit(0)
+    if daemonize:
+        # Start supervisord in background (daemon mode)
+        subprocess.Popen(
+            f"supervisord --configuration={CONFIG_FILE}",
+            stdin=None,
+            stdout=log_handle,
+            stderr=log_handle,
+            shell=True,
+            start_new_session=True,
+        )
+        time.sleep(2)
+        return get_existing_supervisord_process()
+    else:
+        # Start supervisord in FOREGROUND - this will block until supervisord exits
+        # supervisord with nodaemon=true will run in foreground and handle signals properly
+        # When supervisord gets SIGINT/SIGTERM, it will stop all child processes before exiting
+        proc = subprocess.Popen(
+            f"supervisord --configuration={CONFIG_FILE}",
+            stdin=None,
+            stdout=log_handle,
+            stderr=log_handle,
+            shell=True,
+            start_new_session=False,  # Keep in same process group so signals propagate
+        )

-    # Monitor for termination signals and cleanup child processes
-    if not daemonize:
-        try:
-            signal.signal(signal.SIGINT, exit_signal_handler)
-            signal.signal(signal.SIGHUP, exit_signal_handler)
-            signal.signal(signal.SIGPIPE, exit_signal_handler)
-            signal.signal(signal.SIGTERM, exit_signal_handler)
-        except Exception:
-            # signal handlers only work in main thread
-            pass
-    # otherwise supervisord will containue in background even if parent proc is ends (aka daemon mode)
+        # Store the process so we can wait on it later
+        global _supervisord_proc
+        _supervisord_proc = proc

-    time.sleep(2)
+        # Wait a bit for supervisord to start up
+        time.sleep(2)
+
+        return get_existing_supervisord_process()

-    return get_existing_supervisord_process()

 def get_or_create_supervisord_process(daemonize=False):
    SOCK_FILE = get_sock_file()
@ -353,9 +393,15 @@ def tail_worker_logs(log_path: str):
        pass


-def tail_multiple_worker_logs(log_files: list[str], follow=True):
-    """Tail multiple log files simultaneously, interleaving their output."""
-    import select
+def tail_multiple_worker_logs(log_files: list[str], follow=True, proc=None):
+    """Tail multiple log files simultaneously, interleaving their output.
+
+    Args:
+        log_files: List of log file paths to tail
+        follow: Whether to keep following (True) or just read existing content (False)
+        proc: Optional subprocess.Popen object - stop tailing when this process exits
+    """
+    import re
    from pathlib import Path

    # Convert relative paths to absolute paths
@ -377,48 +423,53 @@ def tail_multiple_worker_logs(log_files: list[str], follow=True):
    for log_path in log_paths:
        try:
            f = open(log_path, 'r')
-            # Seek to end of file if following
-            if follow:
-                f.seek(0, 2)  # Seek to end
-            file_handles.append((log_path.name, f))
+            # Don't seek to end - show recent content so user sees something
+            # Go to end minus 4KB to show some recent logs
+            f.seek(0, 2)  # Go to end first
+            file_size = f.tell()
+            if file_size > 4096:
+                f.seek(file_size - 4096)
+                f.readline()  # Skip partial line
+            else:
+                f.seek(0)  # Small file, read from start
+
+            file_handles.append((log_path, f))
+            print(f"    [tailing {log_path.name}]")
        except Exception as e:
-            print(f"[yellow]Warning: Could not open {log_path}: {e}[/yellow]")
+            sys.stderr.write(f"Warning: Could not open {log_path}: {e}\n")

    if not file_handles:
-        print("[red]No log files could be opened[/red]")
+        sys.stderr.write("No log files could be opened\n")
        return

-    # Print which logs we're tailing
-    log_names = [name for name, _ in file_handles]
-    print(f"[dim]Tailing: {', '.join(log_names)}[/dim]")
    print()

    try:
        while follow:
-            # Read available lines from all files
-            for log_name, f in file_handles:
-                line = f.readline()
-                if line:
-                    # Colorize based on log source
-                    if 'orchestrator' in log_name.lower():
-                        color = 'cyan'
-                    elif 'daphne' in log_name.lower():
-                        color = 'green'
-                    else:
-                        color = 'white'
+            # Check if the monitored process has exited
+            if proc is not None and proc.poll() is not None:
+                print(f"\n[server process exited with code {proc.returncode}]")
+                break

+            had_output = False
+            # Read ALL available lines from all files (not just one per iteration)
+            for log_path, f in file_handles:
+                while True:
+                    line = f.readline()
+                    if not line:
+                        break  # No more lines available in this file
+                    had_output = True
                    # Strip ANSI codes if present (supervisord does this but just in case)
-                    import re
                    line_clean = re.sub(r'\x1b\[[0-9;]*m', '', line.rstrip())
-
                    if line_clean:
-                        print(f'[{color}][{log_name}][/{color}] {line_clean}')
+                        print(line_clean)

-            # Small sleep to avoid busy-waiting
-            time.sleep(0.1)
+            # Small sleep to avoid busy-waiting (only when no output)
+            if not had_output:
+                time.sleep(0.05)

    except (KeyboardInterrupt, BrokenPipeError, IOError):
-        print("\n[yellow][i] Stopped tailing logs[/i][/yellow]")
+        pass  # Let the caller handle the cleanup message
    except SystemExit:
        pass
    finally:
@ -451,6 +502,8 @@ def watch_worker(supervisor, daemon_name, interval=5):


 def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
+    global _supervisord_proc
+
    supervisor = get_or_create_supervisord_process(daemonize=daemonize)

    bg_workers = [
@ -466,36 +519,50 @@ def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):

    if not daemonize:
        try:
-            watch_worker(supervisor, "worker_daphne")
+            # Tail worker logs while supervisord runs
+            sys.stdout.write('Tailing worker logs (Ctrl+C to stop)...\n\n')
+            sys.stdout.flush()
+            tail_multiple_worker_logs(
+                log_files=['logs/worker_daphne.log', 'logs/worker_orchestrator.log'],
+                follow=True,
+                proc=_supervisord_proc,  # Stop tailing when supervisord exits
+            )
        except (KeyboardInterrupt, BrokenPipeError, IOError):
            STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
        except SystemExit:
            pass
        except BaseException as e:
-            STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping web server gracefully...")
-            raise
+            STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping gracefully...")
        finally:
-            stop_worker(supervisor, "worker_daphne")
+            # Ensure supervisord and all children are stopped
+            stop_existing_supervisord_process()
            time.sleep(0.5)


 def start_cli_workers(watch=False):
+    global _supervisord_proc
+
    supervisor = get_or_create_supervisord_process(daemonize=False)

    start_worker(supervisor, ORCHESTRATOR_WORKER)

    if watch:
        try:
-            watch_worker(supervisor, ORCHESTRATOR_WORKER['name'])
+            # Block on supervisord process - it will handle signals and stop children
+            if _supervisord_proc:
+                _supervisord_proc.wait()
+            else:
+                # Fallback to watching worker if no proc reference
+                watch_worker(supervisor, ORCHESTRATOR_WORKER['name'])
        except (KeyboardInterrupt, BrokenPipeError, IOError):
            STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
        except SystemExit:
            pass
        except BaseException as e:
-            STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping orchestrator gracefully...")
-            raise
+            STDERR.print(f"\n[🛑] Got {e.__class__.__name__} exception, stopping gracefully...")
        finally:
-            stop_worker(supervisor, ORCHESTRATOR_WORKER['name'])
+            # Ensure supervisord and all children are stopped
+            stop_existing_supervisord_process()
            time.sleep(0.5)
    return [ORCHESTRATOR_WORKER]
				`@ -1,2 +0,0 @@`

				`> /Users/squash/Local/Code/archiveboxes/archivebox-nue/archivebox/cli/archivebox_init.py --force; TS=2025-12-25__08:03:12 VERSION=0.9.0rc1 IN_DOCKER=False IS_TTY=False`