mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-12-28 06:34:15 +00:00
- Create uuid_compat.py module that provides uuid7 for Python <3.14 using uuid_extensions package, and native uuid.uuid7 for Python 3.14+ - Update all model files and migrations to use archivebox.uuid_compat - Add uuid7 conditional dependency in pyproject.toml for Python <3.14 - Update requires-python to >=3.13 (from >=3.14) - Update GitHub workflows, lock_pkgs.sh to use Python 3.13 - Update tool configs (ruff, pyright, uv) for Python 3.13 This enables running ArchiveBox on Python 3.13 while maintaining forward compatibility with Python 3.14's native uuid7 support.
149 lines
4.9 KiB
Python
Executable File
149 lines
4.9 KiB
Python
Executable File
"""Base models using UUIDv7 for all id fields."""
|
|
|
|
__package__ = 'archivebox.base_models'
|
|
|
|
import io
|
|
import csv
|
|
import json
|
|
from uuid import UUID
|
|
from archivebox.uuid_compat import uuid7
|
|
from typing import Any, Iterable, ClassVar
|
|
from pathlib import Path
|
|
|
|
from django.contrib import admin
|
|
from django.db import models
|
|
from django.utils import timezone
|
|
from django.contrib.auth import get_user_model
|
|
from django.urls import reverse_lazy
|
|
from django.conf import settings
|
|
|
|
from django_stubs_ext.db.models import TypedModelMeta
|
|
|
|
from archivebox import DATA_DIR
|
|
from archivebox.misc.util import to_json
|
|
from archivebox.misc.hashing import get_dir_info
|
|
|
|
|
|
def get_or_create_system_user_pk(username='system'):
|
|
User = get_user_model()
|
|
# If there's exactly one superuser, use that for all system operations
|
|
if User.objects.filter(is_superuser=True).count() == 1:
|
|
return User.objects.filter(is_superuser=True).values_list('pk', flat=True)[0]
|
|
# Otherwise get or create the system user
|
|
user, _ = User.objects.get_or_create(
|
|
username=username,
|
|
defaults={'is_staff': True, 'is_superuser': True, 'email': '', 'password': '!'}
|
|
)
|
|
return user.pk
|
|
|
|
|
|
class AutoDateTimeField(models.DateTimeField):
|
|
"""DateTimeField that automatically updates on save (legacy compatibility)."""
|
|
def pre_save(self, model_instance, add):
|
|
if add or not getattr(model_instance, self.attname):
|
|
value = timezone.now()
|
|
setattr(model_instance, self.attname, value)
|
|
return value
|
|
return super().pre_save(model_instance, add)
|
|
|
|
|
|
class ModelWithUUID(models.Model):
|
|
id = models.UUIDField(primary_key=True, default=uuid7, editable=False, unique=True)
|
|
created_at = models.DateTimeField(default=timezone.now, db_index=True)
|
|
modified_at = models.DateTimeField(auto_now=True)
|
|
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, db_index=True)
|
|
|
|
class Meta(TypedModelMeta):
|
|
abstract = True
|
|
|
|
def __str__(self):
|
|
return f'[{self.id}] {self.__class__.__name__}'
|
|
|
|
@property
|
|
def admin_change_url(self) -> str:
|
|
return f"/admin/{self._meta.app_label}/{self._meta.model_name}/{self.pk}/change/"
|
|
|
|
@property
|
|
def api_url(self) -> str:
|
|
return reverse_lazy('api-1:get_any', args=[self.id])
|
|
|
|
@property
|
|
def api_docs_url(self) -> str:
|
|
return f'/api/v1/docs#/{self._meta.app_label.title()}%20Models/api_v1_{self._meta.app_label}_get_{self._meta.db_table}'
|
|
|
|
def as_json(self, keys: Iterable[str] = ()) -> dict:
|
|
default_keys = ('id', 'created_at', 'modified_at', 'created_by_id')
|
|
return {key: getattr(self, key) for key in (keys or default_keys) if hasattr(self, key)}
|
|
|
|
|
|
class ModelWithSerializers(ModelWithUUID):
|
|
class Meta(TypedModelMeta):
|
|
abstract = True
|
|
|
|
def as_csv_row(self, keys: Iterable[str] = (), separator: str = ',') -> str:
|
|
buffer = io.StringIO()
|
|
csv.writer(buffer, delimiter=separator).writerow(str(getattr(self, key, '')) for key in (keys or self.as_json().keys()))
|
|
return buffer.getvalue()
|
|
|
|
def as_jsonl_row(self, keys: Iterable[str] = (), **json_kwargs) -> str:
|
|
return json.dumps({key: getattr(self, key, '') for key in (keys or self.as_json().keys())}, sort_keys=True, indent=None, **json_kwargs)
|
|
|
|
|
|
class ModelWithNotes(models.Model):
|
|
"""Mixin for models with a notes field."""
|
|
notes = models.TextField(blank=True, null=False, default='')
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
|
|
class ModelWithHealthStats(models.Model):
|
|
"""Mixin for models with health tracking fields."""
|
|
num_uses_failed = models.PositiveIntegerField(default=0)
|
|
num_uses_succeeded = models.PositiveIntegerField(default=0)
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
@property
|
|
def health(self) -> int:
|
|
total = max(self.num_uses_failed + self.num_uses_succeeded, 1)
|
|
return round((self.num_uses_succeeded / total) * 100)
|
|
|
|
|
|
class ModelWithConfig(models.Model):
|
|
"""Mixin for models with a JSON config field."""
|
|
config = models.JSONField(default=dict, null=False, blank=False, editable=True)
|
|
|
|
class Meta:
|
|
abstract = True
|
|
|
|
|
|
class ModelWithOutputDir(ModelWithSerializers):
|
|
class Meta:
|
|
abstract = True
|
|
|
|
def save(self, *args, **kwargs):
|
|
super().save(*args, **kwargs)
|
|
self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
self.save_json_index()
|
|
|
|
@property
|
|
def output_dir_parent(self) -> str:
|
|
return f'{self._meta.model_name}s'
|
|
|
|
@property
|
|
def output_dir_name(self) -> str:
|
|
return str(self.id)
|
|
|
|
@property
|
|
def output_dir_str(self) -> str:
|
|
return f'{self.output_dir_parent}/{self.output_dir_name}'
|
|
|
|
@property
|
|
def OUTPUT_DIR(self) -> Path:
|
|
return DATA_DIR / self.output_dir_str
|
|
|
|
def save_json_index(self):
|
|
(self.OUTPUT_DIR / 'index.json').write_text(to_json(self.as_json()))
|