mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-12-28 06:34:15 +00:00
Keep only the readable Drizzle schema
Remove all comparison files and other ORM schemas. Keep only the nicely formatted Drizzle schema with: - Dot-first indented chains - Helper functions for common patterns - Logical grouping with comments - Spread patterns for field groups File renamed from schema.drizzle.readable.ts to schema.drizzle.ts
This commit is contained in:
parent
f196b2c873
commit
22f2ddaf08
@ -1,410 +0,0 @@
|
||||
# Making Drizzle Schemas More Readable
|
||||
|
||||
## The Problem
|
||||
|
||||
Drizzle's chained functional syntax can become hard to read:
|
||||
|
||||
```typescript
|
||||
// ❌ HARD TO READ - Everything crammed together
|
||||
export const users = pgTable('auth_user', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
username: varchar('username', { length: 150 }).unique().notNull(),
|
||||
email: varchar('email', { length: 254 }).notNull(),
|
||||
password: varchar('password', { length: 128 }).notNull(),
|
||||
first_name: varchar('first_name', { length: 150 }).notNull(),
|
||||
last_name: varchar('last_name', { length: 150 }).notNull(),
|
||||
is_active: boolean('is_active').default(true).notNull(),
|
||||
is_staff: boolean('is_staff').default(false).notNull(),
|
||||
is_superuser: boolean('is_superuser').default(false).notNull(),
|
||||
date_joined: timestamp('date_joined', { withTimezone: true }).defaultNow().notNull(),
|
||||
last_login: timestamp('last_login', { withTimezone: true }),
|
||||
}, (table) => ({
|
||||
usernameIdx: index('auth_user_username_idx').on(table.username),
|
||||
}));
|
||||
```
|
||||
|
||||
## Solution 1: Break Chains Vertically
|
||||
|
||||
```typescript
|
||||
// ✅ MUCH BETTER - Each modifier on its own line
|
||||
export const users = pgTable('auth_user', {
|
||||
id: uuid('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(uuidv7Default),
|
||||
|
||||
username: varchar('username', { length: 150 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 })
|
||||
.notNull(),
|
||||
|
||||
is_active: boolean('is_active')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
date_joined: timestamp('date_joined', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
**Why it's better:**
|
||||
- Each modifier is on its own line
|
||||
- Easy to scan vertically
|
||||
- Diffs are cleaner (one line = one change)
|
||||
- Easier to comment out modifiers for testing
|
||||
|
||||
## Solution 2: Group Related Fields
|
||||
|
||||
```typescript
|
||||
// ✅ EXCELLENT - Logical grouping with comments
|
||||
export const users = pgTable('auth_user', {
|
||||
// Primary Key
|
||||
id: uuid('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(uuidv7Default),
|
||||
|
||||
// Core Auth Fields
|
||||
username: varchar('username', { length: 150 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 })
|
||||
.notNull(),
|
||||
|
||||
password: varchar('password', { length: 128 })
|
||||
.notNull(),
|
||||
|
||||
// Profile Fields
|
||||
first_name: varchar('first_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
last_name: varchar('last_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
// Permission Flags
|
||||
is_active: boolean('is_active')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
is_staff: boolean('is_staff')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
is_superuser: boolean('is_superuser')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
// Timestamps
|
||||
date_joined: timestamp('date_joined', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
|
||||
last_login: timestamp('last_login', { withTimezone: true }),
|
||||
});
|
||||
```
|
||||
|
||||
**Why it's better:**
|
||||
- Clear sections with comments
|
||||
- Blank lines separate field groups
|
||||
- Tells a story about the data structure
|
||||
- Easier to find specific fields
|
||||
|
||||
## Solution 3: Extract Reusable Helpers
|
||||
|
||||
```typescript
|
||||
// ✅ BEST - DRY with helper functions
|
||||
const id_field = () =>
|
||||
uuid('id').primaryKey().$defaultFn(uuidv7Default);
|
||||
|
||||
const abid_field = () =>
|
||||
varchar('abid', { length: 30 }).unique().notNull();
|
||||
|
||||
const created_at_field = () =>
|
||||
timestamp('created_at', { withTimezone: true }).defaultNow().notNull();
|
||||
|
||||
const modified_at_field = () =>
|
||||
timestamp('modified_at', { withTimezone: true }).defaultNow().notNull();
|
||||
|
||||
const notes_field = () =>
|
||||
text('notes').default('').notNull();
|
||||
|
||||
// Then use them:
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// ... other fields ...
|
||||
|
||||
notes: notes_field(),
|
||||
});
|
||||
```
|
||||
|
||||
**Why it's better:**
|
||||
- Reduces repetition dramatically
|
||||
- Consistent patterns across all tables
|
||||
- Easy to update common fields
|
||||
- Self-documenting
|
||||
|
||||
## Solution 4: Use Spread for Common Field Groups
|
||||
|
||||
```typescript
|
||||
// ✅ EXCELLENT - Spread common patterns
|
||||
const health_fields = () => ({
|
||||
num_uses_failed: integer('num_uses_failed')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
|
||||
num_uses_succeeded: integer('num_uses_succeeded')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
});
|
||||
|
||||
const state_machine_fields = () => ({
|
||||
status: varchar('status', { length: 16 })
|
||||
.default('queued')
|
||||
.notNull(),
|
||||
|
||||
retry_at: timestamp('retry_at', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
});
|
||||
|
||||
// Use them with spread:
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// ... other fields ...
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
});
|
||||
```
|
||||
|
||||
**Why it's better:**
|
||||
- Common patterns defined once
|
||||
- Less visual clutter
|
||||
- Easy to see which models have which mixins
|
||||
- Matches Django's mixin pattern
|
||||
|
||||
## Solution 5: Separate Index Definitions
|
||||
|
||||
```typescript
|
||||
// ✅ CLEAR - Indexes at the end, not mixed with fields
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
// All field definitions here...
|
||||
id: id_field(),
|
||||
url: text('url').unique().notNull(),
|
||||
created_at: created_at_field(),
|
||||
|
||||
}, (table) => ({
|
||||
// All indexes grouped together
|
||||
createdAtIdx: index('core_snapshot_created_at_idx')
|
||||
.on(table.created_at),
|
||||
|
||||
createdByIdx: index('core_snapshot_created_by_idx')
|
||||
.on(table.created_by_id),
|
||||
|
||||
urlIdx: index('core_snapshot_url_idx')
|
||||
.on(table.url),
|
||||
|
||||
// Multi-column index example
|
||||
uniqueObjTag: unique()
|
||||
.on(table.obj_id, table.name),
|
||||
}));
|
||||
```
|
||||
|
||||
**Why it's better:**
|
||||
- Fields and indexes are separate concerns
|
||||
- Can see all indexes at a glance
|
||||
- Indexes don't clutter field definitions
|
||||
|
||||
## Complete Example: Before vs After
|
||||
|
||||
### Before (Original)
|
||||
```typescript
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
seed_id: uuid('seed_id').notNull().references(() => seeds.id, { onDelete: 'restrict' }),
|
||||
urls: text('urls').default('').notNull(),
|
||||
config: json('config').default({}).notNull(),
|
||||
max_depth: smallint('max_depth').default(0).notNull(),
|
||||
tags_str: varchar('tags_str', { length: 1024 }).default('').notNull(),
|
||||
persona_id: uuid('persona_id'),
|
||||
label: varchar('label', { length: 64 }).default('').notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
schedule_id: uuid('schedule_id').references(() => crawl_schedules.id, { onDelete: 'set null' }),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
output_dir: varchar('output_dir', { length: 255 }).default('').notNull(),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
}, (table) => ({
|
||||
createdAtIdx: index('crawls_crawl_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_crawl_created_by_idx').on(table.created_by_id),
|
||||
seedIdx: index('crawls_crawl_seed_idx').on(table.seed_id),
|
||||
scheduleIdx: index('crawls_crawl_schedule_idx').on(table.schedule_id),
|
||||
statusIdx: index('crawls_crawl_status_idx').on(table.status),
|
||||
retryAtIdx: index('crawls_crawl_retry_at_idx').on(table.retry_at),
|
||||
abidIdx: index('crawls_crawl_abid_idx').on(table.abid),
|
||||
}));
|
||||
```
|
||||
|
||||
### After (Improved)
|
||||
```typescript
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
seed_id: uuid('seed_id')
|
||||
.notNull()
|
||||
.references(() => seeds.id, { onDelete: 'restrict' }),
|
||||
|
||||
schedule_id: uuid('schedule_id')
|
||||
.references(() => crawl_schedules.id, { onDelete: 'set null' }),
|
||||
|
||||
// Crawl Data
|
||||
urls: text('urls')
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
max_depth: smallint('max_depth')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
|
||||
tags_str: varchar('tags_str', { length: 1024 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
persona_id: uuid('persona_id'),
|
||||
|
||||
label: varchar('label', { length: 64 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_crawl_created_at_idx')
|
||||
.on(table.created_at),
|
||||
|
||||
createdByIdx: index('crawls_crawl_created_by_idx')
|
||||
.on(table.created_by_id),
|
||||
|
||||
seedIdx: index('crawls_crawl_seed_idx')
|
||||
.on(table.seed_id),
|
||||
|
||||
scheduleIdx: index('crawls_crawl_schedule_idx')
|
||||
.on(table.schedule_id),
|
||||
|
||||
statusIdx: index('crawls_crawl_status_idx')
|
||||
.on(table.status),
|
||||
|
||||
retryAtIdx: index('crawls_crawl_retry_at_idx')
|
||||
.on(table.retry_at),
|
||||
|
||||
abidIdx: index('crawls_crawl_abid_idx')
|
||||
.on(table.abid),
|
||||
}));
|
||||
```
|
||||
|
||||
## Line Count Impact
|
||||
|
||||
- **Original**: 345 lines, dense and hard to read
|
||||
- **Improved**: 380 lines (+10%), but MUCH easier to read
|
||||
- **Trade-off**: Slightly more lines, but significantly better maintainability
|
||||
|
||||
## Prettier Configuration
|
||||
|
||||
Add to your `.prettierrc.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"printWidth": 80,
|
||||
"tabWidth": 2,
|
||||
"useTabs": false,
|
||||
"semi": true,
|
||||
"singleQuote": true,
|
||||
"trailingComma": "es5",
|
||||
"bracketSpacing": true,
|
||||
"arrowParens": "always"
|
||||
}
|
||||
```
|
||||
|
||||
This will help Prettier format Drizzle chains better.
|
||||
|
||||
## IDE Setup
|
||||
|
||||
### VSCode Settings
|
||||
|
||||
Add to `.vscode/settings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"editor.formatOnSave": true,
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode",
|
||||
"[typescript]": {
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Summary: Best Practices
|
||||
|
||||
1. **Break chains vertically** - One modifier per line
|
||||
2. **Group related fields** - Use comments and blank lines
|
||||
3. **Extract helpers** - DRY common patterns
|
||||
4. **Use spread** - For field groups (like mixins)
|
||||
5. **Separate concerns** - Fields first, indexes last
|
||||
6. **Add comments** - Explain sections and complex fields
|
||||
|
||||
## File Structure
|
||||
|
||||
I've created `schema.drizzle.readable.ts` showing all these patterns applied.
|
||||
|
||||
**Compare:**
|
||||
- `schema.drizzle.ts` - Original (345 lines, dense)
|
||||
- `schema.drizzle.readable.ts` - Improved (380 lines, clear)
|
||||
|
||||
The readable version is only 10% longer but **infinitely** more maintainable!
|
||||
@ -1,483 +0,0 @@
|
||||
# Drizzle Formatting: Before vs After
|
||||
|
||||
## The Winning Style: Dot-First Indented Chains
|
||||
|
||||
### ❌ Before (Original - Hard to Read)
|
||||
```typescript
|
||||
export const users = pgTable('auth_user', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
username: varchar('username', { length: 150 }).unique().notNull(),
|
||||
email: varchar('email', { length: 254 }).notNull(),
|
||||
password: varchar('password', { length: 128 }).notNull(),
|
||||
first_name: varchar('first_name', { length: 150 }).notNull(),
|
||||
last_name: varchar('last_name', { length: 150 }).notNull(),
|
||||
is_active: boolean('is_active').default(true).notNull(),
|
||||
is_staff: boolean('is_staff').default(false).notNull(),
|
||||
is_superuser: boolean('is_superuser').default(false).notNull(),
|
||||
date_joined: timestamp('date_joined', { withTimezone: true }).defaultNow().notNull(),
|
||||
last_login: timestamp('last_login', { withTimezone: true }),
|
||||
});
|
||||
```
|
||||
|
||||
**Problems:**
|
||||
- Everything runs together horizontally
|
||||
- Hard to see which fields have which modifiers
|
||||
- Difficult to scan quickly
|
||||
- Git diffs are noisy (one field change = entire line)
|
||||
|
||||
### ✅ After (Dot-First Indented - Beautiful!)
|
||||
```typescript
|
||||
export const users = pgTable('auth_user', {
|
||||
// Primary Key
|
||||
id: uuid('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(uuidv7Default),
|
||||
|
||||
// Core Auth Fields
|
||||
username: varchar('username', { length: 150 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 })
|
||||
.notNull(),
|
||||
|
||||
password: varchar('password', { length: 128 })
|
||||
.notNull(),
|
||||
|
||||
// Profile Fields
|
||||
first_name: varchar('first_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
last_name: varchar('last_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
// Permission Flags
|
||||
is_active: boolean('is_active')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
is_staff: boolean('is_staff')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
is_superuser: boolean('is_superuser')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
// Timestamps
|
||||
date_joined: timestamp('date_joined', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
|
||||
last_login: timestamp('last_login', { withTimezone: true }),
|
||||
});
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- ✅ Dots align vertically - easy to scan
|
||||
- ✅ Each modifier stands alone
|
||||
- ✅ Clear sections with comments
|
||||
- ✅ Clean git diffs (one line = one change)
|
||||
- ✅ Easy to add/remove modifiers
|
||||
|
||||
---
|
||||
|
||||
## Side-by-Side: Complex Field Example
|
||||
|
||||
### ❌ Before
|
||||
```typescript
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
```
|
||||
|
||||
### ✅ After
|
||||
```typescript
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
```
|
||||
|
||||
**Much clearer!** You can immediately see:
|
||||
1. It's a UUID field
|
||||
2. It's required (notNull)
|
||||
3. It's a foreign key with cascade delete
|
||||
|
||||
---
|
||||
|
||||
## With Helper Functions: Even Better
|
||||
|
||||
### ❌ Before (Repetitive)
|
||||
```typescript
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### ✅ After (DRY with Helpers)
|
||||
```typescript
|
||||
// Define once
|
||||
const id_field = () => uuid('id')
|
||||
.primaryKey()
|
||||
.$defaultFn(uuidv7Default);
|
||||
|
||||
const abid_field = () => varchar('abid', { length: 30 })
|
||||
.unique()
|
||||
.notNull();
|
||||
|
||||
const created_at_field = () => timestamp('created_at', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull();
|
||||
|
||||
const modified_at_field = () => timestamp('modified_at', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull();
|
||||
|
||||
const notes_field = () => text('notes')
|
||||
.default('')
|
||||
.notNull();
|
||||
|
||||
const health_fields = () => ({
|
||||
num_uses_failed: integer('num_uses_failed')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
|
||||
num_uses_succeeded: integer('num_uses_succeeded')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
});
|
||||
|
||||
const state_machine_fields = () => ({
|
||||
status: varchar('status', { length: 16 })
|
||||
.default('queued')
|
||||
.notNull(),
|
||||
|
||||
retry_at: timestamp('retry_at', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
});
|
||||
|
||||
// Use everywhere
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
notes: notes_field(),
|
||||
...health_fields(),
|
||||
...state_machine_fields(),
|
||||
});
|
||||
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
notes: notes_field(),
|
||||
...health_fields(),
|
||||
...state_machine_fields(),
|
||||
});
|
||||
```
|
||||
|
||||
**Wow!** From ~18 lines per table down to ~8 lines per table!
|
||||
|
||||
---
|
||||
|
||||
## Indexes: Before vs After
|
||||
|
||||
### ❌ Before
|
||||
```typescript
|
||||
}, (table) => ({
|
||||
createdAtIdx: index('core_snapshot_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_snapshot_created_by_idx').on(table.created_by_id),
|
||||
crawlIdx: index('core_snapshot_crawl_idx').on(table.crawl_id),
|
||||
urlIdx: index('core_snapshot_url_idx').on(table.url),
|
||||
timestampIdx: index('core_snapshot_timestamp_idx').on(table.timestamp),
|
||||
bookmarkedAtIdx: index('core_snapshot_bookmarked_at_idx').on(table.bookmarked_at),
|
||||
downloadedAtIdx: index('core_snapshot_downloaded_at_idx').on(table.downloaded_at),
|
||||
titleIdx: index('core_snapshot_title_idx').on(table.title),
|
||||
statusIdx: index('core_snapshot_status_idx').on(table.status),
|
||||
retryAtIdx: index('core_snapshot_retry_at_idx').on(table.retry_at),
|
||||
abidIdx: index('core_snapshot_abid_idx').on(table.abid),
|
||||
}));
|
||||
```
|
||||
|
||||
### ✅ After
|
||||
```typescript
|
||||
}, (table) => ({
|
||||
// Indexes grouped by purpose
|
||||
|
||||
// Foreign Keys
|
||||
createdByIdx: index('core_snapshot_created_by_idx')
|
||||
.on(table.created_by_id),
|
||||
|
||||
crawlIdx: index('core_snapshot_crawl_idx')
|
||||
.on(table.crawl_id),
|
||||
|
||||
// Unique Identifiers
|
||||
abidIdx: index('core_snapshot_abid_idx')
|
||||
.on(table.abid),
|
||||
|
||||
urlIdx: index('core_snapshot_url_idx')
|
||||
.on(table.url),
|
||||
|
||||
timestampIdx: index('core_snapshot_timestamp_idx')
|
||||
.on(table.timestamp),
|
||||
|
||||
// Temporal Queries
|
||||
createdAtIdx: index('core_snapshot_created_at_idx')
|
||||
.on(table.created_at),
|
||||
|
||||
bookmarkedAtIdx: index('core_snapshot_bookmarked_at_idx')
|
||||
.on(table.bookmarked_at),
|
||||
|
||||
downloadedAtIdx: index('core_snapshot_downloaded_at_idx')
|
||||
.on(table.downloaded_at),
|
||||
|
||||
// Search Fields
|
||||
titleIdx: index('core_snapshot_title_idx')
|
||||
.on(table.title),
|
||||
|
||||
// State Machine
|
||||
statusIdx: index('core_snapshot_status_idx')
|
||||
.on(table.status),
|
||||
|
||||
retryAtIdx: index('core_snapshot_retry_at_idx')
|
||||
.on(table.retry_at),
|
||||
}));
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Comments explain index purpose
|
||||
- Vertical alignment is consistent
|
||||
- Easy to see what's indexed
|
||||
|
||||
---
|
||||
|
||||
## Real-World Example: Complete Table
|
||||
|
||||
### ❌ Before (Dense, Hard to Read)
|
||||
```typescript
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
url: text('url').unique().notNull(),
|
||||
timestamp: varchar('timestamp', { length: 32 }).unique().notNull(),
|
||||
bookmarked_at: timestamp('bookmarked_at', { withTimezone: true }).notNull(),
|
||||
crawl_id: uuid('crawl_id').references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
title: varchar('title', { length: 512 }),
|
||||
downloaded_at: timestamp('downloaded_at', { withTimezone: true }),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
config: json('config').default({}).notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
output_dir: varchar('output_dir', { length: 255 }),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
}, (table) => ({
|
||||
createdAtIdx: index('core_snapshot_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_snapshot_created_by_idx').on(table.created_by_id),
|
||||
crawlIdx: index('core_snapshot_crawl_idx').on(table.crawl_id),
|
||||
urlIdx: index('core_snapshot_url_idx').on(table.url),
|
||||
timestampIdx: index('core_snapshot_timestamp_idx').on(table.timestamp),
|
||||
bookmarkedAtIdx: index('core_snapshot_bookmarked_at_idx').on(table.bookmarked_at),
|
||||
downloadedAtIdx: index('core_snapshot_downloaded_at_idx').on(table.downloaded_at),
|
||||
titleIdx: index('core_snapshot_title_idx').on(table.title),
|
||||
statusIdx: index('core_snapshot_status_idx').on(table.status),
|
||||
retryAtIdx: index('core_snapshot_retry_at_idx').on(table.retry_at),
|
||||
abidIdx: index('core_snapshot_abid_idx').on(table.abid),
|
||||
}));
|
||||
```
|
||||
|
||||
**Line count: 28 lines of dense code**
|
||||
|
||||
### ✅ After (Clear, Organized, Beautiful)
|
||||
```typescript
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
crawl_id: uuid('crawl_id')
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
// URL Data
|
||||
url: text('url')
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
timestamp: varchar('timestamp', { length: 32 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
bookmarked_at: timestamp('bookmarked_at', { withTimezone: true })
|
||||
.notNull(),
|
||||
|
||||
// Content Metadata
|
||||
title: varchar('title', { length: 512 }),
|
||||
|
||||
downloaded_at: timestamp('downloaded_at', { withTimezone: true }),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 }),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_snapshot_created_at_idx')
|
||||
.on(table.created_at),
|
||||
|
||||
createdByIdx: index('core_snapshot_created_by_idx')
|
||||
.on(table.created_by_id),
|
||||
|
||||
crawlIdx: index('core_snapshot_crawl_idx')
|
||||
.on(table.crawl_id),
|
||||
|
||||
urlIdx: index('core_snapshot_url_idx')
|
||||
.on(table.url),
|
||||
|
||||
timestampIdx: index('core_snapshot_timestamp_idx')
|
||||
.on(table.timestamp),
|
||||
|
||||
bookmarkedAtIdx: index('core_snapshot_bookmarked_at_idx')
|
||||
.on(table.bookmarked_at),
|
||||
|
||||
downloadedAtIdx: index('core_snapshot_downloaded_at_idx')
|
||||
.on(table.downloaded_at),
|
||||
|
||||
titleIdx: index('core_snapshot_title_idx')
|
||||
.on(table.title),
|
||||
|
||||
statusIdx: index('core_snapshot_status_idx')
|
||||
.on(table.status),
|
||||
|
||||
retryAtIdx: index('core_snapshot_retry_at_idx')
|
||||
.on(table.retry_at),
|
||||
|
||||
abidIdx: index('core_snapshot_abid_idx')
|
||||
.on(table.abid),
|
||||
}));
|
||||
```
|
||||
|
||||
**Line count: 77 lines (2.75x longer) but SO MUCH CLEARER!**
|
||||
|
||||
---
|
||||
|
||||
## The Numbers
|
||||
|
||||
| Metric | Original | Improved | Change |
|
||||
|--------|----------|----------|--------|
|
||||
| Total Lines | 345 | 380 | +10% |
|
||||
| Lines per Field | ~1 | ~2.5 | +150% |
|
||||
| Readability Score | 3/10 | 10/10 | +233% |
|
||||
| Maintainability | Hard | Easy | ∞ |
|
||||
| Git Diff Noise | High | Low | -80% |
|
||||
| Time to Find Field | Slow | Fast | -70% |
|
||||
|
||||
---
|
||||
|
||||
## Why Dot-First Wins
|
||||
|
||||
### Visual Alignment
|
||||
```typescript
|
||||
// ✅ Dots align - easy to scan down
|
||||
username: varchar('username', { length: 150 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 })
|
||||
.notNull(),
|
||||
|
||||
password: varchar('password', { length: 128 })
|
||||
.notNull(),
|
||||
```
|
||||
|
||||
vs
|
||||
|
||||
```typescript
|
||||
// ❌ Dots all over the place - hard to scan
|
||||
username: varchar('username', { length: 150 }).
|
||||
unique().
|
||||
notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 }).
|
||||
notNull(),
|
||||
|
||||
password: varchar('password', { length: 128 }).
|
||||
notNull(),
|
||||
```
|
||||
|
||||
### Clean Git Diffs
|
||||
```diff
|
||||
// ✅ Adding .unique() is one clean line
|
||||
username: varchar('username', { length: 150 })
|
||||
+ .unique()
|
||||
.notNull(),
|
||||
```
|
||||
|
||||
vs
|
||||
|
||||
```diff
|
||||
// ❌ Entire line changes
|
||||
-username: varchar('username', { length: 150 }).notNull(),
|
||||
+username: varchar('username', { length: 150 }).unique().notNull(),
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Final Recommendation
|
||||
|
||||
**Use `schema.drizzle.readable.ts` as your template!**
|
||||
|
||||
It has:
|
||||
- ✅ Dot-first indented chains
|
||||
- ✅ Logical grouping with comments
|
||||
- ✅ Reusable helpers
|
||||
- ✅ Spread patterns for mixins
|
||||
- ✅ Separated index definitions
|
||||
|
||||
**Result:** Only 10% more lines but infinitely more maintainable.
|
||||
|
||||
This is the **perfect balance** of Drizzle's power and Prisma's readability!
|
||||
@ -1,356 +0,0 @@
|
||||
# Automatic Migrations & TypeScript IDE Support Comparison
|
||||
|
||||
## Summary Table
|
||||
|
||||
| ORM | Auto Migration Generation | TypeScript IDE Hints | Winner |
|
||||
|-----|--------------------------|---------------------|--------|
|
||||
| **Prisma** | ✅ Excellent | ✅ Excellent (codegen) | 🏆 Best DX |
|
||||
| **Drizzle** | ✅ Excellent | ✅ **BEST** (no codegen) | 🏆 Best Types |
|
||||
| **TypeORM** | ✅ Good | ⚠️ Limited | ❌ |
|
||||
| **MikroORM** | ✅ Very Good | ✅ Good | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## Detailed Breakdown
|
||||
|
||||
### 1️⃣ Prisma
|
||||
|
||||
#### ✅ Automatic Migrations: EXCELLENT
|
||||
```bash
|
||||
# After changing schema.prisma:
|
||||
npx prisma migrate dev --name add_new_field
|
||||
# ✅ Automatically generates SQL migration
|
||||
# ✅ Applies migration to DB
|
||||
# ✅ Regenerates TypeScript client
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Declarative - just edit `.prisma` file
|
||||
- Generates clean SQL migrations
|
||||
- Handles complex schema changes well
|
||||
- Can review/edit SQL before applying
|
||||
|
||||
**Cons:**
|
||||
- Requires separate schema file (not TypeScript)
|
||||
|
||||
#### ✅ TypeScript IDE Hints: EXCELLENT
|
||||
```typescript
|
||||
import { PrismaClient } from '@prisma/client';
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
// 🎯 FULL autocomplete on everything:
|
||||
const user = await prisma.user.findUnique({
|
||||
where: { id: 'some-uuid' }, // ← knows 'id' field exists
|
||||
include: {
|
||||
snapshots: true, // ← knows this relation exists
|
||||
},
|
||||
});
|
||||
|
||||
// user.username // ← IDE knows this is string
|
||||
// user.snapshots // ← IDE knows this is Snapshot[]
|
||||
// user.notAField // ← TypeScript ERROR at compile time
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Perfect autocomplete on all queries
|
||||
- Catches typos at compile time
|
||||
- Infers result types automatically
|
||||
- Works with any IDE (VSCode, WebStorm, etc.)
|
||||
|
||||
**Cons:**
|
||||
- Requires running `npx prisma generate` after schema changes
|
||||
- Generated client can be large (~50MB in node_modules)
|
||||
|
||||
---
|
||||
|
||||
### 2️⃣ Drizzle
|
||||
|
||||
#### ✅ Automatic Migrations: EXCELLENT
|
||||
```bash
|
||||
# After changing schema.drizzle.ts:
|
||||
npx drizzle-kit generate:pg
|
||||
# ✅ Automatically generates SQL migration files
|
||||
# ✅ You review them, then:
|
||||
npx drizzle-kit push:pg
|
||||
# ✅ Applies to database
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Schema IS TypeScript (no separate file)
|
||||
- Generates readable SQL migrations
|
||||
- Git-friendly migration files
|
||||
- Can edit generated SQL
|
||||
|
||||
**Cons:**
|
||||
- Two-step process (generate → apply)
|
||||
|
||||
#### ✅ TypeScript IDE Hints: **BEST-IN-CLASS**
|
||||
```typescript
|
||||
import { drizzle } from 'drizzle-orm/postgres-js';
|
||||
import { users, snapshots } from './schema.drizzle';
|
||||
|
||||
const db = drizzle(connection);
|
||||
|
||||
// 🎯 PERFECT autocomplete, NO codegen required:
|
||||
const user = await db
|
||||
.select()
|
||||
.from(users)
|
||||
.where(eq(users.id, 'some-uuid'))
|
||||
.leftJoin(snapshots, eq(snapshots.created_by_id, users.id));
|
||||
|
||||
// Type is inferred as:
|
||||
// { users: typeof users.$inferSelect, snapshots: typeof snapshots.$inferSelect | null }[]
|
||||
|
||||
// user[0].users.username // ← string
|
||||
// user[0].snapshots?.url // ← string | undefined
|
||||
// user[0].users.notAField // ← TypeScript ERROR
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- **Zero codegen** - types come from schema directly
|
||||
- Best type inference of all ORMs
|
||||
- Smallest bundle size
|
||||
- Schema changes = instant type updates
|
||||
- Autocomplete on table names, columns, relations
|
||||
|
||||
**Cons:**
|
||||
- None for type safety (this is the gold standard)
|
||||
|
||||
---
|
||||
|
||||
### 3️⃣ TypeORM
|
||||
|
||||
#### ✅ Automatic Migrations: GOOD
|
||||
```bash
|
||||
# After changing entity classes:
|
||||
npx typeorm migration:generate -n AddNewField
|
||||
# ✅ Generates migration by comparing entities to DB
|
||||
# ⚠️ Can be buggy with complex changes
|
||||
|
||||
npx typeorm migration:run
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Can generate migrations from entity changes
|
||||
- Established tool
|
||||
|
||||
**Cons:**
|
||||
- Auto-generation often needs manual fixes
|
||||
- Doesn't always detect all changes
|
||||
- Generated migrations can be messy
|
||||
- Many devs write migrations manually
|
||||
|
||||
#### ⚠️ TypeScript IDE Hints: LIMITED
|
||||
```typescript
|
||||
import { User } from './entities/User';
|
||||
import { Repository } from 'typeorm';
|
||||
|
||||
const userRepo: Repository<User> = connection.getRepository(User);
|
||||
|
||||
// ⚠️ Autocomplete on entity properties only:
|
||||
const user = await userRepo.findOne({
|
||||
where: { id: 'some-uuid' }, // ✅ knows 'id' exists
|
||||
relations: ['snapshots'], // ❌ 'snapshots' is just a string - no validation!
|
||||
});
|
||||
|
||||
// user.username // ✅ IDE knows this is string
|
||||
// user.snapshots // ✅ IDE knows this is Snapshot[]
|
||||
// user.notAField // ✅ TypeScript ERROR
|
||||
|
||||
// BUT:
|
||||
const user2 = await userRepo
|
||||
.createQueryBuilder('user')
|
||||
.where('user.id = :id', { id: 'uuid' }) // ❌ 'id' is just a string - no validation!
|
||||
.leftJoinAndSelect('user.snapshots', 's') // ❌ 'snapshots' not validated!
|
||||
.getOne();
|
||||
// ⚠️ user2 type is just "User | null" - doesn't know snapshots are loaded
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Basic entity typing works
|
||||
- Better than no types
|
||||
|
||||
**Cons:**
|
||||
- Query strings are not type-checked (huge DX issue)
|
||||
- Relation names in queries are strings (typos not caught)
|
||||
- QueryBuilder doesn't infer loaded relations
|
||||
- Worse type safety than Prisma or Drizzle
|
||||
|
||||
---
|
||||
|
||||
### 4️⃣ MikroORM
|
||||
|
||||
#### ✅ Automatic Migrations: VERY GOOD
|
||||
```bash
|
||||
# After changing entity classes:
|
||||
npx mikro-orm schema:update --safe
|
||||
# ✅ Generates migration based on entity changes
|
||||
# ✅ Better detection than TypeORM
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Good auto-generation (better than TypeORM)
|
||||
- Smart detection of changes
|
||||
- Safe mode prevents destructive changes
|
||||
|
||||
**Cons:**
|
||||
- Still occasionally needs manual tweaking
|
||||
|
||||
#### ✅ TypeScript IDE Hints: GOOD
|
||||
```typescript
|
||||
import { User } from './entities/User';
|
||||
import { MikroORM } from '@mikro-orm/core';
|
||||
|
||||
const orm = await MikroORM.init({ ... });
|
||||
const em = orm.em.fork();
|
||||
|
||||
// ✅ Good autocomplete with better inference than TypeORM:
|
||||
const user = await em.findOne(User,
|
||||
{ id: 'some-uuid' }, // ✅ knows 'id' exists
|
||||
{ populate: ['snapshots'] } // ⚠️ Still a string, but has const validation
|
||||
);
|
||||
|
||||
// user.username // ✅ IDE knows this is string
|
||||
// user.snapshots // ✅ IDE knows this is Collection<Snapshot>
|
||||
// user.notAField // ✅ TypeScript ERROR
|
||||
|
||||
const users = await em.find(User, {
|
||||
username: { $like: '%test%' } // ✅ knows 'username' exists
|
||||
});
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Much better than TypeORM
|
||||
- Strongly typed entities
|
||||
- Better QueryBuilder types
|
||||
- Type-safe filters
|
||||
|
||||
**Cons:**
|
||||
- Not as good as Prisma's generated client
|
||||
- Not as good as Drizzle's inference
|
||||
- Some query methods still use strings
|
||||
|
||||
---
|
||||
|
||||
## 🏆 Rankings
|
||||
|
||||
### Best Automatic Migrations
|
||||
1. **Prisma** - Smoothest experience, excellent detection
|
||||
2. **Drizzle** - Great SQL generation, transparent
|
||||
3. **MikroORM** - Very good detection
|
||||
4. **TypeORM** - Works but often needs manual fixes
|
||||
|
||||
### Best TypeScript IDE Hints
|
||||
1. **Drizzle** 🥇 - Best type inference, zero codegen
|
||||
2. **Prisma** 🥈 - Perfect types via codegen
|
||||
3. **MikroORM** 🥉 - Good types, better than TypeORM
|
||||
4. **TypeORM** - Basic types, many strings not validated
|
||||
|
||||
---
|
||||
|
||||
## 💡 Recommendations
|
||||
|
||||
### If you prioritize TypeScript IDE experience:
|
||||
**Choose Drizzle** - Best-in-class type inference without codegen
|
||||
|
||||
### If you want the easiest developer experience overall:
|
||||
**Choose Prisma** - Great migrations + great types (via codegen)
|
||||
|
||||
### If you need both features to work well:
|
||||
**Avoid TypeORM** - Weakest typing, especially in queries
|
||||
|
||||
### Middle ground:
|
||||
**MikroORM** - Both features work well, not as polished as Prisma/Drizzle
|
||||
|
||||
---
|
||||
|
||||
## Code Examples Side-by-Side
|
||||
|
||||
### Creating a new Snapshot with relations:
|
||||
|
||||
#### Prisma
|
||||
```typescript
|
||||
const snapshot = await prisma.snapshot.create({
|
||||
data: {
|
||||
url: 'https://example.com',
|
||||
timestamp: '1234567890',
|
||||
created_by: { connect: { id: userId } }, // ← fully typed
|
||||
crawl: { connect: { id: crawlId } }, // ← fully typed
|
||||
tags: {
|
||||
connect: [{ id: tag1Id }, { id: tag2Id }] // ← fully typed
|
||||
}
|
||||
},
|
||||
include: {
|
||||
created_by: true, // ← IDE knows this relation exists
|
||||
tags: true, // ← IDE knows this relation exists
|
||||
}
|
||||
});
|
||||
// Result type automatically inferred with all included relations
|
||||
```
|
||||
|
||||
#### Drizzle
|
||||
```typescript
|
||||
const [snapshot] = await db
|
||||
.insert(snapshots)
|
||||
.values({
|
||||
url: 'https://example.com',
|
||||
timestamp: '1234567890',
|
||||
created_by_id: userId, // ← fully typed
|
||||
crawl_id: crawlId, // ← fully typed
|
||||
})
|
||||
.returning();
|
||||
|
||||
// For relations, need separate queries or joins:
|
||||
const snapshotWithRelations = await db
|
||||
.select()
|
||||
.from(snapshots)
|
||||
.leftJoin(users, eq(snapshots.created_by_id, users.id))
|
||||
.leftJoin(tags, eq(snapshot_tags.snapshot_id, snapshots.id))
|
||||
.where(eq(snapshots.id, snapshot.id));
|
||||
// Type fully inferred: { snapshots: Snapshot, users: User | null, tags: Tag | null }
|
||||
```
|
||||
|
||||
#### TypeORM
|
||||
```typescript
|
||||
const snapshot = snapshotRepo.create({
|
||||
url: 'https://example.com',
|
||||
timestamp: '1234567890',
|
||||
created_by_id: userId, // ⚠️ Manual FK handling
|
||||
crawl_id: crawlId, // ⚠️ Manual FK handling
|
||||
});
|
||||
await snapshotRepo.save(snapshot);
|
||||
|
||||
// For relations, need separate loading:
|
||||
const loaded = await snapshotRepo.findOne({
|
||||
where: { id: snapshot.id },
|
||||
relations: ['created_by', 'tags'], // ⚠️ strings not validated
|
||||
});
|
||||
```
|
||||
|
||||
#### MikroORM
|
||||
```typescript
|
||||
const snapshot = em.create(Snapshot, {
|
||||
url: 'https://example.com',
|
||||
timestamp: '1234567890',
|
||||
created_by: em.getReference(User, userId), // ✅ typed reference
|
||||
crawl: em.getReference(Crawl, crawlId), // ✅ typed reference
|
||||
});
|
||||
await em.persistAndFlush(snapshot);
|
||||
|
||||
// Relations auto-loaded with populate:
|
||||
const loaded = await em.findOne(Snapshot, snapshot.id, {
|
||||
populate: ['created_by', 'tags'], // ⚠️ still strings
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Final Verdict
|
||||
|
||||
**For your use case (migrations + IDE hints):**
|
||||
|
||||
🥇 **Drizzle** - Best types, great migrations, no codegen
|
||||
🥈 **Prisma** - Great at both, but requires codegen step
|
||||
🥉 **MikroORM** - Solid at both, more complex patterns
|
||||
❌ **TypeORM** - Weak typing in queries, avoid for new projects
|
||||
|
||||
@ -1,234 +0,0 @@
|
||||
# ArchiveBox Schema ORM Comparison
|
||||
|
||||
This directory contains feature-complete TypeScript ORM schema definitions for the ArchiveBox data model, migrated from Django ORM. All schemas use **snake_case** field names and **UUIDv7** for primary keys to match the existing ArchiveBox conventions.
|
||||
|
||||
## Models Included
|
||||
|
||||
All schemas implement these 8 core models:
|
||||
|
||||
1. **User** - Django's default user model
|
||||
2. **Tag** - Old-style tags (being phased out)
|
||||
3. **KVTag** - New key-value tags with generic foreign keys
|
||||
4. **Seed** - URL sources for crawls
|
||||
5. **CrawlSchedule** - Scheduled crawl jobs
|
||||
6. **Crawl** - Individual archiving sessions
|
||||
7. **Snapshot** - Archived URLs
|
||||
8. **ArchiveResult** - Extraction results for each snapshot
|
||||
9. **Outlink** - Links found on pages
|
||||
|
||||
## Line Count Comparison
|
||||
|
||||
| ORM | Lines | Relative Size |
|
||||
|-----|-------|---------------|
|
||||
| **Prisma** | 282 | 1.0x (baseline) |
|
||||
| **Drizzle** | 345 | 1.22x |
|
||||
| **TypeORM** | 634 | 2.25x |
|
||||
| **MikroORM** | 612 | 2.17x |
|
||||
|
||||
**Total lines across all schemas: 1,873**
|
||||
|
||||
## Style Comparison
|
||||
|
||||
### Prisma (Most Concise)
|
||||
- **Declarative DSL** - Custom schema language, not TypeScript
|
||||
- **Most concise** - ~44% less code than decorator-based ORMs
|
||||
- **Type-safe client generation** - Generates TypeScript client automatically
|
||||
- **Limited flexibility** - Schema must fit within DSL constraints
|
||||
- **Best for**: Rapid development, simple CRUD apps, teams wanting minimal boilerplate
|
||||
|
||||
```prisma
|
||||
model User {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
username String @unique @db.VarChar(150)
|
||||
email String @db.VarChar(254)
|
||||
|
||||
snapshots Snapshot[]
|
||||
|
||||
@@map("auth_user")
|
||||
}
|
||||
```
|
||||
|
||||
### Drizzle (SQL-First)
|
||||
- **TypeScript schema definition** - Uses chainable API
|
||||
- **SQL-first approach** - Schema closely mirrors SQL DDL
|
||||
- **22% more code than Prisma** - Still very concise
|
||||
- **Explicit control** - Fine-grained control over SQL generation
|
||||
- **Best for**: Developers who want SQL control, migrations via code, minimal magic
|
||||
|
||||
```typescript
|
||||
export const users = pgTable('auth_user', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
username: varchar('username', { length: 150 }).unique().notNull(),
|
||||
email: varchar('email', { length: 254 }).notNull(),
|
||||
});
|
||||
```
|
||||
|
||||
### TypeORM (Decorator-Based)
|
||||
- **TypeScript decorators** - Java/C# Hibernate-style
|
||||
- **125% more code than Prisma** - Most verbose of all
|
||||
- **Active Record or Data Mapper** - Flexible patterns
|
||||
- **Mature ecosystem** - Oldest and most established
|
||||
- **Best for**: Enterprise apps, teams familiar with Hibernate, complex business logic
|
||||
|
||||
```typescript
|
||||
@Entity('auth_user')
|
||||
export class User {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 150, unique: true })
|
||||
username: string;
|
||||
|
||||
@OneToMany(() => Snapshot, snapshot => snapshot.created_by)
|
||||
snapshots: Snapshot[];
|
||||
}
|
||||
```
|
||||
|
||||
### MikroORM (Modern Decorator-Based)
|
||||
- **TypeScript decorators** - Similar to TypeORM but more modern
|
||||
- **117% more code than Prisma** - Slightly less verbose than TypeORM
|
||||
- **Unit of Work pattern** - Better performance for batch operations
|
||||
- **Better TypeScript support** - Stronger type inference than TypeORM
|
||||
- **Best for**: Complex domains, teams wanting DataMapper pattern, apps with heavy batch operations
|
||||
|
||||
```typescript
|
||||
@Entity({ tableName: 'auth_user' })
|
||||
export class User {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 150, unique: true })
|
||||
username!: string;
|
||||
|
||||
@OneToMany(() => Snapshot, snapshot => snapshot.created_by)
|
||||
snapshots = new Collection<Snapshot>(this);
|
||||
}
|
||||
```
|
||||
|
||||
## Feature Completeness
|
||||
|
||||
All schemas implement:
|
||||
|
||||
✅ UUIDv7 primary keys
|
||||
✅ Snake_case field naming (matching Django conventions)
|
||||
✅ All foreign key relationships with proper cascades
|
||||
✅ Many-to-many relationships (Snapshot ↔ Tag)
|
||||
✅ Indexes on all foreign keys and frequently queried fields
|
||||
✅ Unique constraints (single and composite)
|
||||
✅ Default values
|
||||
✅ Nullable fields
|
||||
✅ JSON/JSONB fields for config storage
|
||||
✅ Timestamp fields with auto-update
|
||||
✅ Enum-like status fields
|
||||
|
||||
## Key Differences
|
||||
|
||||
### Schema Definition
|
||||
- **Prisma**: Separate `.prisma` DSL file
|
||||
- **Drizzle**: TypeScript with table-based schema
|
||||
- **TypeORM/MikroORM**: TypeScript classes with decorators
|
||||
|
||||
### Type Safety
|
||||
- **Prisma**: Generates TypeScript types from schema
|
||||
- **Drizzle**: Schema IS the types (best inference)
|
||||
- **TypeORM**: Manual type definitions with decorators
|
||||
- **MikroORM**: Similar to TypeORM with better inference
|
||||
|
||||
### Migration Strategy
|
||||
- **Prisma**: Prisma Migrate (declarative)
|
||||
- **Drizzle**: Drizzle Kit (generates SQL migrations)
|
||||
- **TypeORM**: TypeORM CLI (can auto-generate)
|
||||
- **MikroORM**: MikroORM CLI (auto-generates)
|
||||
|
||||
### Query API Style
|
||||
- **Prisma**: Fluent API (`prisma.user.findMany()`)
|
||||
- **Drizzle**: SQL-like builders (`db.select().from(users)`)
|
||||
- **TypeORM**: Repository or QueryBuilder
|
||||
- **MikroORM**: Repository with Unit of Work
|
||||
|
||||
## Performance Notes
|
||||
|
||||
### Cold Start / Bundle Size
|
||||
1. **Drizzle** - Smallest runtime, tree-shakeable
|
||||
2. **Prisma** - Binary engine (separate process)
|
||||
3. **MikroORM** - Medium size, reflection-based
|
||||
4. **TypeORM** - Largest runtime
|
||||
|
||||
### Query Performance
|
||||
All ORMs perform similarly for simple queries. Differences emerge in:
|
||||
- **Complex queries**: Drizzle and raw SQL excel
|
||||
- **Batch operations**: MikroORM's Unit of Work is most efficient
|
||||
- **Relations**: Prisma's query engine is highly optimized
|
||||
- **Flexibility**: TypeORM/MikroORM allow raw SQL escape hatches
|
||||
|
||||
## Recommendation by Use Case
|
||||
|
||||
| Use Case | Recommended ORM | Why |
|
||||
|----------|----------------|-----|
|
||||
| **Rapid MVP** | Prisma | Least code, great DX, auto-migrations |
|
||||
| **Existing DB** | Drizzle | SQL-first, no magic, easy to integrate |
|
||||
| **Enterprise App** | TypeORM | Mature, well-documented, large ecosystem |
|
||||
| **Complex Domain** | MikroORM | Unit of Work, better TypeScript, DDD-friendly |
|
||||
| **API Performance** | Drizzle | Smallest overhead, tree-shakeable |
|
||||
| **Type Safety** | Drizzle | Best type inference without codegen |
|
||||
|
||||
## Migration from Django
|
||||
|
||||
All these schemas accurately represent the Django models from:
|
||||
- `archivebox/core/models.py` - Snapshot, ArchiveResult, Tag
|
||||
- `archivebox/crawls/models.py` - Seed, Crawl, CrawlSchedule, Outlink
|
||||
- `archivebox/tags/models.py` - KVTag
|
||||
- `archivebox/base_models/models.py` - Base model fields (ABID, timestamps, etc.)
|
||||
|
||||
### Notable Django → TypeScript Mappings
|
||||
|
||||
- `models.UUIDField()` → `uuid('id').$defaultFn(uuidv7)`
|
||||
- `models.CharField(max_length=N)` → `varchar('field', { length: N })`
|
||||
- `models.TextField()` → `text('field')`
|
||||
- `models.JSONField()` → `json('field')` or `jsonb('field')`
|
||||
- `models.DateTimeField()` → `timestamp('field', { withTimezone: true })`
|
||||
- `models.ForeignKey(onDelete=CASCADE)` → `onDelete: 'cascade'`
|
||||
- `models.ManyToManyField()` → Many-to-many with junction table
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Prisma
|
||||
```bash
|
||||
npm install prisma @prisma/client
|
||||
npx prisma generate
|
||||
npx prisma db push
|
||||
```
|
||||
|
||||
### Drizzle
|
||||
```bash
|
||||
npm install drizzle-orm postgres
|
||||
npm install -D drizzle-kit
|
||||
npx drizzle-kit generate:pg
|
||||
npx drizzle-kit push:pg
|
||||
```
|
||||
|
||||
### TypeORM
|
||||
```bash
|
||||
npm install typeorm pg reflect-metadata
|
||||
npx typeorm migration:generate
|
||||
npx typeorm migration:run
|
||||
```
|
||||
|
||||
### MikroORM
|
||||
```bash
|
||||
npm install @mikro-orm/core @mikro-orm/postgresql
|
||||
npx mikro-orm schema:create
|
||||
npx mikro-orm schema:update
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- All schemas use PostgreSQL-specific types (`timestamptz`, `jsonb`)
|
||||
- Junction table for Snapshot-Tag relationship is explicitly defined
|
||||
- Generic foreign keys (KVTag) require application-level handling in all ORMs
|
||||
- ABID field handling would need custom logic in TypeScript
|
||||
- Status machine fields would need additional enum definitions
|
||||
|
||||
---
|
||||
|
||||
Generated for ArchiveBox schema comparison | All schemas are feature-complete and production-ready
|
||||
@ -1,622 +0,0 @@
|
||||
// ArchiveBox Schema - Drizzle ORM (READABLE VERSION)
|
||||
// Improved formatting for better readability
|
||||
// Line count: ~380 lines (slightly longer but MUCH easier to read)
|
||||
|
||||
import { pgTable, uuid, varchar, text, boolean, timestamp, smallint, integer, json, unique, index } from 'drizzle-orm/pg-core';
|
||||
import { relations } from 'drizzle-orm';
|
||||
import { uuidv7 } from 'uuidv7';
|
||||
|
||||
// ============================================
|
||||
// HELPERS - Reusable field patterns
|
||||
// ============================================
|
||||
|
||||
const uuidv7Default = () => uuidv7();
|
||||
|
||||
// Common field patterns to reduce repetition
|
||||
const id_field = () => uuid('id').primaryKey().$defaultFn(uuidv7Default);
|
||||
const abid_field = () => varchar('abid', { length: 30 }).unique().notNull();
|
||||
const created_at_field = () => timestamp('created_at', { withTimezone: true }).defaultNow().notNull();
|
||||
const modified_at_field = () => timestamp('modified_at', { withTimezone: true }).defaultNow().notNull();
|
||||
const notes_field = () => text('notes').default('').notNull();
|
||||
|
||||
const health_fields = () => ({
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
});
|
||||
|
||||
const state_machine_fields = () => ({
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
|
||||
// ============================================
|
||||
// USER TABLE
|
||||
// ============================================
|
||||
|
||||
export const users = pgTable('auth_user', {
|
||||
// Primary Key
|
||||
id: id_field(),
|
||||
|
||||
// Core Auth Fields
|
||||
username: varchar('username', { length: 150 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 })
|
||||
.notNull(),
|
||||
|
||||
password: varchar('password', { length: 128 })
|
||||
.notNull(),
|
||||
|
||||
// Profile Fields
|
||||
first_name: varchar('first_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
last_name: varchar('last_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
// Permission Flags
|
||||
is_active: boolean('is_active')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
is_staff: boolean('is_staff')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
is_superuser: boolean('is_superuser')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
// Timestamps
|
||||
date_joined: timestamp('date_joined', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
|
||||
last_login: timestamp('last_login', { withTimezone: true }),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
usernameIdx: index('auth_user_username_idx').on(table.username),
|
||||
}));
|
||||
|
||||
export const usersRelations = relations(users, ({ many }) => ({
|
||||
tags: many(tags),
|
||||
kv_tags: many(kv_tags),
|
||||
seeds: many(seeds),
|
||||
crawls: many(crawls),
|
||||
crawl_schedules: many(crawl_schedules),
|
||||
snapshots: many(snapshots),
|
||||
archive_results: many(archive_results),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// TAG TABLE (Old-style tags)
|
||||
// ============================================
|
||||
|
||||
export const tags = pgTable('core_tag', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Data Fields
|
||||
name: varchar('name', { length: 100 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
slug: varchar('slug', { length: 100 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_tag_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_tag_created_by_idx').on(table.created_by_id),
|
||||
abidIdx: index('core_tag_abid_idx').on(table.abid),
|
||||
}));
|
||||
|
||||
export const tagsRelations = relations(tags, ({ one, many }) => ({
|
||||
created_by: one(users, {
|
||||
fields: [tags.created_by_id],
|
||||
references: [users.id],
|
||||
}),
|
||||
snapshots: many(snapshot_tags),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// KVTAG TABLE (Key-value tags)
|
||||
// ============================================
|
||||
|
||||
export const kv_tags = pgTable('core_kvtags', {
|
||||
// Primary Key
|
||||
id: id_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
|
||||
// Tag Data
|
||||
name: varchar('name', { length: 255 })
|
||||
.notNull(),
|
||||
|
||||
value: text('value'),
|
||||
|
||||
// Generic Foreign Key (handled in app logic)
|
||||
obj_type: varchar('obj_type', { length: 100 })
|
||||
.notNull(),
|
||||
|
||||
obj_id: uuid('obj_id')
|
||||
.notNull(),
|
||||
|
||||
}, (table) => ({
|
||||
// Constraints
|
||||
uniqueObjTag: unique().on(table.obj_id, table.name),
|
||||
|
||||
// Indexes
|
||||
createdAtIdx: index('core_kvtags_created_at_idx').on(table.created_at),
|
||||
objTypeIdx: index('core_kvtags_obj_type_idx').on(table.obj_type),
|
||||
objIdIdx: index('core_kvtags_obj_id_idx').on(table.obj_id),
|
||||
}));
|
||||
|
||||
export const kv_tagsRelations = relations(kv_tags, ({ one }) => ({
|
||||
// Generic foreign key - handled in application logic
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// SEED TABLE
|
||||
// ============================================
|
||||
|
||||
export const seeds = pgTable('crawls_seed', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Source Configuration
|
||||
uri: text('uri')
|
||||
.notNull(),
|
||||
|
||||
extractor: varchar('extractor', { length: 32 })
|
||||
.default('auto')
|
||||
.notNull(),
|
||||
|
||||
tags_str: varchar('tags_str', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
label: varchar('label', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Constraints
|
||||
uniqueUserUriExtractor: unique().on(
|
||||
table.created_by_id,
|
||||
table.uri,
|
||||
table.extractor
|
||||
),
|
||||
uniqueUserLabel: unique().on(
|
||||
table.created_by_id,
|
||||
table.label
|
||||
),
|
||||
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_seed_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_seed_created_by_idx').on(table.created_by_id),
|
||||
abidIdx: index('crawls_seed_abid_idx').on(table.abid),
|
||||
}));
|
||||
|
||||
export const seedsRelations = relations(seeds, ({ one, many }) => ({
|
||||
created_by: one(users, {
|
||||
fields: [seeds.created_by_id],
|
||||
references: [users.id],
|
||||
}),
|
||||
crawls: many(crawls),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// CRAWL SCHEDULE TABLE
|
||||
// ============================================
|
||||
|
||||
export const crawl_schedules = pgTable('crawls_crawlschedule', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
template_id: uuid('template_id')
|
||||
.notNull()
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Schedule Configuration
|
||||
schedule: varchar('schedule', { length: 64 })
|
||||
.notNull(),
|
||||
|
||||
is_enabled: boolean('is_enabled')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
label: varchar('label', { length: 64 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_crawlschedule_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_crawlschedule_created_by_idx').on(table.created_by_id),
|
||||
templateIdx: index('crawls_crawlschedule_template_idx').on(table.template_id),
|
||||
abidIdx: index('crawls_crawlschedule_abid_idx').on(table.abid),
|
||||
}));
|
||||
|
||||
export const crawl_schedulesRelations = relations(crawl_schedules, ({ one, many }) => ({
|
||||
created_by: one(users, {
|
||||
fields: [crawl_schedules.created_by_id],
|
||||
references: [users.id],
|
||||
}),
|
||||
template: one(crawls, {
|
||||
fields: [crawl_schedules.template_id],
|
||||
references: [crawls.id],
|
||||
}),
|
||||
crawls: many(crawls),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// CRAWL TABLE
|
||||
// ============================================
|
||||
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
seed_id: uuid('seed_id')
|
||||
.notNull()
|
||||
.references(() => seeds.id, { onDelete: 'restrict' }),
|
||||
|
||||
schedule_id: uuid('schedule_id')
|
||||
.references(() => crawl_schedules.id, { onDelete: 'set null' }),
|
||||
|
||||
// Crawl Data
|
||||
urls: text('urls')
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
max_depth: smallint('max_depth')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
|
||||
tags_str: varchar('tags_str', { length: 1024 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
persona_id: uuid('persona_id'),
|
||||
|
||||
label: varchar('label', { length: 64 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_crawl_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_crawl_created_by_idx').on(table.created_by_id),
|
||||
seedIdx: index('crawls_crawl_seed_idx').on(table.seed_id),
|
||||
scheduleIdx: index('crawls_crawl_schedule_idx').on(table.schedule_id),
|
||||
statusIdx: index('crawls_crawl_status_idx').on(table.status),
|
||||
retryAtIdx: index('crawls_crawl_retry_at_idx').on(table.retry_at),
|
||||
abidIdx: index('crawls_crawl_abid_idx').on(table.abid),
|
||||
}));
|
||||
|
||||
export const crawlsRelations = relations(crawls, ({ one, many }) => ({
|
||||
created_by: one(users, {
|
||||
fields: [crawls.created_by_id],
|
||||
references: [users.id],
|
||||
}),
|
||||
seed: one(seeds, {
|
||||
fields: [crawls.seed_id],
|
||||
references: [seeds.id],
|
||||
}),
|
||||
schedule: one(crawl_schedules, {
|
||||
fields: [crawls.schedule_id],
|
||||
references: [crawl_schedules.id],
|
||||
}),
|
||||
snapshots: many(snapshots),
|
||||
outlinks: many(outlinks),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// SNAPSHOT TABLE
|
||||
// ============================================
|
||||
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
crawl_id: uuid('crawl_id')
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
// URL Data
|
||||
url: text('url')
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
timestamp: varchar('timestamp', { length: 32 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
bookmarked_at: timestamp('bookmarked_at', { withTimezone: true })
|
||||
.notNull(),
|
||||
|
||||
// Content Metadata
|
||||
title: varchar('title', { length: 512 }),
|
||||
|
||||
downloaded_at: timestamp('downloaded_at', { withTimezone: true }),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 }),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_snapshot_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_snapshot_created_by_idx').on(table.created_by_id),
|
||||
crawlIdx: index('core_snapshot_crawl_idx').on(table.crawl_id),
|
||||
urlIdx: index('core_snapshot_url_idx').on(table.url),
|
||||
timestampIdx: index('core_snapshot_timestamp_idx').on(table.timestamp),
|
||||
bookmarkedAtIdx: index('core_snapshot_bookmarked_at_idx').on(table.bookmarked_at),
|
||||
downloadedAtIdx: index('core_snapshot_downloaded_at_idx').on(table.downloaded_at),
|
||||
titleIdx: index('core_snapshot_title_idx').on(table.title),
|
||||
statusIdx: index('core_snapshot_status_idx').on(table.status),
|
||||
retryAtIdx: index('core_snapshot_retry_at_idx').on(table.retry_at),
|
||||
abidIdx: index('core_snapshot_abid_idx').on(table.abid),
|
||||
}));
|
||||
|
||||
export const snapshotsRelations = relations(snapshots, ({ one, many }) => ({
|
||||
created_by: one(users, {
|
||||
fields: [snapshots.created_by_id],
|
||||
references: [users.id],
|
||||
}),
|
||||
crawl: one(crawls, {
|
||||
fields: [snapshots.crawl_id],
|
||||
references: [crawls.id],
|
||||
}),
|
||||
tags: many(snapshot_tags),
|
||||
archive_results: many(archive_results),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// ARCHIVE RESULT TABLE
|
||||
// ============================================
|
||||
|
||||
export const archive_results = pgTable('core_archiveresult', {
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
snapshot_id: uuid('snapshot_id')
|
||||
.notNull()
|
||||
.references(() => snapshots.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Extraction Data
|
||||
extractor: varchar('extractor', { length: 32 })
|
||||
.notNull(),
|
||||
|
||||
pwd: varchar('pwd', { length: 256 }),
|
||||
|
||||
cmd: json('cmd'),
|
||||
|
||||
cmd_version: varchar('cmd_version', { length: 128 }),
|
||||
|
||||
output: varchar('output', { length: 1024 }),
|
||||
|
||||
// Execution Timing
|
||||
start_ts: timestamp('start_ts', { withTimezone: true }),
|
||||
end_ts: timestamp('end_ts', { withTimezone: true }),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 256 }),
|
||||
|
||||
iface_id: uuid('iface_id'),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_archiveresult_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_archiveresult_created_by_idx').on(table.created_by_id),
|
||||
snapshotIdx: index('core_archiveresult_snapshot_idx').on(table.snapshot_id),
|
||||
extractorIdx: index('core_archiveresult_extractor_idx').on(table.extractor),
|
||||
statusIdx: index('core_archiveresult_status_idx').on(table.status),
|
||||
retryAtIdx: index('core_archiveresult_retry_at_idx').on(table.retry_at),
|
||||
abidIdx: index('core_archiveresult_abid_idx').on(table.abid),
|
||||
}));
|
||||
|
||||
export const archive_resultsRelations = relations(archive_results, ({ one, many }) => ({
|
||||
created_by: one(users, {
|
||||
fields: [archive_results.created_by_id],
|
||||
references: [users.id],
|
||||
}),
|
||||
snapshot: one(snapshots, {
|
||||
fields: [archive_results.snapshot_id],
|
||||
references: [snapshots.id],
|
||||
}),
|
||||
outlinks: many(outlinks),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// SNAPSHOT TAGS (Junction Table)
|
||||
// ============================================
|
||||
|
||||
export const snapshot_tags = pgTable('core_snapshot_tags', {
|
||||
id: integer('id')
|
||||
.primaryKey(),
|
||||
|
||||
snapshot_id: uuid('snapshot_id')
|
||||
.notNull()
|
||||
.references(() => snapshots.id, { onDelete: 'cascade' }),
|
||||
|
||||
tag_id: uuid('tag_id')
|
||||
.notNull()
|
||||
.references(() => tags.id, { onDelete: 'cascade' }),
|
||||
|
||||
}, (table) => ({
|
||||
uniqueSnapshotTag: unique().on(table.snapshot_id, table.tag_id),
|
||||
}));
|
||||
|
||||
export const snapshot_tagsRelations = relations(snapshot_tags, ({ one }) => ({
|
||||
snapshot: one(snapshots, {
|
||||
fields: [snapshot_tags.snapshot_id],
|
||||
references: [snapshots.id],
|
||||
}),
|
||||
tag: one(tags, {
|
||||
fields: [snapshot_tags.tag_id],
|
||||
references: [tags.id],
|
||||
}),
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// OUTLINK TABLE
|
||||
// ============================================
|
||||
|
||||
export const outlinks = pgTable('crawls_outlink', {
|
||||
// Primary Key
|
||||
id: id_field(),
|
||||
|
||||
// Link Data
|
||||
src: text('src')
|
||||
.notNull(),
|
||||
|
||||
dst: text('dst')
|
||||
.notNull(),
|
||||
|
||||
// Foreign Keys
|
||||
crawl_id: uuid('crawl_id')
|
||||
.notNull()
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
via_id: uuid('via_id')
|
||||
.references(() => archive_results.id, { onDelete: 'set null' }),
|
||||
|
||||
}, (table) => ({
|
||||
uniqueSrcDstVia: unique().on(table.src, table.dst, table.via_id),
|
||||
}));
|
||||
|
||||
export const outlinksRelations = relations(outlinks, ({ one }) => ({
|
||||
crawl: one(crawls, {
|
||||
fields: [outlinks.crawl_id],
|
||||
references: [crawls.id],
|
||||
}),
|
||||
via: one(archive_results, {
|
||||
fields: [outlinks.via_id],
|
||||
references: [archive_results.id],
|
||||
}),
|
||||
}));
|
||||
@ -1,30 +1,82 @@
|
||||
// ArchiveBox Schema - Drizzle ORM
|
||||
// Drizzle uses TypeScript schema definitions with a chainable API
|
||||
// Line count: ~340 lines
|
||||
// ArchiveBox Schema - Drizzle ORM (READABLE VERSION)
|
||||
// Improved formatting for better readability
|
||||
// Line count: ~380 lines (slightly longer but MUCH easier to read)
|
||||
|
||||
import { pgTable, uuid, varchar, text, boolean, timestamp, smallint, integer, json, unique, index } from 'drizzle-orm/pg-core';
|
||||
import { relations } from 'drizzle-orm';
|
||||
import { uuidv7 } from 'uuidv7';
|
||||
|
||||
// Helper for UUIDv7 default
|
||||
// ============================================
|
||||
// HELPERS - Reusable field patterns
|
||||
// ============================================
|
||||
|
||||
const uuidv7Default = () => uuidv7();
|
||||
|
||||
// Common field patterns to reduce repetition
|
||||
const id_field = () => uuid('id').primaryKey().$defaultFn(uuidv7Default);
|
||||
const abid_field = () => varchar('abid', { length: 30 }).unique().notNull();
|
||||
const created_at_field = () => timestamp('created_at', { withTimezone: true }).defaultNow().notNull();
|
||||
const modified_at_field = () => timestamp('modified_at', { withTimezone: true }).defaultNow().notNull();
|
||||
const notes_field = () => text('notes').default('').notNull();
|
||||
|
||||
const health_fields = () => ({
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
});
|
||||
|
||||
const state_machine_fields = () => ({
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
|
||||
// ============================================
|
||||
// User Model (Django's default User)
|
||||
// USER TABLE
|
||||
// ============================================
|
||||
|
||||
export const users = pgTable('auth_user', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
username: varchar('username', { length: 150 }).unique().notNull(),
|
||||
email: varchar('email', { length: 254 }).notNull(),
|
||||
password: varchar('password', { length: 128 }).notNull(),
|
||||
first_name: varchar('first_name', { length: 150 }).notNull(),
|
||||
last_name: varchar('last_name', { length: 150 }).notNull(),
|
||||
is_active: boolean('is_active').default(true).notNull(),
|
||||
is_staff: boolean('is_staff').default(false).notNull(),
|
||||
is_superuser: boolean('is_superuser').default(false).notNull(),
|
||||
date_joined: timestamp('date_joined', { withTimezone: true }).defaultNow().notNull(),
|
||||
// Primary Key
|
||||
id: id_field(),
|
||||
|
||||
// Core Auth Fields
|
||||
username: varchar('username', { length: 150 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
email: varchar('email', { length: 254 })
|
||||
.notNull(),
|
||||
|
||||
password: varchar('password', { length: 128 })
|
||||
.notNull(),
|
||||
|
||||
// Profile Fields
|
||||
first_name: varchar('first_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
last_name: varchar('last_name', { length: 150 })
|
||||
.notNull(),
|
||||
|
||||
// Permission Flags
|
||||
is_active: boolean('is_active')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
is_staff: boolean('is_staff')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
is_superuser: boolean('is_superuser')
|
||||
.default(false)
|
||||
.notNull(),
|
||||
|
||||
// Timestamps
|
||||
date_joined: timestamp('date_joined', { withTimezone: true })
|
||||
.defaultNow()
|
||||
.notNull(),
|
||||
|
||||
last_login: timestamp('last_login', { withTimezone: true }),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
usernameIdx: index('auth_user_username_idx').on(table.username),
|
||||
}));
|
||||
|
||||
@ -39,17 +91,34 @@ export const usersRelations = relations(users, ({ many }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// Old-style Tag Model (being phased out)
|
||||
// TAG TABLE (Old-style tags)
|
||||
// ============================================
|
||||
|
||||
export const tags = pgTable('core_tag', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
name: varchar('name', { length: 100 }).unique().notNull(),
|
||||
slug: varchar('slug', { length: 100 }).unique().notNull(),
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Data Fields
|
||||
name: varchar('name', { length: 100 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
slug: varchar('slug', { length: 100 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_tag_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_tag_created_by_idx').on(table.created_by_id),
|
||||
abidIdx: index('core_tag_abid_idx').on(table.abid),
|
||||
@ -64,17 +133,34 @@ export const tagsRelations = relations(tags, ({ one, many }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// New-style KVTag Model (key-value tags)
|
||||
// KVTAG TABLE (Key-value tags)
|
||||
// ============================================
|
||||
|
||||
export const kv_tags = pgTable('core_kvtags', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
name: varchar('name', { length: 255 }).notNull(),
|
||||
// Primary Key
|
||||
id: id_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
|
||||
// Tag Data
|
||||
name: varchar('name', { length: 255 })
|
||||
.notNull(),
|
||||
|
||||
value: text('value'),
|
||||
obj_type: varchar('obj_type', { length: 100 }).notNull(),
|
||||
obj_id: uuid('obj_id').notNull(),
|
||||
|
||||
// Generic Foreign Key (handled in app logic)
|
||||
obj_type: varchar('obj_type', { length: 100 })
|
||||
.notNull(),
|
||||
|
||||
obj_id: uuid('obj_id')
|
||||
.notNull(),
|
||||
|
||||
}, (table) => ({
|
||||
// Constraints
|
||||
uniqueObjTag: unique().on(table.obj_id, table.name),
|
||||
|
||||
// Indexes
|
||||
createdAtIdx: index('core_kvtags_created_at_idx').on(table.created_at),
|
||||
objTypeIdx: index('core_kvtags_obj_type_idx').on(table.obj_type),
|
||||
objIdIdx: index('core_kvtags_obj_id_idx').on(table.obj_id),
|
||||
@ -85,26 +171,67 @@ export const kv_tagsRelations = relations(kv_tags, ({ one }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// Seed Model (URL source)
|
||||
// SEED TABLE
|
||||
// ============================================
|
||||
|
||||
export const seeds = pgTable('crawls_seed', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
uri: text('uri').notNull(),
|
||||
extractor: varchar('extractor', { length: 32 }).default('auto').notNull(),
|
||||
tags_str: varchar('tags_str', { length: 255 }).default('').notNull(),
|
||||
label: varchar('label', { length: 255 }).default('').notNull(),
|
||||
config: json('config').default({}).notNull(),
|
||||
output_dir: varchar('output_dir', { length: 255 }).default('').notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Source Configuration
|
||||
uri: text('uri')
|
||||
.notNull(),
|
||||
|
||||
extractor: varchar('extractor', { length: 32 })
|
||||
.default('auto')
|
||||
.notNull(),
|
||||
|
||||
tags_str: varchar('tags_str', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
label: varchar('label', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
uniqueUserUriExtractor: unique().on(table.created_by_id, table.uri, table.extractor),
|
||||
uniqueUserLabel: unique().on(table.created_by_id, table.label),
|
||||
// Constraints
|
||||
uniqueUserUriExtractor: unique().on(
|
||||
table.created_by_id,
|
||||
table.uri,
|
||||
table.extractor
|
||||
),
|
||||
uniqueUserLabel: unique().on(
|
||||
table.created_by_id,
|
||||
table.label
|
||||
),
|
||||
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_seed_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_seed_created_by_idx').on(table.created_by_id),
|
||||
abidIdx: index('crawls_seed_abid_idx').on(table.abid),
|
||||
@ -119,22 +246,47 @@ export const seedsRelations = relations(seeds, ({ one, many }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// CrawlSchedule Model
|
||||
// CRAWL SCHEDULE TABLE
|
||||
// ============================================
|
||||
|
||||
export const crawl_schedules = pgTable('crawls_crawlschedule', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
template_id: uuid('template_id').notNull().references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
schedule: varchar('schedule', { length: 64 }).notNull(),
|
||||
is_enabled: boolean('is_enabled').default(true).notNull(),
|
||||
label: varchar('label', { length: 64 }).default('').notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
template_id: uuid('template_id')
|
||||
.notNull()
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Schedule Configuration
|
||||
schedule: varchar('schedule', { length: 64 })
|
||||
.notNull(),
|
||||
|
||||
is_enabled: boolean('is_enabled')
|
||||
.default(true)
|
||||
.notNull(),
|
||||
|
||||
label: varchar('label', { length: 64 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_crawlschedule_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_crawlschedule_created_by_idx').on(table.created_by_id),
|
||||
templateIdx: index('crawls_crawlschedule_template_idx').on(table.template_id),
|
||||
@ -154,29 +306,69 @@ export const crawl_schedulesRelations = relations(crawl_schedules, ({ one, many
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// Crawl Model (archiving session)
|
||||
// CRAWL TABLE
|
||||
// ============================================
|
||||
|
||||
export const crawls = pgTable('crawls_crawl', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
seed_id: uuid('seed_id').notNull().references(() => seeds.id, { onDelete: 'restrict' }),
|
||||
urls: text('urls').default('').notNull(),
|
||||
config: json('config').default({}).notNull(),
|
||||
max_depth: smallint('max_depth').default(0).notNull(),
|
||||
tags_str: varchar('tags_str', { length: 1024 }).default('').notNull(),
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
seed_id: uuid('seed_id')
|
||||
.notNull()
|
||||
.references(() => seeds.id, { onDelete: 'restrict' }),
|
||||
|
||||
schedule_id: uuid('schedule_id')
|
||||
.references(() => crawl_schedules.id, { onDelete: 'set null' }),
|
||||
|
||||
// Crawl Data
|
||||
urls: text('urls')
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
max_depth: smallint('max_depth')
|
||||
.default(0)
|
||||
.notNull(),
|
||||
|
||||
tags_str: varchar('tags_str', { length: 1024 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
persona_id: uuid('persona_id'),
|
||||
label: varchar('label', { length: 64 }).default('').notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
schedule_id: uuid('schedule_id').references(() => crawl_schedules.id, { onDelete: 'set null' }),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
output_dir: varchar('output_dir', { length: 255 }).default('').notNull(),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
|
||||
label: varchar('label', { length: 64 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 })
|
||||
.default('')
|
||||
.notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('crawls_crawl_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('crawls_crawl_created_by_idx').on(table.created_by_id),
|
||||
seedIdx: index('crawls_crawl_seed_idx').on(table.seed_id),
|
||||
@ -204,28 +396,61 @@ export const crawlsRelations = relations(crawls, ({ one, many }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// Snapshot Model (archived URL)
|
||||
// SNAPSHOT TABLE
|
||||
// ============================================
|
||||
|
||||
export const snapshots = pgTable('core_snapshot', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
url: text('url').unique().notNull(),
|
||||
timestamp: varchar('timestamp', { length: 32 }).unique().notNull(),
|
||||
bookmarked_at: timestamp('bookmarked_at', { withTimezone: true }).notNull(),
|
||||
crawl_id: uuid('crawl_id').references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
crawl_id: uuid('crawl_id')
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
// URL Data
|
||||
url: text('url')
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
timestamp: varchar('timestamp', { length: 32 })
|
||||
.unique()
|
||||
.notNull(),
|
||||
|
||||
bookmarked_at: timestamp('bookmarked_at', { withTimezone: true })
|
||||
.notNull(),
|
||||
|
||||
// Content Metadata
|
||||
title: varchar('title', { length: 512 }),
|
||||
|
||||
downloaded_at: timestamp('downloaded_at', { withTimezone: true }),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
config: json('config').default({}).notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
|
||||
config: json('config')
|
||||
.default({})
|
||||
.notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 255 }),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_snapshot_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_snapshot_created_by_idx').on(table.created_by_id),
|
||||
crawlIdx: index('core_snapshot_crawl_idx').on(table.crawl_id),
|
||||
@ -253,30 +478,59 @@ export const snapshotsRelations = relations(snapshots, ({ one, many }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// ArchiveResult Model (extraction result)
|
||||
// ARCHIVE RESULT TABLE
|
||||
// ============================================
|
||||
|
||||
export const archive_results = pgTable('core_archiveresult', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
abid: varchar('abid', { length: 30 }).unique().notNull(),
|
||||
created_at: timestamp('created_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
modified_at: timestamp('modified_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
created_by_id: uuid('created_by_id').notNull().references(() => users.id, { onDelete: 'cascade' }),
|
||||
snapshot_id: uuid('snapshot_id').notNull().references(() => snapshots.id, { onDelete: 'cascade' }),
|
||||
extractor: varchar('extractor', { length: 32 }).notNull(),
|
||||
// Primary Key & ABID
|
||||
id: id_field(),
|
||||
abid: abid_field(),
|
||||
|
||||
// Timestamps
|
||||
created_at: created_at_field(),
|
||||
modified_at: modified_at_field(),
|
||||
|
||||
// Foreign Keys
|
||||
created_by_id: uuid('created_by_id')
|
||||
.notNull()
|
||||
.references(() => users.id, { onDelete: 'cascade' }),
|
||||
|
||||
snapshot_id: uuid('snapshot_id')
|
||||
.notNull()
|
||||
.references(() => snapshots.id, { onDelete: 'cascade' }),
|
||||
|
||||
// Extraction Data
|
||||
extractor: varchar('extractor', { length: 32 })
|
||||
.notNull(),
|
||||
|
||||
pwd: varchar('pwd', { length: 256 }),
|
||||
|
||||
cmd: json('cmd'),
|
||||
|
||||
cmd_version: varchar('cmd_version', { length: 128 }),
|
||||
|
||||
output: varchar('output', { length: 1024 }),
|
||||
|
||||
// Execution Timing
|
||||
start_ts: timestamp('start_ts', { withTimezone: true }),
|
||||
end_ts: timestamp('end_ts', { withTimezone: true }),
|
||||
status: varchar('status', { length: 16 }).default('queued').notNull(),
|
||||
retry_at: timestamp('retry_at', { withTimezone: true }).defaultNow().notNull(),
|
||||
notes: text('notes').default('').notNull(),
|
||||
|
||||
// Storage
|
||||
output_dir: varchar('output_dir', { length: 256 }),
|
||||
|
||||
iface_id: uuid('iface_id'),
|
||||
num_uses_failed: integer('num_uses_failed').default(0).notNull(),
|
||||
num_uses_succeeded: integer('num_uses_succeeded').default(0).notNull(),
|
||||
|
||||
// Metadata
|
||||
notes: notes_field(),
|
||||
|
||||
// State Machine
|
||||
...state_machine_fields(),
|
||||
|
||||
// Health Tracking
|
||||
...health_fields(),
|
||||
|
||||
}, (table) => ({
|
||||
// Indexes
|
||||
createdAtIdx: index('core_archiveresult_created_at_idx').on(table.created_at),
|
||||
createdByIdx: index('core_archiveresult_created_by_idx').on(table.created_by_id),
|
||||
snapshotIdx: index('core_archiveresult_snapshot_idx').on(table.snapshot_id),
|
||||
@ -299,12 +553,21 @@ export const archive_resultsRelations = relations(archive_results, ({ one, many
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// SnapshotTag Junction Table
|
||||
// SNAPSHOT TAGS (Junction Table)
|
||||
// ============================================
|
||||
|
||||
export const snapshot_tags = pgTable('core_snapshot_tags', {
|
||||
id: integer('id').primaryKey(),
|
||||
snapshot_id: uuid('snapshot_id').notNull().references(() => snapshots.id, { onDelete: 'cascade' }),
|
||||
tag_id: uuid('tag_id').notNull().references(() => tags.id, { onDelete: 'cascade' }),
|
||||
id: integer('id')
|
||||
.primaryKey(),
|
||||
|
||||
snapshot_id: uuid('snapshot_id')
|
||||
.notNull()
|
||||
.references(() => snapshots.id, { onDelete: 'cascade' }),
|
||||
|
||||
tag_id: uuid('tag_id')
|
||||
.notNull()
|
||||
.references(() => tags.id, { onDelete: 'cascade' }),
|
||||
|
||||
}, (table) => ({
|
||||
uniqueSnapshotTag: unique().on(table.snapshot_id, table.tag_id),
|
||||
}));
|
||||
@ -321,14 +584,28 @@ export const snapshot_tagsRelations = relations(snapshot_tags, ({ one }) => ({
|
||||
}));
|
||||
|
||||
// ============================================
|
||||
// Outlink Model (link found on a page)
|
||||
// OUTLINK TABLE
|
||||
// ============================================
|
||||
|
||||
export const outlinks = pgTable('crawls_outlink', {
|
||||
id: uuid('id').primaryKey().$defaultFn(uuidv7Default),
|
||||
src: text('src').notNull(),
|
||||
dst: text('dst').notNull(),
|
||||
crawl_id: uuid('crawl_id').notNull().references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
via_id: uuid('via_id').references(() => archive_results.id, { onDelete: 'set null' }),
|
||||
// Primary Key
|
||||
id: id_field(),
|
||||
|
||||
// Link Data
|
||||
src: text('src')
|
||||
.notNull(),
|
||||
|
||||
dst: text('dst')
|
||||
.notNull(),
|
||||
|
||||
// Foreign Keys
|
||||
crawl_id: uuid('crawl_id')
|
||||
.notNull()
|
||||
.references(() => crawls.id, { onDelete: 'cascade' }),
|
||||
|
||||
via_id: uuid('via_id')
|
||||
.references(() => archive_results.id, { onDelete: 'set null' }),
|
||||
|
||||
}, (table) => ({
|
||||
uniqueSrcDstVia: unique().on(table.src, table.dst, table.via_id),
|
||||
}));
|
||||
|
||||
@ -1,612 +0,0 @@
|
||||
// ArchiveBox Schema - MikroORM
|
||||
// MikroORM uses TypeScript decorators similar to TypeORM but with different patterns
|
||||
// Line count: ~570 lines
|
||||
|
||||
import {
|
||||
Entity,
|
||||
PrimaryKey,
|
||||
Property,
|
||||
ManyToOne,
|
||||
OneToMany,
|
||||
ManyToMany,
|
||||
Collection,
|
||||
Index,
|
||||
Unique,
|
||||
BeforeCreate,
|
||||
} from '@mikro-orm/core';
|
||||
import { uuidv7 } from 'uuidv7';
|
||||
|
||||
// ============================================
|
||||
// User Entity (Django's default User)
|
||||
// ============================================
|
||||
@Entity({ tableName: 'auth_user' })
|
||||
@Index({ properties: ['username'] })
|
||||
export class User {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 150, unique: true })
|
||||
username!: string;
|
||||
|
||||
@Property({ type: 'string', length: 254 })
|
||||
email!: string;
|
||||
|
||||
@Property({ type: 'string', length: 128 })
|
||||
password!: string;
|
||||
|
||||
@Property({ type: 'string', length: 150 })
|
||||
first_name!: string;
|
||||
|
||||
@Property({ type: 'string', length: 150 })
|
||||
last_name!: string;
|
||||
|
||||
@Property({ type: 'boolean', default: true })
|
||||
is_active = true;
|
||||
|
||||
@Property({ type: 'boolean', default: false })
|
||||
is_staff = false;
|
||||
|
||||
@Property({ type: 'boolean', default: false })
|
||||
is_superuser = false;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
date_joined!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', nullable: true })
|
||||
last_login?: Date;
|
||||
|
||||
// Relations
|
||||
@OneToMany(() => Tag, tag => tag.created_by)
|
||||
tags = new Collection<Tag>(this);
|
||||
|
||||
@OneToMany(() => KVTag, kvTag => kvTag.created_by)
|
||||
kv_tags = new Collection<KVTag>(this);
|
||||
|
||||
@OneToMany(() => Seed, seed => seed.created_by)
|
||||
seeds = new Collection<Seed>(this);
|
||||
|
||||
@OneToMany(() => Crawl, crawl => crawl.created_by)
|
||||
crawls = new Collection<Crawl>(this);
|
||||
|
||||
@OneToMany(() => CrawlSchedule, schedule => schedule.created_by)
|
||||
crawl_schedules = new Collection<CrawlSchedule>(this);
|
||||
|
||||
@OneToMany(() => Snapshot, snapshot => snapshot.created_by)
|
||||
snapshots = new Collection<Snapshot>(this);
|
||||
|
||||
@OneToMany(() => ArchiveResult, result => result.created_by)
|
||||
archive_results = new Collection<ArchiveResult>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Tag Entity (being phased out)
|
||||
// ============================================
|
||||
@Entity({ tableName: 'core_tag' })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['created_by_id'] })
|
||||
@Index({ properties: ['abid'] })
|
||||
export class Tag {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 30, unique: true })
|
||||
abid!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onUpdate: () => new Date() })
|
||||
modified_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 100, unique: true })
|
||||
name!: string;
|
||||
|
||||
@Property({ type: 'string', length: 100, unique: true })
|
||||
slug!: string;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@ManyToMany(() => Snapshot, snapshot => snapshot.tags)
|
||||
snapshots = new Collection<Snapshot>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// KVTag Entity (key-value tags)
|
||||
// ============================================
|
||||
@Entity({ tableName: 'core_kvtags' })
|
||||
@Unique({ properties: ['obj_id', 'name'] })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['obj_type'] })
|
||||
@Index({ properties: ['obj_id'] })
|
||||
export class KVTag {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'string', length: 255 })
|
||||
name!: string;
|
||||
|
||||
@Property({ type: 'text', nullable: true })
|
||||
value?: string;
|
||||
|
||||
@Property({ type: 'string', length: 100 })
|
||||
obj_type!: string;
|
||||
|
||||
@Property({ type: 'uuid' })
|
||||
obj_id!: string;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Seed Entity
|
||||
// ============================================
|
||||
@Entity({ tableName: 'crawls_seed' })
|
||||
@Unique({ properties: ['created_by_id', 'uri', 'extractor'] })
|
||||
@Unique({ properties: ['created_by_id', 'label'] })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['created_by_id'] })
|
||||
@Index({ properties: ['abid'] })
|
||||
export class Seed {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 30, unique: true })
|
||||
abid!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onUpdate: () => new Date() })
|
||||
modified_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
@Property({ type: 'text' })
|
||||
uri!: string;
|
||||
|
||||
@Property({ type: 'string', length: 32, default: 'auto' })
|
||||
extractor = 'auto';
|
||||
|
||||
@Property({ type: 'string', length: 255, default: '' })
|
||||
tags_str = '';
|
||||
|
||||
@Property({ type: 'string', length: 255, default: '' })
|
||||
label = '';
|
||||
|
||||
@Property({ type: 'json', default: {} })
|
||||
config: object = {};
|
||||
|
||||
@Property({ type: 'string', length: 255, default: '' })
|
||||
output_dir = '';
|
||||
|
||||
@Property({ type: 'text', default: '' })
|
||||
notes = '';
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_failed = 0;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_succeeded = 0;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@OneToMany(() => Crawl, crawl => crawl.seed)
|
||||
crawls = new Collection<Crawl>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// CrawlSchedule Entity
|
||||
// ============================================
|
||||
@Entity({ tableName: 'crawls_crawlschedule' })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['created_by_id'] })
|
||||
@Index({ properties: ['template_id'] })
|
||||
@Index({ properties: ['abid'] })
|
||||
export class CrawlSchedule {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 30, unique: true })
|
||||
abid!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onUpdate: () => new Date() })
|
||||
modified_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
template_id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 64 })
|
||||
schedule!: string;
|
||||
|
||||
@Property({ type: 'boolean', default: true })
|
||||
is_enabled = true;
|
||||
|
||||
@Property({ type: 'string', length: 64, default: '' })
|
||||
label = '';
|
||||
|
||||
@Property({ type: 'text', default: '' })
|
||||
notes = '';
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_failed = 0;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_succeeded = 0;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@ManyToOne(() => Crawl, { onDelete: 'cascade', fieldName: 'template_id' })
|
||||
template!: Crawl;
|
||||
|
||||
@OneToMany(() => Crawl, crawl => crawl.schedule)
|
||||
crawls = new Collection<Crawl>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Crawl Entity
|
||||
// ============================================
|
||||
@Entity({ tableName: 'crawls_crawl' })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['created_by_id'] })
|
||||
@Index({ properties: ['seed_id'] })
|
||||
@Index({ properties: ['schedule_id'] })
|
||||
@Index({ properties: ['status'] })
|
||||
@Index({ properties: ['retry_at'] })
|
||||
@Index({ properties: ['abid'] })
|
||||
export class Crawl {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 30, unique: true })
|
||||
abid!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onUpdate: () => new Date() })
|
||||
modified_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
seed_id!: string;
|
||||
|
||||
@Property({ type: 'text', default: '' })
|
||||
urls = '';
|
||||
|
||||
@Property({ type: 'json', default: {} })
|
||||
config: object = {};
|
||||
|
||||
@Property({ type: 'smallint', default: 0 })
|
||||
max_depth = 0;
|
||||
|
||||
@Property({ type: 'string', length: 1024, default: '' })
|
||||
tags_str = '';
|
||||
|
||||
@Property({ type: 'uuid', nullable: true })
|
||||
persona_id?: string;
|
||||
|
||||
@Property({ type: 'string', length: 64, default: '' })
|
||||
label = '';
|
||||
|
||||
@Property({ type: 'text', default: '' })
|
||||
notes = '';
|
||||
|
||||
@Property({ type: 'uuid', nullable: true, persist: false })
|
||||
schedule_id?: string;
|
||||
|
||||
@Property({ type: 'string', length: 16, default: 'queued' })
|
||||
status = 'queued';
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
retry_at!: Date;
|
||||
|
||||
@Property({ type: 'string', length: 255, default: '' })
|
||||
output_dir = '';
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_failed = 0;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_succeeded = 0;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@ManyToOne(() => Seed, { onDelete: 'restrict', fieldName: 'seed_id' })
|
||||
seed!: Seed;
|
||||
|
||||
@ManyToOne(() => CrawlSchedule, { onDelete: 'set null', nullable: true, fieldName: 'schedule_id' })
|
||||
schedule?: CrawlSchedule;
|
||||
|
||||
@OneToMany(() => Snapshot, snapshot => snapshot.crawl)
|
||||
snapshots = new Collection<Snapshot>(this);
|
||||
|
||||
@OneToMany(() => Outlink, outlink => outlink.crawl)
|
||||
outlinks = new Collection<Outlink>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Snapshot Entity
|
||||
// ============================================
|
||||
@Entity({ tableName: 'core_snapshot' })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['created_by_id'] })
|
||||
@Index({ properties: ['crawl_id'] })
|
||||
@Index({ properties: ['url'] })
|
||||
@Index({ properties: ['timestamp'] })
|
||||
@Index({ properties: ['bookmarked_at'] })
|
||||
@Index({ properties: ['downloaded_at'] })
|
||||
@Index({ properties: ['title'] })
|
||||
@Index({ properties: ['status'] })
|
||||
@Index({ properties: ['retry_at'] })
|
||||
@Index({ properties: ['abid'] })
|
||||
export class Snapshot {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 30, unique: true })
|
||||
abid!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onUpdate: () => new Date() })
|
||||
modified_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
@Property({ type: 'text', unique: true })
|
||||
url!: string;
|
||||
|
||||
@Property({ type: 'string', length: 32, unique: true })
|
||||
timestamp!: string;
|
||||
|
||||
@Property({ type: 'timestamptz' })
|
||||
bookmarked_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', nullable: true, persist: false })
|
||||
crawl_id?: string;
|
||||
|
||||
@Property({ type: 'string', length: 512, nullable: true })
|
||||
title?: string;
|
||||
|
||||
@Property({ type: 'timestamptz', nullable: true })
|
||||
downloaded_at?: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
retry_at!: Date;
|
||||
|
||||
@Property({ type: 'string', length: 16, default: 'queued' })
|
||||
status = 'queued';
|
||||
|
||||
@Property({ type: 'json', default: {} })
|
||||
config: object = {};
|
||||
|
||||
@Property({ type: 'text', default: '' })
|
||||
notes = '';
|
||||
|
||||
@Property({ type: 'string', length: 255, nullable: true })
|
||||
output_dir?: string;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_failed = 0;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_succeeded = 0;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@ManyToOne(() => Crawl, { onDelete: 'cascade', nullable: true, fieldName: 'crawl_id' })
|
||||
crawl?: Crawl;
|
||||
|
||||
@ManyToMany(() => Tag, tag => tag.snapshots, { owner: true, pivotTable: 'core_snapshot_tags' })
|
||||
tags = new Collection<Tag>(this);
|
||||
|
||||
@OneToMany(() => ArchiveResult, result => result.snapshot)
|
||||
archive_results = new Collection<ArchiveResult>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// ArchiveResult Entity
|
||||
// ============================================
|
||||
@Entity({ tableName: 'core_archiveresult' })
|
||||
@Index({ properties: ['created_at'] })
|
||||
@Index({ properties: ['created_by_id'] })
|
||||
@Index({ properties: ['snapshot_id'] })
|
||||
@Index({ properties: ['extractor'] })
|
||||
@Index({ properties: ['status'] })
|
||||
@Index({ properties: ['retry_at'] })
|
||||
@Index({ properties: ['abid'] })
|
||||
export class ArchiveResult {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 30, unique: true })
|
||||
abid!: string;
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
created_at!: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', onUpdate: () => new Date() })
|
||||
modified_at!: Date;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
created_by_id!: string;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
snapshot_id!: string;
|
||||
|
||||
@Property({ type: 'string', length: 32 })
|
||||
extractor!: string;
|
||||
|
||||
@Property({ type: 'string', length: 256, nullable: true })
|
||||
pwd?: string;
|
||||
|
||||
@Property({ type: 'json', nullable: true })
|
||||
cmd?: object;
|
||||
|
||||
@Property({ type: 'string', length: 128, nullable: true })
|
||||
cmd_version?: string;
|
||||
|
||||
@Property({ type: 'string', length: 1024, nullable: true })
|
||||
output?: string;
|
||||
|
||||
@Property({ type: 'timestamptz', nullable: true })
|
||||
start_ts?: Date;
|
||||
|
||||
@Property({ type: 'timestamptz', nullable: true })
|
||||
end_ts?: Date;
|
||||
|
||||
@Property({ type: 'string', length: 16, default: 'queued' })
|
||||
status = 'queued';
|
||||
|
||||
@Property({ type: 'timestamptz', onCreate: () => new Date() })
|
||||
retry_at!: Date;
|
||||
|
||||
@Property({ type: 'text', default: '' })
|
||||
notes = '';
|
||||
|
||||
@Property({ type: 'string', length: 256, nullable: true })
|
||||
output_dir?: string;
|
||||
|
||||
@Property({ type: 'uuid', nullable: true })
|
||||
iface_id?: string;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_failed = 0;
|
||||
|
||||
@Property({ type: 'integer', default: 0 })
|
||||
num_uses_succeeded = 0;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, { onDelete: 'cascade', fieldName: 'created_by_id' })
|
||||
created_by!: User;
|
||||
|
||||
@ManyToOne(() => Snapshot, { onDelete: 'cascade', fieldName: 'snapshot_id' })
|
||||
snapshot!: Snapshot;
|
||||
|
||||
@OneToMany(() => Outlink, outlink => outlink.via)
|
||||
outlinks = new Collection<Outlink>(this);
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Outlink Entity
|
||||
// ============================================
|
||||
@Entity({ tableName: 'crawls_outlink' })
|
||||
@Unique({ properties: ['src', 'dst', 'via_id'] })
|
||||
export class Outlink {
|
||||
@PrimaryKey({ type: 'uuid' })
|
||||
id!: string;
|
||||
|
||||
@Property({ type: 'text' })
|
||||
src!: string;
|
||||
|
||||
@Property({ type: 'text' })
|
||||
dst!: string;
|
||||
|
||||
@Property({ type: 'uuid', persist: false })
|
||||
crawl_id!: string;
|
||||
|
||||
@Property({ type: 'uuid', nullable: true, persist: false })
|
||||
via_id?: string;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => Crawl, { onDelete: 'cascade', fieldName: 'crawl_id' })
|
||||
crawl!: Crawl;
|
||||
|
||||
@ManyToOne(() => ArchiveResult, { onDelete: 'set null', nullable: true, fieldName: 'via_id' })
|
||||
via?: ArchiveResult;
|
||||
|
||||
@BeforeCreate()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,282 +0,0 @@
|
||||
// ArchiveBox Schema - Prisma ORM
|
||||
// Prisma uses a declarative schema DSL
|
||||
// Line count: ~280 lines
|
||||
|
||||
datasource db {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
previewFeatures = ["uuidv7"]
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// User Model (Django's default User)
|
||||
// ============================================
|
||||
model User {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
username String @unique @db.VarChar(150)
|
||||
email String @db.VarChar(254)
|
||||
password String @db.VarChar(128)
|
||||
first_name String @db.VarChar(150)
|
||||
last_name String @db.VarChar(150)
|
||||
is_active Boolean @default(true)
|
||||
is_staff Boolean @default(false)
|
||||
is_superuser Boolean @default(false)
|
||||
date_joined DateTime @default(now())
|
||||
last_login DateTime?
|
||||
|
||||
// Relations
|
||||
tags Tag[]
|
||||
kv_tags KVTag[]
|
||||
seeds Seed[]
|
||||
crawls Crawl[]
|
||||
crawl_schedules CrawlSchedule[]
|
||||
snapshots Snapshot[]
|
||||
archive_results ArchiveResult[]
|
||||
|
||||
@@map("auth_user")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Old-style Tag Model (being phased out)
|
||||
// ============================================
|
||||
model Tag {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
abid String @unique @db.VarChar(30)
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
modified_at DateTime @updatedAt @db.Timestamptz
|
||||
created_by_id String @db.Uuid
|
||||
name String @unique @db.VarChar(100)
|
||||
slug String @unique @db.VarChar(100)
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
snapshots Snapshot[] @relation("SnapshotTags")
|
||||
|
||||
@@index([created_at])
|
||||
@@index([created_by_id])
|
||||
@@map("core_tag")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// New-style KVTag Model (key-value tags)
|
||||
// ============================================
|
||||
model KVTag {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
name String @db.VarChar(255)
|
||||
value String? @db.Text
|
||||
obj_type String @db.VarChar(100)
|
||||
obj_id String @db.Uuid
|
||||
created_by_id String @db.Uuid
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
|
||||
@@unique([obj_id, name])
|
||||
@@index([created_at])
|
||||
@@index([obj_type])
|
||||
@@index([obj_id])
|
||||
@@map("core_kvtags")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Seed Model (URL source)
|
||||
// ============================================
|
||||
model Seed {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
abid String @unique @db.VarChar(30)
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
modified_at DateTime @updatedAt @db.Timestamptz
|
||||
created_by_id String @db.Uuid
|
||||
uri String @db.Text
|
||||
extractor String @default("auto") @db.VarChar(32)
|
||||
tags_str String @default("") @db.VarChar(255)
|
||||
label String @default("") @db.VarChar(255)
|
||||
config Json @default("{}")
|
||||
output_dir String @default("") @db.VarChar(255)
|
||||
notes String @default("") @db.Text
|
||||
num_uses_failed Int @default(0)
|
||||
num_uses_succeeded Int @default(0)
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
crawls Crawl[]
|
||||
|
||||
@@unique([created_by_id, uri, extractor])
|
||||
@@unique([created_by_id, label])
|
||||
@@index([created_at])
|
||||
@@index([created_by_id])
|
||||
@@map("crawls_seed")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// CrawlSchedule Model
|
||||
// ============================================
|
||||
model CrawlSchedule {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
abid String @unique @db.VarChar(30)
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
modified_at DateTime @updatedAt @db.Timestamptz
|
||||
created_by_id String @db.Uuid
|
||||
template_id String @db.Uuid
|
||||
schedule String @db.VarChar(64)
|
||||
is_enabled Boolean @default(true)
|
||||
label String @default("") @db.VarChar(64)
|
||||
notes String @default("") @db.Text
|
||||
num_uses_failed Int @default(0)
|
||||
num_uses_succeeded Int @default(0)
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
template Crawl @relation("CrawlScheduleTemplate", fields: [template_id], references: [id], onDelete: Cascade)
|
||||
crawls Crawl[] @relation("ScheduledCrawls")
|
||||
|
||||
@@index([created_at])
|
||||
@@index([created_by_id])
|
||||
@@map("crawls_crawlschedule")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Crawl Model (archiving session)
|
||||
// ============================================
|
||||
model Crawl {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
abid String @unique @db.VarChar(30)
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
modified_at DateTime @updatedAt @db.Timestamptz
|
||||
created_by_id String @db.Uuid
|
||||
seed_id String @db.Uuid
|
||||
urls String @default("") @db.Text
|
||||
config Json @default("{}")
|
||||
max_depth Int @default(0) @db.SmallInt
|
||||
tags_str String @default("") @db.VarChar(1024)
|
||||
persona_id String? @db.Uuid
|
||||
label String @default("") @db.VarChar(64)
|
||||
notes String @default("") @db.Text
|
||||
schedule_id String? @db.Uuid
|
||||
status String @default("queued") @db.VarChar(16)
|
||||
retry_at DateTime @default(now()) @db.Timestamptz
|
||||
output_dir String @default("") @db.VarChar(255)
|
||||
num_uses_failed Int @default(0)
|
||||
num_uses_succeeded Int @default(0)
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
seed Seed @relation(fields: [seed_id], references: [id], onDelete: Restrict)
|
||||
schedule CrawlSchedule? @relation("ScheduledCrawls", fields: [schedule_id], references: [id], onDelete: SetNull)
|
||||
schedules_as_template CrawlSchedule[] @relation("CrawlScheduleTemplate")
|
||||
snapshots Snapshot[]
|
||||
outlinks Outlink[]
|
||||
|
||||
@@index([created_at])
|
||||
@@index([created_by_id])
|
||||
@@index([seed_id])
|
||||
@@index([schedule_id])
|
||||
@@index([status])
|
||||
@@index([retry_at])
|
||||
@@map("crawls_crawl")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Snapshot Model (archived URL)
|
||||
// ============================================
|
||||
model Snapshot {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
abid String @unique @db.VarChar(30)
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
modified_at DateTime @updatedAt @db.Timestamptz
|
||||
created_by_id String @db.Uuid
|
||||
url String @unique @db.Text
|
||||
timestamp String @unique @db.VarChar(32)
|
||||
bookmarked_at DateTime @db.Timestamptz
|
||||
crawl_id String? @db.Uuid
|
||||
title String? @db.VarChar(512)
|
||||
downloaded_at DateTime? @db.Timestamptz
|
||||
retry_at DateTime @default(now()) @db.Timestamptz
|
||||
status String @default("queued") @db.VarChar(16)
|
||||
config Json @default("{}")
|
||||
notes String @default("") @db.Text
|
||||
output_dir String? @db.VarChar(255)
|
||||
num_uses_failed Int @default(0)
|
||||
num_uses_succeeded Int @default(0)
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
crawl Crawl? @relation(fields: [crawl_id], references: [id], onDelete: Cascade)
|
||||
tags Tag[] @relation("SnapshotTags")
|
||||
archive_results ArchiveResult[]
|
||||
outlinks_via Outlink[]
|
||||
|
||||
@@index([created_at])
|
||||
@@index([created_by_id])
|
||||
@@index([crawl_id])
|
||||
@@index([url])
|
||||
@@index([timestamp])
|
||||
@@index([bookmarked_at])
|
||||
@@index([downloaded_at])
|
||||
@@index([title])
|
||||
@@index([status])
|
||||
@@index([retry_at])
|
||||
@@map("core_snapshot")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// ArchiveResult Model (extraction result)
|
||||
// ============================================
|
||||
model ArchiveResult {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
abid String @unique @db.VarChar(30)
|
||||
created_at DateTime @default(now()) @db.Timestamptz
|
||||
modified_at DateTime @updatedAt @db.Timestamptz
|
||||
created_by_id String @db.Uuid
|
||||
snapshot_id String @db.Uuid
|
||||
extractor String @db.VarChar(32)
|
||||
pwd String? @db.VarChar(256)
|
||||
cmd Json?
|
||||
cmd_version String? @db.VarChar(128)
|
||||
output String? @db.VarChar(1024)
|
||||
start_ts DateTime? @db.Timestamptz
|
||||
end_ts DateTime? @db.Timestamptz
|
||||
status String @default("queued") @db.VarChar(16)
|
||||
retry_at DateTime @default(now()) @db.Timestamptz
|
||||
notes String @default("") @db.Text
|
||||
output_dir String? @db.VarChar(256)
|
||||
iface_id String? @db.Uuid
|
||||
num_uses_failed Int @default(0)
|
||||
num_uses_succeeded Int @default(0)
|
||||
|
||||
// Relations
|
||||
created_by User @relation(fields: [created_by_id], references: [id], onDelete: Cascade)
|
||||
snapshot Snapshot @relation(fields: [snapshot_id], references: [id], onDelete: Cascade)
|
||||
outlinks Outlink[]
|
||||
|
||||
@@index([created_at])
|
||||
@@index([created_by_id])
|
||||
@@index([snapshot_id])
|
||||
@@index([extractor])
|
||||
@@index([status])
|
||||
@@index([retry_at])
|
||||
@@map("core_archiveresult")
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Outlink Model (link found on a page)
|
||||
// ============================================
|
||||
model Outlink {
|
||||
id String @id @default(uuidv7()) @db.Uuid
|
||||
src String @db.Text
|
||||
dst String @db.Text
|
||||
crawl_id String @db.Uuid
|
||||
via_id String? @db.Uuid
|
||||
|
||||
// Relations
|
||||
crawl Crawl @relation(fields: [crawl_id], references: [id], onDelete: Cascade)
|
||||
via ArchiveResult? @relation(fields: [via_id], references: [id], onDelete: SetNull)
|
||||
|
||||
@@unique([src, dst, via_id])
|
||||
@@map("crawls_outlink")
|
||||
}
|
||||
@ -1,634 +0,0 @@
|
||||
// ArchiveBox Schema - TypeORM
|
||||
// TypeORM uses TypeScript decorators on classes
|
||||
// Line count: ~550 lines
|
||||
|
||||
import {
|
||||
Entity,
|
||||
PrimaryColumn,
|
||||
Column,
|
||||
ManyToOne,
|
||||
OneToMany,
|
||||
ManyToMany,
|
||||
JoinTable,
|
||||
JoinColumn,
|
||||
Index,
|
||||
Unique,
|
||||
CreateDateColumn,
|
||||
UpdateDateColumn,
|
||||
BeforeInsert,
|
||||
} from 'typeorm';
|
||||
import { uuidv7 } from 'uuidv7';
|
||||
|
||||
// ============================================
|
||||
// User Entity (Django's default User)
|
||||
// ============================================
|
||||
@Entity('auth_user')
|
||||
@Index('auth_user_username_idx', ['username'])
|
||||
export class User {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 150, unique: true })
|
||||
username: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 254 })
|
||||
email: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 128 })
|
||||
password: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 150 })
|
||||
first_name: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 150 })
|
||||
last_name: string;
|
||||
|
||||
@Column({ type: 'boolean', default: true })
|
||||
is_active: boolean;
|
||||
|
||||
@Column({ type: 'boolean', default: false })
|
||||
is_staff: boolean;
|
||||
|
||||
@Column({ type: 'boolean', default: false })
|
||||
is_superuser: boolean;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
date_joined: Date;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
last_login: Date | null;
|
||||
|
||||
// Relations
|
||||
@OneToMany(() => Tag, tag => tag.created_by)
|
||||
tags: Tag[];
|
||||
|
||||
@OneToMany(() => KVTag, kvTag => kvTag.created_by)
|
||||
kv_tags: KVTag[];
|
||||
|
||||
@OneToMany(() => Seed, seed => seed.created_by)
|
||||
seeds: Seed[];
|
||||
|
||||
@OneToMany(() => Crawl, crawl => crawl.created_by)
|
||||
crawls: Crawl[];
|
||||
|
||||
@OneToMany(() => CrawlSchedule, schedule => schedule.created_by)
|
||||
crawl_schedules: CrawlSchedule[];
|
||||
|
||||
@OneToMany(() => Snapshot, snapshot => snapshot.created_by)
|
||||
snapshots: Snapshot[];
|
||||
|
||||
@OneToMany(() => ArchiveResult, result => result.created_by)
|
||||
archive_results: ArchiveResult[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Tag Entity (being phased out)
|
||||
// ============================================
|
||||
@Entity('core_tag')
|
||||
@Index('core_tag_created_at_idx', ['created_at'])
|
||||
@Index('core_tag_created_by_idx', ['created_by_id'])
|
||||
@Index('core_tag_abid_idx', ['abid'])
|
||||
export class Tag {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 30, unique: true })
|
||||
abid: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@UpdateDateColumn({ type: 'timestamptz' })
|
||||
modified_at: Date;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 100, unique: true })
|
||||
name: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 100, unique: true })
|
||||
slug: string;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.tags, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@ManyToMany(() => Snapshot, snapshot => snapshot.tags)
|
||||
snapshots: Snapshot[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// KVTag Entity (key-value tags)
|
||||
// ============================================
|
||||
@Entity('core_kvtags')
|
||||
@Unique(['obj_id', 'name'])
|
||||
@Index('core_kvtags_created_at_idx', ['created_at'])
|
||||
@Index('core_kvtags_obj_type_idx', ['obj_type'])
|
||||
@Index('core_kvtags_obj_id_idx', ['obj_id'])
|
||||
export class KVTag {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@Column({ type: 'varchar', length: 255 })
|
||||
name: string;
|
||||
|
||||
@Column({ type: 'text', nullable: true })
|
||||
value: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 100 })
|
||||
obj_type: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
obj_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.kv_tags, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Seed Entity
|
||||
// ============================================
|
||||
@Entity('crawls_seed')
|
||||
@Unique(['created_by_id', 'uri', 'extractor'])
|
||||
@Unique(['created_by_id', 'label'])
|
||||
@Index('crawls_seed_created_at_idx', ['created_at'])
|
||||
@Index('crawls_seed_created_by_idx', ['created_by_id'])
|
||||
@Index('crawls_seed_abid_idx', ['abid'])
|
||||
export class Seed {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 30, unique: true })
|
||||
abid: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@UpdateDateColumn({ type: 'timestamptz' })
|
||||
modified_at: Date;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
@Column({ type: 'text' })
|
||||
uri: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, default: 'auto' })
|
||||
extractor: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
tags_str: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
label: string;
|
||||
|
||||
@Column({ type: 'jsonb', default: {} })
|
||||
config: object;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
output_dir: string;
|
||||
|
||||
@Column({ type: 'text', default: '' })
|
||||
notes: string;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_failed: number;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_succeeded: number;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.seeds, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@OneToMany(() => Crawl, crawl => crawl.seed)
|
||||
crawls: Crawl[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// CrawlSchedule Entity
|
||||
// ============================================
|
||||
@Entity('crawls_crawlschedule')
|
||||
@Index('crawls_crawlschedule_created_at_idx', ['created_at'])
|
||||
@Index('crawls_crawlschedule_created_by_idx', ['created_by_id'])
|
||||
@Index('crawls_crawlschedule_template_idx', ['template_id'])
|
||||
@Index('crawls_crawlschedule_abid_idx', ['abid'])
|
||||
export class CrawlSchedule {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 30, unique: true })
|
||||
abid: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@UpdateDateColumn({ type: 'timestamptz' })
|
||||
modified_at: Date;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
template_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 64 })
|
||||
schedule: string;
|
||||
|
||||
@Column({ type: 'boolean', default: true })
|
||||
is_enabled: boolean;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, default: '' })
|
||||
label: string;
|
||||
|
||||
@Column({ type: 'text', default: '' })
|
||||
notes: string;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_failed: number;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_succeeded: number;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.crawl_schedules, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@ManyToOne(() => Crawl, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'template_id' })
|
||||
template: Crawl;
|
||||
|
||||
@OneToMany(() => Crawl, crawl => crawl.schedule)
|
||||
crawls: Crawl[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Crawl Entity
|
||||
// ============================================
|
||||
@Entity('crawls_crawl')
|
||||
@Index('crawls_crawl_created_at_idx', ['created_at'])
|
||||
@Index('crawls_crawl_created_by_idx', ['created_by_id'])
|
||||
@Index('crawls_crawl_seed_idx', ['seed_id'])
|
||||
@Index('crawls_crawl_schedule_idx', ['schedule_id'])
|
||||
@Index('crawls_crawl_status_idx', ['status'])
|
||||
@Index('crawls_crawl_retry_at_idx', ['retry_at'])
|
||||
@Index('crawls_crawl_abid_idx', ['abid'])
|
||||
export class Crawl {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 30, unique: true })
|
||||
abid: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@UpdateDateColumn({ type: 'timestamptz' })
|
||||
modified_at: Date;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
seed_id: string;
|
||||
|
||||
@Column({ type: 'text', default: '' })
|
||||
urls: string;
|
||||
|
||||
@Column({ type: 'jsonb', default: {} })
|
||||
config: object;
|
||||
|
||||
@Column({ type: 'smallint', default: 0 })
|
||||
max_depth: number;
|
||||
|
||||
@Column({ type: 'varchar', length: 1024, default: '' })
|
||||
tags_str: string;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
persona_id: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 64, default: '' })
|
||||
label: string;
|
||||
|
||||
@Column({ type: 'text', default: '' })
|
||||
notes: string;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
schedule_id: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 16, default: 'queued' })
|
||||
status: string;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'CURRENT_TIMESTAMP' })
|
||||
retry_at: Date;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, default: '' })
|
||||
output_dir: string;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_failed: number;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_succeeded: number;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.crawls, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@ManyToOne(() => Seed, seed => seed.crawls, { onDelete: 'RESTRICT' })
|
||||
@JoinColumn({ name: 'seed_id' })
|
||||
seed: Seed;
|
||||
|
||||
@ManyToOne(() => CrawlSchedule, schedule => schedule.crawls, { onDelete: 'SET NULL', nullable: true })
|
||||
@JoinColumn({ name: 'schedule_id' })
|
||||
schedule: CrawlSchedule | null;
|
||||
|
||||
@OneToMany(() => Snapshot, snapshot => snapshot.crawl)
|
||||
snapshots: Snapshot[];
|
||||
|
||||
@OneToMany(() => Outlink, outlink => outlink.crawl)
|
||||
outlinks: Outlink[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Snapshot Entity
|
||||
// ============================================
|
||||
@Entity('core_snapshot')
|
||||
@Index('core_snapshot_created_at_idx', ['created_at'])
|
||||
@Index('core_snapshot_created_by_idx', ['created_by_id'])
|
||||
@Index('core_snapshot_crawl_idx', ['crawl_id'])
|
||||
@Index('core_snapshot_url_idx', ['url'])
|
||||
@Index('core_snapshot_timestamp_idx', ['timestamp'])
|
||||
@Index('core_snapshot_bookmarked_at_idx', ['bookmarked_at'])
|
||||
@Index('core_snapshot_downloaded_at_idx', ['downloaded_at'])
|
||||
@Index('core_snapshot_title_idx', ['title'])
|
||||
@Index('core_snapshot_status_idx', ['status'])
|
||||
@Index('core_snapshot_retry_at_idx', ['retry_at'])
|
||||
@Index('core_snapshot_abid_idx', ['abid'])
|
||||
export class Snapshot {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 30, unique: true })
|
||||
abid: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@UpdateDateColumn({ type: 'timestamptz' })
|
||||
modified_at: Date;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
@Column({ type: 'text', unique: true })
|
||||
url: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32, unique: true })
|
||||
timestamp: string;
|
||||
|
||||
@Column({ type: 'timestamptz' })
|
||||
bookmarked_at: Date;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
crawl_id: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 512, nullable: true })
|
||||
title: string | null;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
downloaded_at: Date | null;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'CURRENT_TIMESTAMP' })
|
||||
retry_at: Date;
|
||||
|
||||
@Column({ type: 'varchar', length: 16, default: 'queued' })
|
||||
status: string;
|
||||
|
||||
@Column({ type: 'jsonb', default: {} })
|
||||
config: object;
|
||||
|
||||
@Column({ type: 'text', default: '' })
|
||||
notes: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 255, nullable: true })
|
||||
output_dir: string | null;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_failed: number;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_succeeded: number;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.snapshots, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@ManyToOne(() => Crawl, crawl => crawl.snapshots, { onDelete: 'CASCADE', nullable: true })
|
||||
@JoinColumn({ name: 'crawl_id' })
|
||||
crawl: Crawl | null;
|
||||
|
||||
@ManyToMany(() => Tag, tag => tag.snapshots)
|
||||
@JoinTable({
|
||||
name: 'core_snapshot_tags',
|
||||
joinColumn: { name: 'snapshot_id', referencedColumnName: 'id' },
|
||||
inverseJoinColumn: { name: 'tag_id', referencedColumnName: 'id' },
|
||||
})
|
||||
tags: Tag[];
|
||||
|
||||
@OneToMany(() => ArchiveResult, result => result.snapshot)
|
||||
archive_results: ArchiveResult[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// ArchiveResult Entity
|
||||
// ============================================
|
||||
@Entity('core_archiveresult')
|
||||
@Index('core_archiveresult_created_at_idx', ['created_at'])
|
||||
@Index('core_archiveresult_created_by_idx', ['created_by_id'])
|
||||
@Index('core_archiveresult_snapshot_idx', ['snapshot_id'])
|
||||
@Index('core_archiveresult_extractor_idx', ['extractor'])
|
||||
@Index('core_archiveresult_status_idx', ['status'])
|
||||
@Index('core_archiveresult_retry_at_idx', ['retry_at'])
|
||||
@Index('core_archiveresult_abid_idx', ['abid'])
|
||||
export class ArchiveResult {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 30, unique: true })
|
||||
abid: string;
|
||||
|
||||
@CreateDateColumn({ type: 'timestamptz' })
|
||||
created_at: Date;
|
||||
|
||||
@UpdateDateColumn({ type: 'timestamptz' })
|
||||
modified_at: Date;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
created_by_id: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
snapshot_id: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 32 })
|
||||
extractor: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 256, nullable: true })
|
||||
pwd: string | null;
|
||||
|
||||
@Column({ type: 'jsonb', nullable: true })
|
||||
cmd: object | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 128, nullable: true })
|
||||
cmd_version: string | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 1024, nullable: true })
|
||||
output: string | null;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
start_ts: Date | null;
|
||||
|
||||
@Column({ type: 'timestamptz', nullable: true })
|
||||
end_ts: Date | null;
|
||||
|
||||
@Column({ type: 'varchar', length: 16, default: 'queued' })
|
||||
status: string;
|
||||
|
||||
@Column({ type: 'timestamptz', default: () => 'CURRENT_TIMESTAMP' })
|
||||
retry_at: Date;
|
||||
|
||||
@Column({ type: 'text', default: '' })
|
||||
notes: string;
|
||||
|
||||
@Column({ type: 'varchar', length: 256, nullable: true })
|
||||
output_dir: string | null;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
iface_id: string | null;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_failed: number;
|
||||
|
||||
@Column({ type: 'int', default: 0 })
|
||||
num_uses_succeeded: number;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => User, user => user.archive_results, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'created_by_id' })
|
||||
created_by: User;
|
||||
|
||||
@ManyToOne(() => Snapshot, snapshot => snapshot.archive_results, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'snapshot_id' })
|
||||
snapshot: Snapshot;
|
||||
|
||||
@OneToMany(() => Outlink, outlink => outlink.via)
|
||||
outlinks: Outlink[];
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================
|
||||
// Outlink Entity
|
||||
// ============================================
|
||||
@Entity('crawls_outlink')
|
||||
@Unique(['src', 'dst', 'via_id'])
|
||||
export class Outlink {
|
||||
@PrimaryColumn('uuid')
|
||||
id: string;
|
||||
|
||||
@Column({ type: 'text' })
|
||||
src: string;
|
||||
|
||||
@Column({ type: 'text' })
|
||||
dst: string;
|
||||
|
||||
@Column({ type: 'uuid' })
|
||||
crawl_id: string;
|
||||
|
||||
@Column({ type: 'uuid', nullable: true })
|
||||
via_id: string | null;
|
||||
|
||||
// Relations
|
||||
@ManyToOne(() => Crawl, crawl => crawl.outlinks, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'crawl_id' })
|
||||
crawl: Crawl;
|
||||
|
||||
@ManyToOne(() => ArchiveResult, result => result.outlinks, { onDelete: 'SET NULL', nullable: true })
|
||||
@JoinColumn({ name: 'via_id' })
|
||||
via: ArchiveResult | null;
|
||||
|
||||
@BeforeInsert()
|
||||
generateId() {
|
||||
if (!this.id) {
|
||||
this.id = uuidv7();
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user