Extension-based content type classification (#146)

This commit is contained in:
mgdigital 2024-02-19 17:27:21 +00:00 committed by GitHub
parent efdde16dd1
commit f8d02aac5e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 239 additions and 67 deletions

View File

@ -43,8 +43,8 @@ select
size,
-- map the RARBG category to a valid bitmagnet content type:
case
when cat like 'ebooks%' then 'book'
when cat like 'games%' then 'game'
when cat like 'ebooks%' then 'ebook'
when cat like 'games%' then 'software'
when cat like 'movies%' then 'movie'
when cat like 'tv%' then 'tv_show'
when cat like 'music%' then 'music'
@ -84,6 +84,7 @@ select
cat not like '%_720' and
cat not like '%_SD' and
cat not like 'software%' and
cat not like 'games%' and
-- I won't judge you if you disable the following line;
-- bear in mind there is a *lot* of this in the RARBG backup
cat != 'xxx' and

View File

@ -2,9 +2,10 @@ enum ContentType {
movie
tv_show
music
ebook
audiobook
game
software
book
xxx
}

View File

@ -2,6 +2,7 @@ package classifierfx
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/extension"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/videofx"
"go.uber.org/fx"
)
@ -11,6 +12,7 @@ func New() fx.Option {
"classifier",
fx.Provide(
classifier.New,
extension.New,
),
videofx.New(),
)

View File

@ -0,0 +1,63 @@
package extension
import (
"context"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
type extensionClassifier struct {
}
func (c extensionClassifier) Key() string {
return "extension"
}
func (c extensionClassifier) Priority() int {
return 10
}
func (c extensionClassifier) Classify(_ context.Context, t model.Torrent) (classifier.Classification, error) {
if !t.Hint.IsNil() || t.FilesStatus == model.FilesStatusNoInfo || t.FilesStatus == model.FilesStatusOverThreshold {
return classifier.Classification{}, classifier.ErrNoMatch
}
if t.FilesStatus == model.FilesStatusSingle {
if t.Extension.Valid {
ct := model.ContentTypeFromExtension(t.Extension.String)
if ct.Valid {
return classifier.Classification{
ContentType: ct,
}, nil
}
}
return classifier.Classification{}, classifier.ErrNoMatch
}
var unknownSize uint64
sizeMap := make(map[model.ContentType]uint64)
for _, f := range t.Files {
if f.Size == 0 {
unknownSize++
continue
}
ct := model.ContentTypeFromExtension(f.Extension.String)
if ct.Valid {
sizeMap[ct.ContentType] += f.Size
} else {
unknownSize += f.Size
}
}
var maxSize uint64
var maxType model.ContentType
for k, v := range sizeMap {
if v > maxSize {
maxSize = v
maxType = k
}
}
if maxSize > 0 && maxSize > unknownSize {
return classifier.Classification{
ContentType: model.NewNullContentType(maxType),
}, nil
}
return classifier.Classification{}, classifier.ErrNoMatch
}

View File

@ -0,0 +1,20 @@
package extension
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"go.uber.org/fx"
)
type Result struct {
fx.Out
Classifier lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
}
func New() Result {
return Result{
Classifier: lazy.New(func() (classifier.SubClassifier, error) {
return extensionClassifier{}, nil
}),
}
}

View File

@ -1568,9 +1568,10 @@ var sources = []*ast.Source{
movie
tv_show
music
ebook
audiobook
game
software
book
xxx
}

View File

@ -1,7 +1,7 @@
package model
// ContentType represents the type of content
// ENUM(movie, tv_show, music, game, software, book, xxx)
// ENUM(movie, tv_show, music, ebook, audiobook, game, software, xxx)
type ContentType string
func (c ContentType) Label() string {
@ -15,3 +15,42 @@ func (c ContentType) IsNil() bool {
func (c ContentType) IsVideo() bool {
return c == ContentTypeMovie || c == ContentTypeTvShow || c == ContentTypeXxx
}
var extensionToContentTypeMap = map[string]ContentType{
"m4b": ContentTypeAudiobook,
"epub": ContentTypeEbook,
"mobi": ContentTypeEbook,
"azw": ContentTypeEbook,
"azw3": ContentTypeEbook,
"pdf": ContentTypeEbook,
"cbr": ContentTypeEbook,
"cbz": ContentTypeEbook,
"cb7": ContentTypeEbook,
"cbt": ContentTypeEbook,
"cba": ContentTypeEbook,
"chm": ContentTypeEbook,
"doc": ContentTypeEbook,
"docx": ContentTypeEbook,
"odt": ContentTypeEbook,
"rtf": ContentTypeEbook,
"djvu": ContentTypeEbook,
"exe": ContentTypeSoftware,
"dmg": ContentTypeSoftware,
"app": ContentTypeSoftware,
"apk": ContentTypeSoftware,
"deb": ContentTypeSoftware,
"rpm": ContentTypeSoftware,
"jar": ContentTypeSoftware,
"dll": ContentTypeSoftware,
"lua": ContentTypeSoftware,
"package": ContentTypeSoftware,
"pkg": ContentTypeSoftware,
}
func ContentTypeFromExtension(ext string) NullContentType {
ct, ok := extensionToContentTypeMap[ext]
if !ok {
return NullContentType{}
}
return NewNullContentType(ct)
}

View File

@ -16,13 +16,14 @@ import (
)
const (
ContentTypeMovie ContentType = "movie"
ContentTypeTvShow ContentType = "tv_show"
ContentTypeMusic ContentType = "music"
ContentTypeGame ContentType = "game"
ContentTypeSoftware ContentType = "software"
ContentTypeBook ContentType = "book"
ContentTypeXxx ContentType = "xxx"
ContentTypeMovie ContentType = "movie"
ContentTypeTvShow ContentType = "tv_show"
ContentTypeMusic ContentType = "music"
ContentTypeEbook ContentType = "ebook"
ContentTypeAudiobook ContentType = "audiobook"
ContentTypeGame ContentType = "game"
ContentTypeSoftware ContentType = "software"
ContentTypeXxx ContentType = "xxx"
)
var ErrInvalidContentType = fmt.Errorf("not a valid ContentType, try [%s]", strings.Join(_ContentTypeNames, ", "))
@ -31,9 +32,10 @@ var _ContentTypeNames = []string{
string(ContentTypeMovie),
string(ContentTypeTvShow),
string(ContentTypeMusic),
string(ContentTypeEbook),
string(ContentTypeAudiobook),
string(ContentTypeGame),
string(ContentTypeSoftware),
string(ContentTypeBook),
string(ContentTypeXxx),
}
@ -50,9 +52,10 @@ func ContentTypeValues() []ContentType {
ContentTypeMovie,
ContentTypeTvShow,
ContentTypeMusic,
ContentTypeEbook,
ContentTypeAudiobook,
ContentTypeGame,
ContentTypeSoftware,
ContentTypeBook,
ContentTypeXxx,
}
}
@ -70,13 +73,14 @@ func (x ContentType) IsValid() bool {
}
var _ContentTypeValue = map[string]ContentType{
"movie": ContentTypeMovie,
"tv_show": ContentTypeTvShow,
"music": ContentTypeMusic,
"game": ContentTypeGame,
"software": ContentTypeSoftware,
"book": ContentTypeBook,
"xxx": ContentTypeXxx,
"movie": ContentTypeMovie,
"tv_show": ContentTypeTvShow,
"music": ContentTypeMusic,
"ebook": ContentTypeEbook,
"audiobook": ContentTypeAudiobook,
"game": ContentTypeGame,
"software": ContentTypeSoftware,
"xxx": ContentTypeXxx,
}
// ParseContentType attempts to convert a string to a ContentType.

View File

@ -46,7 +46,7 @@ func (a adapter) searchRequestOptions(r torznab.SearchRequest) ([]query.Option,
case torznab.FunctionMusic:
options = append(options, query.Where(search.ContentTypeCriteria(model.ContentTypeMusic)))
case torznab.FunctionBook:
options = append(options, query.Where(search.ContentTypeCriteria(model.ContentTypeBook)))
options = append(options, query.Where(search.ContentTypeCriteria(model.ContentTypeEbook)))
default:
return nil, torznab.Error{
Code: 202,
@ -103,7 +103,7 @@ func (a adapter) searchRequestOptions(r torznab.SearchRequest) ([]query.Option,
}
} else if torznab.CategoryBooks.Has(cat) {
if r.Type != torznab.FunctionBook {
catCriteria = append(catCriteria, search.ContentTypeCriteria(model.ContentTypeBook))
catCriteria = append(catCriteria, search.ContentTypeCriteria(model.ContentTypeEbook))
}
}
if len(catCriteria) > 0 {
@ -175,8 +175,10 @@ func (a adapter) transformSearchResult(req torznab.SearchRequest, res search.Tor
categoryId = torznab.CategoryTV.ID
case model.ContentTypeMusic:
categoryId = torznab.CategoryAudio.ID
case model.ContentTypeBook:
case model.ContentTypeEbook:
categoryId = torznab.CategoryBooks.ID
case model.ContentTypeAudiobook:
categoryId = torznab.CategoryAudioAudiobook.ID
case model.ContentTypeSoftware:
categoryId = torznab.CategoryPC.ID
case model.ContentTypeGame:

View File

@ -52,6 +52,16 @@ var categoriesMap = map[int]Category{
3000: {
ID: 3000,
Name: "Audio",
Subcat: []Subcategory{
{
ID: 3030,
Name: "Audio/Audiobook",
},
},
},
3030: {
ID: 3030,
Name: "Audio/Audiobook",
Subcat: []Subcategory{
},
},
@ -138,22 +148,23 @@ var categoriesMap = map[int]Category{
}
var (
CategoryMovies = categoriesMap[2000]
CategoryMoviesSD = categoriesMap[2030]
CategoryMoviesHD = categoriesMap[2040]
CategoryMoviesUHD = categoriesMap[2045]
CategoryMovies3D = categoriesMap[2060]
CategoryAudio = categoriesMap[3000]
CategoryPC = categoriesMap[4000]
CategoryPCGames = categoriesMap[4050]
CategoryTV = categoriesMap[5000]
CategoryTVSD = categoriesMap[5030]
CategoryTVHD = categoriesMap[5040]
CategoryTVUHD = categoriesMap[5045]
CategoryXXX = categoriesMap[6000]
CategoryXXXOther = categoriesMap[6070]
CategoryBooks = categoriesMap[7000]
CategoryOther = categoriesMap[8000]
CategoryMovies = categoriesMap[2000]
CategoryMoviesSD = categoriesMap[2030]
CategoryMoviesHD = categoriesMap[2040]
CategoryMoviesUHD = categoriesMap[2045]
CategoryMovies3D = categoriesMap[2060]
CategoryAudio = categoriesMap[3000]
CategoryAudioAudiobook = categoriesMap[3030]
CategoryPC = categoriesMap[4000]
CategoryPCGames = categoriesMap[4050]
CategoryTV = categoriesMap[5000]
CategoryTVSD = categoriesMap[5030]
CategoryTVHD = categoriesMap[5040]
CategoryTVUHD = categoriesMap[5045]
CategoryXXX = categoriesMap[6000]
CategoryXXXOther = categoriesMap[6070]
CategoryBooks = categoriesMap[7000]
CategoryOther = categoriesMap[8000]
)
var TopLevelCategories = []Category{

View File

@ -18,7 +18,7 @@ id,name,supported
3000,Audio,1
3010,Audio/MP3,0
3020,Audio/Video,0
3030,Audio/Audiobook,0
3030,Audio/Audiobook,1
3040,Audio/Lossless,0
4000,PC,1
4010,PC/0day,0

1 id name supported
18 3000 Audio 1
19 3010 Audio/MP3 0
20 3020 Audio/Video 0
21 3030 Audio/Audiobook 0 1
22 3040 Audio/Lossless 0
23 4000 PC 1
24 4010 PC/0day 0

View File

@ -0,0 +1,15 @@
-- +goose Up
-- +goose StatementBegin
update torrent_contents set content_type = 'ebook' where content_type = 'book';
update torrent_hints set content_type = 'ebook' where content_type = 'book';
-- +goose StatementEnd
-- +goose Down
-- +goose StatementBegin
update torrent_contents set content_type = 'book' where content_type = 'ebook';
update torrent_hints set content_type = 'book' where content_type = 'ebook';
-- +goose StatementEnd

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -70,7 +70,8 @@ export type ContentCollection = {
};
export type ContentType =
| 'book'
| 'audiobook'
| 'ebook'
| 'game'
| 'movie'
| 'music'

View File

@ -125,6 +125,7 @@ export class TorrentContentSearchEngine
public totalCount$ = this.totalCountSubject.asObservable();
public contentTypes = contentTypes;
public availableContentTypes = new Set<string>();
constructor(
private graphQLService: GraphQLService,
@ -186,15 +187,19 @@ export class TorrentContentSearchEngine
count: result.totalCount,
isEstimate: result.totalCountIsEstimate,
});
this.overallTotalCountSubject.next(
(result.aggregations.contentType ?? []).reduce(
(acc, next) => ({
count: acc.count + next.count,
isEstimate: acc.isEstimate || next.isEstimate,
}),
emptyBudgetedCount,
),
);
let overallTotalCount = 0;
let overallIsEstimate = false;
for (const ct of result.aggregations.contentType ?? []) {
overallTotalCount += ct.count;
overallIsEstimate = overallIsEstimate || ct.isEstimate;
if (ct.value) {
this.availableContentTypes.add(ct.value);
}
}
this.overallTotalCountSubject.next({
count: overallTotalCount,
isEstimate: overallIsEstimate,
});
}
});
}
@ -284,11 +289,16 @@ const contentTypes: Record<generated.ContentType | "null", ContentTypeInfo> = {
plural: "Music",
icon: "music_note",
},
book: {
singular: "Book",
plural: "Books",
ebook: {
singular: "E-Book",
plural: "E-Books",
icon: "auto_stories",
},
audiobook: {
singular: "Audiobook",
plural: "Audiobooks",
icon: "mic",
},
software: {
singular: "Software",
plural: "Software",

View File

@ -21,16 +21,18 @@
t of search.contentTypes | keyvalue: originalOrder;
track t.key
) {
<mat-radio-button [value]="t.key">
<mat-icon>{{ t.value.icon }}</mat-icon>
{{ t.value.plural }}
@if (search.contentTypeCount(t.key) | async; as agg) {
<small
>{{ agg.isEstimate ? "~" : ""
}}{{ agg.count | number }}</small
>
}
</mat-radio-button>
@if (search.availableContentTypes.has(t.key)) {
<mat-radio-button [value]="t.key">
<mat-icon>{{ t.value.icon }}</mat-icon>
{{ t.value.plural }}
@if (search.contentTypeCount(t.key) | async; as agg) {
<small
>{{ agg.isEstimate ? "~" : ""
}}{{ agg.count | number }}</small
>
}
</mat-radio-button>
}
}
</mat-radio-group>
</section>