Classifier enhancements (#158)

This commit is contained in:
mgdigital 2024-02-21 17:02:03 +00:00 committed by GitHub
parent ce5d913bd8
commit ab0405196e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 148 additions and 273 deletions

View File

@ -3,6 +3,7 @@ enum ContentType {
tv_show
music
ebook
comic
audiobook
game
software

View File

@ -47,6 +47,14 @@ func (a *ContentAttributes) ApplyHint(h model.TorrentHint) {
}
}
func (a *ContentAttributes) InferVideoAttributes(input string) {
a.VideoResolution = model.InferVideoResolution(input)
a.VideoSource = model.InferVideoSource(input)
a.VideoCodec, a.ReleaseGroup = model.InferVideoCodecAndReleaseGroup(input)
a.Video3d = model.InferVideo3d(input)
a.VideoModifier = model.InferVideoModifier(input)
}
func (c *Classification) ApplyHint(h model.TorrentHint) {
c.ContentType = h.NullContentType()
c.ContentAttributes.ApplyHint(h)

View File

@ -19,5 +19,9 @@ func (c FallbackClassifier) Priority() int {
func (c FallbackClassifier) Classify(_ context.Context, t model.Torrent) (Classification, error) {
cl := Classification{}
cl.ApplyHint(t.Hint)
hasVideo := t.HasFileType(model.FileTypeVideo)
if hasVideo.Valid && hasVideo.Bool {
cl.InferVideoAttributes(t.Name)
}
return cl, nil
}

View File

@ -4,22 +4,14 @@ import (
"context"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/regex"
"regexp"
)
func NewKeywordsClassifier(contentType model.ContentType, words []string, priority int) classifier.SubClassifier {
return keywordsClassifier{
contentType: contentType,
priority: priority,
regex: regex.NewRegexFromNames(words...),
}
}
type keywordsClassifier struct {
contentType model.ContentType
priority int
regex *regexp.Regexp
contentType model.ContentType
priority int
regex *regexp.Regexp
requiredFileTypes []model.FileType
}
func (c keywordsClassifier) Key() string {
@ -30,14 +22,25 @@ func (c keywordsClassifier) Priority() int {
return c.priority
}
func (c keywordsClassifier) Classify(_ context.Context, torrent model.Torrent) (classifier.Classification, error) {
if !c.regex.MatchString(torrent.Name) {
func (c keywordsClassifier) Classify(_ context.Context, t model.Torrent) (classifier.Classification, error) {
if !t.Hint.IsNil() || !c.regex.MatchString(t.Name) {
return classifier.Classification{}, classifier.ErrNoMatch
}
return classifier.Classification{
if len(c.requiredFileTypes) > 0 {
hasRequiredFileTypes := t.HasFileType(c.requiredFileTypes...)
if hasRequiredFileTypes.Valid && !hasRequiredFileTypes.Bool {
return classifier.Classification{}, classifier.ErrNoMatch
}
}
cl := classifier.Classification{
ContentType: model.NullContentType{
Valid: true,
ContentType: c.contentType,
},
}, nil
}
hasVideo := t.HasFileType(model.FileTypeVideo)
if hasVideo.Valid && hasVideo.Bool {
cl.InferVideoAttributes(t.Name)
}
return cl, nil
}

View File

@ -4,22 +4,50 @@ import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/regex"
"go.uber.org/fx"
)
type Result struct {
fx.Out
Xxx lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Music lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Audiobook lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Ebook lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Xxx lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
}
func New() Result {
return Result{
Audiobook: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeAudiobook,
regex: regex.NewRegexFromNames(audiobookWords...),
priority: 20,
requiredFileTypes: []model.FileType{model.FileTypeAudio},
}, nil
}),
Music: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeMusic,
regex: regex.NewRegexFromNames(musicWords...),
priority: 21,
requiredFileTypes: []model.FileType{model.FileTypeAudio},
}, nil
}),
Ebook: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeAudiobook,
regex: regex.NewRegexFromNames(ebookWords...),
priority: 22,
requiredFileTypes: []model.FileType{model.FileTypeDocument},
}, nil
}),
Xxx: lazy.New(func() (classifier.SubClassifier, error) {
return NewKeywordsClassifier(
model.ContentTypeXxx,
xxxWords,
20,
), nil
return keywordsClassifier{
contentType: model.ContentTypeXxx,
regex: regex.NewRegexFromNames(xxxWords...),
priority: 23,
}, nil
}),
}
}

View File

@ -1,7 +1,53 @@
package keywords
var musicWords = []string{
"discography",
"music",
"album",
"va",
"various",
"compilation",
"ep",
"lp",
"single",
"vinyl",
"classical",
"disco",
"folk",
"hits",
"house",
"indie",
"jazz",
"metal",
"pop",
"jazz",
"reggae",
"rock",
"trance",
}
var audiobookWords = []string{
"audiobook",
"audiobooks",
"book",
"books",
"abridged",
"unabridged",
"narrated",
}
var ebookWords = []string{
"book",
"books",
"ebook",
"ebooks",
"abridged",
"unabridged",
}
var xxxWords = []string{
"xxx",
"porn",
"porno",
"sex",
}

View File

@ -59,7 +59,7 @@ var titleEpisodesRegex = rex.New(
),
).MustCompile()
var multiRegex = regex.NewRegexFromNames("multi")
var multiRegex = regex.NewRegexFromNames("multi", "dual")
var separatorToken = rex.Chars.Runes(" ._")
@ -168,16 +168,11 @@ func ParseContent(hintCt model.NullContentType, input string) (model.ContentType
if ct != model.ContentTypeTvShow {
episodes = nil
}
vc, rg := model.InferVideoCodecAndReleaseGroup(rest)
return ct, title, year, classifier.ContentAttributes{
Episodes: episodes,
Languages: model.InferLanguages(rest),
LanguageMulti: multiRegex.MatchString(rest),
VideoResolution: model.InferVideoResolution(rest),
VideoSource: model.InferVideoSource(rest),
VideoCodec: vc,
Video3d: model.InferVideo3d(rest),
VideoModifier: model.InferVideoModifier(rest),
ReleaseGroup: rg,
}, nil
attrs := classifier.ContentAttributes{
Episodes: episodes,
Languages: model.InferLanguages(rest),
LanguageMulti: multiRegex.MatchString(rest),
}
attrs.InferVideoAttributes(rest)
return ct, title, year, attrs, nil
}

View File

@ -1569,6 +1569,7 @@ var sources = []*ast.Source{
tv_show
music
ebook
comic
audiobook
game
software

View File

@ -1,7 +1,7 @@
package model
// ContentType represents the type of content
// ENUM(movie, tv_show, music, ebook, audiobook, game, software, xxx)
// ENUM(movie, tv_show, music, ebook, comic, audiobook, game, software, xxx)
type ContentType string
func (c ContentType) Label() string {
@ -16,24 +16,28 @@ func (c ContentType) IsVideo() bool {
return c == ContentTypeMovie || c == ContentTypeTvShow || c == ContentTypeXxx
}
// A map of file extensions to associated content types.
// The map includes only extensions where the content type can be very reliably inferred.
var extensionToContentTypeMap = map[string]ContentType{
"m4b": ContentTypeAudiobook,
"cb7": ContentTypeComic,
"cba": ContentTypeComic,
"cbr": ContentTypeComic,
"cbt": ContentTypeComic,
"cbz": ContentTypeComic,
"epub": ContentTypeEbook,
"mobi": ContentTypeEbook,
"azw": ContentTypeEbook,
"azw3": ContentTypeEbook,
"pdf": ContentTypeEbook,
"cbr": ContentTypeEbook,
"cbz": ContentTypeEbook,
"cb7": ContentTypeEbook,
"cbt": ContentTypeEbook,
"cba": ContentTypeEbook,
"chm": ContentTypeEbook,
"doc": ContentTypeEbook,
"docx": ContentTypeEbook,
"odt": ContentTypeEbook,
"rtf": ContentTypeEbook,
"djvu": ContentTypeEbook,
"ape": ContentTypeMusic,
"flac": ContentTypeMusic,
"exe": ContentTypeSoftware,
"dmg": ContentTypeSoftware,
"app": ContentTypeSoftware,

View File

@ -20,6 +20,7 @@ const (
ContentTypeTvShow ContentType = "tv_show"
ContentTypeMusic ContentType = "music"
ContentTypeEbook ContentType = "ebook"
ContentTypeComic ContentType = "comic"
ContentTypeAudiobook ContentType = "audiobook"
ContentTypeGame ContentType = "game"
ContentTypeSoftware ContentType = "software"
@ -33,6 +34,7 @@ var _ContentTypeNames = []string{
string(ContentTypeTvShow),
string(ContentTypeMusic),
string(ContentTypeEbook),
string(ContentTypeComic),
string(ContentTypeAudiobook),
string(ContentTypeGame),
string(ContentTypeSoftware),
@ -53,6 +55,7 @@ func ContentTypeValues() []ContentType {
ContentTypeTvShow,
ContentTypeMusic,
ContentTypeEbook,
ContentTypeComic,
ContentTypeAudiobook,
ContentTypeGame,
ContentTypeSoftware,
@ -77,6 +80,7 @@ var _ContentTypeValue = map[string]ContentType{
"tv_show": ContentTypeTvShow,
"music": ContentTypeMusic,
"ebook": ContentTypeEbook,
"comic": ContentTypeComic,
"audiobook": ContentTypeAudiobook,
"game": ContentTypeGame,
"software": ContentTypeSoftware,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -34,7 +34,10 @@ import { AppErrorsService } from "./app-errors.service";
})
export class AppModule {
constructor(iconRegistry: MatIconRegistry, domSanitizer: DomSanitizer) {
iconRegistry.setDefaultFontSetClass("material-icons-outlined");
iconRegistry.setDefaultFontSetClass(
"material-icons-outlined",
"material-symbols-outlined",
);
iconRegistry.addSvgIcon(
"magnet",
domSanitizer.bypassSecurityTrustResourceUrl("assets/magnet.svg"),

View File

@ -71,6 +71,7 @@ export type ContentCollection = {
export type ContentType =
| 'audiobook'
| 'comic'
| 'ebook'
| 'game'
| 'movie'

View File

@ -292,6 +292,11 @@ const contentTypes: Record<generated.ContentType | "null", ContentTypeInfo> = {
plural: "E-Books",
icon: "auto_stories",
},
comic: {
singular: "Comic",
plural: "Comics",
icon: "comic_bubble",
},
audiobook: {
singular: "Audiobook",
plural: "Audiobooks",

View File

@ -12,7 +12,7 @@
rel="stylesheet"
/>
<link
href="https://fonts.googleapis.com/icon?family=Material+Icons|Material+Icons+Outlined"
href="https://fonts.googleapis.com/icon?family=Material+Icons|Material+Icons+Outlined|Material+Symbols+Outlined"
rel="stylesheet"
/>
</head>