remove dots from series and title if not near digits

This commit is contained in:
AJ Slater 2024-02-20 13:33:48 -08:00
parent da825abda7
commit 3304ba76d6
3 changed files with 11 additions and 5 deletions

View File

@ -6,6 +6,7 @@ from re import Pattern
from typing import Any from typing import Any
from comicfn2dict.regex import ( from comicfn2dict.regex import (
NON_NUMBER_DOT_RE,
EXTRA_SPACES_RE, EXTRA_SPACES_RE,
ISSUE_ANYWHERE_RE, ISSUE_ANYWHERE_RE,
ISSUE_COUNT_RE, ISSUE_COUNT_RE,
@ -133,6 +134,8 @@ class ComicFilenameParser:
unused_tokens.append(token) unused_tokens.append(token)
continue continue
value = self._grouping_operators_strip(value) value = self._grouping_operators_strip(value)
value = NON_NUMBER_DOT_RE.sub(r"\1 \2", value)
self.metadata[key] = value self.metadata[key] = value
remaining_key_index += 1 remaining_key_index += 1
else: else:
@ -165,6 +168,7 @@ class ComicFilenameParser:
self._log_progress("INITIAL") self._log_progress("INITIAL")
self._parse_ext() self._parse_ext()
self._clean_dividers() self._clean_dividers()
self._log_progress("CLEANED")
# Parse paren tokens # Parse paren tokens
self._parse_item(ISSUE_COUNT_RE) self._parse_item(ISSUE_COUNT_RE)

View File

@ -73,3 +73,5 @@ ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
# LONG STRINGS # LONG STRINGS
REMAINING_GROUP_RE = re_compile(r"^[^\()].*[^\)]") REMAINING_GROUP_RE = re_compile(r"^[^\()].*[^\)]")
NON_NUMBER_DOT_RE = re_compile(r"(\D)\.(\D)")

View File

@ -309,6 +309,11 @@ FNS.update(
"series": "Star Wars - War of the Bounty Hunters - IG-88", "series": "Star Wars - War of the Bounty Hunters - IG-88",
"year": "2021", "year": "2021",
}, },
"Free Comic Book Day - Avengers.Hulk (2021).cbz": {
"ext": "cbz",
"series": "Free Comic Book Day - Avengers Hulk",
"year": "2021",
},
} }
) )
LATER = { LATER = {
@ -333,11 +338,6 @@ LATER = {
# Not examined yet. # Not examined yet.
FNS.update( FNS.update(
{ {
"Free Comic Book Day - Avengers.Hulk (2021).cbz": {
"ext": "cbz",
"series": "Free Comic Book Day - Avengers Hulk",
"year": "2021",
},
# CT assumes the volume is also the issue number if it can't find an issue number # CT assumes the volume is also the issue number if it can't find an issue number
"Avengers By Brian Michael Bendis volume 03 (2013).cbz": { "Avengers By Brian Michael Bendis volume 03 (2013).cbz": {
"ext": "cbz", "ext": "cbz",