break up parse method and sort methods

This commit is contained in:
AJ Slater 2024-02-23 18:15:53 -08:00
parent b57899d954
commit 4466fa6723

View File

@ -57,6 +57,16 @@ class ComicFilenameParser:
self._path_indexes[value] = index self._path_indexes[value] = index
return self._path_indexes[value] return self._path_indexes[value]
def _log(self, label):
if not self._debug:
return
print_log_header(label)
combined = {}
for key in self.metadata:
combined[key] = (self.metadata.get(key), self.path_index(key))
print(" " + self._unparsed_path)
print(" " + pformat(combined))
def _parse_ext(self): def _parse_ext(self):
"""Pop the extension from the pathname.""" """Pop the extension from the pathname."""
path = Path(self._unparsed_path) path = Path(self._unparsed_path)
@ -78,6 +88,7 @@ class ComicFilenameParser:
replacement, count = pair replacement, count = pair
data = regex.sub(replacement, data, count=count).strip() data = regex.sub(replacement, data, count=count).strip()
self._unparsed_path = data.strip() self._unparsed_path = data.strip()
self._log("After Clean Path")
def _parse_items_update_metadata( def _parse_items_update_metadata(
self, matches: Match, exclude: str, require_all: bool, first_only: bool self, matches: Match, exclude: str, require_all: bool, first_only: bool
@ -131,6 +142,20 @@ class ComicFilenameParser:
if pop: if pop:
self._parse_items_pop_tokens(regex, first_only) self._parse_items_pop_tokens(regex, first_only)
def _parse_issue(self):
"""Parse Issue."""
self._parse_items(ISSUE_NUMBER_RE)
if "issue" not in self.metadata:
self._parse_items(ISSUE_WITH_COUNT_RE)
self._log("After Issue")
def _parse_volume(self):
"""Parse Volume."""
self._parse_items(VOLUME_RE)
if "volume" not in self.metadata:
self._parse_items(VOLUME_WITH_COUNT_RE)
self._log("After Volume")
def _alpha_month_to_numeric(self): def _alpha_month_to_numeric(self):
"""Translate alpha_month to numeric month.""" """Translate alpha_month to numeric month."""
if alpha_month := self.metadata.pop("alpha_month", ""): if alpha_month := self.metadata.pop("alpha_month", ""):
@ -165,6 +190,58 @@ class ComicFilenameParser:
self._parse_items(YEAR_TOKEN_RE) self._parse_items(YEAR_TOKEN_RE)
if self.metadata.get("year", "") != volume: if self.metadata.get("year", "") != volume:
self.metadata["volume"] = volume self.metadata["volume"] = volume
self._log("After Date")
def _parse_format_and_scan_info(self):
# Format & Scan Info
#
self._parse_items(
ORIGINAL_FORMAT_SCAN_INFO_RE,
require_all=True,
)
if "original_format" not in self.metadata:
self._parse_items(
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
)
self._parse_items(SCAN_INFO_SECONDARY_RE)
if (
scan_info_secondary := self.metadata.pop("secondary_scan_info", "")
) and "scan_info" not in self.metadata:
self.metadata["scan_info"] = scan_info_secondary # type: ignore
self._log("After original_format & scan_info")
def _parse_ends_of_remaining_tokens(self):
# Volume left on the end of string tokens
if "volume" not in self.metadata:
self._parse_items(BOOK_VOLUME_RE)
self._log("After original_format & scan_info")
# Years left on the end of string tokens
year_end_matched = False
if "year" not in self.metadata:
self._parse_items(YEAR_END_RE, pop=False)
year_end_matched = "year" in self.metadata
self._log("After Year on end of token")
# Issue left on the end of string tokens
if "issue" not in self.metadata and not year_end_matched:
exclude: str = self.metadata.get("year", "") # type: ignore
self._parse_items(ISSUE_END_RE, exclude=exclude)
if "issue" not in self.metadata:
self._parse_items(ISSUE_BEGIN_RE)
self._log("After Issue on ends of tokens")
def _parse_publisher(self):
"""Parse Publisher."""
# Pop single tokens so they don't end up titles.
self._parse_items(PUBLISHER_UNAMBIGUOUS_TOKEN_RE, first_only=True)
if "publisher" not in self.metadata:
self._parse_items(PUBLISHER_AMBIGUOUS_TOKEN_RE, first_only=True)
if "publisher" not in self.metadata:
self._parse_items(PUBLISHER_UNAMBIGUOUS_RE, pop=False, first_only=True)
if "publisher" not in self.metadata:
self._parse_items(PUBLISHER_AMBIGUOUS_RE, pop=False, first_only=True)
self._log("After publisher")
def _is_title_in_position(self, value): def _is_title_in_position(self, value):
"""Does the title come after series and one other token if they exist.""" """Does the title come after series and one other token if they exist."""
@ -193,7 +270,7 @@ class ComicFilenameParser:
value = value.strip("'").strip() value = value.strip("'").strip()
return value.strip('"').strip() return value.strip('"').strip()
def _assign_remaining_groups(self): def _parse_series_and_title(self):
"""Assign series and title.""" """Assign series and title."""
if not self._unparsed_path: if not self._unparsed_path:
return return
@ -221,6 +298,7 @@ class ComicFilenameParser:
unused_tokens.append(token) unused_tokens.append(token)
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else "" self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
self._log("After Series & Title")
def _add_remainders(self): def _add_remainders(self):
"""Add Remainders.""" """Add Remainders."""
@ -232,101 +310,20 @@ class ComicFilenameParser:
if remainders: if remainders:
self.metadata["remainders"] = tuple(remainders) self.metadata["remainders"] = tuple(remainders)
def _log(self, label):
if not self._debug:
return
print_log_header(label)
combined = {}
for key in self.metadata:
combined[key] = (self.metadata.get(key), self.path_index(key))
print(" " + self._unparsed_path)
print(" " + pformat(combined))
def parse(self) -> dict[str, Any]: def parse(self) -> dict[str, Any]:
"""Parse the filename with a hierarchy of regexes.""" """Parse the filename with a hierarchy of regexes."""
# Init
#
self._log("Init") self._log("Init")
self._parse_ext() self._parse_ext()
self._clean_dividers() self._clean_dividers()
self._log("After Clean Path") self._parse_issue()
self._parse_volume()
# Issue
#
self._parse_items(ISSUE_NUMBER_RE)
if "issue" not in self.metadata:
self._parse_items(ISSUE_WITH_COUNT_RE)
# self._parse_items(ISSUE_COUNT_RE)
self._log("After Issue")
# Volume
#
self._parse_items(VOLUME_RE)
if "volume" not in self.metadata:
self._parse_items(VOLUME_WITH_COUNT_RE)
self._log("After Volume")
# Date
#
self._parse_dates() self._parse_dates()
self._log("After Date") self._parse_format_and_scan_info()
self._parse_ends_of_remaining_tokens()
# Format & Scan Info self._parse_publisher()
# self._parse_series_and_title()
self._parse_items(
ORIGINAL_FORMAT_SCAN_INFO_RE,
require_all=True,
)
if "original_format" not in self.metadata:
self._parse_items(
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
)
self._parse_items(SCAN_INFO_SECONDARY_RE)
if (
scan_info_secondary := self.metadata.pop("secondary_scan_info", "")
) and "scan_info" not in self.metadata:
self.metadata["scan_info"] = scan_info_secondary # type: ignore
self._log("After original_format & scan_info")
# Series and Title
#
# Volume left on the end of string tokens
if "volume" not in self.metadata:
self._parse_items(BOOK_VOLUME_RE)
self._log("After original_format & scan_info")
# Years left on the end of string tokens
year_end_matched = False
if "year" not in self.metadata:
self._parse_items(YEAR_END_RE, pop=False)
year_end_matched = "year" in self.metadata
self._log("After Year on end of token")
# Issue left on the end of string tokens
if "issue" not in self.metadata and not year_end_matched:
exclude: str = self.metadata.get("year", "") # type: ignore
self._parse_items(ISSUE_END_RE, exclude=exclude)
if "issue" not in self.metadata:
self._parse_items(ISSUE_BEGIN_RE)
self._log("After Issue on ends of tokens")
# Publisher
#
# Pop single tokens so they don't end up titles.
self._parse_items(PUBLISHER_UNAMBIGUOUS_TOKEN_RE, first_only=True)
if "publisher" not in self.metadata:
self._parse_items(PUBLISHER_AMBIGUOUS_TOKEN_RE, first_only=True)
if "publisher" not in self.metadata:
self._parse_items(PUBLISHER_UNAMBIGUOUS_RE, pop=False, first_only=True)
if "publisher" not in self.metadata:
self._parse_items(PUBLISHER_AMBIGUOUS_RE, pop=False, first_only=True)
self._log("After publisher")
self._assign_remaining_groups()
self._log("After Series & Title")
# Copy volume into issue if it's all we have. # Copy volume into issue if it's all we have.
#
if "issue" not in self.metadata and "volume" in self.metadata: if "issue" not in self.metadata and "volume" in self.metadata:
self.metadata["issue"] = self.metadata["volume"] self.metadata["issue"] = self.metadata["volume"]
self._log("After issue can be volume") self._log("After issue can be volume")