break up parse method and sort methods
This commit is contained in:
parent
b57899d954
commit
4466fa6723
@ -57,6 +57,16 @@ class ComicFilenameParser:
|
|||||||
self._path_indexes[value] = index
|
self._path_indexes[value] = index
|
||||||
return self._path_indexes[value]
|
return self._path_indexes[value]
|
||||||
|
|
||||||
|
def _log(self, label):
|
||||||
|
if not self._debug:
|
||||||
|
return
|
||||||
|
print_log_header(label)
|
||||||
|
combined = {}
|
||||||
|
for key in self.metadata:
|
||||||
|
combined[key] = (self.metadata.get(key), self.path_index(key))
|
||||||
|
print(" " + self._unparsed_path)
|
||||||
|
print(" " + pformat(combined))
|
||||||
|
|
||||||
def _parse_ext(self):
|
def _parse_ext(self):
|
||||||
"""Pop the extension from the pathname."""
|
"""Pop the extension from the pathname."""
|
||||||
path = Path(self._unparsed_path)
|
path = Path(self._unparsed_path)
|
||||||
@ -78,6 +88,7 @@ class ComicFilenameParser:
|
|||||||
replacement, count = pair
|
replacement, count = pair
|
||||||
data = regex.sub(replacement, data, count=count).strip()
|
data = regex.sub(replacement, data, count=count).strip()
|
||||||
self._unparsed_path = data.strip()
|
self._unparsed_path = data.strip()
|
||||||
|
self._log("After Clean Path")
|
||||||
|
|
||||||
def _parse_items_update_metadata(
|
def _parse_items_update_metadata(
|
||||||
self, matches: Match, exclude: str, require_all: bool, first_only: bool
|
self, matches: Match, exclude: str, require_all: bool, first_only: bool
|
||||||
@ -131,6 +142,20 @@ class ComicFilenameParser:
|
|||||||
if pop:
|
if pop:
|
||||||
self._parse_items_pop_tokens(regex, first_only)
|
self._parse_items_pop_tokens(regex, first_only)
|
||||||
|
|
||||||
|
def _parse_issue(self):
|
||||||
|
"""Parse Issue."""
|
||||||
|
self._parse_items(ISSUE_NUMBER_RE)
|
||||||
|
if "issue" not in self.metadata:
|
||||||
|
self._parse_items(ISSUE_WITH_COUNT_RE)
|
||||||
|
self._log("After Issue")
|
||||||
|
|
||||||
|
def _parse_volume(self):
|
||||||
|
"""Parse Volume."""
|
||||||
|
self._parse_items(VOLUME_RE)
|
||||||
|
if "volume" not in self.metadata:
|
||||||
|
self._parse_items(VOLUME_WITH_COUNT_RE)
|
||||||
|
self._log("After Volume")
|
||||||
|
|
||||||
def _alpha_month_to_numeric(self):
|
def _alpha_month_to_numeric(self):
|
||||||
"""Translate alpha_month to numeric month."""
|
"""Translate alpha_month to numeric month."""
|
||||||
if alpha_month := self.metadata.pop("alpha_month", ""):
|
if alpha_month := self.metadata.pop("alpha_month", ""):
|
||||||
@ -165,6 +190,58 @@ class ComicFilenameParser:
|
|||||||
self._parse_items(YEAR_TOKEN_RE)
|
self._parse_items(YEAR_TOKEN_RE)
|
||||||
if self.metadata.get("year", "") != volume:
|
if self.metadata.get("year", "") != volume:
|
||||||
self.metadata["volume"] = volume
|
self.metadata["volume"] = volume
|
||||||
|
self._log("After Date")
|
||||||
|
|
||||||
|
def _parse_format_and_scan_info(self):
|
||||||
|
# Format & Scan Info
|
||||||
|
#
|
||||||
|
self._parse_items(
|
||||||
|
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
||||||
|
require_all=True,
|
||||||
|
)
|
||||||
|
if "original_format" not in self.metadata:
|
||||||
|
self._parse_items(
|
||||||
|
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
||||||
|
)
|
||||||
|
self._parse_items(SCAN_INFO_SECONDARY_RE)
|
||||||
|
if (
|
||||||
|
scan_info_secondary := self.metadata.pop("secondary_scan_info", "")
|
||||||
|
) and "scan_info" not in self.metadata:
|
||||||
|
self.metadata["scan_info"] = scan_info_secondary # type: ignore
|
||||||
|
self._log("After original_format & scan_info")
|
||||||
|
|
||||||
|
def _parse_ends_of_remaining_tokens(self):
|
||||||
|
# Volume left on the end of string tokens
|
||||||
|
if "volume" not in self.metadata:
|
||||||
|
self._parse_items(BOOK_VOLUME_RE)
|
||||||
|
self._log("After original_format & scan_info")
|
||||||
|
|
||||||
|
# Years left on the end of string tokens
|
||||||
|
year_end_matched = False
|
||||||
|
if "year" not in self.metadata:
|
||||||
|
self._parse_items(YEAR_END_RE, pop=False)
|
||||||
|
year_end_matched = "year" in self.metadata
|
||||||
|
self._log("After Year on end of token")
|
||||||
|
|
||||||
|
# Issue left on the end of string tokens
|
||||||
|
if "issue" not in self.metadata and not year_end_matched:
|
||||||
|
exclude: str = self.metadata.get("year", "") # type: ignore
|
||||||
|
self._parse_items(ISSUE_END_RE, exclude=exclude)
|
||||||
|
if "issue" not in self.metadata:
|
||||||
|
self._parse_items(ISSUE_BEGIN_RE)
|
||||||
|
self._log("After Issue on ends of tokens")
|
||||||
|
|
||||||
|
def _parse_publisher(self):
|
||||||
|
"""Parse Publisher."""
|
||||||
|
# Pop single tokens so they don't end up titles.
|
||||||
|
self._parse_items(PUBLISHER_UNAMBIGUOUS_TOKEN_RE, first_only=True)
|
||||||
|
if "publisher" not in self.metadata:
|
||||||
|
self._parse_items(PUBLISHER_AMBIGUOUS_TOKEN_RE, first_only=True)
|
||||||
|
if "publisher" not in self.metadata:
|
||||||
|
self._parse_items(PUBLISHER_UNAMBIGUOUS_RE, pop=False, first_only=True)
|
||||||
|
if "publisher" not in self.metadata:
|
||||||
|
self._parse_items(PUBLISHER_AMBIGUOUS_RE, pop=False, first_only=True)
|
||||||
|
self._log("After publisher")
|
||||||
|
|
||||||
def _is_title_in_position(self, value):
|
def _is_title_in_position(self, value):
|
||||||
"""Does the title come after series and one other token if they exist."""
|
"""Does the title come after series and one other token if they exist."""
|
||||||
@ -193,7 +270,7 @@ class ComicFilenameParser:
|
|||||||
value = value.strip("'").strip()
|
value = value.strip("'").strip()
|
||||||
return value.strip('"').strip()
|
return value.strip('"').strip()
|
||||||
|
|
||||||
def _assign_remaining_groups(self):
|
def _parse_series_and_title(self):
|
||||||
"""Assign series and title."""
|
"""Assign series and title."""
|
||||||
if not self._unparsed_path:
|
if not self._unparsed_path:
|
||||||
return
|
return
|
||||||
@ -221,6 +298,7 @@ class ComicFilenameParser:
|
|||||||
unused_tokens.append(token)
|
unused_tokens.append(token)
|
||||||
|
|
||||||
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
|
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
|
||||||
|
self._log("After Series & Title")
|
||||||
|
|
||||||
def _add_remainders(self):
|
def _add_remainders(self):
|
||||||
"""Add Remainders."""
|
"""Add Remainders."""
|
||||||
@ -232,101 +310,20 @@ class ComicFilenameParser:
|
|||||||
if remainders:
|
if remainders:
|
||||||
self.metadata["remainders"] = tuple(remainders)
|
self.metadata["remainders"] = tuple(remainders)
|
||||||
|
|
||||||
def _log(self, label):
|
|
||||||
if not self._debug:
|
|
||||||
return
|
|
||||||
print_log_header(label)
|
|
||||||
combined = {}
|
|
||||||
for key in self.metadata:
|
|
||||||
combined[key] = (self.metadata.get(key), self.path_index(key))
|
|
||||||
print(" " + self._unparsed_path)
|
|
||||||
print(" " + pformat(combined))
|
|
||||||
|
|
||||||
def parse(self) -> dict[str, Any]:
|
def parse(self) -> dict[str, Any]:
|
||||||
"""Parse the filename with a hierarchy of regexes."""
|
"""Parse the filename with a hierarchy of regexes."""
|
||||||
# Init
|
|
||||||
#
|
|
||||||
self._log("Init")
|
self._log("Init")
|
||||||
self._parse_ext()
|
self._parse_ext()
|
||||||
self._clean_dividers()
|
self._clean_dividers()
|
||||||
self._log("After Clean Path")
|
self._parse_issue()
|
||||||
|
self._parse_volume()
|
||||||
# Issue
|
|
||||||
#
|
|
||||||
self._parse_items(ISSUE_NUMBER_RE)
|
|
||||||
if "issue" not in self.metadata:
|
|
||||||
self._parse_items(ISSUE_WITH_COUNT_RE)
|
|
||||||
# self._parse_items(ISSUE_COUNT_RE)
|
|
||||||
self._log("After Issue")
|
|
||||||
|
|
||||||
# Volume
|
|
||||||
#
|
|
||||||
self._parse_items(VOLUME_RE)
|
|
||||||
if "volume" not in self.metadata:
|
|
||||||
self._parse_items(VOLUME_WITH_COUNT_RE)
|
|
||||||
self._log("After Volume")
|
|
||||||
|
|
||||||
# Date
|
|
||||||
#
|
|
||||||
self._parse_dates()
|
self._parse_dates()
|
||||||
self._log("After Date")
|
self._parse_format_and_scan_info()
|
||||||
|
self._parse_ends_of_remaining_tokens()
|
||||||
# Format & Scan Info
|
self._parse_publisher()
|
||||||
#
|
self._parse_series_and_title()
|
||||||
self._parse_items(
|
|
||||||
ORIGINAL_FORMAT_SCAN_INFO_RE,
|
|
||||||
require_all=True,
|
|
||||||
)
|
|
||||||
if "original_format" not in self.metadata:
|
|
||||||
self._parse_items(
|
|
||||||
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
|
|
||||||
)
|
|
||||||
self._parse_items(SCAN_INFO_SECONDARY_RE)
|
|
||||||
if (
|
|
||||||
scan_info_secondary := self.metadata.pop("secondary_scan_info", "")
|
|
||||||
) and "scan_info" not in self.metadata:
|
|
||||||
self.metadata["scan_info"] = scan_info_secondary # type: ignore
|
|
||||||
self._log("After original_format & scan_info")
|
|
||||||
|
|
||||||
# Series and Title
|
|
||||||
#
|
|
||||||
# Volume left on the end of string tokens
|
|
||||||
if "volume" not in self.metadata:
|
|
||||||
self._parse_items(BOOK_VOLUME_RE)
|
|
||||||
self._log("After original_format & scan_info")
|
|
||||||
|
|
||||||
# Years left on the end of string tokens
|
|
||||||
year_end_matched = False
|
|
||||||
if "year" not in self.metadata:
|
|
||||||
self._parse_items(YEAR_END_RE, pop=False)
|
|
||||||
year_end_matched = "year" in self.metadata
|
|
||||||
self._log("After Year on end of token")
|
|
||||||
|
|
||||||
# Issue left on the end of string tokens
|
|
||||||
if "issue" not in self.metadata and not year_end_matched:
|
|
||||||
exclude: str = self.metadata.get("year", "") # type: ignore
|
|
||||||
self._parse_items(ISSUE_END_RE, exclude=exclude)
|
|
||||||
if "issue" not in self.metadata:
|
|
||||||
self._parse_items(ISSUE_BEGIN_RE)
|
|
||||||
self._log("After Issue on ends of tokens")
|
|
||||||
|
|
||||||
# Publisher
|
|
||||||
#
|
|
||||||
# Pop single tokens so they don't end up titles.
|
|
||||||
self._parse_items(PUBLISHER_UNAMBIGUOUS_TOKEN_RE, first_only=True)
|
|
||||||
if "publisher" not in self.metadata:
|
|
||||||
self._parse_items(PUBLISHER_AMBIGUOUS_TOKEN_RE, first_only=True)
|
|
||||||
if "publisher" not in self.metadata:
|
|
||||||
self._parse_items(PUBLISHER_UNAMBIGUOUS_RE, pop=False, first_only=True)
|
|
||||||
if "publisher" not in self.metadata:
|
|
||||||
self._parse_items(PUBLISHER_AMBIGUOUS_RE, pop=False, first_only=True)
|
|
||||||
self._log("After publisher")
|
|
||||||
|
|
||||||
self._assign_remaining_groups()
|
|
||||||
self._log("After Series & Title")
|
|
||||||
|
|
||||||
# Copy volume into issue if it's all we have.
|
# Copy volume into issue if it's all we have.
|
||||||
#
|
|
||||||
if "issue" not in self.metadata and "volume" in self.metadata:
|
if "issue" not in self.metadata and "volume" in self.metadata:
|
||||||
self.metadata["issue"] = self.metadata["volume"]
|
self.metadata["issue"] = self.metadata["volume"]
|
||||||
self._log("After issue can be volume")
|
self._log("After issue can be volume")
|
||||||
|
Loading…
Reference in New Issue
Block a user