make parser a class. use delimeters in a string instead of the data_list

This commit is contained in:
AJ Slater 2024-02-20 00:21:54 -08:00
parent 71dd1d3972
commit 664f54cecb
8 changed files with 246 additions and 292 deletions

View File

@ -1,3 +1,3 @@
"""Comic Filename to Dict parser and unparser.""" """Comic Filename to Dict parser and unparser."""
from .parse import comicfn2dict # noqa: F401 from .parse import ComicFilenameParser # noqa: F401
from .unparse import dict2comicfn # noqa: F401 from .unparse import serialize # noqa: F401

View File

@ -3,8 +3,7 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from pprint import pprint from pprint import pprint
from comicfn2dict.parse import ComicFilenameParser
from comicfn2dict.parse import comicfn2dict
def main(): def main():
@ -12,9 +11,16 @@ def main():
description = "Comic book archive read/write tool." description = "Comic book archive read/write tool."
parser = ArgumentParser(description=description) parser = ArgumentParser(description=description)
parser.add_argument("path", help="Path of comic filename to parse", type=Path) parser.add_argument("path", help="Path of comic filename to parse", type=Path)
parser.add_argument(
"-v",
"--verbose",
default=0,
action="count",
help="Display intermediate parsing steps. Good for debugging.",
)
args = parser.parse_args() args = parser.parse_args()
name = args.path.name name = args.path.name
metadata = comicfn2dict(name) metadata = ComicFilenameParser(name, verbose=args.verbose).parse()
pprint(metadata) # noqa:T203 pprint(metadata) # noqa:T203

View File

@ -1,3 +1,3 @@
"""API import source.""" """API import source."""
from comicfn2dict.parse import comicfn2dict # noqa: F401 from comicfn2dict.parse import ComicFilenameParser # noqa: F401
from comicfn2dict.unparse import dict2comicfn # noqa: F401 from comicfn2dict.unparse import dict2comicfn # noqa: F401

View File

@ -1,22 +1,21 @@
"""Parse comic book archive names using the simple 'parse' parser.""" """Parse comic book archive names using the simple 'parse' parser."""
from pprint import pprint from pprint import pprint
from copy import copy
from pathlib import Path from pathlib import Path
from re import Match, Pattern from re import Pattern
from typing import Any from typing import Any
from comicfn2dict.regex import ( from comicfn2dict.regex import (
EXTRA_SPACES_RE, EXTRA_SPACES_RE,
ISSUE_ANYWHERE_RE, ISSUE_ANYWHERE_RE,
ISSUE_BEGIN_RE,
ISSUE_COUNT_RE, ISSUE_COUNT_RE,
ISSUE_END_RE,
ISSUE_NUMBER_RE, ISSUE_NUMBER_RE,
ISSUE_TOKEN_RE, ISSUE_BEGIN_RE,
ISSUE_END_RE,
NON_SPACE_DIVIDER_RE, NON_SPACE_DIVIDER_RE,
ORIGINAL_FORMAT_RE, ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
ORIGINAL_FORMAT_SCAN_INFO_RE, ORIGINAL_FORMAT_SCAN_INFO_RE,
REMAINING_GROUP_RE, REMAINING_GROUP_RE,
SCAN_INFO_RE,
VOLUME_RE, VOLUME_RE,
YEAR_BEGIN_RE, YEAR_BEGIN_RE,
YEAR_END_RE, YEAR_END_RE,
@ -24,270 +23,195 @@ from comicfn2dict.regex import (
) )
_REMAINING_GROUP_KEYS = ("series", "title") _REMAINING_GROUP_KEYS = ("series", "title")
_TITLE_PRECEDING_KEYS = ("issue", "year", "volume")
_TOKEN_DELIMETER = "/"
def _parse_ext(name: str | Path, metadata: dict) -> str: class ComicFilenameParser:
"""Pop the extension from the pathname.""" @staticmethod
if isinstance(name, str): def _clean_dividers(data: str) -> str:
name = name.strip() """Replace non space dividers and clean extra spaces out of string."""
path = Path(name) data = NON_SPACE_DIVIDER_RE.sub(" ", data)
suffix = path.suffix return EXTRA_SPACES_RE.sub(" ", data).strip()
data = path.name.removesuffix(suffix)
ext = suffix.lstrip(".")
if ext:
metadata["ext"] = ext
return data
def _parse_ext(self):
"""Pop the extension from the pathname."""
path = Path(self._unparsed_path)
suffix = path.suffix
if not suffix:
return
self.path_indexes["ext"] = self.path.rfind(suffix)
def _clean_dividers(data: str) -> str: data = path.name.removesuffix(suffix)
"""Replace non space dividers and clean extra spaces out of string.""" ext = suffix.lstrip(".")
data = NON_SPACE_DIVIDER_RE.sub(" ", data) self.metadata["ext"] = ext
return EXTRA_SPACES_RE.sub(" ", data) self._unparsed_path = data
def _grouping_operators_strip(self, value: str) -> str:
"""Strip spaces and parens."""
value = value.strip()
value = value.strip("()").strip()
value = value.strip("-").strip()
value = value.strip("'").strip('"').strip()
return value
def _get_data_list(path: str | Path, metadata: dict) -> list[str]: def _parse_item(
"""Prepare data list from a path or string.""" self,
data = _parse_ext(path, metadata) regex: Pattern,
data = _clean_dividers(data) require_all: bool = False,
return [data] ) -> None:
"""Parse a value from the data list into metadata and alter the data list."""
matches = regex.search(self._unparsed_path)
def _grouping_operators_strip(value: str) -> str: if not matches:
"""Strip spaces and parens.""" return
value = value.strip() matched_metadata = {}
value = value.strip("()").strip() matched_path_indexes = {}
value = value.strip("-").strip() for key, value in matches.groupdict().items():
value = value.strip("'").strip('"').strip() if not value:
return value if require_all:
return
def _splicey_dicey(
data_list: list[str], index: int, match: Match, match_group: int | str = 0
) -> str:
"""Replace a string token from a list with two strings and the value removed.
And return the value.
"""
value = match.group(match_group)
data = data_list.pop(index)
data_ends = []
if data_before := data[: match.start()].strip():
data_ends.append(data_before)
if data_after := data[match.end() :].strip():
data_ends.append(data_after)
data_list[index:index] = data_ends
return _grouping_operators_strip(value)
def _match_original_format_and_scan_info(
match: Match, metadata: dict[str, Any], data_list: list[str], index: int
) -> None:
"""Match (ORIGINAL_FORMAT-SCAN_INFO)."""
original_format = match.group("original_format")
try:
scan_info = match.group("scan_info")
except IndexError:
scan_info = None
metadata["original_format"] = _grouping_operators_strip(original_format)
match_group = 1
if scan_info:
metadata["scan_info"] = _grouping_operators_strip(scan_info)
match_group = 0
_splicey_dicey(data_list, index, match, match_group=match_group)
def _parse_original_format_and_scan_info(data_list: list[str], metadata: dict) -> int:
"""Parse (ORIGINAL_FORMAT-SCAN_INFO)."""
index = 0
match = None
for data in data_list:
match = ORIGINAL_FORMAT_SCAN_INFO_RE.search(data)
if match:
_match_original_format_and_scan_info(match, metadata, data_list, index)
break
index += 1
else:
index = 0
return index
def _pop_value_from_token(
data_list: list,
metadata: dict,
regex: Pattern,
key: str,
index: int = 0,
) -> str:
"""Search token for value, splice and assign to metadata."""
data = data_list[index]
match = regex.search(data)
if match:
value = _splicey_dicey(data_list, index, match, key)
metadata[key] = value
else:
value = ""
return value
def _parse_item(
data_list: list[str],
metadata: dict,
regex: Pattern,
key: str,
start_index: int = 0,
path: str = "",
) -> int:
"""Parse a value from the data list into metadata and alter the data list."""
path_index = -1
index = start_index
dl_len = end_index = len(data_list)
if index >= end_index:
index = 0
while index < end_index:
value = _pop_value_from_token(data_list, metadata, regex, key, index)
if value:
if "key" == "issue":
path_index = path.find(value)
break
index += 1
if index > dl_len and start_index > 0:
index = 0
end_index = start_index
return path_index
def _pop_issue_from_text_fields(
data_list: list[str], metadata: dict, index: int
) -> str:
"""Search issue from ends of text fields."""
if "issue" not in metadata:
_pop_value_from_token(data_list, metadata, ISSUE_END_RE, "issue", index=index)
if "issue" not in metadata:
_pop_value_from_token(data_list, metadata, ISSUE_BEGIN_RE, "issue", index=index)
return data_list.pop(index)
TITLE_PRECEDING_KEYS = ("issue", "year", "volume")
def _is_title_in_position(path, value, metadata):
"""Does the title come after series and one other token if they exist."""
# TODO this could be faster if indexes could be grabbed for these tokens
# when they are extracted.
title_index = path.find(value)
# Does a series come first.
series = metadata.get("series")
if not series:
return False
series_index = path.find(series)
if title_index < series_index:
return False
# If other tokens exist then they much precede the title.
title_ok = False
other_tokens_exist = False
for preceding_key in TITLE_PRECEDING_KEYS:
preceding_value = metadata.get(preceding_key)
if not preceding_value:
continue
other_tokens_exist = True
preceding_index = path.find(preceding_value)
if title_index > preceding_index:
title_ok = True
break
return title_ok or not other_tokens_exist
def _assign_remaining_groups(data_list: list[str], metadata: dict, path: str):
"""Assign series and title."""
index = 0
for key in _REMAINING_GROUP_KEYS:
try:
data = data_list[index]
except (IndexError, TypeError):
break
match = REMAINING_GROUP_RE.search(data) if data else None
if match:
value = _pop_issue_from_text_fields(data_list, metadata, index)
if key == "title" and not _is_title_in_position(path, value, metadata):
continue continue
value = _grouping_operators_strip(value) matched_path_indexes[key] = self.path.find(value)
if value: # TODO idk if strip is necceesary here
metadata[key] = value matched_metadata[key] = self._grouping_operators_strip(value)
else: self.metadata.update(matched_metadata)
index += 1 self.path_indexes.update(matched_path_indexes)
marked_str = regex.sub(_TOKEN_DELIMETER, self._unparsed_path)
parts = []
for part in marked_str.split(_TOKEN_DELIMETER):
if token := part.strip():
parts.append(token)
self._unparsed_path = _TOKEN_DELIMETER.join(parts)
def _pickup_issue(remainders: list[str], metadata: dict) -> None: def _is_title_in_position(self, value):
"""Get issue from remaining tokens or anywhere in a pinch.""" """Does the title come after series and one other token if they exist."""
if "issue" in metadata: title_index = self.path.find(value)
return
_parse_item(remainders, metadata, ISSUE_TOKEN_RE, "issue")
if "issue" in metadata:
return
_parse_item(remainders, metadata, ISSUE_ANYWHERE_RE, "issue")
# Does a series come first.
if title_index < self.path_indexes.get("series", -1):
return False
def _log_progress(label, metadata, data_list): # If other tokens exist then they much precede the title.
print(label + ":") title_ok = False
pprint(metadata) other_tokens_exist = False
pprint(data_list) for preceding_key in _TITLE_PRECEDING_KEYS:
other_tokens_exist = True
if title_index > self.path_indexes.get(preceding_key, -1):
title_ok = True
break
return title_ok or not other_tokens_exist
def _assign_remaining_groups(self):
"""Assign series and title."""
if not self._unparsed_path:
return
def comicfn2dict(path: str | Path) -> dict[str, Any]: # TODO fix REMAINING GROUP_RE to use token delim
"""Parse the filename with a hierarchy of regexes.""" tokens = self._unparsed_path.split(_TOKEN_DELIMETER)
metadata = {}
data_list = _get_data_list(path, metadata)
_log_progress("INITIAL", metadata, data_list)
# Parse paren tokens # ASSIGN GROUPS
_parse_item(data_list, metadata, ISSUE_COUNT_RE, "issue_count") remaining_key_index = 0
_parse_item(data_list, metadata, YEAR_TOKEN_RE, "year") unused_tokens = []
of_index = _parse_original_format_and_scan_info(data_list, metadata) while tokens and remaining_key_index < len(_REMAINING_GROUP_KEYS):
if "original_format" not in metadata: key = _REMAINING_GROUP_KEYS[remaining_key_index]
of_index = _parse_item( token = tokens.pop(0)
data_list, metadata, ORIGINAL_FORMAT_RE, "original_format" match = REMAINING_GROUP_RE.search(token)
if match:
value = match.group()
if key == "title" and not self._is_title_in_position(value):
unused_tokens.append(token)
continue
value = self._grouping_operators_strip(value)
self.metadata[key] = value
self.path_indexes[key] = self.path.find(value)
remaining_key_index += 1
else:
unused_tokens.append(token)
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
def _add_remainders(self):
"""Add Remainders."""
remainders = []
for token in self._unparsed_path.split(_TOKEN_DELIMETER):
if remainder := token.strip():
remainders.append(remainder)
if remainders:
self.metadata["remainders"] = tuple(remainders)
def _log_progress(self, label):
if not self._debug:
return
print(label + ":")
combined = {}
for key in self.metadata:
combined[key] = (self.metadata.get(key), self.path_indexes.get(key))
pprint(combined)
print(self._unparsed_path)
def parse(self) -> dict[str, Any]:
"""Parse the filename with a hierarchy of regexes."""
self._unparsed_path = self._clean_dividers(self._unparsed_path)
self._log_progress("INITIAL")
self._parse_ext()
# Parse paren tokens
self._parse_item(ISSUE_COUNT_RE)
self._parse_item(YEAR_TOKEN_RE)
self._parse_item(
ORIGINAL_FORMAT_SCAN_INFO_RE,
require_all=True,
) )
if "scan_info" not in metadata: if "original_format" not in self.metadata:
# Start searching for scan_info after original format. self._parse_item(
_parse_item( ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
data_list, )
metadata, self._log_progress("AFTER PAREN TOKENS")
SCAN_INFO_RE,
"scan_info",
start_index=of_index + 1,
)
_log_progress("AFTER PAREN TOKENS", metadata, data_list)
# Parse regular tokens # Parse regular tokens
_parse_item(data_list, metadata, VOLUME_RE, "volume") self._parse_item(VOLUME_RE)
_parse_item(data_list, metadata, ISSUE_NUMBER_RE, "issue", path=str(path)) self._parse_item(ISSUE_NUMBER_RE)
_log_progress("AFTER REGULAR TOKENS", metadata, data_list) self._log_progress("AFTER REGULAR TOKENS")
# Pickup year if not gotten. # Pickup year if not gotten.
if "year" not in metadata: if "year" not in self.metadata:
_parse_item(data_list, metadata, YEAR_BEGIN_RE, "year") self._parse_item(YEAR_BEGIN_RE)
if "year" not in metadata: if "year" not in self.metadata:
_parse_item(data_list, metadata, YEAR_END_RE, "year") self._parse_item(YEAR_END_RE)
_log_progress("AFTER YEAR PICKUP", metadata, data_list) self._log_progress("AFTER YEAR PICKUP")
# Pickup issue if it's a standalone token # Pickup issue if it's a standalone token
if "issue" not in metadata: if "issue" not in self.metadata:
_parse_item(data_list, metadata, ISSUE_TOKEN_RE, "issue") self._parse_item(ISSUE_END_RE)
if "issue" not in self.metadata:
self._parse_item(ISSUE_BEGIN_RE)
_log_progress("AFTER ISSUE PICKUP", metadata, data_list) self._log_progress("AFTER ISSUE PICKUP")
# Series and Title. Also looks for issue. # Series and Title. Also looks for issue.
_assign_remaining_groups(data_list, metadata, str(path)) self._assign_remaining_groups()
_log_progress("AFTER SERIES AND TITLE", metadata, data_list) self._log_progress("AFTER SERIES AND TITLE")
# Final try for issue number. # Final try for issue number.
_pickup_issue(data_list, metadata) if "issue" not in self.metadata:
_log_progress("AFTER ISSUE PICKUP", metadata, data_list) # TODO is this useful?
self._parse_item(ISSUE_ANYWHERE_RE)
self._log_progress("AFTER ISSUE PICKUP")
# Add Remainders self._add_remainders()
if data_list:
metadata["remainders"] = tuple(data_list)
return metadata return self.metadata
def __init__(self, path: str | Path, verbose: int = 0):
"""Initialize."""
self._debug: bool = verbose > 0
self.metadata: dict[str, str | tuple[str, ...]] = {}
self.path_indexes: dict[str, int] = {}
# munge path
if isinstance(path, str):
path = path.strip()
p_path = Path(path)
self.path = str(p_path.name).strip()
self._unparsed_path = copy(self.path)

View File

@ -51,24 +51,27 @@ YEAR_BEGIN_RE = re_compile(r"^" + _YEAR_RE_EXP + r"\b")
YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$") YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$")
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS) _OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
_ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")" _ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")"
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
_SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]+?)" _SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]+?)"
SCAN_INFO_RE = re_compile(_SCAN_INFO_RE_EXP, parenthify=True)
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = ( _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
_ORIGINAL_FORMAT_RE_EXP + r"(?:-" + _SCAN_INFO_RE_EXP + r")?" _ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
) )
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile( ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
) )
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile(
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
)
# REGULAR TOKENS # REGULAR TOKENS
VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+))") VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+))")
_ISSUE_NUMBER_RE_EXP = r"(?P<issue>[\w½]+\.?\d*\w*)" _ISSUE_NUMBER_RE_EXP = r"(?P<issue>[\w½]+\.?\d*\w*)"
ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_NUMBER_RE_EXP + r")") ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_NUMBER_RE_EXP + r")")
_ISSUE_RE_EXP = r"(?P<issue>[\d½]+\.?\d*\w*)" _ISSUE_RE_EXP = r"(?P<issue>[\d½]+\.?\d*\w*)"
ISSUE_TOKEN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")$")
ISSUE_END_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")$") ISSUE_END_RE = re_compile(r"([\/\s]" + _ISSUE_RE_EXP + r"(\/|$))")
ISSUE_BEGIN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")\b") ISSUE_BEGIN_RE = re_compile(r"((^|\/)" + _ISSUE_RE_EXP + r"[\/|\s])")
# TODO is this used?
ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b") ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
# LONG STRINGS # LONG STRINGS

View File

@ -28,22 +28,27 @@ _FILENAME_FORMAT_TAGS: tuple[tuple[str, str | Callable], ...] = (
_EMPTY_VALUES: tuple[None, str] = (None, "") _EMPTY_VALUES: tuple[None, str] = (None, "")
def dict2comicfn(md: Mapping, ext: bool = True) -> str | None: def _tokenize_tag(md: Mapping, tag: str, fmt: str | Callable) -> str:
val = md.get(tag)
if val in _EMPTY_VALUES:
return ""
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
token = final_fmt.format(val).strip()
return token
def serialize(md: Mapping, ext: bool = True) -> str:
"""Get our preferred basename from a metadata dict.""" """Get our preferred basename from a metadata dict."""
if not md: if not md:
return None return ""
tokens = [] tokens = []
for tag, fmt in _FILENAME_FORMAT_TAGS: for tag, fmt in _FILENAME_FORMAT_TAGS:
val = md.get(tag) if token := _tokenize_tag(md, tag, fmt):
if val in _EMPTY_VALUES:
continue
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
token = final_fmt.format(val).strip()
if token:
tokens.append(token) tokens.append(token)
fn = " ".join(tokens) fn = " ".join(tokens)
if remainders := md.get("remainders"): if remainders := md.get("remainders"):
remainder = " ".join(remainders) remainder = " ".join(remainders)
# TODO oh this is the - delineated remainder :(
fn += f" - {remainder}" fn += f" - {remainder}"
if ext: if ext:
fn += "." + md.get("ext", "cbz") fn += "." + md.get("ext", "cbz")

View File

@ -136,8 +136,7 @@ FNS = {
"year": "2006", "year": "2006",
"ext": "cbz", "ext": "cbz",
"scan_info": "Minutemen-Faessla", "scan_info": "Minutemen-Faessla",
# "original_format": "digital", "original_format": "digital",
"remainders": ("(digital",),
}, },
"Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": { "Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": {
"series": "Jeremy John", "series": "Jeremy John",
@ -243,6 +242,7 @@ FNS = {
FNS.update( # Newly fixed. FNS.update( # Newly fixed.
{ {
# BIG Change. title after token. more stripping.
"'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": { "'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "022", "issue": "022",
@ -252,6 +252,7 @@ FNS.update( # Newly fixed.
"year": "2024", "year": "2024",
}, },
# Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543 # Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543
# word characters now allowed to lead issue numbers only if preceded by a # marker
"batman #B01 title.cbz": { "batman #B01 title.cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "B01", "issue": "B01",
@ -261,32 +262,47 @@ FNS.update( # Newly fixed.
} }
) )
WONFIX = {
# Leading issue number is usually an alternate sequence number
# WONTFIX: Series names may begin with numerals.
"52 action comics #2024.cbz": {
"ext": "cbz",
"issue": "2024",
"series": "action comics",
"alternate": "52",
},
# Only the issue number. CT ensures that the series always has a value if possible
# I don't think making the series the same as the number is valuable.
"#52.cbz": {
"ext": "cbz",
"issue": "52",
"series": "52",
},
}
LATER = {
# 4 digit issue number
# should this be an issue number if year DONE?.
"action comics 1024.cbz": {
"ext": "cbz",
"issue": "1024",
"series": "action comics",
},
}
FNS.update( FNS.update(
{ {
# Leading issue number is usually an alternate sequence number # CT treats double-underscore the same as double-dash
"52 action comics #2024.cbz": { # BUG: should be title right now.
"ext": "cbz", # FEATURE: double dash should be a token delimiter?
"issue": "2024",
"series": "action comics",
"alternate": "52",
}, # 4 digit issue number
"action comics 1024.cbz": {
"ext": "cbz",
"issue": "1024",
"series": "action comics",
}, # Only the issue number. CT ensures that the series always has a value if possible
"#52.cbz": {
"ext": "cbz",
"issue": "52",
"series": "52",
}, # CT treats double-underscore the same as double-dash
"Monster_Island_v1_#2__repaired__c2c.cbz": { "Monster_Island_v1_#2__repaired__c2c.cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "2", "issue": "2",
"series": "Monster Island", "series": "Monster Island",
"volume": "1", "volume": "1",
}, # I'm not sure there's a right way to parse this. This might also be a madeup filename I don't remember "remainders": ("repaired c2c",),
},
# I'm not sure there's a right way to parse this. This might also be a madeup filename I don't remember
"Super Strange Yarns (1957) #92 (1969).cbz": { "Super Strange Yarns (1957) #92 (1969).cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "92", "issue": "92",

View File

@ -5,7 +5,7 @@ from types import MappingProxyType
import pytest import pytest
from deepdiff.diff import DeepDiff from deepdiff.diff import DeepDiff
from comicfn2dict import comicfn2dict from comicfn2dict import ComicFilenameParser
from tests.comic_filenames import FNS from tests.comic_filenames import FNS
ALL_FIELDS = frozenset({"series", "volume", "issue", "issue_count", "year", "ext"}) ALL_FIELDS = frozenset({"series", "volume", "issue", "issue_count", "year", "ext"})
@ -16,7 +16,7 @@ FIELD_SCHEMA = MappingProxyType({key: None for key in ALL_FIELDS})
def test_parse_filename(item): def test_parse_filename(item):
"""Test filename parsing.""" """Test filename parsing."""
fn, defined_fields = item fn, defined_fields = item
md = comicfn2dict(fn) md = ComicFilenameParser(fn, verbose=1).parse()
diff = DeepDiff(defined_fields, md, ignore_order=True) diff = DeepDiff(defined_fields, md, ignore_order=True)
print(fn) print(fn)
pprint(defined_fields) pprint(defined_fields)