make parser a class. use delimeters in a string instead of the data_list

This commit is contained in:
AJ Slater 2024-02-20 00:21:54 -08:00
parent 71dd1d3972
commit 664f54cecb
8 changed files with 246 additions and 292 deletions

View File

@ -1,3 +1,3 @@
"""Comic Filename to Dict parser and unparser.""" """Comic Filename to Dict parser and unparser."""
from .parse import comicfn2dict # noqa: F401 from .parse import ComicFilenameParser # noqa: F401
from .unparse import dict2comicfn # noqa: F401 from .unparse import serialize # noqa: F401

View File

@ -3,8 +3,7 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
from pprint import pprint from pprint import pprint
from comicfn2dict.parse import ComicFilenameParser
from comicfn2dict.parse import comicfn2dict
def main(): def main():
@ -12,9 +11,16 @@ def main():
description = "Comic book archive read/write tool." description = "Comic book archive read/write tool."
parser = ArgumentParser(description=description) parser = ArgumentParser(description=description)
parser.add_argument("path", help="Path of comic filename to parse", type=Path) parser.add_argument("path", help="Path of comic filename to parse", type=Path)
parser.add_argument(
"-v",
"--verbose",
default=0,
action="count",
help="Display intermediate parsing steps. Good for debugging.",
)
args = parser.parse_args() args = parser.parse_args()
name = args.path.name name = args.path.name
metadata = comicfn2dict(name) metadata = ComicFilenameParser(name, verbose=args.verbose).parse()
pprint(metadata) # noqa:T203 pprint(metadata) # noqa:T203

View File

@ -1,3 +1,3 @@
"""API import source.""" """API import source."""
from comicfn2dict.parse import comicfn2dict # noqa: F401 from comicfn2dict.parse import ComicFilenameParser # noqa: F401
from comicfn2dict.unparse import dict2comicfn # noqa: F401 from comicfn2dict.unparse import dict2comicfn # noqa: F401

View File

@ -1,22 +1,21 @@
"""Parse comic book archive names using the simple 'parse' parser.""" """Parse comic book archive names using the simple 'parse' parser."""
from pprint import pprint from pprint import pprint
from copy import copy
from pathlib import Path from pathlib import Path
from re import Match, Pattern from re import Pattern
from typing import Any from typing import Any
from comicfn2dict.regex import ( from comicfn2dict.regex import (
EXTRA_SPACES_RE, EXTRA_SPACES_RE,
ISSUE_ANYWHERE_RE, ISSUE_ANYWHERE_RE,
ISSUE_BEGIN_RE,
ISSUE_COUNT_RE, ISSUE_COUNT_RE,
ISSUE_END_RE,
ISSUE_NUMBER_RE, ISSUE_NUMBER_RE,
ISSUE_TOKEN_RE, ISSUE_BEGIN_RE,
ISSUE_END_RE,
NON_SPACE_DIVIDER_RE, NON_SPACE_DIVIDER_RE,
ORIGINAL_FORMAT_RE, ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
ORIGINAL_FORMAT_SCAN_INFO_RE, ORIGINAL_FORMAT_SCAN_INFO_RE,
REMAINING_GROUP_RE, REMAINING_GROUP_RE,
SCAN_INFO_RE,
VOLUME_RE, VOLUME_RE,
YEAR_BEGIN_RE, YEAR_BEGIN_RE,
YEAR_END_RE, YEAR_END_RE,
@ -24,35 +23,31 @@ from comicfn2dict.regex import (
) )
_REMAINING_GROUP_KEYS = ("series", "title") _REMAINING_GROUP_KEYS = ("series", "title")
_TITLE_PRECEDING_KEYS = ("issue", "year", "volume")
_TOKEN_DELIMETER = "/"
def _parse_ext(name: str | Path, metadata: dict) -> str: class ComicFilenameParser:
"""Pop the extension from the pathname.""" @staticmethod
if isinstance(name, str):
name = name.strip()
path = Path(name)
suffix = path.suffix
data = path.name.removesuffix(suffix)
ext = suffix.lstrip(".")
if ext:
metadata["ext"] = ext
return data
def _clean_dividers(data: str) -> str: def _clean_dividers(data: str) -> str:
"""Replace non space dividers and clean extra spaces out of string.""" """Replace non space dividers and clean extra spaces out of string."""
data = NON_SPACE_DIVIDER_RE.sub(" ", data) data = NON_SPACE_DIVIDER_RE.sub(" ", data)
return EXTRA_SPACES_RE.sub(" ", data) return EXTRA_SPACES_RE.sub(" ", data).strip()
def _parse_ext(self):
"""Pop the extension from the pathname."""
path = Path(self._unparsed_path)
suffix = path.suffix
if not suffix:
return
self.path_indexes["ext"] = self.path.rfind(suffix)
def _get_data_list(path: str | Path, metadata: dict) -> list[str]: data = path.name.removesuffix(suffix)
"""Prepare data list from a path or string.""" ext = suffix.lstrip(".")
data = _parse_ext(path, metadata) self.metadata["ext"] = ext
data = _clean_dividers(data) self._unparsed_path = data
return [data]
def _grouping_operators_strip(self, value: str) -> str:
def _grouping_operators_strip(value: str) -> str:
"""Strip spaces and parens.""" """Strip spaces and parens."""
value = value.strip() value = value.strip()
value = value.strip("()").strip() value = value.strip("()").strip()
@ -60,234 +55,163 @@ def _grouping_operators_strip(value: str) -> str:
value = value.strip("'").strip('"').strip() value = value.strip("'").strip('"').strip()
return value return value
def _splicey_dicey(
data_list: list[str], index: int, match: Match, match_group: int | str = 0
) -> str:
"""Replace a string token from a list with two strings and the value removed.
And return the value.
"""
value = match.group(match_group)
data = data_list.pop(index)
data_ends = []
if data_before := data[: match.start()].strip():
data_ends.append(data_before)
if data_after := data[match.end() :].strip():
data_ends.append(data_after)
data_list[index:index] = data_ends
return _grouping_operators_strip(value)
def _match_original_format_and_scan_info(
match: Match, metadata: dict[str, Any], data_list: list[str], index: int
) -> None:
"""Match (ORIGINAL_FORMAT-SCAN_INFO)."""
original_format = match.group("original_format")
try:
scan_info = match.group("scan_info")
except IndexError:
scan_info = None
metadata["original_format"] = _grouping_operators_strip(original_format)
match_group = 1
if scan_info:
metadata["scan_info"] = _grouping_operators_strip(scan_info)
match_group = 0
_splicey_dicey(data_list, index, match, match_group=match_group)
def _parse_original_format_and_scan_info(data_list: list[str], metadata: dict) -> int:
"""Parse (ORIGINAL_FORMAT-SCAN_INFO)."""
index = 0
match = None
for data in data_list:
match = ORIGINAL_FORMAT_SCAN_INFO_RE.search(data)
if match:
_match_original_format_and_scan_info(match, metadata, data_list, index)
break
index += 1
else:
index = 0
return index
def _pop_value_from_token(
data_list: list,
metadata: dict,
regex: Pattern,
key: str,
index: int = 0,
) -> str:
"""Search token for value, splice and assign to metadata."""
data = data_list[index]
match = regex.search(data)
if match:
value = _splicey_dicey(data_list, index, match, key)
metadata[key] = value
else:
value = ""
return value
def _parse_item( def _parse_item(
data_list: list[str], self,
metadata: dict,
regex: Pattern, regex: Pattern,
key: str, require_all: bool = False,
start_index: int = 0, ) -> None:
path: str = "",
) -> int:
"""Parse a value from the data list into metadata and alter the data list.""" """Parse a value from the data list into metadata and alter the data list."""
path_index = -1 matches = regex.search(self._unparsed_path)
index = start_index if not matches:
dl_len = end_index = len(data_list) return
if index >= end_index: matched_metadata = {}
index = 0 matched_path_indexes = {}
while index < end_index: for key, value in matches.groupdict().items():
value = _pop_value_from_token(data_list, metadata, regex, key, index) if not value:
if value: if require_all:
if "key" == "issue": return
path_index = path.find(value) continue
break matched_path_indexes[key] = self.path.find(value)
index += 1 # TODO idk if strip is necceesary here
if index > dl_len and start_index > 0: matched_metadata[key] = self._grouping_operators_strip(value)
index = 0 self.metadata.update(matched_metadata)
end_index = start_index self.path_indexes.update(matched_path_indexes)
return path_index
marked_str = regex.sub(_TOKEN_DELIMETER, self._unparsed_path)
parts = []
for part in marked_str.split(_TOKEN_DELIMETER):
if token := part.strip():
parts.append(token)
self._unparsed_path = _TOKEN_DELIMETER.join(parts)
def _pop_issue_from_text_fields( def _is_title_in_position(self, value):
data_list: list[str], metadata: dict, index: int
) -> str:
"""Search issue from ends of text fields."""
if "issue" not in metadata:
_pop_value_from_token(data_list, metadata, ISSUE_END_RE, "issue", index=index)
if "issue" not in metadata:
_pop_value_from_token(data_list, metadata, ISSUE_BEGIN_RE, "issue", index=index)
return data_list.pop(index)
TITLE_PRECEDING_KEYS = ("issue", "year", "volume")
def _is_title_in_position(path, value, metadata):
"""Does the title come after series and one other token if they exist.""" """Does the title come after series and one other token if they exist."""
# TODO this could be faster if indexes could be grabbed for these tokens title_index = self.path.find(value)
# when they are extracted.
title_index = path.find(value)
# Does a series come first. # Does a series come first.
series = metadata.get("series") if title_index < self.path_indexes.get("series", -1):
if not series:
return False
series_index = path.find(series)
if title_index < series_index:
return False return False
# If other tokens exist then they much precede the title. # If other tokens exist then they much precede the title.
title_ok = False title_ok = False
other_tokens_exist = False other_tokens_exist = False
for preceding_key in TITLE_PRECEDING_KEYS: for preceding_key in _TITLE_PRECEDING_KEYS:
preceding_value = metadata.get(preceding_key)
if not preceding_value:
continue
other_tokens_exist = True other_tokens_exist = True
preceding_index = path.find(preceding_value) if title_index > self.path_indexes.get(preceding_key, -1):
if title_index > preceding_index:
title_ok = True title_ok = True
break break
return title_ok or not other_tokens_exist return title_ok or not other_tokens_exist
def _assign_remaining_groups(self):
def _assign_remaining_groups(data_list: list[str], metadata: dict, path: str):
"""Assign series and title.""" """Assign series and title."""
index = 0 if not self._unparsed_path:
for key in _REMAINING_GROUP_KEYS: return
try:
data = data_list[index] # TODO fix REMAINING GROUP_RE to use token delim
except (IndexError, TypeError): tokens = self._unparsed_path.split(_TOKEN_DELIMETER)
break
match = REMAINING_GROUP_RE.search(data) if data else None # ASSIGN GROUPS
remaining_key_index = 0
unused_tokens = []
while tokens and remaining_key_index < len(_REMAINING_GROUP_KEYS):
key = _REMAINING_GROUP_KEYS[remaining_key_index]
token = tokens.pop(0)
match = REMAINING_GROUP_RE.search(token)
if match: if match:
value = _pop_issue_from_text_fields(data_list, metadata, index) value = match.group()
if key == "title" and not _is_title_in_position(path, value, metadata): if key == "title" and not self._is_title_in_position(value):
unused_tokens.append(token)
continue continue
value = _grouping_operators_strip(value) value = self._grouping_operators_strip(value)
if value: self.metadata[key] = value
metadata[key] = value self.path_indexes[key] = self.path.find(value)
remaining_key_index += 1
else: else:
index += 1 unused_tokens.append(token)
self._unparsed_path = " ".join(unused_tokens) if unused_tokens else ""
def _pickup_issue(remainders: list[str], metadata: dict) -> None: def _add_remainders(self):
"""Get issue from remaining tokens or anywhere in a pinch.""" """Add Remainders."""
if "issue" in metadata: remainders = []
for token in self._unparsed_path.split(_TOKEN_DELIMETER):
if remainder := token.strip():
remainders.append(remainder)
if remainders:
self.metadata["remainders"] = tuple(remainders)
def _log_progress(self, label):
if not self._debug:
return return
_parse_item(remainders, metadata, ISSUE_TOKEN_RE, "issue")
if "issue" in metadata:
return
_parse_item(remainders, metadata, ISSUE_ANYWHERE_RE, "issue")
def _log_progress(label, metadata, data_list):
print(label + ":") print(label + ":")
pprint(metadata) combined = {}
pprint(data_list) for key in self.metadata:
combined[key] = (self.metadata.get(key), self.path_indexes.get(key))
pprint(combined)
print(self._unparsed_path)
def parse(self) -> dict[str, Any]:
def comicfn2dict(path: str | Path) -> dict[str, Any]:
"""Parse the filename with a hierarchy of regexes.""" """Parse the filename with a hierarchy of regexes."""
metadata = {} self._unparsed_path = self._clean_dividers(self._unparsed_path)
data_list = _get_data_list(path, metadata) self._log_progress("INITIAL")
_log_progress("INITIAL", metadata, data_list) self._parse_ext()
# Parse paren tokens # Parse paren tokens
_parse_item(data_list, metadata, ISSUE_COUNT_RE, "issue_count") self._parse_item(ISSUE_COUNT_RE)
_parse_item(data_list, metadata, YEAR_TOKEN_RE, "year") self._parse_item(YEAR_TOKEN_RE)
of_index = _parse_original_format_and_scan_info(data_list, metadata) self._parse_item(
if "original_format" not in metadata: ORIGINAL_FORMAT_SCAN_INFO_RE,
of_index = _parse_item( require_all=True,
data_list, metadata, ORIGINAL_FORMAT_RE, "original_format"
) )
if "scan_info" not in metadata: if "original_format" not in self.metadata:
# Start searching for scan_info after original format. self._parse_item(
_parse_item( ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE,
data_list,
metadata,
SCAN_INFO_RE,
"scan_info",
start_index=of_index + 1,
) )
_log_progress("AFTER PAREN TOKENS", metadata, data_list) self._log_progress("AFTER PAREN TOKENS")
# Parse regular tokens # Parse regular tokens
_parse_item(data_list, metadata, VOLUME_RE, "volume") self._parse_item(VOLUME_RE)
_parse_item(data_list, metadata, ISSUE_NUMBER_RE, "issue", path=str(path)) self._parse_item(ISSUE_NUMBER_RE)
_log_progress("AFTER REGULAR TOKENS", metadata, data_list) self._log_progress("AFTER REGULAR TOKENS")
# Pickup year if not gotten. # Pickup year if not gotten.
if "year" not in metadata: if "year" not in self.metadata:
_parse_item(data_list, metadata, YEAR_BEGIN_RE, "year") self._parse_item(YEAR_BEGIN_RE)
if "year" not in metadata: if "year" not in self.metadata:
_parse_item(data_list, metadata, YEAR_END_RE, "year") self._parse_item(YEAR_END_RE)
_log_progress("AFTER YEAR PICKUP", metadata, data_list) self._log_progress("AFTER YEAR PICKUP")
# Pickup issue if it's a standalone token # Pickup issue if it's a standalone token
if "issue" not in metadata: if "issue" not in self.metadata:
_parse_item(data_list, metadata, ISSUE_TOKEN_RE, "issue") self._parse_item(ISSUE_END_RE)
if "issue" not in self.metadata:
self._parse_item(ISSUE_BEGIN_RE)
_log_progress("AFTER ISSUE PICKUP", metadata, data_list) self._log_progress("AFTER ISSUE PICKUP")
# Series and Title. Also looks for issue. # Series and Title. Also looks for issue.
_assign_remaining_groups(data_list, metadata, str(path)) self._assign_remaining_groups()
_log_progress("AFTER SERIES AND TITLE", metadata, data_list) self._log_progress("AFTER SERIES AND TITLE")
# Final try for issue number. # Final try for issue number.
_pickup_issue(data_list, metadata) if "issue" not in self.metadata:
_log_progress("AFTER ISSUE PICKUP", metadata, data_list) # TODO is this useful?
self._parse_item(ISSUE_ANYWHERE_RE)
self._log_progress("AFTER ISSUE PICKUP")
# Add Remainders self._add_remainders()
if data_list:
metadata["remainders"] = tuple(data_list)
return metadata return self.metadata
def __init__(self, path: str | Path, verbose: int = 0):
"""Initialize."""
self._debug: bool = verbose > 0
self.metadata: dict[str, str | tuple[str, ...]] = {}
self.path_indexes: dict[str, int] = {}
# munge path
if isinstance(path, str):
path = path.strip()
p_path = Path(path)
self.path = str(p_path.name).strip()
self._unparsed_path = copy(self.path)

View File

@ -51,24 +51,27 @@ YEAR_BEGIN_RE = re_compile(r"^" + _YEAR_RE_EXP + r"\b")
YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$") YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$")
_OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS) _OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS)
_ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")" _ORIGINAL_FORMAT_RE_EXP = r"(?P<original_format>" + _OF_PATTERNS + r")"
ORIGINAL_FORMAT_RE = re_compile(_ORIGINAL_FORMAT_RE_EXP, parenthify=True)
_SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]+?)" _SCAN_INFO_RE_EXP = r"(?P<scan_info>[^()]+?)"
SCAN_INFO_RE = re_compile(_SCAN_INFO_RE_EXP, parenthify=True)
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = ( _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = (
_ORIGINAL_FORMAT_RE_EXP + r"(?:-" + _SCAN_INFO_RE_EXP + r")?" _ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?"
) )
ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile( ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile(
_ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP, parenthify=True
) )
ORIGINAL_FORMAT_SCAN_INFO_SEPARATE_RE = re_compile(
r"\(" + _ORIGINAL_FORMAT_RE_EXP + r"\).*\(" + _SCAN_INFO_RE_EXP + r"\)"
)
# REGULAR TOKENS # REGULAR TOKENS
VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+))") VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P<volume>\d+))")
_ISSUE_NUMBER_RE_EXP = r"(?P<issue>[\w½]+\.?\d*\w*)" _ISSUE_NUMBER_RE_EXP = r"(?P<issue>[\w½]+\.?\d*\w*)"
ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_NUMBER_RE_EXP + r")") ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_NUMBER_RE_EXP + r")")
_ISSUE_RE_EXP = r"(?P<issue>[\d½]+\.?\d*\w*)" _ISSUE_RE_EXP = r"(?P<issue>[\d½]+\.?\d*\w*)"
ISSUE_TOKEN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")$")
ISSUE_END_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")$") ISSUE_END_RE = re_compile(r"([\/\s]" + _ISSUE_RE_EXP + r"(\/|$))")
ISSUE_BEGIN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")\b") ISSUE_BEGIN_RE = re_compile(r"((^|\/)" + _ISSUE_RE_EXP + r"[\/|\s])")
# TODO is this used?
ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b") ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
# LONG STRINGS # LONG STRINGS

View File

@ -28,22 +28,27 @@ _FILENAME_FORMAT_TAGS: tuple[tuple[str, str | Callable], ...] = (
_EMPTY_VALUES: tuple[None, str] = (None, "") _EMPTY_VALUES: tuple[None, str] = (None, "")
def dict2comicfn(md: Mapping, ext: bool = True) -> str | None: def _tokenize_tag(md: Mapping, tag: str, fmt: str | Callable) -> str:
"""Get our preferred basename from a metadata dict."""
if not md:
return None
tokens = []
for tag, fmt in _FILENAME_FORMAT_TAGS:
val = md.get(tag) val = md.get(tag)
if val in _EMPTY_VALUES: if val in _EMPTY_VALUES:
continue return ""
final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt final_fmt = fmt(val) if isinstance(fmt, Callable) else fmt
token = final_fmt.format(val).strip() token = final_fmt.format(val).strip()
if token: return token
def serialize(md: Mapping, ext: bool = True) -> str:
"""Get our preferred basename from a metadata dict."""
if not md:
return ""
tokens = []
for tag, fmt in _FILENAME_FORMAT_TAGS:
if token := _tokenize_tag(md, tag, fmt):
tokens.append(token) tokens.append(token)
fn = " ".join(tokens) fn = " ".join(tokens)
if remainders := md.get("remainders"): if remainders := md.get("remainders"):
remainder = " ".join(remainders) remainder = " ".join(remainders)
# TODO oh this is the - delineated remainder :(
fn += f" - {remainder}" fn += f" - {remainder}"
if ext: if ext:
fn += "." + md.get("ext", "cbz") fn += "." + md.get("ext", "cbz")

View File

@ -136,8 +136,7 @@ FNS = {
"year": "2006", "year": "2006",
"ext": "cbz", "ext": "cbz",
"scan_info": "Minutemen-Faessla", "scan_info": "Minutemen-Faessla",
# "original_format": "digital", "original_format": "digital",
"remainders": ("(digital",),
}, },
"Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": { "Jeremy John 003 (2007) (4 covers) (digital) (Minutemen-Faessla).cbz": {
"series": "Jeremy John", "series": "Jeremy John",
@ -243,6 +242,7 @@ FNS = {
FNS.update( # Newly fixed. FNS.update( # Newly fixed.
{ {
# BIG Change. title after token. more stripping.
"'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": { "'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "022", "issue": "022",
@ -252,6 +252,7 @@ FNS.update( # Newly fixed.
"year": "2024", "year": "2024",
}, },
# Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543 # Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543
# word characters now allowed to lead issue numbers only if preceded by a # marker
"batman #B01 title.cbz": { "batman #B01 title.cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "B01", "issue": "B01",
@ -261,32 +262,47 @@ FNS.update( # Newly fixed.
} }
) )
WONFIX = {
FNS.update(
{
# Leading issue number is usually an alternate sequence number # Leading issue number is usually an alternate sequence number
# WONTFIX: Series names may begin with numerals.
"52 action comics #2024.cbz": { "52 action comics #2024.cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "2024", "issue": "2024",
"series": "action comics", "series": "action comics",
"alternate": "52", "alternate": "52",
}, # 4 digit issue number },
"action comics 1024.cbz": { # Only the issue number. CT ensures that the series always has a value if possible
"ext": "cbz", # I don't think making the series the same as the number is valuable.
"issue": "1024",
"series": "action comics",
}, # Only the issue number. CT ensures that the series always has a value if possible
"#52.cbz": { "#52.cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "52", "issue": "52",
"series": "52", "series": "52",
}, # CT treats double-underscore the same as double-dash },
}
LATER = {
# 4 digit issue number
# should this be an issue number if year DONE?.
"action comics 1024.cbz": {
"ext": "cbz",
"issue": "1024",
"series": "action comics",
},
}
FNS.update(
{
# CT treats double-underscore the same as double-dash
# BUG: should be title right now.
# FEATURE: double dash should be a token delimiter?
"Monster_Island_v1_#2__repaired__c2c.cbz": { "Monster_Island_v1_#2__repaired__c2c.cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "2", "issue": "2",
"series": "Monster Island", "series": "Monster Island",
"volume": "1", "volume": "1",
}, # I'm not sure there's a right way to parse this. This might also be a madeup filename I don't remember "remainders": ("repaired c2c",),
},
# I'm not sure there's a right way to parse this. This might also be a madeup filename I don't remember
"Super Strange Yarns (1957) #92 (1969).cbz": { "Super Strange Yarns (1957) #92 (1969).cbz": {
"ext": "cbz", "ext": "cbz",
"issue": "92", "issue": "92",

View File

@ -5,7 +5,7 @@ from types import MappingProxyType
import pytest import pytest
from deepdiff.diff import DeepDiff from deepdiff.diff import DeepDiff
from comicfn2dict import comicfn2dict from comicfn2dict import ComicFilenameParser
from tests.comic_filenames import FNS from tests.comic_filenames import FNS
ALL_FIELDS = frozenset({"series", "volume", "issue", "issue_count", "year", "ext"}) ALL_FIELDS = frozenset({"series", "volume", "issue", "issue_count", "year", "ext"})
@ -16,7 +16,7 @@ FIELD_SCHEMA = MappingProxyType({key: None for key in ALL_FIELDS})
def test_parse_filename(item): def test_parse_filename(item):
"""Test filename parsing.""" """Test filename parsing."""
fn, defined_fields = item fn, defined_fields = item
md = comicfn2dict(fn) md = ComicFilenameParser(fn, verbose=1).parse()
diff = DeepDiff(defined_fields, md, ignore_order=True) diff = DeepDiff(defined_fields, md, ignore_order=True)
print(fn) print(fn)
pprint(defined_fields) pprint(defined_fields)