titles after tokens
This commit is contained in:
parent
65e17236df
commit
3ce61254dc
5
NEWS.md
5
NEWS.md
@ -1,5 +1,10 @@
|
|||||||
# 📰 comicfn2dict News
|
# 📰 comicfn2dict News
|
||||||
|
|
||||||
|
## v0.2.0
|
||||||
|
|
||||||
|
- Titles are now parsed only if they occur after the series token AND after
|
||||||
|
either issue, year or volume.
|
||||||
|
|
||||||
## v0.1.4
|
## v0.1.4
|
||||||
|
|
||||||
- Require Python 3.10
|
- Require Python 3.10
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
"""Parse comic book archive names using the simple 'parse' parser."""
|
"""Parse comic book archive names using the simple 'parse' parser."""
|
||||||
|
from pprint import pprint
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from re import Match, Pattern
|
from re import Match, Pattern
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from comicfn2dict.regex import (
|
from comicfn2dict.regex import (
|
||||||
DASH_SPLIT_RE,
|
|
||||||
EXTRA_SPACES_RE,
|
EXTRA_SPACES_RE,
|
||||||
ISSUE_ANYWHERE_RE,
|
ISSUE_ANYWHERE_RE,
|
||||||
ISSUE_BEGIN_RE,
|
ISSUE_BEGIN_RE,
|
||||||
@ -26,9 +26,13 @@ from comicfn2dict.regex import (
|
|||||||
_REMAINING_GROUP_KEYS = ("series", "title")
|
_REMAINING_GROUP_KEYS = ("series", "title")
|
||||||
|
|
||||||
|
|
||||||
def _parse_ext(name: str, suffix: str, metadata: dict) -> str:
|
def _parse_ext(name: str | Path, metadata: dict) -> str:
|
||||||
"""Pop the extension from the pathname."""
|
"""Pop the extension from the pathname."""
|
||||||
data = name.removesuffix(suffix)
|
if isinstance(name, str):
|
||||||
|
name = name.strip()
|
||||||
|
path = Path(name)
|
||||||
|
suffix = path.suffix
|
||||||
|
data = path.name.removesuffix(suffix)
|
||||||
ext = suffix.lstrip(".")
|
ext = suffix.lstrip(".")
|
||||||
if ext:
|
if ext:
|
||||||
metadata["ext"] = ext
|
metadata["ext"] = ext
|
||||||
@ -43,17 +47,18 @@ def _clean_dividers(data: str) -> str:
|
|||||||
|
|
||||||
def _get_data_list(path: str | Path, metadata: dict) -> list[str]:
|
def _get_data_list(path: str | Path, metadata: dict) -> list[str]:
|
||||||
"""Prepare data list from a path or string."""
|
"""Prepare data list from a path or string."""
|
||||||
if isinstance(path, str):
|
data = _parse_ext(path, metadata)
|
||||||
path = path.strip()
|
|
||||||
path = Path(path)
|
|
||||||
data = _parse_ext(path.name, path.suffix, metadata)
|
|
||||||
data = _clean_dividers(data)
|
data = _clean_dividers(data)
|
||||||
return DASH_SPLIT_RE.split(data)
|
return [data]
|
||||||
|
|
||||||
|
|
||||||
def _paren_strip(value: str) -> str:
|
def _grouping_operators_strip(value: str) -> str:
|
||||||
"""Strip spaces and parens."""
|
"""Strip spaces and parens."""
|
||||||
return value.strip().strip("()").strip()
|
value = value.strip()
|
||||||
|
value = value.strip("()").strip()
|
||||||
|
value = value.strip("-").strip()
|
||||||
|
value = value.strip("'").strip('"').strip()
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def _splicey_dicey(
|
def _splicey_dicey(
|
||||||
@ -71,7 +76,7 @@ def _splicey_dicey(
|
|||||||
if data_after := data[match.end() :].strip():
|
if data_after := data[match.end() :].strip():
|
||||||
data_ends.append(data_after)
|
data_ends.append(data_after)
|
||||||
data_list[index:index] = data_ends
|
data_list[index:index] = data_ends
|
||||||
return _paren_strip(value)
|
return _grouping_operators_strip(value)
|
||||||
|
|
||||||
|
|
||||||
def _match_original_format_and_scan_info(
|
def _match_original_format_and_scan_info(
|
||||||
@ -83,10 +88,10 @@ def _match_original_format_and_scan_info(
|
|||||||
scan_info = match.group("scan_info")
|
scan_info = match.group("scan_info")
|
||||||
except IndexError:
|
except IndexError:
|
||||||
scan_info = None
|
scan_info = None
|
||||||
metadata["original_format"] = _paren_strip(original_format)
|
metadata["original_format"] = _grouping_operators_strip(original_format)
|
||||||
match_group = 1
|
match_group = 1
|
||||||
if scan_info:
|
if scan_info:
|
||||||
metadata["scan_info"] = _paren_strip(scan_info)
|
metadata["scan_info"] = _grouping_operators_strip(scan_info)
|
||||||
match_group = 0
|
match_group = 0
|
||||||
_splicey_dicey(data_list, index, match, match_group=match_group)
|
_splicey_dicey(data_list, index, match, match_group=match_group)
|
||||||
|
|
||||||
@ -112,14 +117,16 @@ def _pop_value_from_token(
|
|||||||
regex: Pattern,
|
regex: Pattern,
|
||||||
key: str,
|
key: str,
|
||||||
index: int = 0,
|
index: int = 0,
|
||||||
) -> Match:
|
) -> str:
|
||||||
"""Search token for value, splice and assign to metadata."""
|
"""Search token for value, splice and assign to metadata."""
|
||||||
data = data_list[index]
|
data = data_list[index]
|
||||||
match = regex.search(data)
|
match = regex.search(data)
|
||||||
if match:
|
if match:
|
||||||
value = _splicey_dicey(data_list, index, match, key)
|
value = _splicey_dicey(data_list, index, match, key)
|
||||||
metadata[key] = value
|
metadata[key] = value
|
||||||
return match
|
else:
|
||||||
|
value = ""
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
def _parse_item(
|
def _parse_item(
|
||||||
@ -128,21 +135,25 @@ def _parse_item(
|
|||||||
regex: Pattern,
|
regex: Pattern,
|
||||||
key: str,
|
key: str,
|
||||||
start_index: int = 0,
|
start_index: int = 0,
|
||||||
|
path: str = "",
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Parse a value from the data list into metadata and alter the data list."""
|
"""Parse a value from the data list into metadata and alter the data list."""
|
||||||
|
path_index = -1
|
||||||
index = start_index
|
index = start_index
|
||||||
dl_len = end_index = len(data_list)
|
dl_len = end_index = len(data_list)
|
||||||
if index >= end_index:
|
if index >= end_index:
|
||||||
index = 0
|
index = 0
|
||||||
while index < end_index:
|
while index < end_index:
|
||||||
match = _pop_value_from_token(data_list, metadata, regex, key, index)
|
value = _pop_value_from_token(data_list, metadata, regex, key, index)
|
||||||
if match:
|
if value:
|
||||||
|
if "key" == "issue":
|
||||||
|
path_index = path.find(value)
|
||||||
break
|
break
|
||||||
index += 1
|
index += 1
|
||||||
if index > dl_len and start_index > 0:
|
if index > dl_len and start_index > 0:
|
||||||
index = 0
|
index = 0
|
||||||
end_index = start_index
|
end_index = start_index
|
||||||
return index
|
return path_index
|
||||||
|
|
||||||
|
|
||||||
def _pop_issue_from_text_fields(
|
def _pop_issue_from_text_fields(
|
||||||
@ -156,7 +167,39 @@ def _pop_issue_from_text_fields(
|
|||||||
return data_list.pop(index)
|
return data_list.pop(index)
|
||||||
|
|
||||||
|
|
||||||
def _assign_remaining_groups(data_list: list[str], metadata: dict):
|
TITLE_PRECEDING_KEYS = ("issue", "year", "volume")
|
||||||
|
|
||||||
|
|
||||||
|
def _is_title_in_position(path, value, metadata):
|
||||||
|
"""Does the title come after series and one other token if they exist."""
|
||||||
|
# TODO this could be faster if indexes could be grabbed for these tokens
|
||||||
|
# when they are extracted.
|
||||||
|
title_index = path.find(value)
|
||||||
|
|
||||||
|
# Does a series come first.
|
||||||
|
series = metadata.get("series")
|
||||||
|
if not series:
|
||||||
|
return False
|
||||||
|
series_index = path.find(series)
|
||||||
|
if title_index < series_index:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# If other tokens exist then they much precede the title.
|
||||||
|
title_ok = False
|
||||||
|
other_tokens_exist = False
|
||||||
|
for preceding_key in TITLE_PRECEDING_KEYS:
|
||||||
|
preceding_value = metadata.get(preceding_key)
|
||||||
|
if not preceding_value:
|
||||||
|
continue
|
||||||
|
other_tokens_exist = True
|
||||||
|
preceding_index = path.find(preceding_value)
|
||||||
|
if title_index > preceding_index:
|
||||||
|
title_ok = True
|
||||||
|
break
|
||||||
|
return title_ok or not other_tokens_exist
|
||||||
|
|
||||||
|
|
||||||
|
def _assign_remaining_groups(data_list: list[str], metadata: dict, path: str):
|
||||||
"""Assign series and title."""
|
"""Assign series and title."""
|
||||||
index = 0
|
index = 0
|
||||||
for key in _REMAINING_GROUP_KEYS:
|
for key in _REMAINING_GROUP_KEYS:
|
||||||
@ -167,7 +210,9 @@ def _assign_remaining_groups(data_list: list[str], metadata: dict):
|
|||||||
match = REMAINING_GROUP_RE.search(data) if data else None
|
match = REMAINING_GROUP_RE.search(data) if data else None
|
||||||
if match:
|
if match:
|
||||||
value = _pop_issue_from_text_fields(data_list, metadata, index)
|
value = _pop_issue_from_text_fields(data_list, metadata, index)
|
||||||
value = _paren_strip(value)
|
if key == "title" and not _is_title_in_position(path, value, metadata):
|
||||||
|
continue
|
||||||
|
value = _grouping_operators_strip(value)
|
||||||
if value:
|
if value:
|
||||||
metadata[key] = value
|
metadata[key] = value
|
||||||
else:
|
else:
|
||||||
@ -184,10 +229,17 @@ def _pickup_issue(remainders: list[str], metadata: dict) -> None:
|
|||||||
_parse_item(remainders, metadata, ISSUE_ANYWHERE_RE, "issue")
|
_parse_item(remainders, metadata, ISSUE_ANYWHERE_RE, "issue")
|
||||||
|
|
||||||
|
|
||||||
|
def _log_progress(label, metadata, data_list):
|
||||||
|
print(label + ":")
|
||||||
|
pprint(metadata)
|
||||||
|
pprint(data_list)
|
||||||
|
|
||||||
|
|
||||||
def comicfn2dict(path: str | Path) -> dict[str, Any]:
|
def comicfn2dict(path: str | Path) -> dict[str, Any]:
|
||||||
"""Parse the filename with a hierarchy of regexes."""
|
"""Parse the filename with a hierarchy of regexes."""
|
||||||
metadata = {}
|
metadata = {}
|
||||||
data_list = _get_data_list(path, metadata)
|
data_list = _get_data_list(path, metadata)
|
||||||
|
_log_progress("INITIAL", metadata, data_list)
|
||||||
|
|
||||||
# Parse paren tokens
|
# Parse paren tokens
|
||||||
_parse_item(data_list, metadata, ISSUE_COUNT_RE, "issue_count")
|
_parse_item(data_list, metadata, ISSUE_COUNT_RE, "issue_count")
|
||||||
@ -206,26 +258,33 @@ def comicfn2dict(path: str | Path) -> dict[str, Any]:
|
|||||||
"scan_info",
|
"scan_info",
|
||||||
start_index=of_index + 1,
|
start_index=of_index + 1,
|
||||||
)
|
)
|
||||||
|
_log_progress("AFTER PAREN TOKENS", metadata, data_list)
|
||||||
|
|
||||||
# Parse regular tokens
|
# Parse regular tokens
|
||||||
_parse_item(data_list, metadata, VOLUME_RE, "volume")
|
_parse_item(data_list, metadata, VOLUME_RE, "volume")
|
||||||
_parse_item(data_list, metadata, ISSUE_NUMBER_RE, "issue")
|
_parse_item(data_list, metadata, ISSUE_NUMBER_RE, "issue", path=str(path))
|
||||||
|
_log_progress("AFTER REGULAR TOKENS", metadata, data_list)
|
||||||
|
|
||||||
# Pickup year if not gotten.
|
# Pickup year if not gotten.
|
||||||
if "year" not in metadata:
|
if "year" not in metadata:
|
||||||
_parse_item(data_list, metadata, YEAR_BEGIN_RE, "year")
|
_parse_item(data_list, metadata, YEAR_BEGIN_RE, "year")
|
||||||
if "year" not in metadata:
|
if "year" not in metadata:
|
||||||
_parse_item(data_list, metadata, YEAR_END_RE, "year")
|
_parse_item(data_list, metadata, YEAR_END_RE, "year")
|
||||||
|
_log_progress("AFTER YEAR PICKUP", metadata, data_list)
|
||||||
|
|
||||||
# Pickup issue if it's a standalone token
|
# Pickup issue if it's a standalone token
|
||||||
if "issue" not in metadata:
|
if "issue" not in metadata:
|
||||||
_parse_item(data_list, metadata, ISSUE_TOKEN_RE, "issue")
|
_parse_item(data_list, metadata, ISSUE_TOKEN_RE, "issue")
|
||||||
|
|
||||||
|
_log_progress("AFTER ISSUE PICKUP", metadata, data_list)
|
||||||
|
|
||||||
# Series and Title. Also looks for issue.
|
# Series and Title. Also looks for issue.
|
||||||
_assign_remaining_groups(data_list, metadata)
|
_assign_remaining_groups(data_list, metadata, str(path))
|
||||||
|
_log_progress("AFTER SERIES AND TITLE", metadata, data_list)
|
||||||
|
|
||||||
# Final try for issue number.
|
# Final try for issue number.
|
||||||
_pickup_issue(data_list, metadata)
|
_pickup_issue(data_list, metadata)
|
||||||
|
_log_progress("AFTER ISSUE PICKUP", metadata, data_list)
|
||||||
|
|
||||||
# Add Remainders
|
# Add Remainders
|
||||||
if data_list:
|
if data_list:
|
||||||
|
@ -72,4 +72,4 @@ ISSUE_BEGIN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")\b")
|
|||||||
ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
|
ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b")
|
||||||
|
|
||||||
# LONG STRINGS
|
# LONG STRINGS
|
||||||
REMAINING_GROUP_RE = re_compile(r"^[\w].*[^\)]")
|
REMAINING_GROUP_RE = re_compile(r"^[^\()].*[^\)]")
|
||||||
|
@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "comicfn2dict"
|
name = "comicfn2dict"
|
||||||
version = "0.1.4"
|
version = "0.2.0"
|
||||||
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
|
description = "Parse common comic filenames and return a dict of metadata attributes. Includes a cli."
|
||||||
license = "GPL-3.0-only"
|
license = "GPL-3.0-only"
|
||||||
authors = ["AJ Slater <aj@slater.net>"]
|
authors = ["AJ Slater <aj@slater.net>"]
|
||||||
|
@ -80,13 +80,11 @@ FNS = {
|
|||||||
"original_format": "digital",
|
"original_format": "digital",
|
||||||
},
|
},
|
||||||
"Bardude - The Last Thing I Remember.cbz": {
|
"Bardude - The Last Thing I Remember.cbz": {
|
||||||
"series": "Bardude",
|
"series": "Bardude - The Last Thing I Remember",
|
||||||
"title": "The Last Thing I Remember",
|
|
||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
},
|
},
|
||||||
"Drunkguy - The Man Without Fear - 01.cbz": {
|
"Drunkguy - The Man Without Fear - 01.cbz": {
|
||||||
"series": "Drunkguy",
|
"series": "Drunkguy - The Man Without Fear",
|
||||||
"title": "The Man Without Fear",
|
|
||||||
"issue": "01",
|
"issue": "01",
|
||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
},
|
},
|
||||||
@ -125,9 +123,8 @@ FNS = {
|
|||||||
"scan_info": "Zone-Empire",
|
"scan_info": "Zone-Empire",
|
||||||
"title": "Last Bullet",
|
"title": "Last Bullet",
|
||||||
},
|
},
|
||||||
"Jeremy John - A Big Long Title (2017) (digital-Minutement).cbz": {
|
"Jeremy John - Not A Title (2017) (digital-Minutement).cbz": {
|
||||||
"series": "Jeremy John",
|
"series": "Jeremy John - Not A Title",
|
||||||
"title": "A Big Long Title",
|
|
||||||
"year": "2017",
|
"year": "2017",
|
||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
"original_format": "digital",
|
"original_format": "digital",
|
||||||
@ -243,3 +240,167 @@ FNS = {
|
|||||||
"ext": "cbz",
|
"ext": "cbz",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FNS.update( # Newly fixed.
|
||||||
|
{
|
||||||
|
"'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "022",
|
||||||
|
"remainders": ("(The Last Kryptonian-DCP)",),
|
||||||
|
"scan_info": "Webrip",
|
||||||
|
"series": "Batman - Superman - World's Finest",
|
||||||
|
"year": "2024",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
FNS.update(
|
||||||
|
{
|
||||||
|
# Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543
|
||||||
|
"batman #B01 title.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "B01",
|
||||||
|
"series": "batman",
|
||||||
|
"title": "title",
|
||||||
|
}, # Leading issue number is usually an alternate sequence number
|
||||||
|
"52 action comics #2024.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "2024",
|
||||||
|
"series": "action comics",
|
||||||
|
"alternate": "52",
|
||||||
|
}, # 4 digit issue number
|
||||||
|
"action comics 1024.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "1024",
|
||||||
|
"series": "action comics",
|
||||||
|
}, # Only the issue number. CT ensures that the series always has a value if possible
|
||||||
|
"#52.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "52",
|
||||||
|
"series": "52",
|
||||||
|
}, # CT treats double-underscore the same as double-dash
|
||||||
|
"Monster_Island_v1_#2__repaired__c2c.cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "2",
|
||||||
|
"series": "Monster Island",
|
||||||
|
"volume": "1",
|
||||||
|
}, # I'm not sure there's a right way to parse this. This might also be a madeup filename I don't remember
|
||||||
|
"Super Strange Yarns (1957) #92 (1969).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "92",
|
||||||
|
"series": "Super Strange Yarns",
|
||||||
|
"volume": "1957",
|
||||||
|
"year": "1969",
|
||||||
|
}, # Extra - in the series
|
||||||
|
" X-Men-V1-#067.cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "067",
|
||||||
|
"series": "X-Men",
|
||||||
|
"volume": "1",
|
||||||
|
}, # CT only separates this into a title if the '-' is attached to the previous word eg 'aquaman- Green Arrow'. @bpepple opened a ticket for this https://github.com/ajslater/comicfn2dict/issues/1 already
|
||||||
|
"Aquaman - Green Arrow - Deep Target #01 (of 07) (2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "01",
|
||||||
|
"series": "Aquaman - Green Arrow - Deep Target",
|
||||||
|
"year": "2021",
|
||||||
|
"issue_count": "7",
|
||||||
|
},
|
||||||
|
"Batman_-_Superman_#020_(2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "020",
|
||||||
|
"series": "Batman - Superman",
|
||||||
|
"year": "2021",
|
||||||
|
},
|
||||||
|
"Free Comic Book Day - Avengers.Hulk (2021).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"series": "Free Comic Book Day - Avengers Hulk",
|
||||||
|
"year": "2021",
|
||||||
|
}, # CT assumes the volume is also the issue number if it can't find an issue number
|
||||||
|
"Avengers By Brian Michael Bendis volume 03 (2013).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "3",
|
||||||
|
"series": "Avengers By Brian Michael Bendis",
|
||||||
|
"volume": "03",
|
||||||
|
"year": "2013",
|
||||||
|
}, # Publishers like to re-print some of their annuals using this format for the year
|
||||||
|
"Batman '89 (2021) .cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"series": "Batman '89",
|
||||||
|
"year": "2021",
|
||||||
|
}, # CT has extra processing to re-attach the year in this case
|
||||||
|
"Blade Runner Free Comic Book Day 2021 (2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"series": "Blade Runner Free Comic Book Day 2021",
|
||||||
|
"year": "2021",
|
||||||
|
}, # CT treats book like 'v' but also adds it as the title (matches ComicVine for this particular series)
|
||||||
|
"Bloodshot Book 03 (2020).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "03",
|
||||||
|
"series": "Bloodshot",
|
||||||
|
"title": "Book 03",
|
||||||
|
"volume": "03",
|
||||||
|
"year": "2020",
|
||||||
|
}, # CT checks for the following '(of 06)' after the '03' and marks it as the volume
|
||||||
|
"Elephantmen 2259 #008 - Simple Truth 03 (of 06) (2021).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "008",
|
||||||
|
"series": "Elephantmen 2259",
|
||||||
|
"title": "Simple Truth",
|
||||||
|
"volume": "03",
|
||||||
|
"year": "2021",
|
||||||
|
"volume_count": "06",
|
||||||
|
}, # CT catches the year
|
||||||
|
"Marvel Previews #002 (January 2022).cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "002",
|
||||||
|
"series": "Marvel Previews",
|
||||||
|
"year": "2022",
|
||||||
|
}, # c2c aka "cover to cover" is fairly common and CT moves it to scan_info/remainder
|
||||||
|
"Marvel Two In One V1 #090 c2c.cbr": {
|
||||||
|
"ext": "cbr",
|
||||||
|
"issue": "090",
|
||||||
|
"series": "Marvel Two In One",
|
||||||
|
"publisher": "Marvel",
|
||||||
|
"volume": "1",
|
||||||
|
}, # This made the parser in CT much more complicated. It's understandable that this isn't parsed on the first few iterations of this project
|
||||||
|
"Star Wars - War of the Bounty Hunters - IG-88 (2021).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||||
|
"year": "2021",
|
||||||
|
}, # The addition of the '#1' turns this into the same as 'Aquaman - Green Arrow - Deep Target' above
|
||||||
|
"Star Wars - War of the Bounty Hunters - IG-88 #1 (2021).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "1",
|
||||||
|
"series": "Star Wars - War of the Bounty Hunters - IG-88",
|
||||||
|
"year": "2021",
|
||||||
|
}, # CT treats '[]' as equivalent to '()', catches DC as a publisher and 'Sep-Oct 1951' as dates and removes them. CT doesn't catch the digital though so that could be better but I blame whoever made this atrocious filename
|
||||||
|
"Wonder Woman #49 DC Sep-Oct 1951 digital [downsized, lightened, 4 missing story pages restored] (Shadowcat-Empire).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "49",
|
||||||
|
"series": "Wonder Woman",
|
||||||
|
"title": "digital",
|
||||||
|
"publisher": "DC",
|
||||||
|
"year": "1951",
|
||||||
|
}, # CT notices that this is a full date, CT doesn't actually return the month or day though just removes it
|
||||||
|
"X-Men, 2021-08-04 (#02).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "02",
|
||||||
|
"series": "X-Men",
|
||||||
|
"year": "2021",
|
||||||
|
}, # CT treats ':' the same as '-' but here the ':' is attached to 'Now' which CT sees as a title separation
|
||||||
|
"Cory Doctorow's Futuristic Tales of the Here and Now: Anda's Game #001 (2007).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "001",
|
||||||
|
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||||
|
"title": "Anda's Game",
|
||||||
|
"year": "2007",
|
||||||
|
}, # This is a contrived test case. I've never seen this I just wanted to handle it with my parser
|
||||||
|
"Cory Doctorow's Futuristic Tales of the Here and Now #0.0.1 (2007).cbz": {
|
||||||
|
"ext": "cbz",
|
||||||
|
"issue": "0.1",
|
||||||
|
"series": "Cory Doctorow's Futuristic Tales of the Here and Now",
|
||||||
|
"year": "2007",
|
||||||
|
"issue_count": "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user