diff --git a/comicfn2dict/regex.py b/comicfn2dict/regex.py index 43ae9ea..f3f1362 100644 --- a/comicfn2dict/regex.py +++ b/comicfn2dict/regex.py @@ -35,7 +35,7 @@ ORIGINAL_FORMAT_PATTERNS = ( r"Sketch", r"TPB", r"Trade[-\s]Paper[-\s]?Back", - r"Web([-\s]?Comic)?", + r"Web([-\s]?(Comic|Rip))?", ) @@ -51,7 +51,7 @@ YEAR_BEGIN_RE = re_compile(r"^" + _YEAR_RE_EXP + r"\b") YEAR_END_RE = re_compile(r"\b" + _YEAR_RE_EXP + r"$") _OF_PATTERNS = r"|".join(ORIGINAL_FORMAT_PATTERNS) _ORIGINAL_FORMAT_RE_EXP = r"(?P" + _OF_PATTERNS + r")" -_SCAN_INFO_RE_EXP = r"(?P[^()]+?)" +_SCAN_INFO_RE_EXP = r"(?P[^()]*)" _ORIGINAL_FORMAT_SCAN_INFO_RE_EXP = ( _ORIGINAL_FORMAT_RE_EXP + r"\s*[\(:-]" + _SCAN_INFO_RE_EXP # + r")?" ) @@ -70,8 +70,6 @@ _ISSUE_RE_EXP = r"(?P[\d½]+\.?\d*\w*)" ISSUE_END_RE = re_compile(r"([\/\s]" + _ISSUE_RE_EXP + r"(\/|$))") ISSUE_BEGIN_RE = re_compile(r"((^|\/)" + _ISSUE_RE_EXP + r"[\/|\s])") - -# TODO is this used? ISSUE_ANYWHERE_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")\b") # LONG STRINGS diff --git a/tests/comic_filenames.py b/tests/comic_filenames.py index b5bd175..61d38c0 100644 --- a/tests/comic_filenames.py +++ b/tests/comic_filenames.py @@ -246,9 +246,9 @@ FNS.update( # Newly fixed. "'Batman - Superman - World's Finest 022 (2024) (Webrip) (The Last Kryptonian-DCP).cbz": { "ext": "cbz", "issue": "022", - "remainders": ("(The Last Kryptonian-DCP)",), - "scan_info": "Webrip", + "original_format": "Webrip", "series": "Batman - Superman - World's Finest", + "scan_info": "The Last Kryptonian-DCP", "year": "2024", }, # Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543 @@ -259,6 +259,13 @@ FNS.update( # Newly fixed. "series": "batman", "title": "title", }, + "Monster_Island_v1_#2__repaired__c2c.cbz": { + "ext": "cbz", + "issue": "2", + "series": "Monster Island", + "volume": "1", + "remainders": ("repaired c2c",), + }, } ) @@ -292,16 +299,6 @@ LATER = { FNS.update( { - # CT treats double-underscore the same as double-dash - # BUG: should be title right now. - # FEATURE: double dash should be a token delimiter? - "Monster_Island_v1_#2__repaired__c2c.cbz": { - "ext": "cbz", - "issue": "2", - "series": "Monster Island", - "volume": "1", - "remainders": ("repaired c2c",), - }, # I'm not sure there's a right way to parse this. This might also be a madeup filename I don't remember "Super Strange Yarns (1957) #92 (1969).cbz": { "ext": "cbz",