diff --git a/NEWS.md b/NEWS.md index 525bf58..dcabf89 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,8 @@ - Titles are now parsed only if they occur after the series token AND after either issue, year or volume. +- Issue numbers that start with a '#' character may contain alphabetical + characters. ## v0.1.4 diff --git a/comicfn2dict/regex.py b/comicfn2dict/regex.py index d49273d..73fdc45 100644 --- a/comicfn2dict/regex.py +++ b/comicfn2dict/regex.py @@ -41,7 +41,6 @@ ORIGINAL_FORMAT_PATTERNS = ( # CLEAN NON_SPACE_DIVIDER_RE = re_compile(r"[_\+]") -DASH_SPLIT_RE = re_compile(r"\s-\s") EXTRA_SPACES_RE = re_compile(r"\s\s+") # PAREN GROUPS @@ -64,8 +63,9 @@ ORIGINAL_FORMAT_SCAN_INFO_RE = re_compile( # REGULAR TOKENS VOLUME_RE = re_compile(r"((?:v(?:ol(?:ume)?)?\.?)\s*(?P\d+))") +_ISSUE_NUMBER_RE_EXP = r"(?P[\w½]+\.?\d*\w*)" +ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_NUMBER_RE_EXP + r")") _ISSUE_RE_EXP = r"(?P[\d½]+\.?\d*\w*)" -ISSUE_NUMBER_RE = re_compile(r"(#" + _ISSUE_RE_EXP + r")") ISSUE_TOKEN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")$") ISSUE_END_RE = re_compile(r"\b(" + _ISSUE_RE_EXP + r")$") ISSUE_BEGIN_RE = re_compile(r"^(" + _ISSUE_RE_EXP + r")\b") diff --git a/tests/comic_filenames.py b/tests/comic_filenames.py index 3700d65..67e8ac5 100644 --- a/tests/comic_filenames.py +++ b/tests/comic_filenames.py @@ -251,18 +251,20 @@ FNS.update( # Newly fixed. "series": "Batman - Superman - World's Finest", "year": "2024", }, - } -) - -FNS.update( - { # Issue number starting with a letter requested in https://github.com/comictagger/comictagger/issues/543 "batman #B01 title.cbz": { "ext": "cbz", "issue": "B01", "series": "batman", "title": "title", - }, # Leading issue number is usually an alternate sequence number + }, + } +) + + +FNS.update( + { + # Leading issue number is usually an alternate sequence number "52 action comics #2024.cbz": { "ext": "cbz", "issue": "2024",