2015-02-21 18:30:32 -08:00
|
|
|
"""Functions for parsing comic info from filename
|
2015-02-16 04:27:21 -08:00
|
|
|
|
|
|
|
This should probably be re-written, but, well, it mostly works!
|
|
|
|
"""
|
2023-02-16 17:23:13 -08:00
|
|
|
# Copyright 2012-2014 ComicTagger Authors
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-21 18:30:32 -08:00
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-21 18:30:32 -08:00
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-21 18:30:32 -08:00
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2022-06-02 18:32:16 -07:00
|
|
|
#
|
2015-02-16 04:27:21 -08:00
|
|
|
# Some portions of this code were modified from pyComicMetaThis project
|
|
|
|
# http://code.google.com/p/pycomicmetathis/
|
2022-06-02 18:32:16 -07:00
|
|
|
from __future__ import annotations
|
2015-02-16 04:27:21 -08:00
|
|
|
|
2023-10-23 21:57:23 -07:00
|
|
|
import functools
|
2023-10-26 20:51:53 -07:00
|
|
|
import itertools
|
2022-04-04 18:59:26 -07:00
|
|
|
import logging
|
2015-02-16 04:27:21 -08:00
|
|
|
import os
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
import re
|
2022-04-29 16:37:44 -07:00
|
|
|
from operator import itemgetter
|
2022-08-10 16:46:00 -07:00
|
|
|
from re import Match
|
|
|
|
from typing import Callable, TypedDict
|
2018-09-19 13:05:39 -07:00
|
|
|
from urllib.parse import unquote
|
2015-02-16 04:27:21 -08:00
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
from text2digits import text2digits
|
|
|
|
|
|
|
|
from comicapi import filenamelexer, issuestring
|
|
|
|
|
2022-10-04 16:15:55 -07:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
t2d = text2digits.Text2Digits(add_ordinal_ending=False)
|
|
|
|
t2do = text2digits.Text2Digits(add_ordinal_ending=True)
|
|
|
|
|
2022-10-04 16:15:55 -07:00
|
|
|
placeholders_no_dashes = [re.compile(r"[-_]"), re.compile(r" +")]
|
|
|
|
placeholders_allow_dashes = [re.compile(r"[_]"), re.compile(r" +")]
|
2022-04-04 18:59:26 -07:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
|
2015-02-16 04:27:21 -08:00
|
|
|
class FileNameParser:
|
2022-05-22 19:59:31 -07:00
|
|
|
volume_regex = r"v(?:|ol|olume)\.?\s?"
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def __init__(self) -> None:
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
self.series = ""
|
|
|
|
self.volume = ""
|
|
|
|
self.year = ""
|
|
|
|
self.issue_count = ""
|
|
|
|
self.remainder = ""
|
|
|
|
self.issue = ""
|
2015-02-16 04:27:21 -08:00
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def repl(self, m: Match[str]) -> str:
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
return " " * len(m.group())
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def fix_spaces(self, string: str, remove_dashes: bool = True) -> str:
|
2015-02-16 07:19:38 -08:00
|
|
|
if remove_dashes:
|
2022-10-04 16:15:55 -07:00
|
|
|
placeholders = placeholders_no_dashes
|
2015-02-16 07:19:38 -08:00
|
|
|
else:
|
2022-10-04 16:15:55 -07:00
|
|
|
placeholders = placeholders_allow_dashes
|
2015-02-16 07:19:38 -08:00
|
|
|
for ph in placeholders:
|
2015-02-21 18:30:32 -08:00
|
|
|
string = re.sub(ph, self.repl, string)
|
2022-06-02 18:32:16 -07:00
|
|
|
return string
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def get_issue_count(self, filename: str, issue_end: int) -> str:
|
2015-02-16 07:19:38 -08:00
|
|
|
count = ""
|
|
|
|
filename = filename[issue_end:]
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# replace any name separators with spaces
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
tmpstr = self.fix_spaces(filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-08-08 18:03:29 -07:00
|
|
|
match = re.search(r"(?:\s\(?of\s)(\d+)(?: |\))", tmpstr, re.IGNORECASE)
|
2015-02-16 07:19:38 -08:00
|
|
|
if match:
|
2022-08-08 18:03:29 -07:00
|
|
|
count = match.group(1)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
return count.lstrip("0")
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def get_issue_number(self, filename: str) -> tuple[str, int, int]:
|
2015-02-21 18:30:32 -08:00
|
|
|
"""Returns a tuple of issue number string, and start and end indexes in the filename
|
|
|
|
(The indexes will be used to split the string up for further parsing)
|
|
|
|
"""
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
found = False
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
issue = ""
|
2015-02-16 07:19:38 -08:00
|
|
|
start = 0
|
|
|
|
end = 0
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# first, look for multiple "--", this means it's formatted differently
|
|
|
|
# from most:
|
2015-02-16 07:19:38 -08:00
|
|
|
if "--" in filename:
|
2015-02-21 18:30:32 -08:00
|
|
|
# the pattern seems to be that anything to left of the first "--"
|
|
|
|
# is the series name followed by issue
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
filename = re.sub(r"--.*", self.repl, filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-04-18 18:32:45 -07:00
|
|
|
elif "__" in filename and not re.search(r"\[__\d+__]", filename):
|
2015-02-21 18:30:32 -08:00
|
|
|
# the pattern seems to be that anything to left of the first "__"
|
|
|
|
# is the series name followed by issue
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
filename = re.sub(r"__.*", self.repl, filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
filename = filename.replace("+", " ")
|
|
|
|
|
|
|
|
# replace parenthetical phrases with spaces
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
filename = re.sub(r"\(.*?\)", self.repl, filename)
|
|
|
|
filename = re.sub(r"\[.*?]", self.repl, filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# replace any name separators with spaces
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
filename = self.fix_spaces(filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# remove any "of NN" phrase with spaces (problem: this could break on
|
|
|
|
# some titles)
|
2022-07-18 12:17:13 -07:00
|
|
|
filename = re.sub(r"of \d+", self.repl, filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
# we should now have a cleaned up filename version with all the words in
|
|
|
|
# the same positions as original filename
|
|
|
|
|
2022-05-22 19:59:31 -07:00
|
|
|
# search for volume number
|
|
|
|
match = re.search(self.volume_regex + r"(\d+)", filename, re.IGNORECASE)
|
|
|
|
if match:
|
|
|
|
self.volume = match.group(1)
|
|
|
|
|
2015-02-16 07:19:38 -08:00
|
|
|
# make a list of each word and its position
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
word_list = []
|
|
|
|
for m in re.finditer(r"\S+", filename):
|
2015-02-21 18:30:32 -08:00
|
|
|
word_list.append((m.group(0), m.start(), m.end()))
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-06-09 12:31:57 -07:00
|
|
|
# remove the first word, since it shouldn't be the issue number
|
2015-02-16 07:19:38 -08:00
|
|
|
if len(word_list) > 1:
|
|
|
|
word_list = word_list[1:]
|
|
|
|
else:
|
2022-06-09 12:31:57 -07:00
|
|
|
# only one word? Check to see if there is a digit, if so use it as the issue number and the series
|
|
|
|
if any(char.isnumeric() for char in word_list[0][0]):
|
2022-12-31 02:15:17 -08:00
|
|
|
issue = word_list[0][0].removeprefix("#")
|
|
|
|
return issue, word_list[0][1], word_list[0][2]
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
# Now try to search for the likely issue number word in the list
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# first look for a word with "#" followed by digits with optional suffix
|
2015-02-16 07:19:38 -08:00
|
|
|
# this is almost certainly the issue number
|
|
|
|
for w in reversed(word_list):
|
2022-07-18 12:17:13 -07:00
|
|
|
if re.match(r"#-?((\d*\.\d+|\d+)(\w*))", w[0]):
|
2015-02-16 07:19:38 -08:00
|
|
|
found = True
|
|
|
|
break
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# same as above but w/o a '#', and only look at the last word in the
|
|
|
|
# list
|
2015-02-16 07:19:38 -08:00
|
|
|
if not found:
|
2015-02-21 18:30:32 -08:00
|
|
|
w = word_list[-1]
|
2022-07-18 12:17:13 -07:00
|
|
|
if re.match(r"-?((\d*\.\d+|\d+)(\w*))", w[0]):
|
2015-02-16 07:19:38 -08:00
|
|
|
found = True
|
|
|
|
|
|
|
|
# now try to look for a # followed by any characters
|
|
|
|
if not found:
|
|
|
|
for w in reversed(word_list):
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
if re.match(r"#\S+", w[0]):
|
2015-02-16 07:19:38 -08:00
|
|
|
found = True
|
|
|
|
break
|
|
|
|
|
|
|
|
if found:
|
|
|
|
issue = w[0]
|
|
|
|
start = w[1]
|
|
|
|
end = w[2]
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
if issue[0] == "#":
|
2015-02-16 07:19:38 -08:00
|
|
|
issue = issue[1:]
|
|
|
|
|
|
|
|
return issue, start, end
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def get_series_name(self, filename: str, issue_start: int) -> tuple[str, str]:
|
2015-02-21 18:30:32 -08:00
|
|
|
"""Use the issue number string index to split the filename string"""
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
if issue_start != 0:
|
|
|
|
filename = filename[:issue_start]
|
2022-12-31 02:15:17 -08:00
|
|
|
else:
|
|
|
|
filename = filename.lstrip("#")
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
# in case there is no issue number, remove some obvious stuff
|
|
|
|
if "--" in filename:
|
2022-06-02 18:32:16 -07:00
|
|
|
# the pattern seems to be that anything to left of the first "--" is the series name followed by issue
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
filename = re.sub(r"--.*", self.repl, filename)
|
2022-08-08 18:03:29 -07:00
|
|
|
# never happens
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
elif "__" in filename:
|
2022-06-02 18:32:16 -07:00
|
|
|
# the pattern seems to be that anything to left of the first "__" is the series name followed by issue
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
filename = re.sub(r"__.*", self.repl, filename)
|
2022-08-08 18:03:29 -07:00
|
|
|
# never happens
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
filename = filename.replace("+", " ")
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
tmpstr = self.fix_spaces(filename, remove_dashes=False)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
series = tmpstr
|
|
|
|
volume = ""
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# save the last word
|
2022-06-02 18:32:16 -07:00
|
|
|
split = series.split()
|
|
|
|
if split:
|
|
|
|
last_word = split[-1]
|
|
|
|
else:
|
2015-02-16 07:19:38 -08:00
|
|
|
last_word = ""
|
|
|
|
|
|
|
|
# remove any parenthetical phrases
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
series = re.sub(r"\(.*?\)", "", series)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
# search for volume number
|
2022-05-22 19:59:31 -07:00
|
|
|
match = re.search(r"(.+)" + self.volume_regex + r"(\d+)", series, re.IGNORECASE)
|
2015-02-16 07:19:38 -08:00
|
|
|
if match:
|
|
|
|
series = match.group(1)
|
2022-05-22 19:59:31 -07:00
|
|
|
volume = match.group(2)
|
|
|
|
if self.volume:
|
|
|
|
volume = self.volume
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
# if a volume wasn't found, see if the last word is a year in parentheses
|
|
|
|
# since that's a common way to designate the volume
|
|
|
|
if volume == "":
|
2015-02-21 18:30:32 -08:00
|
|
|
# match either (YEAR), (YEAR-), or (YEAR-YEAR2)
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
match = re.search(r"(\()(\d{4})(-(\d{4}|)|)(\))", last_word)
|
2015-02-16 07:19:38 -08:00
|
|
|
if match:
|
|
|
|
volume = match.group(2)
|
|
|
|
|
|
|
|
series = series.strip()
|
|
|
|
|
|
|
|
# if we don't have an issue number (issue_start==0), look
|
|
|
|
# for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might
|
|
|
|
# be removed to help search online
|
|
|
|
if issue_start == 0:
|
2015-02-21 18:30:32 -08:00
|
|
|
one_shot_words = ["tpb", "os", "one-shot", "ogn", "gn"]
|
2022-06-02 18:32:16 -07:00
|
|
|
split = series.split()
|
|
|
|
if split:
|
|
|
|
last_word = split[-1]
|
|
|
|
if last_word.casefold() in one_shot_words:
|
|
|
|
series, _, _ = series.rpartition(" ")
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-22 19:59:31 -07:00
|
|
|
if volume:
|
|
|
|
series = re.sub(r"\s+v(|ol|olume)$", "", series)
|
|
|
|
|
2022-12-31 02:15:17 -08:00
|
|
|
return series.strip().strip("-_.").strip(), volume.strip()
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def get_year(self, filename: str, issue_end: int) -> str:
|
2015-02-16 07:19:38 -08:00
|
|
|
filename = filename[issue_end:]
|
|
|
|
|
|
|
|
year = ""
|
|
|
|
# look for four digit number with "(" ")" or "--" around it
|
2022-04-29 16:37:44 -07:00
|
|
|
match = re.search(r"(\(\d{4}\))|(--\d{4}--)", filename)
|
2015-02-16 07:19:38 -08:00
|
|
|
if match:
|
|
|
|
year = match.group()
|
2015-02-21 18:30:32 -08:00
|
|
|
# remove non-digits
|
2022-07-18 12:17:13 -07:00
|
|
|
year = re.sub(r"\D", "", year)
|
2015-02-16 07:19:38 -08:00
|
|
|
return year
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def get_remainder(self, filename: str, year: str, count: str, volume: str, issue_end: int) -> str:
|
2015-02-21 18:30:32 -08:00
|
|
|
"""Make a guess at where the the non-interesting stuff begins"""
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
remainder = ""
|
|
|
|
|
|
|
|
if "--" in filename:
|
Convert ComicIssue into GenericMetadata
I could not find a good reason for ComicIssue to exist other than that
it had more attributes than GenericMetadata, so it has been replaced.
New attributes for GenericMetadata:
series_id: a string uniquely identifying the series to tag_origin
series_aliases: alternate series names that are not the canonical name
title_aliases: alternate issue titles that are not the canonical name
alternate_images: a list of urls to alternate cover images
Updated attributes for GenericMetadata:
genre -> genres: str -> list[str]
comments -> description: str -> str
story_arc -> story_arcs: str -> list[str]
series_group -> series_groups: str -> list[str]
character -> characters: str -> list[str]
team -> teams: str -> list[str]
location -> locations: str -> list[str]
tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str])
ComicSeries has been relocated to the ComicAPI package, currently has no
usage within ComicAPI.
CreditMetadata has been renamed to Credit and has replaced Credit from
ComicTalker.
fetch_series has been added to ComicTalker, this is currently only used
in the GUI when a series is selected and does not already contain the
needed fields, this function should always be cached.
A new split function has been added to ComicAPI, all uses of split on
single characters have been updated to use this
cleanup_html and the corresponding setting are now only used in
ComicTagger proper, for display we want any html directly from the
upstream. When applying the metadata we then strip the description of
any html.
A new conversion has been added to the MetadataFormatter:
j: joins any lists into a string with ', '. Note this is a valid
operation on strings as well, it will add ', ' in between every
character.
parse_settings now assigns the given ComicTaggerPaths object to the
result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
|
|
|
remainder = "--".join(filename.split("--", 1)[1:])
|
2015-02-16 07:19:38 -08:00
|
|
|
elif "__" in filename:
|
Convert ComicIssue into GenericMetadata
I could not find a good reason for ComicIssue to exist other than that
it had more attributes than GenericMetadata, so it has been replaced.
New attributes for GenericMetadata:
series_id: a string uniquely identifying the series to tag_origin
series_aliases: alternate series names that are not the canonical name
title_aliases: alternate issue titles that are not the canonical name
alternate_images: a list of urls to alternate cover images
Updated attributes for GenericMetadata:
genre -> genres: str -> list[str]
comments -> description: str -> str
story_arc -> story_arcs: str -> list[str]
series_group -> series_groups: str -> list[str]
character -> characters: str -> list[str]
team -> teams: str -> list[str]
location -> locations: str -> list[str]
tag_origin -> tag_origin: str -> TagOrigin (tuple[str, str])
ComicSeries has been relocated to the ComicAPI package, currently has no
usage within ComicAPI.
CreditMetadata has been renamed to Credit and has replaced Credit from
ComicTalker.
fetch_series has been added to ComicTalker, this is currently only used
in the GUI when a series is selected and does not already contain the
needed fields, this function should always be cached.
A new split function has been added to ComicAPI, all uses of split on
single characters have been updated to use this
cleanup_html and the corresponding setting are now only used in
ComicTagger proper, for display we want any html directly from the
upstream. When applying the metadata we then strip the description of
any html.
A new conversion has been added to the MetadataFormatter:
j: joins any lists into a string with ', '. Note this is a valid
operation on strings as well, it will add ', ' in between every
character.
parse_settings now assigns the given ComicTaggerPaths object to the
result ensuring that the correct path is always used.
2023-08-02 09:00:04 -07:00
|
|
|
remainder = "__".join(filename.split("__", 1)[1:])
|
2015-02-16 07:19:38 -08:00
|
|
|
elif issue_end != 0:
|
|
|
|
remainder = filename[issue_end:]
|
|
|
|
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
remainder = self.fix_spaces(remainder, remove_dashes=False)
|
2015-02-16 07:19:38 -08:00
|
|
|
if volume != "":
|
2022-05-22 19:59:31 -07:00
|
|
|
remainder = re.sub(r"(?i)(.+)((?:v(?:|ol|olume))\.?\s?)" + volume, "", remainder, count=1)
|
2015-02-16 07:19:38 -08:00
|
|
|
if year != "":
|
2015-02-21 18:30:32 -08:00
|
|
|
remainder = remainder.replace(year, "", 1)
|
2015-02-16 07:19:38 -08:00
|
|
|
if count != "":
|
2015-02-21 18:30:32 -08:00
|
|
|
remainder = remainder.replace("of " + count, "", 1)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
remainder = remainder.replace("()", "")
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
remainder = remainder.replace(" ", " ") # cleans some whitespace mess
|
2015-02-16 07:19:38 -08:00
|
|
|
|
|
|
|
return remainder.strip()
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def parse_filename(self, filename: str) -> None:
|
2015-02-16 07:19:38 -08:00
|
|
|
# remove the path
|
|
|
|
filename = os.path.basename(filename)
|
|
|
|
|
|
|
|
# remove the extension
|
|
|
|
filename = os.path.splitext(filename)[0]
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# url decode, just in case
|
2015-02-16 07:19:38 -08:00
|
|
|
filename = unquote(filename)
|
|
|
|
|
2015-02-21 18:30:32 -08:00
|
|
|
# sometimes archives get messed up names from too many decodes
|
2015-02-16 07:19:38 -08:00
|
|
|
# often url encodings will break and leave "_28" and "_29" in place
|
|
|
|
# of "(" and ")" see if there are a number of these, and replace them
|
|
|
|
if filename.count("_28") > 1 and filename.count("_29") > 1:
|
|
|
|
filename = filename.replace("_28", "(")
|
|
|
|
filename = filename.replace("_29", ")")
|
|
|
|
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
self.issue, issue_start, issue_end = self.get_issue_number(filename)
|
|
|
|
self.series, self.volume = self.get_series_name(filename, issue_start)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-22 19:59:31 -07:00
|
|
|
if self.issue == "":
|
|
|
|
self.issue = self.volume
|
|
|
|
|
2015-02-16 07:19:38 -08:00
|
|
|
# provides proper value when the filename doesn't have a issue number
|
|
|
|
if issue_end == 0:
|
2015-02-21 18:30:32 -08:00
|
|
|
issue_end = len(self.series)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
Code cleanup
Remove no longer used google scripts
Remove convenience files from comicataggerlib and import comicapi directly
Add type-hints to facilitate auto-complete tools
Make PyQt5 code more compatible with PyQt6
Implement automatic tooling
isort and black for code formatting
Line length has been set to 120
flake8 for code standards with exceptions:
E203 - Whitespace before ':' - format compatiblity with black
E501 - Line too long - flake8 line limit cannot be set
E722 - Do not use bare except - fixing bare except statements is a
lot of overhead and there are already
many in the codebase
These changes, along with some manual fixes creates much more readable code.
See examples below:
diff --git a/comicapi/comet.py b/comicapi/comet.py
index d1741c5..52dc195 100644
--- a/comicapi/comet.py
+++ b/comicapi/comet.py
@@ -166,7 +166,2 @@ class CoMet:
- if credit['role'].lower() in set(self.editor_synonyms):
- ET.SubElement(
- root,
- 'editor').text = "{0}".format(
- credit['person'])
@@ -174,2 +169,4 @@ class CoMet:
self.indent(root)
+ if credit["role"].lower() in set(self.editor_synonyms):
+ ET.SubElement(root, "editor").text = str(credit["person"])
diff --git a/comictaggerlib/autotagmatchwindow.py b/comictaggerlib/autotagmatchwindow.py
index 4338176..9219f01 100644
--- a/comictaggerlib/autotagmatchwindow.py
+++ b/comictaggerlib/autotagmatchwindow.py
@@ -63,4 +63,3 @@ class AutoTagMatchWindow(QtWidgets.QDialog):
self.skipButton, QtWidgets.QDialogButtonBox.ActionRole)
- self.buttonBox.button(QtWidgets.QDialogButtonBox.Ok).setText(
- "Accept and Write Tags")
+ self.buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText("Accept and Write Tags")
diff --git a/comictaggerlib/cli.py b/comictaggerlib/cli.py
index 688907d..dbd0c2e 100644
--- a/comictaggerlib/cli.py
+++ b/comictaggerlib/cli.py
@@ -293,7 +293,3 @@ def process_file_cli(filename, opts, settings, match_results):
if opts.raw:
- print((
- "{0}".format(
- str(
- ca.readRawCIX(),
- errors='ignore'))))
+ print(ca.read_raw_cix())
else:
2022-04-01 16:50:46 -07:00
|
|
|
self.year = self.get_year(filename, issue_end)
|
|
|
|
self.issue_count = self.get_issue_count(filename, issue_end)
|
|
|
|
self.remainder = self.get_remainder(filename, self.year, self.issue_count, self.volume, issue_end)
|
2015-02-16 07:19:38 -08:00
|
|
|
|
2022-05-22 19:59:31 -07:00
|
|
|
if self.volume != "":
|
|
|
|
self.volume = issuestring.IssueString(self.volume).as_string()
|
2015-02-16 07:19:38 -08:00
|
|
|
if self.issue != "":
|
2022-05-22 19:59:31 -07:00
|
|
|
self.issue = issuestring.IssueString(self.issue).as_string()
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
class FilenameInfo(TypedDict):
|
2022-04-29 16:37:44 -07:00
|
|
|
alternate: str
|
|
|
|
annual: bool
|
|
|
|
archive: str
|
|
|
|
c2c: bool
|
|
|
|
fcbd: bool
|
|
|
|
issue: str
|
|
|
|
issue_count: str
|
|
|
|
publisher: str
|
|
|
|
remainder: str
|
|
|
|
series: str
|
|
|
|
title: str
|
|
|
|
volume: str
|
|
|
|
volume_count: str
|
|
|
|
year: str
|
2023-09-06 01:50:05 -07:00
|
|
|
format: str
|
|
|
|
|
|
|
|
|
|
|
|
protofolius_issue_number_scheme = {
|
|
|
|
"B": "biography/best of",
|
|
|
|
"C": "compact edition",
|
|
|
|
"E": "entrtainment/puzzle edition",
|
|
|
|
"F": "familiy book edition",
|
|
|
|
"J": "jubileum (anniversary) edition",
|
|
|
|
"P": "pocket edition",
|
|
|
|
"N": "newly brought out/restyled edition",
|
|
|
|
"O": "old editions (or oblong format)",
|
|
|
|
"S": "special edition",
|
|
|
|
"X": "X-rated edition",
|
|
|
|
}
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
|
|
|
|
eof = filenamelexer.Item(filenamelexer.ItemType.EOF, -1, "")
|
|
|
|
|
|
|
|
|
2022-06-23 22:59:09 -07:00
|
|
|
# Extracted and mutilated from https://github.com/lordwelch/wsfmt
|
2022-07-18 12:17:13 -07:00
|
|
|
# Which was extracted and mutilated from https://github.com/golang/go/tree/master/src/text/template/parse
|
2022-04-29 16:37:44 -07:00
|
|
|
class Parser:
|
|
|
|
"""docstring for FilenameParser"""
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
self,
|
|
|
|
lexer_result: list[filenamelexer.Item],
|
2022-05-17 13:57:04 -07:00
|
|
|
first_is_alt: bool = False,
|
|
|
|
remove_c2c: bool = False,
|
|
|
|
remove_fcbd: bool = False,
|
|
|
|
remove_publisher: bool = False,
|
2023-09-06 01:50:05 -07:00
|
|
|
protofolius_issue_number_scheme: bool = False,
|
2022-05-17 13:57:04 -07:00
|
|
|
) -> None:
|
2022-11-22 16:51:26 -08:00
|
|
|
self.state: Callable[[Parser], Callable | None] | None = None # type: ignore[type-arg]
|
2022-04-29 16:37:44 -07:00
|
|
|
self.pos = -1
|
|
|
|
|
|
|
|
self.firstItem = True
|
|
|
|
self.skip = False
|
|
|
|
self.alt = False
|
2023-10-23 21:08:55 -07:00
|
|
|
self.filename_info = FilenameInfo(
|
|
|
|
alternate="",
|
|
|
|
annual=False,
|
|
|
|
archive="",
|
|
|
|
c2c=False,
|
|
|
|
fcbd=False,
|
|
|
|
issue="",
|
|
|
|
issue_count="",
|
|
|
|
publisher="",
|
|
|
|
remainder="",
|
|
|
|
series="",
|
|
|
|
title="",
|
|
|
|
volume="",
|
|
|
|
volume_count="",
|
|
|
|
year="",
|
|
|
|
format="",
|
|
|
|
)
|
2022-04-29 16:37:44 -07:00
|
|
|
self.issue_number_at = None
|
2023-10-11 17:03:07 -07:00
|
|
|
self.issue_number_marked = False
|
|
|
|
self.issue_number_passed = False
|
2022-04-29 16:37:44 -07:00
|
|
|
self.in_something = 0 # In some sort of brackets {}[]()
|
|
|
|
self.in_brace = 0 # In {}
|
|
|
|
self.in_s_brace = 0 # In []
|
|
|
|
self.in_paren = 0 # In ()
|
2022-12-30 21:50:10 -08:00
|
|
|
self.year_candidates: list[tuple[bool, bool, filenamelexer.Item]] = []
|
2023-10-26 20:51:53 -07:00
|
|
|
self.series: list[list[filenamelexer.Item]] = []
|
2022-04-29 16:37:44 -07:00
|
|
|
self.series_parts: list[filenamelexer.Item] = []
|
|
|
|
self.title_parts: list[filenamelexer.Item] = []
|
|
|
|
self.used_items: list[filenamelexer.Item] = []
|
|
|
|
self.irrelevant: list[filenamelexer.Item] = []
|
|
|
|
self.operator_rejected: list[filenamelexer.Item] = []
|
|
|
|
self.publisher_removed: list[filenamelexer.Item] = []
|
|
|
|
|
|
|
|
self.first_is_alt = first_is_alt
|
|
|
|
self.remove_c2c = remove_c2c
|
|
|
|
self.remove_fcbd = remove_fcbd
|
|
|
|
self.remove_publisher = remove_publisher
|
2023-09-06 01:50:05 -07:00
|
|
|
self.protofolius_issue_number_scheme = protofolius_issue_number_scheme
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2022-06-09 12:31:57 -07:00
|
|
|
self.remove_from_remainder = []
|
|
|
|
if remove_c2c:
|
|
|
|
self.remove_from_remainder.append(filenamelexer.ItemType.C2C)
|
|
|
|
if remove_fcbd:
|
|
|
|
self.remove_from_remainder.append(filenamelexer.ItemType.FCBD)
|
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
self.input = lexer_result
|
|
|
|
for i, item in enumerate(self.input):
|
|
|
|
if item.typ == filenamelexer.ItemType.IssueNumber:
|
|
|
|
self.issue_number_at = i
|
2023-10-11 17:03:07 -07:00
|
|
|
self.issue_number_marked = True
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Get returns the next Item in the input.
|
|
|
|
def get(self) -> filenamelexer.Item:
|
|
|
|
if int(self.pos) >= len(self.input) - 1:
|
|
|
|
self.pos += 1
|
|
|
|
return eof
|
|
|
|
|
|
|
|
self.pos += 1
|
|
|
|
return self.input[self.pos]
|
|
|
|
|
|
|
|
# Peek returns but does not consume the next Item in the input.
|
2023-10-26 20:51:53 -07:00
|
|
|
def peek(self, length: int = 1) -> filenamelexer.Item:
|
|
|
|
if int(self.pos) + length >= len(self.input):
|
2022-04-29 16:37:44 -07:00
|
|
|
return eof
|
|
|
|
|
2023-10-26 20:51:53 -07:00
|
|
|
return self.input[self.pos + length]
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Peek_back returns but does not step back the previous Item in the input.
|
2023-10-11 17:03:07 -07:00
|
|
|
def peek_back(self, length: int = 1) -> filenamelexer.Item:
|
|
|
|
if int(self.pos) - length < 0:
|
2022-04-29 16:37:44 -07:00
|
|
|
return eof
|
|
|
|
|
2023-10-11 17:03:07 -07:00
|
|
|
return self.input[self.pos - length]
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Backup steps back one Item.
|
2022-05-17 13:57:04 -07:00
|
|
|
def backup(self) -> None:
|
2022-04-29 16:37:44 -07:00
|
|
|
self.pos -= 1
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def run(self) -> None:
|
2022-04-29 16:37:44 -07:00
|
|
|
self.state = parse
|
|
|
|
while self.state is not None:
|
|
|
|
self.state = self.state(self)
|
|
|
|
|
|
|
|
|
2022-11-22 16:51:26 -08:00
|
|
|
def parse(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
2022-04-29 16:37:44 -07:00
|
|
|
item: filenamelexer.Item = p.get()
|
|
|
|
# We're done, time to do final processing
|
|
|
|
if item.typ == filenamelexer.ItemType.EOF:
|
|
|
|
return parse_finish
|
|
|
|
|
|
|
|
# Need to figure out if this is the issue number
|
|
|
|
if item.typ == filenamelexer.ItemType.Number:
|
|
|
|
likely_year = False
|
2022-12-30 21:50:10 -08:00
|
|
|
likely_issue_number = True
|
2022-04-29 16:37:44 -07:00
|
|
|
if p.firstItem and p.first_is_alt:
|
|
|
|
p.alt = True
|
2022-08-08 18:03:29 -07:00
|
|
|
p.firstItem = False
|
2022-04-29 16:37:44 -07:00
|
|
|
return parse_issue_number
|
|
|
|
|
2022-12-31 02:15:17 -08:00
|
|
|
# Issue number is not 4 digits e.g. a year
|
|
|
|
# If this is still used in 7978 years, something is terribly wrong
|
2023-10-11 17:03:07 -07:00
|
|
|
if len(item.val.lstrip("0")) < 4:
|
2022-04-29 16:37:44 -07:00
|
|
|
# Assume that operators indicate a non-issue number e.g. IG-88 or 88-IG
|
|
|
|
if filenamelexer.ItemType.Operator not in (p.peek().typ, p.peek_back().typ):
|
|
|
|
# It is common to use '89 to refer to an annual reprint from 1989
|
|
|
|
if item.val[0] != "'":
|
2022-12-31 02:15:17 -08:00
|
|
|
# An issue number starting with # Was not found and no previous number was found
|
|
|
|
if p.issue_number_at is None:
|
|
|
|
# Series has already been started/parsed,
|
|
|
|
# filters out leading alternate numbers leading alternate number
|
2023-10-26 20:51:53 -07:00
|
|
|
if len(p.series) > 0:
|
2022-12-31 02:15:17 -08:00
|
|
|
return parse_issue_number
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
|
|
|
p.operator_rejected.append(item)
|
|
|
|
# operator rejected used later to add back to the series/title
|
|
|
|
# It is more likely to be a year if it is inside parentheses.
|
|
|
|
if p.in_something > 0:
|
2022-12-31 02:15:17 -08:00
|
|
|
likely_year = len(item.val.lstrip("0")) == 4
|
|
|
|
likely_issue_number = not likely_year
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# If numbers are directly followed by text it most likely isn't a year e.g. 2048px
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Text:
|
|
|
|
likely_year = False
|
2022-12-31 02:15:17 -08:00
|
|
|
likely_issue_number = p.in_something == 0
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Is either a full year '2001' or a short year "'89"
|
2022-12-31 02:15:17 -08:00
|
|
|
if len(item.val.lstrip("0")) == 4 or item.val[0] == "'":
|
2023-10-26 20:51:53 -07:00
|
|
|
series = " ".join([x.val for x in (p.series[-1] if p.series else [])])
|
|
|
|
if p.series and series.casefold().endswith("free comic book day"):
|
2022-12-30 21:50:10 -08:00
|
|
|
likely_issue_number = False
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Look for a full date as in 2022-04-22
|
|
|
|
if p.peek().typ in [
|
|
|
|
filenamelexer.ItemType.Symbol,
|
|
|
|
filenamelexer.ItemType.Operator,
|
|
|
|
filenamelexer.ItemType.Dot,
|
|
|
|
]:
|
|
|
|
op = [p.get()]
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Number:
|
|
|
|
month = p.get()
|
|
|
|
if p.peek().typ in [
|
|
|
|
filenamelexer.ItemType.Symbol,
|
|
|
|
filenamelexer.ItemType.Operator,
|
|
|
|
filenamelexer.ItemType.Dot,
|
|
|
|
]:
|
|
|
|
op.append(p.get())
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Number:
|
2022-12-30 21:50:10 -08:00
|
|
|
likely_issue_number = False
|
2022-04-29 16:37:44 -07:00
|
|
|
day = p.get()
|
|
|
|
fulldate = [month, day, item]
|
|
|
|
p.used_items.extend(op)
|
|
|
|
p.used_items.extend(fulldate)
|
|
|
|
else:
|
|
|
|
p.backup()
|
|
|
|
p.backup()
|
|
|
|
p.backup()
|
|
|
|
# TODO never happens
|
|
|
|
else:
|
|
|
|
p.backup()
|
|
|
|
p.backup()
|
|
|
|
# TODO never happens
|
|
|
|
else:
|
|
|
|
p.backup()
|
|
|
|
# TODO never happens
|
|
|
|
|
2022-12-30 21:50:10 -08:00
|
|
|
likely_issue_number = likely_issue_number and item.val[0] != "'"
|
|
|
|
p.year_candidates.append((likely_year, likely_issue_number, item))
|
2023-10-26 20:51:53 -07:00
|
|
|
if p.in_something == 0:
|
|
|
|
# Append to series in case it is a part of the title, but only if were not inside parenthesis
|
|
|
|
if not p.series:
|
|
|
|
p.series.append([])
|
|
|
|
p.series[-1].append(item)
|
|
|
|
|
|
|
|
# We would use i=item but we want to force a split after year candidates
|
|
|
|
return functools.partial(parse_series, i=None)
|
2022-04-29 16:37:44 -07:00
|
|
|
# Ensures that IG-88 gets added back to the series/title
|
2023-10-11 17:03:07 -07:00
|
|
|
else:
|
|
|
|
if p.in_something == 0:
|
2023-10-26 20:51:53 -07:00
|
|
|
to_series = (
|
|
|
|
filenamelexer.ItemType.IssueNumber,
|
|
|
|
filenamelexer.ItemType.Number,
|
|
|
|
filenamelexer.ItemType.Operator,
|
|
|
|
)
|
2023-10-11 17:03:07 -07:00
|
|
|
if (
|
2023-10-26 20:51:53 -07:00
|
|
|
p.peek().typ in to_series
|
|
|
|
or (p.peek().typ == filenamelexer.ItemType.Space and p.peek(2).typ in to_series)
|
|
|
|
or p.peek_back().typ in to_series
|
|
|
|
or (p.peek_back().typ == filenamelexer.ItemType.Space and p.peek_back(2).typ in to_series)
|
2023-10-11 17:03:07 -07:00
|
|
|
):
|
2023-10-26 20:51:53 -07:00
|
|
|
# Were not in something and the next or previous type is an operator or number, add it to the series
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=item)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Number with a leading hash e.g. #003
|
|
|
|
elif item.typ == filenamelexer.ItemType.IssueNumber:
|
|
|
|
# Unset first item
|
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
2023-10-11 17:03:07 -07:00
|
|
|
p.issue_number_passed = True
|
2022-04-29 16:37:44 -07:00
|
|
|
return parse_issue_number
|
|
|
|
|
|
|
|
# Matches FCBD. Not added to p.used_items so it will show in "remainder"
|
|
|
|
elif item.typ == filenamelexer.ItemType.FCBD:
|
|
|
|
p.filename_info["fcbd"] = True
|
|
|
|
|
|
|
|
# Matches c2c. Not added to p.used_items so it will show in "remainder"
|
|
|
|
elif item.typ == filenamelexer.ItemType.C2C:
|
|
|
|
p.filename_info["c2c"] = True
|
|
|
|
|
|
|
|
# Matches the extension if it is known to be an archive format e.g. cbt,cbz,zip,rar
|
|
|
|
elif item.typ == filenamelexer.ItemType.ArchiveType:
|
2022-07-01 16:22:01 -07:00
|
|
|
p.filename_info["archive"] = item.val.casefold()
|
2022-04-29 16:37:44 -07:00
|
|
|
if p.peek_back().typ == filenamelexer.ItemType.Dot:
|
|
|
|
p.used_items.append(p.peek_back())
|
2022-06-09 12:31:57 -07:00
|
|
|
p.used_items.append(item)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2022-06-02 18:32:16 -07:00
|
|
|
# Allows removing DC from 'Wonder Woman 49 DC Sep-Oct 1951'
|
|
|
|
# dependent on publisher being in a static list in the lexer
|
2022-04-29 16:37:44 -07:00
|
|
|
elif item.typ == filenamelexer.ItemType.Publisher:
|
|
|
|
p.filename_info["publisher"] = item.val
|
|
|
|
p.used_items.append(item)
|
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
|
|
|
if p.in_something == 0:
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=item)
|
2022-04-29 16:37:44 -07:00
|
|
|
p.publisher_removed.append(item)
|
|
|
|
if p.in_something == 0:
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=item)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Attempts to identify the type e.g. annual
|
|
|
|
elif item.typ == filenamelexer.ItemType.ComicType:
|
|
|
|
series_append = True
|
|
|
|
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Space:
|
|
|
|
p.get()
|
|
|
|
|
2022-12-30 21:50:10 -08:00
|
|
|
if p.peek().typ == filenamelexer.ItemType.Number or (
|
2022-04-29 16:37:44 -07:00
|
|
|
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
|
|
|
):
|
|
|
|
number = p.get()
|
|
|
|
# Mark volume info. Text will be added to the title/series later
|
2022-12-30 21:50:10 -08:00
|
|
|
if item.val.casefold() in ["tpb"]:
|
2023-10-26 20:51:53 -07:00
|
|
|
# p.title_parts.extend([item, number])
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["volume"] = t2do.convert(number.val)
|
|
|
|
p.filename_info["issue"] = t2do.convert(number.val)
|
|
|
|
|
|
|
|
p.used_items.append(item)
|
|
|
|
series_append = False
|
|
|
|
|
|
|
|
# Annuals usually mean the year
|
2022-07-01 16:22:01 -07:00
|
|
|
elif item.val.casefold() in ["annual"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["annual"] = True
|
|
|
|
num = t2d.convert(number.val)
|
|
|
|
if num.isnumeric() and len(num) == 4:
|
2022-12-30 21:50:10 -08:00
|
|
|
p.year_candidates.append((True, False, number))
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
|
|
|
p.backup()
|
|
|
|
|
2022-07-01 16:22:01 -07:00
|
|
|
elif item.val.casefold() in ["annual"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["annual"] = True
|
|
|
|
|
|
|
|
# If we don't have a reason to exclude it from the series go back to parsing the series immediately
|
|
|
|
if series_append:
|
|
|
|
p.used_items.append(item)
|
2022-08-08 18:03:29 -07:00
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=item)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# We found text, it's probably the title or series
|
|
|
|
elif item.typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Honorific]:
|
|
|
|
# Unset first item
|
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
2023-10-26 20:51:53 -07:00
|
|
|
if p.in_something == 0 and not p.skip:
|
|
|
|
p.backup()
|
|
|
|
return functools.partial(parse_series, i=None)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Usually the word 'of' eg 1 (of 6)
|
|
|
|
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
2022-08-08 18:03:29 -07:00
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
2022-04-29 16:37:44 -07:00
|
|
|
return parse_info_specifier
|
|
|
|
|
|
|
|
# Operator is a symbol that acts as some sort of separator eg - : ;
|
|
|
|
elif item.typ == filenamelexer.ItemType.Operator:
|
|
|
|
if p.in_something == 0:
|
|
|
|
p.irrelevant.append(item)
|
|
|
|
|
|
|
|
# Filter out Month and day names in filename
|
|
|
|
elif item.typ == filenamelexer.ItemType.Calendar:
|
|
|
|
# Month and day are currently irrelevant if they are inside parentheses e.g. (January 2002)
|
|
|
|
if p.in_something > 0:
|
|
|
|
p.irrelevant.append(item)
|
|
|
|
|
|
|
|
# assume Sep-Oct is not useful in the series/title
|
|
|
|
elif p.peek().typ in [filenamelexer.ItemType.Symbol, filenamelexer.ItemType.Operator]:
|
|
|
|
p.get()
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Calendar:
|
|
|
|
p.irrelevant.extend([item, p.input[p.pos], p.get()])
|
|
|
|
else:
|
|
|
|
p.backup()
|
2022-08-08 18:03:29 -07:00
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=item)
|
2022-04-29 16:37:44 -07:00
|
|
|
# This is text that just happens to also be a month/day
|
|
|
|
else:
|
2023-10-06 20:06:39 -07:00
|
|
|
p.get()
|
2022-08-08 18:03:29 -07:00
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=item)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Specifically '__' or '--', no further title/series parsing is done to keep compatibility with wiki
|
|
|
|
elif item.typ == filenamelexer.ItemType.Skip:
|
|
|
|
p.skip = True
|
|
|
|
|
|
|
|
# Keeping track of parentheses depth
|
|
|
|
elif item.typ == filenamelexer.ItemType.LeftParen:
|
|
|
|
p.in_paren += 1
|
|
|
|
p.in_something += 1
|
|
|
|
elif item.typ == filenamelexer.ItemType.LeftBrace:
|
|
|
|
p.in_brace += 1
|
|
|
|
p.in_something += 1
|
|
|
|
elif item.typ == filenamelexer.ItemType.LeftSBrace:
|
|
|
|
p.in_s_brace += 1
|
|
|
|
p.in_something += 1
|
|
|
|
|
|
|
|
elif item.typ == filenamelexer.ItemType.RightParen:
|
|
|
|
p.in_paren -= 1
|
|
|
|
p.in_something -= 1
|
|
|
|
elif item.typ == filenamelexer.ItemType.RightBrace:
|
|
|
|
p.in_brace -= 1
|
|
|
|
p.in_something -= 1
|
|
|
|
elif item.typ == filenamelexer.ItemType.RightSBrace:
|
|
|
|
p.in_s_brace -= 1
|
|
|
|
p.in_something -= 1
|
|
|
|
|
|
|
|
# Unset first item
|
|
|
|
if p.firstItem:
|
|
|
|
p.firstItem = False
|
|
|
|
|
|
|
|
# Brace management, I don't like negative numbers
|
|
|
|
if p.in_paren < 0:
|
|
|
|
p.in_something += p.in_paren * -1
|
|
|
|
if p.in_brace < 0:
|
|
|
|
p.in_something += p.in_brace * -1
|
|
|
|
if p.in_s_brace < 0:
|
|
|
|
p.in_something += p.in_s_brace * -1
|
|
|
|
|
|
|
|
return parse
|
|
|
|
|
|
|
|
|
|
|
|
# TODO: What about more esoteric numbers???
|
2022-11-22 16:51:26 -08:00
|
|
|
def parse_issue_number(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
2022-04-29 16:37:44 -07:00
|
|
|
item = p.input[p.pos]
|
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
if p.filename_info["issue"]:
|
|
|
|
if p.filename_info["alternate"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["alternate"] += "," + item.val
|
|
|
|
p.filename_info["alternate"] = item.val
|
|
|
|
else:
|
|
|
|
if p.alt:
|
|
|
|
p.filename_info["alternate"] = item.val
|
|
|
|
else:
|
|
|
|
p.filename_info["issue"] = item.val
|
|
|
|
p.issue_number_at = item.pos
|
|
|
|
p.used_items.append(item)
|
2022-06-09 12:31:57 -07:00
|
|
|
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Dot:
|
|
|
|
p.used_items.append(p.get()) # Add the Dot to used items
|
|
|
|
if p.peek().typ in [filenamelexer.ItemType.Text, filenamelexer.ItemType.Number]:
|
|
|
|
item = p.get()
|
2022-04-29 16:37:44 -07:00
|
|
|
if p.alt:
|
|
|
|
p.filename_info["alternate"] += "." + item.val
|
|
|
|
else:
|
|
|
|
p.filename_info["issue"] += "." + item.val
|
|
|
|
p.used_items.append(item)
|
|
|
|
else:
|
2022-06-09 12:31:57 -07:00
|
|
|
p.backup() # We don't use the Dot so don't consume it
|
|
|
|
p.used_items.pop() # we also don't add it to used items
|
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
p.alt = False
|
|
|
|
return parse
|
|
|
|
|
|
|
|
|
2023-10-26 20:51:53 -07:00
|
|
|
# i=None is a split in the series
|
|
|
|
def parse_series(p: Parser, i: filenamelexer.Item | None) -> Callable[[Parser], Callable | None] | None:
|
|
|
|
current = []
|
2023-10-11 17:03:07 -07:00
|
|
|
prev_space = False
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2023-10-23 21:57:23 -07:00
|
|
|
issue_marked_or_passed = (
|
|
|
|
p.issue_number_marked and p.issue_number_passed or p.issue_number_at is not None and not p.issue_number_marked
|
|
|
|
)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2023-10-23 21:57:23 -07:00
|
|
|
if i:
|
2023-10-26 20:51:53 -07:00
|
|
|
if not issue_marked_or_passed:
|
|
|
|
if p.series:
|
|
|
|
current = p.series.pop()
|
|
|
|
current.append(i)
|
|
|
|
else:
|
|
|
|
# If we are splitting we don't want to sart with these
|
|
|
|
while p.peek().typ in [
|
|
|
|
filenamelexer.ItemType.Space,
|
|
|
|
filenamelexer.ItemType.Operator,
|
|
|
|
filenamelexer.ItemType.Symbol,
|
|
|
|
]:
|
|
|
|
p.irrelevant.append(p.get())
|
2023-10-11 17:03:07 -07:00
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
# Skip is only true if we have come across '--' or '__'
|
|
|
|
while not p.skip:
|
|
|
|
item = p.get()
|
|
|
|
|
|
|
|
# Spaces are evil
|
|
|
|
if item.typ == filenamelexer.ItemType.Space:
|
|
|
|
prev_space = True
|
|
|
|
continue
|
|
|
|
if item.typ in [
|
|
|
|
filenamelexer.ItemType.Text,
|
|
|
|
filenamelexer.ItemType.Symbol,
|
|
|
|
filenamelexer.ItemType.Publisher,
|
|
|
|
filenamelexer.ItemType.Honorific,
|
|
|
|
]:
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2023-10-11 17:03:07 -07:00
|
|
|
if p.peek().typ == filenamelexer.ItemType.Dot:
|
|
|
|
dot = p.get()
|
|
|
|
if item.typ == filenamelexer.ItemType.Honorific or (
|
|
|
|
p.peek().typ == filenamelexer.ItemType.Space
|
|
|
|
and item.typ in (filenamelexer.ItemType.Text, filenamelexer.ItemType.Publisher)
|
|
|
|
):
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(dot)
|
2023-10-11 17:03:07 -07:00
|
|
|
else:
|
|
|
|
p.backup()
|
|
|
|
if item.typ == filenamelexer.ItemType.Publisher:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["publisher"] = item.val
|
|
|
|
|
|
|
|
# Handle Volume
|
|
|
|
elif item.typ == filenamelexer.ItemType.InfoSpecifier:
|
|
|
|
# Exception for 'of'
|
2022-07-01 16:22:01 -07:00
|
|
|
if item.val.casefold() == "of":
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
|
|
|
# This specifically lets 'X-Men-V1-067' parse correctly as Series: X-Men Volume: 1 Issue: 67
|
2023-10-26 20:51:53 -07:00
|
|
|
while len(current) > 0 and current[-1].typ not in [
|
2022-04-29 16:37:44 -07:00
|
|
|
filenamelexer.ItemType.Text,
|
|
|
|
filenamelexer.ItemType.Symbol,
|
|
|
|
]:
|
2023-10-26 20:51:53 -07:00
|
|
|
p.irrelevant.append(current.pop())
|
2022-04-29 16:37:44 -07:00
|
|
|
p.backup()
|
|
|
|
break
|
|
|
|
|
|
|
|
elif item.typ == filenamelexer.ItemType.Operator:
|
|
|
|
peek = p.peek()
|
|
|
|
# ': ' separates the title from the series, only the last section is considered the title
|
|
|
|
if not prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
|
|
|
break
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
|
|
|
# Force space around '-' makes 'batman - superman' stay otherwise we get 'batman-superman'
|
|
|
|
if prev_space and peek.typ in [filenamelexer.ItemType.Space]:
|
|
|
|
item.val = " " + item.val + " "
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Stop processing series/title if a skip item is found
|
|
|
|
elif item.typ == filenamelexer.ItemType.Skip:
|
|
|
|
p.backup()
|
|
|
|
break
|
|
|
|
|
|
|
|
elif item.typ == filenamelexer.ItemType.Number:
|
2022-12-30 21:50:10 -08:00
|
|
|
# Special case for the word 'book'
|
2023-10-26 20:51:53 -07:00
|
|
|
if current and current[-1].val.casefold() == "book":
|
|
|
|
# Mark the volume
|
2022-12-30 21:50:10 -08:00
|
|
|
p.filename_info["volume"] = t2do.convert(item.val)
|
2023-10-26 20:51:53 -07:00
|
|
|
|
|
|
|
# Add this section to the series EG [['bloodshot', 'book']]
|
|
|
|
p.series.append(current)
|
|
|
|
# Pop the last item and break to end this section EG [['bloodshot'], ['book', '3']]
|
|
|
|
current = [current.pop(), item]
|
2022-12-30 21:50:10 -08:00
|
|
|
break
|
|
|
|
|
2023-10-11 17:03:07 -07:00
|
|
|
count = get_number(p, p.pos + 1)
|
2023-10-26 20:51:53 -07:00
|
|
|
# this is an issue or volume number eg '1 of 2'
|
2023-10-11 17:03:07 -07:00
|
|
|
if count is not None:
|
|
|
|
p.backup()
|
|
|
|
break
|
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
if p.peek().typ == filenamelexer.ItemType.Space:
|
|
|
|
p.get()
|
|
|
|
# We have 2 numbers, add the first to the series and then go back to parse
|
2022-12-31 02:15:17 -08:00
|
|
|
if p.peek().typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2022-04-29 16:37:44 -07:00
|
|
|
break
|
|
|
|
|
2023-10-11 17:03:07 -07:00
|
|
|
# the issue number has been marked and passed, keep it as a part of the series
|
2023-10-23 21:57:23 -07:00
|
|
|
if issue_marked_or_passed:
|
2023-10-11 17:03:07 -07:00
|
|
|
# We already have an issue number, this should be a part of the series
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2023-10-11 17:03:07 -07:00
|
|
|
else:
|
|
|
|
# We have 1 number break here, it's possible it's the issue
|
|
|
|
p.backup() # Whitespace
|
|
|
|
p.backup() # The number
|
|
|
|
break
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# We have 1 number break here, it's possible it's the issue
|
|
|
|
else:
|
2023-10-11 17:03:07 -07:00
|
|
|
# the issue number has been #marked or passed, keep it as a part of the series
|
2023-10-23 21:57:23 -07:00
|
|
|
if issue_marked_or_passed:
|
2023-10-11 17:03:07 -07:00
|
|
|
# We already have an issue number, this should be a part of the series
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2023-10-11 17:03:07 -07:00
|
|
|
else:
|
|
|
|
p.backup() # The number
|
|
|
|
break
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
else:
|
|
|
|
# Ensure 'ms. marvel' parses 'ms.' correctly
|
2023-10-11 17:03:07 -07:00
|
|
|
if item.typ == filenamelexer.ItemType.Dot:
|
|
|
|
if p.peek_back().typ == filenamelexer.ItemType.Honorific:
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2023-10-11 17:03:07 -07:00
|
|
|
elif (
|
|
|
|
p.peek().typ == filenamelexer.ItemType.Number
|
|
|
|
or p.peek_back().typ == filenamelexer.ItemType.Text
|
|
|
|
and len(p.peek_back().val) == 1
|
|
|
|
):
|
2023-10-26 20:51:53 -07:00
|
|
|
current.append(item)
|
2023-10-11 17:03:07 -07:00
|
|
|
item.no_space = True
|
|
|
|
# Allows avengers.hulk to parse correctly
|
|
|
|
elif p.peek().typ in (filenamelexer.ItemType.Text,):
|
|
|
|
# Marks the dot as used so that the remainder is clean
|
|
|
|
p.used_items.append(item)
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
|
|
|
p.backup()
|
|
|
|
break
|
|
|
|
|
|
|
|
prev_space = False
|
|
|
|
|
2023-10-26 20:51:53 -07:00
|
|
|
p.series.append(current)
|
2022-04-29 16:37:44 -07:00
|
|
|
return parse
|
|
|
|
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def resolve_year(p: Parser) -> None:
|
2022-04-29 16:37:44 -07:00
|
|
|
if len(p.year_candidates) > 0:
|
|
|
|
# Sort by likely_year boolean
|
|
|
|
p.year_candidates.sort(key=itemgetter(0))
|
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
if not p.filename_info["issue"]:
|
2022-12-30 21:50:10 -08:00
|
|
|
year = p.year_candidates.pop(0)
|
|
|
|
if year[1]:
|
|
|
|
p.filename_info["issue"] = year[2].val
|
|
|
|
p.used_items.append(year[2])
|
|
|
|
# Remove year from series and title
|
|
|
|
if year[2] in p.series_parts:
|
|
|
|
p.series_parts.remove(year[2])
|
|
|
|
if year[2] in p.title_parts:
|
|
|
|
p.title_parts.remove(year[2])
|
|
|
|
if not p.year_candidates:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
p.year_candidates.insert(0, year)
|
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
# Take the last year e.g. (2007) 2099 (2008) becomes 2099 2007 2008 and takes 2008
|
2022-12-30 21:50:10 -08:00
|
|
|
selected_year = p.year_candidates.pop()
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2022-12-30 21:50:10 -08:00
|
|
|
p.filename_info["year"] = selected_year[2].val
|
|
|
|
p.used_items.append(selected_year[2])
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# (2008) Title (2009) is many times used to denote the series year if we don't have a volume we use it
|
2023-10-23 21:08:55 -07:00
|
|
|
if not p.filename_info["volume"] and p.year_candidates and p.year_candidates[-1][0]:
|
2022-12-30 21:50:10 -08:00
|
|
|
year = p.year_candidates[-1]
|
|
|
|
if year[2] not in p.series_parts and year[2] not in p.title_parts:
|
|
|
|
vol = p.year_candidates.pop()[2]
|
|
|
|
p.filename_info["volume"] = vol.val
|
|
|
|
p.used_items.append(vol)
|
|
|
|
|
|
|
|
# Remove volume from series and title
|
|
|
|
# note: this never happens
|
|
|
|
if vol in p.series_parts:
|
|
|
|
p.series_parts.remove(vol)
|
|
|
|
if vol in p.title_parts:
|
|
|
|
p.title_parts.remove(vol)
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Remove year from series and title
|
2022-12-30 21:50:10 -08:00
|
|
|
if selected_year[2] in p.series_parts:
|
|
|
|
p.series_parts.remove(selected_year[2])
|
|
|
|
if selected_year[2] in p.title_parts:
|
|
|
|
p.title_parts.remove(selected_year[2])
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
|
2022-06-09 12:31:57 -07:00
|
|
|
def resolve_issue(p: Parser) -> None:
|
2022-04-29 16:37:44 -07:00
|
|
|
# If we don't have an issue try to find it in the series
|
2023-10-23 21:08:55 -07:00
|
|
|
if not p.filename_info["issue"] and p.series_parts and p.series_parts[-1].typ == filenamelexer.ItemType.Number:
|
2022-04-29 16:37:44 -07:00
|
|
|
issue_num = p.series_parts.pop()
|
|
|
|
|
|
|
|
# If the number we just popped is a year put it back on it's probably part of the series e.g. Spider-Man 2099
|
2022-12-30 21:50:10 -08:00
|
|
|
if issue_num in [x[2] for x in p.year_candidates]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.series_parts.append(issue_num)
|
|
|
|
else:
|
2022-06-02 18:32:16 -07:00
|
|
|
# If this number was rejected because of an operator and the operator is still there add it back
|
|
|
|
# e.g. 'IG-88'
|
2022-04-29 16:37:44 -07:00
|
|
|
if (
|
|
|
|
issue_num in p.operator_rejected
|
|
|
|
and p.series_parts
|
|
|
|
and p.series_parts[-1].typ == filenamelexer.ItemType.Operator
|
|
|
|
):
|
|
|
|
p.series_parts.append(issue_num)
|
2022-06-02 18:32:16 -07:00
|
|
|
# We have no reason to not use this number as the issue number.
|
|
|
|
# Specifically happens when parsing 'X-Men-V1-067.cbr'
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
|
|
|
p.filename_info["issue"] = issue_num.val
|
|
|
|
p.used_items.append(issue_num)
|
|
|
|
p.issue_number_at = issue_num.pos
|
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
if p.filename_info["issue"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["issue"] = issuestring.IssueString(p.filename_info["issue"].lstrip("#")).as_string()
|
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
if p.filename_info["volume"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["volume"] = p.filename_info["volume"].lstrip("#").lstrip("0")
|
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
if not p.filename_info["issue"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
# We have an alternate move it to the issue
|
2023-10-23 21:08:55 -07:00
|
|
|
if p.filename_info["alternate"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["issue"] = p.filename_info["alternate"]
|
|
|
|
p.filename_info["alternate"] = ""
|
2022-05-22 19:59:31 -07:00
|
|
|
|
2023-10-23 21:08:55 -07:00
|
|
|
if p.filename_info["volume"]:
|
2022-05-22 19:59:31 -07:00
|
|
|
p.filename_info["issue"] = p.filename_info["volume"]
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2023-09-06 01:50:05 -07:00
|
|
|
if (
|
2023-10-23 21:08:55 -07:00
|
|
|
p.filename_info["issue"]
|
2023-09-06 01:50:05 -07:00
|
|
|
and p.protofolius_issue_number_scheme
|
|
|
|
and len(p.filename_info["issue"]) > 1
|
|
|
|
and p.filename_info["issue"][0].isalpha()
|
|
|
|
and p.filename_info["issue"][0].upper() in protofolius_issue_number_scheme
|
|
|
|
and p.filename_info["issue"][1].isnumeric()
|
|
|
|
):
|
|
|
|
p.filename_info["format"] = protofolius_issue_number_scheme[p.filename_info["issue"][0].upper()]
|
|
|
|
|
2022-04-29 16:37:44 -07:00
|
|
|
|
2023-10-26 20:51:53 -07:00
|
|
|
def split_series(items: list[list[filenamelexer.Item]]) -> tuple[list[filenamelexer.Item], list[filenamelexer.Item]]:
|
|
|
|
series_parts: list[list[filenamelexer.Item]] = []
|
|
|
|
title_parts: list[list[filenamelexer.Item]] = []
|
|
|
|
current = series_parts
|
|
|
|
# We probably have a title
|
|
|
|
if len(items) > 1:
|
|
|
|
for i, s in enumerate(items):
|
|
|
|
# Switch to title if we are on the last part
|
|
|
|
if i == len(items) - 1:
|
|
|
|
current = title_parts
|
|
|
|
if s:
|
|
|
|
current.append(s)
|
|
|
|
if s[-1].typ == filenamelexer.ItemType.Operator:
|
|
|
|
s[-1].val += " " # Ensures that when there are multiple separators that they display properly
|
|
|
|
else: # We don't have an operator separating the parts, it's probably an issue number
|
|
|
|
current = title_parts
|
|
|
|
else:
|
|
|
|
if items:
|
|
|
|
series_parts.extend(items)
|
|
|
|
|
|
|
|
series: list[filenamelexer.Item] = list(itertools.chain.from_iterable(series_parts))
|
|
|
|
title: list[filenamelexer.Item] = list(itertools.chain.from_iterable(title_parts))
|
|
|
|
if series and series[-1].typ == filenamelexer.ItemType.Operator:
|
|
|
|
series.pop()
|
|
|
|
return series, title
|
|
|
|
|
|
|
|
|
2022-11-22 16:51:26 -08:00
|
|
|
def parse_finish(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
2023-10-26 20:51:53 -07:00
|
|
|
for part in p.series:
|
|
|
|
p.used_items.extend(part)
|
|
|
|
p.series_parts, p.title_parts = split_series(p.series)
|
|
|
|
p.filename_info["series"] = join_title(p.series_parts)
|
|
|
|
p.filename_info["title"] = join_title(p.title_parts)
|
|
|
|
|
2022-06-09 12:31:57 -07:00
|
|
|
resolve_year(p)
|
|
|
|
resolve_issue(p)
|
|
|
|
|
|
|
|
# Remove publishers, currently only marvel and dc are defined,
|
|
|
|
# this is an option specifically because this can drastically screw up parsing
|
|
|
|
if p.remove_publisher:
|
|
|
|
for item in p.publisher_removed:
|
|
|
|
if item in p.series_parts:
|
|
|
|
p.series_parts.remove(item)
|
|
|
|
if item in p.title_parts:
|
|
|
|
p.title_parts.remove(item)
|
|
|
|
|
|
|
|
if p.series_parts:
|
|
|
|
p.filename_info["series"] = join_title(p.series_parts)
|
|
|
|
p.used_items.extend(p.series_parts)
|
|
|
|
else:
|
2023-09-06 01:50:05 -07:00
|
|
|
p.filename_info["series"] = p.filename_info.get("issue", "")
|
2022-06-09 12:31:57 -07:00
|
|
|
|
2022-12-30 21:50:10 -08:00
|
|
|
if "free comic book" in p.filename_info["series"].casefold():
|
|
|
|
p.filename_info["fcbd"] = True
|
|
|
|
|
2022-06-09 12:31:57 -07:00
|
|
|
p.filename_info["title"] = join_title(p.title_parts)
|
|
|
|
p.used_items.extend(p.title_parts)
|
|
|
|
|
|
|
|
p.irrelevant.extend([x for x in p.input if x.typ in p.remove_from_remainder])
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
p.filename_info["remainder"] = get_remainder(p)
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
return None
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def get_remainder(p: Parser) -> str:
|
2022-04-29 16:37:44 -07:00
|
|
|
remainder = ""
|
|
|
|
rem = []
|
|
|
|
|
|
|
|
# Remove used items and irrelevant items e.g. the series and useless operators
|
|
|
|
inp = [x for x in p.input if x not in p.irrelevant and x not in p.used_items]
|
|
|
|
for i, item in enumerate(inp):
|
|
|
|
# No double space or space next to parentheses
|
|
|
|
if item.typ in [filenamelexer.ItemType.Space, filenamelexer.ItemType.Skip]:
|
|
|
|
if (
|
|
|
|
i > 0
|
|
|
|
and inp[i - 1].typ
|
|
|
|
not in [
|
|
|
|
filenamelexer.ItemType.Space,
|
|
|
|
filenamelexer.ItemType.LeftBrace,
|
|
|
|
filenamelexer.ItemType.LeftParen,
|
|
|
|
filenamelexer.ItemType.LeftSBrace,
|
|
|
|
]
|
|
|
|
and i + 1 < len(inp)
|
|
|
|
and inp[i + 1].typ
|
|
|
|
not in [
|
|
|
|
filenamelexer.ItemType.RightBrace,
|
|
|
|
filenamelexer.ItemType.RightParen,
|
|
|
|
filenamelexer.ItemType.RightSBrace,
|
|
|
|
]
|
|
|
|
):
|
|
|
|
remainder += " "
|
|
|
|
|
|
|
|
# Strip off useless opening parenthesis
|
|
|
|
elif (
|
|
|
|
item.typ
|
|
|
|
in [
|
|
|
|
filenamelexer.ItemType.Space,
|
|
|
|
filenamelexer.ItemType.RightBrace,
|
|
|
|
filenamelexer.ItemType.RightParen,
|
|
|
|
filenamelexer.ItemType.RightSBrace,
|
|
|
|
]
|
|
|
|
and i > 0
|
|
|
|
and inp[i - 1].typ
|
2022-12-15 20:21:53 -08:00
|
|
|
in [filenamelexer.ItemType.LeftBrace, filenamelexer.ItemType.LeftParen, filenamelexer.ItemType.LeftSBrace]
|
2022-04-29 16:37:44 -07:00
|
|
|
):
|
|
|
|
remainder = remainder.rstrip("[{(")
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Add the next item
|
|
|
|
else:
|
|
|
|
rem.append(item)
|
|
|
|
remainder += item.val
|
|
|
|
|
|
|
|
# Remove empty parentheses
|
|
|
|
remainder = re.sub(r"[\[{(]+[]})]+", "", remainder)
|
|
|
|
return remainder.strip()
|
|
|
|
|
|
|
|
|
2022-11-22 16:51:26 -08:00
|
|
|
def parse_info_specifier(p: Parser) -> Callable[[Parser], Callable | None] | None: # type: ignore[type-arg]
|
2022-04-29 16:37:44 -07:00
|
|
|
item = p.input[p.pos]
|
|
|
|
index = p.pos
|
|
|
|
|
|
|
|
if p.peek().typ == filenamelexer.ItemType.Space:
|
|
|
|
p.get()
|
|
|
|
|
2022-12-30 21:50:10 -08:00
|
|
|
# Handles 'volume 3' and 'volume three'
|
2022-04-29 16:37:44 -07:00
|
|
|
if p.peek().typ == filenamelexer.ItemType.Number or (
|
|
|
|
p.peek().typ == filenamelexer.ItemType.Text and t2d.convert(p.peek().val).isnumeric()
|
|
|
|
):
|
|
|
|
number = p.get()
|
2022-07-01 16:22:01 -07:00
|
|
|
if item.val.casefold() in ["volume", "vol", "vol.", "v"]:
|
2022-04-29 16:37:44 -07:00
|
|
|
p.filename_info["volume"] = t2do.convert(number.val)
|
|
|
|
p.used_items.append(item)
|
|
|
|
p.used_items.append(number)
|
|
|
|
|
|
|
|
# 'of' is only special if it is inside a parenthesis.
|
2022-07-01 16:22:01 -07:00
|
|
|
elif item.val.casefold() == "of":
|
2023-10-11 17:03:07 -07:00
|
|
|
i = get_number_rev(p, index)
|
2022-05-17 13:57:04 -07:00
|
|
|
if i is not None:
|
|
|
|
if p.in_something > 0:
|
|
|
|
if p.issue_number_at is None:
|
|
|
|
# TODO: Figure out what to do here if it ever happens
|
|
|
|
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
|
|
|
p.used_items.append(item)
|
|
|
|
p.used_items.append(number)
|
|
|
|
|
|
|
|
# This is definitely the issue number
|
|
|
|
elif p.issue_number_at == i.pos:
|
|
|
|
p.filename_info["issue_count"] = str(int(t2do.convert(number.val)))
|
|
|
|
p.used_items.append(item)
|
|
|
|
p.used_items.append(number)
|
|
|
|
|
2022-06-02 18:32:16 -07:00
|
|
|
# This is not for the issue number it is not in either the issue or the title,
|
|
|
|
# assume it is the volume number and count
|
2022-05-17 13:57:04 -07:00
|
|
|
elif p.issue_number_at != i.pos and i not in p.series_parts and i not in p.title_parts:
|
|
|
|
p.filename_info["volume"] = i.val
|
|
|
|
p.filename_info["volume_count"] = str(int(t2do.convert(number.val)))
|
|
|
|
p.used_items.append(i)
|
|
|
|
p.used_items.append(item)
|
|
|
|
p.used_items.append(number)
|
|
|
|
else:
|
|
|
|
# TODO: Figure out what to do here if it ever happens
|
|
|
|
pass
|
2022-04-29 16:37:44 -07:00
|
|
|
else:
|
2023-10-26 20:51:53 -07:00
|
|
|
# Resets back to '1' in 'The Wrath of Foobar-Man, Part 1 of 2'
|
|
|
|
# we then go to parse_series it adds i (the '1') and then continues parsing at of
|
2022-04-29 16:37:44 -07:00
|
|
|
p.pos = [ind for ind, x in enumerate(p.input) if x == i][0]
|
|
|
|
|
|
|
|
if not p.in_something:
|
2023-10-23 21:57:23 -07:00
|
|
|
return functools.partial(parse_series, i=i)
|
2022-04-29 16:37:44 -07:00
|
|
|
return parse
|
|
|
|
|
|
|
|
|
|
|
|
# Gets 03 in '03 of 6'
|
2023-10-11 17:03:07 -07:00
|
|
|
def get_number_rev(p: Parser, index: int) -> filenamelexer.Item | None:
|
2022-04-29 16:37:44 -07:00
|
|
|
# Go backward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
|
|
|
rev = p.input[:index]
|
|
|
|
rev.reverse()
|
|
|
|
for i in rev:
|
2022-06-02 18:32:16 -07:00
|
|
|
# We don't care about these types, we are looking to see if there is a number that is possibly different from
|
|
|
|
# the issue number for this count
|
2022-04-29 16:37:44 -07:00
|
|
|
if i.typ in [
|
|
|
|
filenamelexer.ItemType.LeftParen,
|
|
|
|
filenamelexer.ItemType.LeftBrace,
|
|
|
|
filenamelexer.ItemType.LeftSBrace,
|
|
|
|
filenamelexer.ItemType.Space,
|
|
|
|
]:
|
|
|
|
continue
|
2022-12-31 02:15:17 -08:00
|
|
|
if i.typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
|
2022-04-29 16:37:44 -07:00
|
|
|
# We got our number, time to leave
|
|
|
|
return i
|
|
|
|
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
2023-10-11 17:03:07 -07:00
|
|
|
break
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# Gets 6 in '03 of 6'
|
|
|
|
def get_number(p: Parser, index: int) -> filenamelexer.Item | None:
|
|
|
|
# Go forward through the filename to see if we can find what this is of eg '03 (of 6)' or '008 title 03 (of 6)'
|
|
|
|
filename = p.input[index:]
|
|
|
|
of_found = False
|
|
|
|
|
|
|
|
for i in filename:
|
|
|
|
# We don't care about these types, we are looking to see if there is a number that is possibly different from
|
|
|
|
# the issue number for this count
|
|
|
|
if i.typ in [
|
|
|
|
filenamelexer.ItemType.LeftParen,
|
|
|
|
filenamelexer.ItemType.LeftBrace,
|
|
|
|
filenamelexer.ItemType.LeftSBrace,
|
|
|
|
filenamelexer.ItemType.Space,
|
|
|
|
]:
|
|
|
|
continue
|
|
|
|
if i.val == "of":
|
|
|
|
of_found = True
|
|
|
|
continue
|
|
|
|
if i.typ in [filenamelexer.ItemType.Number, filenamelexer.ItemType.IssueNumber]:
|
|
|
|
# We got our number, time to leave
|
|
|
|
if of_found:
|
|
|
|
return i
|
|
|
|
# This is not a number and not an ignorable type, give up looking for the number this count belongs to
|
|
|
|
break
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2022-05-17 13:57:04 -07:00
|
|
|
def join_title(lst: list[filenamelexer.Item]) -> str:
|
2022-04-29 16:37:44 -07:00
|
|
|
title = ""
|
|
|
|
for i, item in enumerate(lst):
|
|
|
|
if i + 1 == len(lst) and item.val == ",": # We ignore commas on the end
|
|
|
|
continue
|
|
|
|
title += item.val # Add the next item
|
|
|
|
# No space after operators
|
|
|
|
if item.typ == filenamelexer.ItemType.Operator:
|
|
|
|
continue
|
|
|
|
# No trailing space
|
|
|
|
if i == len(lst) - 1:
|
|
|
|
continue
|
|
|
|
# No space after honorifics with a dot
|
2023-10-11 17:03:07 -07:00
|
|
|
if (
|
|
|
|
item.typ in (filenamelexer.ItemType.Honorific, filenamelexer.ItemType.Text)
|
|
|
|
and lst[i + 1].typ == filenamelexer.ItemType.Dot
|
|
|
|
):
|
|
|
|
continue
|
|
|
|
if item.no_space:
|
2022-04-29 16:37:44 -07:00
|
|
|
continue
|
|
|
|
# No space if the next item is an operator or symbol
|
2022-12-15 20:21:53 -08:00
|
|
|
if lst[i + 1].typ in [filenamelexer.ItemType.Operator, filenamelexer.ItemType.Symbol]:
|
2023-10-11 17:03:07 -07:00
|
|
|
# exept if followed by a dollarsign
|
2023-10-15 15:47:04 -07:00
|
|
|
if lst[i + 1].val != "&":
|
2023-10-11 17:03:07 -07:00
|
|
|
continue
|
2022-04-29 16:37:44 -07:00
|
|
|
|
|
|
|
# Add a space
|
|
|
|
title += " "
|
|
|
|
|
|
|
|
return title
|
|
|
|
|
|
|
|
|
|
|
|
def Parse(
|
|
|
|
lexer_result: list[filenamelexer.Item],
|
2022-05-17 13:57:04 -07:00
|
|
|
first_is_alt: bool = False,
|
|
|
|
remove_c2c: bool = False,
|
|
|
|
remove_fcbd: bool = False,
|
|
|
|
remove_publisher: bool = False,
|
2023-09-06 01:50:05 -07:00
|
|
|
protofolius_issue_number_scheme: bool = False,
|
2022-05-17 13:57:04 -07:00
|
|
|
) -> Parser:
|
2022-04-29 16:37:44 -07:00
|
|
|
p = Parser(
|
|
|
|
lexer_result=lexer_result,
|
|
|
|
first_is_alt=first_is_alt,
|
|
|
|
remove_c2c=remove_c2c,
|
|
|
|
remove_fcbd=remove_fcbd,
|
|
|
|
remove_publisher=remove_publisher,
|
2023-09-06 01:50:05 -07:00
|
|
|
protofolius_issue_number_scheme=protofolius_issue_number_scheme,
|
2022-04-29 16:37:44 -07:00
|
|
|
)
|
|
|
|
p.run()
|
|
|
|
return p
|