diff --git a/comictalker/talker_utils.py b/comictalker/talker_utils.py index ebc30e1..02bf2c9 100644 --- a/comictalker/talker_utils.py +++ b/comictalker/talker_utils.py @@ -43,25 +43,11 @@ def cleanup_html(string: str | None, remove_html_tables: bool = False) -> str: soup = BeautifulSoup(string, "html.parser") tables = soup.findAll("table") - # remove all newlines first - string = string.replace("\n", "") - # put in our own - string = string.replace("
", "\n") - string = string.replace("", "\n") - string = string.replace("

", "\n\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("

", "*") - string = string.replace("

", "*\n") - string = string.replace("
", "*") - string = string.replace("
", "*\n") - string = string.replace("
", "*") - string = string.replace("
", "*\n") + string = re.sub(r"
|", "\n", string, flags=re.IGNORECASE) + string = re.sub(r"

", "\n\n", string, flags=re.IGNORECASE) + string = re.sub(r"", "*", string, flags=re.IGNORECASE) + string = re.sub(r"", "*\n", string, flags=re.IGNORECASE) # remove the tables p = re.compile(r".*?") @@ -77,6 +63,7 @@ def cleanup_html(string: str | None, remove_html_tables: bool = False) -> str: newstring = newstring.replace(" ", " ") newstring = newstring.replace("&", "&") + newstring = newstring.replace("'", "'") newstring = newstring.strip()