From 987f3fc564c89683648345552410f7122cb77ad1 Mon Sep 17 00:00:00 2001
From: Mizaki
Date: Mon, 13 Nov 2023 01:41:26 +0000
Subject: [PATCH] cleanup_html improvements
---
comictalker/talker_utils.py | 23 +++++------------------
1 file changed, 5 insertions(+), 18 deletions(-)
diff --git a/comictalker/talker_utils.py b/comictalker/talker_utils.py
index ebc30e1..02bf2c9 100644
--- a/comictalker/talker_utils.py
+++ b/comictalker/talker_utils.py
@@ -43,25 +43,11 @@ def cleanup_html(string: str | None, remove_html_tables: bool = False) -> str:
soup = BeautifulSoup(string, "html.parser")
tables = soup.findAll("table")
- # remove all newlines first
- string = string.replace("\n", "")
-
# put in our own
- string = string.replace("
", "\n")
- string = string.replace("", "\n")
- string = string.replace("
", "\n\n")
- string = string.replace("", "*")
- string = string.replace("
", "*\n")
- string = string.replace("", "*")
- string = string.replace("
", "*\n")
- string = string.replace("", "*")
- string = string.replace("
", "*\n")
- string = string.replace("", "*")
- string = string.replace("
", "*\n")
- string = string.replace("", "*")
- string = string.replace("
", "*\n")
- string = string.replace("", "*")
- string = string.replace("
", "*\n")
+ string = re.sub(r"
|", "\n", string, flags=re.IGNORECASE)
+ string = re.sub(r"", "\n\n", string, flags=re.IGNORECASE)
+ string = re.sub(r"", "*", string, flags=re.IGNORECASE)
+ string = re.sub(r"", "*\n", string, flags=re.IGNORECASE)
# remove the tables
p = re.compile(r"")
@@ -77,6 +63,7 @@ def cleanup_html(string: str | None, remove_html_tables: bool = False) -> str:
newstring = newstring.replace(" ", " ")
newstring = newstring.replace("&", "&")
+ newstring = newstring.replace("'", "'")
newstring = newstring.strip()