From c0db1e52aeffabf1ad12e81c6271e9bfc009177e Mon Sep 17 00:00:00 2001 From: Timmy Welch Date: Tue, 12 Mar 2024 18:20:12 -0700 Subject: [PATCH] Make cleanup_html produce text that is more compliant with markdown --- comictalker/talker_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/comictalker/talker_utils.py b/comictalker/talker_utils.py index 8453f97..233d948 100644 --- a/comictalker/talker_utils.py +++ b/comictalker/talker_utils.py @@ -47,9 +47,10 @@ def cleanup_html(string: str | None, remove_html_tables: bool = False) -> str: # put in our own string = re.sub(r"
|", "\n", string, flags=re.IGNORECASE) + string = re.sub(r"
  • ", "* ", string, flags=re.IGNORECASE) string = re.sub(r"

    ", "\n\n", string, flags=re.IGNORECASE) - string = re.sub(r"", "*", string, flags=re.IGNORECASE) - string = re.sub(r"", "*\n", string, flags=re.IGNORECASE) + string = re.sub(r"", lambda m: '#'*int(m.group(1))+' ', string, flags=re.IGNORECASE) + string = re.sub(r"", "\n", string, flags=re.IGNORECASE) # remove the tables p = re.compile(r".*?")