"""Generic sources utils to format API data and the like.
"""
# Copyright 2012-2014 Anthony Beville
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
import logging
import re
from bs4 import BeautifulSoup
from comicapi import utils
logger = logging.getLogger(__name__)
def parse_date_str(date_str: str) -> tuple[int | None, int | None, int | None]:
day = None
month = None
year = None
if date_str:
parts = date_str.split("-")
year = utils.xlate(parts[0], True)
if len(parts) > 1:
month = utils.xlate(parts[1], True)
if len(parts) > 2:
day = utils.xlate(parts[2], True)
return day, month, year
def cleanup_html(string: str, remove_html_tables: bool) -> str:
if string is None:
return ""
# find any tables
soup = BeautifulSoup(string, "html.parser")
tables = soup.findAll("table")
# remove all newlines first
string = string.replace("\n", "")
# put in our own
string = string.replace("
", "\n")
string = string.replace("", "\n")
string = string.replace("
", "\n\n")
string = string.replace("", "*")
string = string.replace("
", "*\n")
string = string.replace("", "*")
string = string.replace("
", "*\n")
string = string.replace("", "*")
string = string.replace("
", "*\n")
string = string.replace("", "*")
string = string.replace("
", "*\n")
string = string.replace("", "*")
string = string.replace("
", "*\n")
string = string.replace("", "*")
string = string.replace("
", "*\n")
# remove the tables
p = re.compile(r"")
if remove_html_tables:
string = p.sub("", string)
string = string.replace("*List of covers and their creators:*", "")
else:
string = p.sub("{}", string)
# now strip all other tags
p = re.compile(r"<[^<]*?>")
newstring = p.sub("", string)
newstring = newstring.replace(" ", " ")
newstring = newstring.replace("&", "&")
newstring = newstring.strip()
if not remove_html_tables:
# now rebuild the tables into text from BSoup
try:
table_strings = []
for table in tables:
rows = []
hdrs = []
col_widths = []
for hdr in table.findAll("th"):
item = hdr.string.strip()
hdrs.append(item)
col_widths.append(len(item))
rows.append(hdrs)
for row in table.findAll("tr"):
cols = []
col = row.findAll("td")
i = 0
for c in col:
item = c.string.strip()
cols.append(item)
if len(item) > col_widths[i]:
col_widths[i] = len(item)
i += 1
if len(cols) != 0:
rows.append(cols)
# now we have the data, make it into text
fmtstr = ""
for w in col_widths:
fmtstr += f" {{:{w + 1}}}|"
width = sum(col_widths) + len(col_widths) * 2
table_text = ""
counter = 0
for row in rows:
table_text += fmtstr.format(*row) + "\n"
if counter == 0 and len(hdrs) != 0:
table_text += "-" * width + "\n"
counter += 1
table_strings.append(table_text)
newstring = newstring.format(*table_strings)
except Exception:
# we caught an error rebuilding the table.
# just bail and remove the formatting
logger.exception("table parse error")
newstring.replace("{}", "")
return newstring