From 2594a857832c16cac24076e4823f01482603fafb Mon Sep 17 00:00:00 2001 From: "beville@gmail.com" Date: Tue, 20 Nov 2012 19:19:33 +0000 Subject: [PATCH] better stripping of markup tags from CV text git-svn-id: http://comictagger.googlecode.com/svn/trunk@70 6c5673fe-1810-88d6-992b-cd32ca31540c --- comicvinetalker.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/comicvinetalker.py b/comicvinetalker.py index 08003d5..770c16a 100644 --- a/comicvinetalker.py +++ b/comicvinetalker.py @@ -240,14 +240,24 @@ class ComicVineTalker(QObject): return metadata def cleanup_html( self, string): - p = re.compile(r'<[^<]*?>') + + # remove all newlines first + string = string.replace("\n", "") + + #put in our own + string = string.replace("
", "\n") + string = string.replace("

", "\n\n") + string = string.replace("

", "*") + string = string.replace("

", "*\n") + + # now strip all other tags + p = re.compile(r'<[^<]*?>') + newstring = p.sub('',string) - newstring = p.sub('',string) - - newstring = newstring.replace(' ',' ') - newstring = newstring.replace('&','&') - - return newstring + newstring = newstring.replace(' ',' ') + newstring = newstring.replace('&','&') + + return newstring def fetchIssueDate( self, issue_id ): image_url, thumb_url, month,year = self.fetchIssueSelectDetails( issue_id )