From 2594a857832c16cac24076e4823f01482603fafb Mon Sep 17 00:00:00 2001
From: "beville@gmail.com"
Date: Tue, 20 Nov 2012 19:19:33 +0000
Subject: [PATCH] better stripping of markup tags from CV text
git-svn-id: http://comictagger.googlecode.com/svn/trunk@70 6c5673fe-1810-88d6-992b-cd32ca31540c
---
comicvinetalker.py | 24 +++++++++++++++++-------
1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/comicvinetalker.py b/comicvinetalker.py
index 08003d5..770c16a 100644
--- a/comicvinetalker.py
+++ b/comicvinetalker.py
@@ -240,14 +240,24 @@ class ComicVineTalker(QObject):
return metadata
def cleanup_html( self, string):
- p = re.compile(r'<[^<]*?>')
+
+ # remove all newlines first
+ string = string.replace("\n", "")
+
+ #put in our own
+ string = string.replace("
", "\n")
+ string = string.replace("
", "\n\n")
+ string = string.replace("", "*")
+ string = string.replace("
", "*\n")
+
+ # now strip all other tags
+ p = re.compile(r'<[^<]*?>')
+ newstring = p.sub('',string)
- newstring = p.sub('',string)
-
- newstring = newstring.replace(' ',' ')
- newstring = newstring.replace('&','&')
-
- return newstring
+ newstring = newstring.replace(' ',' ')
+ newstring = newstring.replace('&','&')
+
+ return newstring
def fetchIssueDate( self, issue_id ):
image_url, thumb_url, month,year = self.fetchIssueSelectDetails( issue_id )