2012-11-06 12:56:30 -08:00
|
|
|
"""
|
|
|
|
A python class to manage communication with Comic Vine's REST API
|
|
|
|
"""
|
|
|
|
|
|
|
|
"""
|
2014-03-23 10:30:23 -07:00
|
|
|
Copyright 2012-2014 Anthony Beville
|
2012-11-06 12:56:30 -08:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
import json
|
|
|
|
from pprint import pprint
|
|
|
|
import urllib2, urllib
|
|
|
|
import math
|
|
|
|
import re
|
2013-04-20 23:51:43 -07:00
|
|
|
import time
|
2013-01-20 17:45:51 -08:00
|
|
|
import datetime
|
|
|
|
import ctversion
|
2013-01-24 22:17:45 -08:00
|
|
|
import sys
|
2013-02-01 22:03:58 -08:00
|
|
|
from bs4 import BeautifulSoup
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-27 10:00:27 -08:00
|
|
|
try:
|
|
|
|
from PyQt4.QtNetwork import QNetworkAccessManager, QNetworkRequest
|
|
|
|
from PyQt4.QtCore import QUrl, pyqtSignal, QObject, QByteArray
|
|
|
|
except ImportError:
|
2012-11-27 10:14:53 -08:00
|
|
|
# No Qt, so define a few dummy QObjects to help us compile
|
|
|
|
class QObject():
|
2012-11-27 15:18:54 -08:00
|
|
|
def __init__(self,*args):
|
|
|
|
pass
|
2012-11-27 10:14:53 -08:00
|
|
|
class pyqtSignal():
|
|
|
|
def __init__(self,*args):
|
|
|
|
pass
|
|
|
|
def emit(a,b,c):
|
|
|
|
pass
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
import utils
|
2012-11-08 20:02:14 -08:00
|
|
|
from settings import ComicTaggerSettings
|
|
|
|
from comicvinecacher import ComicVineCacher
|
2012-11-02 13:54:17 -07:00
|
|
|
from genericmetadata import GenericMetadata
|
2012-12-03 17:16:58 -08:00
|
|
|
from issuestring import IssueString
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2013-03-28 12:04:30 -07:00
|
|
|
class CVTypeID:
|
|
|
|
Volume = "4050"
|
|
|
|
Issue = "4000"
|
|
|
|
|
2012-11-28 12:15:20 -08:00
|
|
|
class ComicVineTalkerException(Exception):
|
|
|
|
pass
|
2012-11-12 16:12:43 -08:00
|
|
|
|
|
|
|
class ComicVineTalker(QObject):
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2013-04-12 10:53:35 -07:00
|
|
|
logo_url = "http://static.comicvine.com/bundles/comicvinesite/images/logo.png"
|
|
|
|
|
2012-11-18 19:55:40 -08:00
|
|
|
def __init__(self, api_key=""):
|
2012-11-12 16:12:43 -08:00
|
|
|
QObject.__init__(self)
|
|
|
|
|
2013-03-27 10:56:30 -07:00
|
|
|
self.api_base_url = "http://www.comicvine.com/api"
|
|
|
|
|
2012-11-16 11:32:46 -08:00
|
|
|
# key that is registered to comictagger
|
|
|
|
self.api_key = '27431e6787042105bd3e47e169a624521f89f3a4'
|
2012-11-06 12:29:18 -08:00
|
|
|
|
2013-01-24 22:17:45 -08:00
|
|
|
self.log_func = None
|
|
|
|
|
|
|
|
def setLogFunc( self , log_func ):
|
|
|
|
self.log_func = log_func
|
|
|
|
|
|
|
|
def writeLog( self , text ):
|
|
|
|
if self.log_func is None:
|
2013-02-05 14:27:35 -08:00
|
|
|
#sys.stdout.write(text.encode( errors='replace') )
|
|
|
|
#sys.stdout.flush()
|
|
|
|
print >> sys.stderr, text
|
2013-01-24 22:17:45 -08:00
|
|
|
else:
|
|
|
|
self.log_func( text )
|
2012-11-06 12:29:18 -08:00
|
|
|
|
2013-04-02 13:47:18 -07:00
|
|
|
def parseDateStr( self, date_str):
|
|
|
|
day = None
|
|
|
|
month = None
|
|
|
|
year = None
|
|
|
|
if date_str is not None:
|
|
|
|
parts = date_str.split('-')
|
|
|
|
year = parts[0]
|
|
|
|
if len(parts) > 1:
|
|
|
|
month = parts[1]
|
|
|
|
if len(parts) > 2:
|
|
|
|
day = parts[2]
|
|
|
|
return day, month, year
|
|
|
|
|
2012-11-06 12:29:18 -08:00
|
|
|
def testKey( self ):
|
|
|
|
|
2013-03-27 10:56:30 -07:00
|
|
|
test_url = self.api_base_url + "/issue/1/?api_key=" + self.api_key + "&format=json&field_list=name"
|
2012-11-06 12:29:18 -08:00
|
|
|
resp = urllib2.urlopen( test_url )
|
|
|
|
content = resp.read()
|
|
|
|
|
|
|
|
cv_response = json.loads( content )
|
|
|
|
|
|
|
|
# Bogus request, but if the key is wrong, you get error 100: "Invalid API Key"
|
|
|
|
return cv_response[ 'status_code' ] != 100
|
|
|
|
|
2012-11-28 12:15:20 -08:00
|
|
|
def getUrlContent( self, url ):
|
2013-04-17 11:08:39 -07:00
|
|
|
# connect to server:
|
|
|
|
# if there is a 500 error, try a few more times before giving up
|
|
|
|
# any other error, just bail
|
2013-05-01 15:31:25 -07:00
|
|
|
#print "ATB---", url
|
2013-04-17 11:08:39 -07:00
|
|
|
for tries in range(3):
|
|
|
|
try:
|
|
|
|
resp = urllib2.urlopen( url )
|
|
|
|
return resp.read()
|
|
|
|
except urllib2.HTTPError as e:
|
|
|
|
if e.getcode() == 500:
|
|
|
|
self.writeLog( "Try #{0}: ".format(tries+1) )
|
2013-04-20 23:51:43 -07:00
|
|
|
time.sleep(1)
|
2013-04-17 11:08:39 -07:00
|
|
|
self.writeLog( str(e) + "\n" )
|
|
|
|
|
|
|
|
if e.getcode() != 500:
|
|
|
|
break
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
self.writeLog( str(e) + "\n" )
|
|
|
|
raise ComicVineTalkerException("Network Error!")
|
|
|
|
|
|
|
|
raise ComicVineTalkerException("Error on Comic Vine server")
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
def searchForSeries( self, series_name , callback=None, refresh_cache=False ):
|
2012-11-08 20:02:14 -08:00
|
|
|
|
2012-11-16 14:45:35 -08:00
|
|
|
# remove cruft from the search string
|
|
|
|
series_name = utils.removearticles( series_name ).lower().strip()
|
|
|
|
|
2012-11-08 20:02:14 -08:00
|
|
|
# before we search online, look in our cache, since we might have
|
|
|
|
# done this same search recently
|
2012-11-20 00:57:12 -08:00
|
|
|
cvc = ComicVineCacher( )
|
2012-11-12 16:12:43 -08:00
|
|
|
if not refresh_cache:
|
|
|
|
cached_search_results = cvc.get_search_results( series_name )
|
|
|
|
|
|
|
|
if len (cached_search_results) > 0:
|
|
|
|
return cached_search_results
|
2012-11-08 20:02:14 -08:00
|
|
|
|
|
|
|
original_series_name = series_name
|
2014-01-30 20:40:58 -08:00
|
|
|
|
|
|
|
# We need to make the series name into an "AND"ed query list
|
|
|
|
query_word_list = series_name.split()
|
|
|
|
and_list = ['AND'] * (len(query_word_list)-1)
|
|
|
|
and_list.append('')
|
|
|
|
# zipper up the two lists
|
|
|
|
query_list = zip(query_word_list, and_list)
|
|
|
|
# flatten the list
|
|
|
|
query_list = [ item for sublist in query_list for item in sublist]
|
|
|
|
# convert back to a string
|
|
|
|
query_string = " ".join( query_list ).strip()
|
|
|
|
#print "Query string = ", query_string
|
|
|
|
|
|
|
|
query_string = urllib.quote_plus(query_string.encode("utf-8"))
|
|
|
|
|
|
|
|
search_url = self.api_base_url + "/search/?api_key=" + self.api_key + "&format=json&resources=volume&query=" + query_string + "&field_list=name,id,start_year,publisher,image,description,count_of_issues"
|
2013-05-01 15:31:25 -07:00
|
|
|
content = self.getUrlContent(search_url + "&page=1")
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-01-24 22:17:45 -08:00
|
|
|
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
|
2012-11-02 13:54:17 -07:00
|
|
|
return None
|
|
|
|
|
|
|
|
search_results = list()
|
|
|
|
|
|
|
|
# see http://api.comicvine.com/documentation/#handling_responses
|
|
|
|
|
|
|
|
limit = cv_response['limit']
|
|
|
|
current_result_count = cv_response['number_of_page_results']
|
|
|
|
total_result_count = cv_response['number_of_total_results']
|
|
|
|
|
2012-11-16 14:45:35 -08:00
|
|
|
if callback is None:
|
2013-01-24 22:17:45 -08:00
|
|
|
self.writeLog( "Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results']))
|
2012-11-02 13:54:17 -07:00
|
|
|
search_results.extend( cv_response['results'])
|
2013-03-27 10:56:30 -07:00
|
|
|
page = 1
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
if callback is not None:
|
|
|
|
callback( current_result_count, total_result_count )
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
# see if we need to keep asking for more pages...
|
|
|
|
while ( current_result_count < total_result_count ):
|
2012-11-16 14:45:35 -08:00
|
|
|
if callback is None:
|
2013-01-24 22:17:45 -08:00
|
|
|
self.writeLog("getting another page of results {0} of {1}...\n".format( current_result_count, total_result_count))
|
2013-03-27 10:56:30 -07:00
|
|
|
page += 1
|
|
|
|
|
|
|
|
content = self.getUrlContent(search_url + "&page="+str(page))
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-01-24 22:17:45 -08:00
|
|
|
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
|
2012-11-02 13:54:17 -07:00
|
|
|
return None
|
|
|
|
search_results.extend( cv_response['results'])
|
|
|
|
current_result_count += cv_response['number_of_page_results']
|
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
if callback is not None:
|
|
|
|
callback( current_result_count, total_result_count )
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
#for record in search_results:
|
2013-03-27 10:56:30 -07:00
|
|
|
# #print( u"{0}: {1} ({2})".format(record['id'], record['name'] , record['start_year'] ) )
|
|
|
|
# #print record
|
|
|
|
# #record['count_of_issues'] = record['count_of_isssues']
|
|
|
|
#print u"{0}: {1} ({2})".format(search_results['results'][0]['id'], search_results['results'][0]['name'] , search_results['results'][0]['start_year'] )
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-08 20:02:14 -08:00
|
|
|
# cache these search results
|
|
|
|
cvc.add_search_results( original_series_name, search_results )
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
return search_results
|
|
|
|
|
|
|
|
def fetchVolumeData( self, series_id ):
|
2012-11-08 20:02:14 -08:00
|
|
|
|
|
|
|
# before we search online, look in our cache, since we might already
|
|
|
|
# have this info
|
2012-11-20 00:57:12 -08:00
|
|
|
cvc = ComicVineCacher( )
|
2012-11-08 20:02:14 -08:00
|
|
|
cached_volume_result = cvc.get_volume_info( series_id )
|
|
|
|
|
|
|
|
if cached_volume_result is not None:
|
|
|
|
return cached_volume_result
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2013-04-02 07:11:00 -07:00
|
|
|
volume_url = self.api_base_url + "/volume/" + CVTypeID.Volume + "-" + str(series_id) + "/?api_key=" + self.api_key + "&field_list=name,id,start_year,publisher,count_of_issues&format=json"
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-28 12:15:20 -08:00
|
|
|
content = self.getUrlContent(volume_url)
|
2012-11-02 13:54:17 -07:00
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-02-05 14:27:35 -08:00
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
2012-11-02 13:54:17 -07:00
|
|
|
return None
|
|
|
|
|
|
|
|
volume_results = cv_response['results']
|
|
|
|
|
2012-11-08 20:02:14 -08:00
|
|
|
cvc.add_volume_info( volume_results )
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
return volume_results
|
2013-03-29 23:25:04 -07:00
|
|
|
|
|
|
|
def fetchIssuesByVolume( self, series_id ):
|
|
|
|
|
|
|
|
# before we search online, look in our cache, since we might already
|
|
|
|
# have this info
|
|
|
|
cvc = ComicVineCacher( )
|
|
|
|
cached_volume_issues_result = cvc.get_volume_issues_info( series_id )
|
|
|
|
|
|
|
|
if cached_volume_issues_result is not None:
|
|
|
|
return cached_volume_issues_result
|
2013-04-02 12:58:23 -07:00
|
|
|
|
2013-03-29 23:25:04 -07:00
|
|
|
#---------------------------------
|
2013-04-11 18:56:24 -07:00
|
|
|
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + "&filter=volume:" + str(series_id) + "&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
|
2013-03-29 23:25:04 -07:00
|
|
|
content = self.getUrlContent(issues_url)
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
|
|
|
return None
|
|
|
|
#------------------------------------
|
|
|
|
|
|
|
|
limit = cv_response['limit']
|
|
|
|
current_result_count = cv_response['number_of_page_results']
|
|
|
|
total_result_count = cv_response['number_of_total_results']
|
|
|
|
#print "ATB total_result_count", total_result_count
|
|
|
|
|
2013-04-12 22:30:53 -07:00
|
|
|
#print "ATB Found {0} of {1} results".format( cv_response['number_of_page_results'], cv_response['number_of_total_results'])
|
2013-03-29 23:25:04 -07:00
|
|
|
volume_issues_result = cv_response['results']
|
|
|
|
page = 1
|
|
|
|
offset = 0
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2013-03-29 23:25:04 -07:00
|
|
|
# see if we need to keep asking for more pages...
|
|
|
|
while ( current_result_count < total_result_count ):
|
2013-04-12 22:30:53 -07:00
|
|
|
#print "ATB getting another page of issue results {0} of {1}...".format( current_result_count, total_result_count)
|
2013-03-29 23:25:04 -07:00
|
|
|
page += 1
|
|
|
|
offset += cv_response['number_of_page_results']
|
|
|
|
|
|
|
|
#print issues_url+ "&offset="+str(offset)
|
|
|
|
content = self.getUrlContent(issues_url + "&offset="+str(offset))
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
|
|
|
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
|
|
|
|
return None
|
|
|
|
volume_issues_result.extend( cv_response['results'])
|
|
|
|
current_result_count += cv_response['number_of_page_results']
|
|
|
|
|
2013-04-12 10:53:35 -07:00
|
|
|
self.repairUrls( volume_issues_result )
|
|
|
|
|
2013-03-29 23:25:04 -07:00
|
|
|
cvc.add_volume_issues_info( series_id, volume_issues_result )
|
|
|
|
|
|
|
|
return volume_issues_result
|
2013-04-02 12:58:23 -07:00
|
|
|
|
|
|
|
|
|
|
|
def fetchIssuesByVolumeIssueNumAndYear( self, volume_id_list, issue_number, year ):
|
|
|
|
volume_filter = "volume:"
|
|
|
|
for vid in volume_id_list:
|
|
|
|
volume_filter += str(vid) + "|"
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2013-04-02 12:58:23 -07:00
|
|
|
year_filter = ""
|
|
|
|
if year is not None and str(year).isdigit():
|
|
|
|
year_filter = ",cover_date:{0}-1-1|{1}-1-1".format(year, int(year)+1)
|
|
|
|
|
|
|
|
issue_number = urllib.quote_plus(unicode(issue_number).encode("utf-8"))
|
|
|
|
|
|
|
|
filter = "&filter=" + volume_filter + year_filter + ",issue_number:" + issue_number
|
|
|
|
|
2013-04-11 18:56:24 -07:00
|
|
|
issues_url = self.api_base_url + "/issues/" + "?api_key=" + self.api_key + filter + "&field_list=id,volume,issue_number,name,image,cover_date,site_detail_url,description&format=json"
|
2013-04-02 12:58:23 -07:00
|
|
|
|
|
|
|
content = self.getUrlContent(issues_url)
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
|
|
|
return None
|
|
|
|
#------------------------------------
|
|
|
|
|
|
|
|
limit = cv_response['limit']
|
|
|
|
current_result_count = cv_response['number_of_page_results']
|
|
|
|
total_result_count = cv_response['number_of_total_results']
|
|
|
|
#print "ATB total_result_count", total_result_count
|
|
|
|
|
|
|
|
#print "ATB Found {0} of {1} results\n".format( cv_response['number_of_page_results'], cv_response['number_of_total_results'])
|
|
|
|
filtered_issues_result = cv_response['results']
|
|
|
|
page = 1
|
|
|
|
offset = 0
|
|
|
|
|
|
|
|
# see if we need to keep asking for more pages...
|
|
|
|
while ( current_result_count < total_result_count ):
|
|
|
|
#print "ATB getting another page of issue results {0} of {1}...\n".format( current_result_count, total_result_count)
|
|
|
|
page += 1
|
|
|
|
offset += cv_response['number_of_page_results']
|
|
|
|
|
|
|
|
#print issues_url+ "&offset="+str(offset)
|
|
|
|
content = self.getUrlContent(issues_url + "&offset="+str(offset))
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
|
|
|
self.writeLog( "Comic Vine query failed with error: [{0}]. \n".format( cv_response[ 'error' ] ))
|
|
|
|
return None
|
|
|
|
filtered_issues_result.extend( cv_response['results'])
|
|
|
|
current_result_count += cv_response['number_of_page_results']
|
|
|
|
|
2013-04-12 10:53:35 -07:00
|
|
|
self.repairUrls( filtered_issues_result )
|
|
|
|
|
2013-04-02 12:58:23 -07:00
|
|
|
return filtered_issues_result
|
|
|
|
|
|
|
|
|
|
|
|
|
2012-12-17 10:44:33 -08:00
|
|
|
def fetchIssueData( self, series_id, issue_number, settings ):
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
volume_results = self.fetchVolumeData( series_id )
|
2013-03-29 23:25:04 -07:00
|
|
|
issues_list_results = self.fetchIssuesByVolume( series_id )
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
found = False
|
2013-03-29 23:25:04 -07:00
|
|
|
for record in issues_list_results:
|
2013-03-30 09:31:56 -07:00
|
|
|
if IssueString(issue_number).asString() is None:
|
2013-01-20 12:42:06 -08:00
|
|
|
issue_number = 1
|
2013-03-27 10:56:30 -07:00
|
|
|
if IssueString(record['issue_number']).asString().lower() == IssueString(issue_number).asString().lower():
|
2012-11-02 13:54:17 -07:00
|
|
|
found = True
|
|
|
|
break
|
|
|
|
|
|
|
|
if (found):
|
2013-03-28 12:04:30 -07:00
|
|
|
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(record['id']) + "/?api_key=" + self.api_key + "&format=json"
|
2012-11-28 12:15:20 -08:00
|
|
|
|
|
|
|
content = self.getUrlContent(issue_url)
|
2012-11-02 13:54:17 -07:00
|
|
|
cv_response = json.loads(content)
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-02-05 14:27:35 -08:00
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
2012-11-02 13:54:17 -07:00
|
|
|
return None
|
|
|
|
issue_results = cv_response['results']
|
|
|
|
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2013-01-11 17:48:34 -08:00
|
|
|
# now, map the comicvine data to generic metadata
|
|
|
|
return self.mapCVDataToMetadata( volume_results, issue_results, settings )
|
|
|
|
|
|
|
|
def fetchIssueDataByIssueID( self, issue_id, settings ):
|
|
|
|
|
2013-03-28 12:04:30 -07:00
|
|
|
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json"
|
2013-01-11 17:48:34 -08:00
|
|
|
content = self.getUrlContent(issue_url)
|
|
|
|
cv_response = json.loads(content)
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-02-05 14:27:35 -08:00
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
2013-01-11 17:48:34 -08:00
|
|
|
return None
|
|
|
|
|
|
|
|
issue_results = cv_response['results']
|
|
|
|
|
|
|
|
volume_results = self.fetchVolumeData( issue_results['volume']['id'] )
|
|
|
|
|
|
|
|
# now, map the comicvine data to generic metadata
|
|
|
|
md = self.mapCVDataToMetadata( volume_results, issue_results, settings )
|
|
|
|
md.isEmpty = False
|
|
|
|
return md
|
|
|
|
|
|
|
|
def mapCVDataToMetadata(self, volume_results, issue_results, settings ):
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
# now, map the comicvine data to generic metadata
|
|
|
|
metadata = GenericMetadata()
|
|
|
|
|
|
|
|
metadata.series = issue_results['volume']['name']
|
|
|
|
|
2012-12-03 18:51:10 -08:00
|
|
|
num_s = IssueString(issue_results['issue_number']).asString()
|
2012-11-19 11:57:16 -08:00
|
|
|
metadata.issue = num_s
|
2012-11-02 13:54:17 -07:00
|
|
|
metadata.title = issue_results['name']
|
2013-03-27 10:56:30 -07:00
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
metadata.publisher = volume_results['publisher']['name']
|
2013-04-02 13:47:18 -07:00
|
|
|
metadata.day, metadata.month, metadata.year = self.parseDateStr( issue_results['cover_date'] )
|
2013-03-29 16:09:41 -07:00
|
|
|
|
2012-11-14 22:21:19 -08:00
|
|
|
#metadata.issueCount = volume_results['count_of_issues']
|
2012-11-02 13:54:17 -07:00
|
|
|
metadata.comments = self.cleanup_html(issue_results['description'])
|
2012-12-17 10:44:33 -08:00
|
|
|
if settings.use_series_start_as_volume:
|
|
|
|
metadata.volume = volume_results['start_year']
|
|
|
|
|
2013-01-20 17:45:51 -08:00
|
|
|
metadata.notes = "Tagged with ComicTagger {0} using info from Comic Vine on {1}. [Issue ID {2}]".format(
|
|
|
|
ctversion.version,
|
|
|
|
datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
|
|
|
issue_results['id'])
|
2012-11-16 14:09:40 -08:00
|
|
|
#metadata.notes += issue_results['site_detail_url']
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-29 18:14:06 -08:00
|
|
|
metadata.webLink = issue_results['site_detail_url']
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
person_credits = issue_results['person_credits']
|
2013-03-27 10:56:30 -07:00
|
|
|
for person in person_credits:
|
2013-03-29 16:09:41 -07:00
|
|
|
if person.has_key('role'):
|
|
|
|
roles = person['role'].split(',')
|
|
|
|
for role in roles:
|
2013-03-27 10:56:30 -07:00
|
|
|
# can we determine 'primary' from CV??
|
2013-03-29 16:09:41 -07:00
|
|
|
metadata.addCredit( person['name'], role.title().strip(), False )
|
2012-11-02 13:54:17 -07:00
|
|
|
|
|
|
|
character_credits = issue_results['character_credits']
|
|
|
|
character_list = list()
|
|
|
|
for character in character_credits:
|
|
|
|
character_list.append( character['name'] )
|
|
|
|
metadata.characters = utils.listToString( character_list )
|
|
|
|
|
|
|
|
team_credits = issue_results['team_credits']
|
|
|
|
team_list = list()
|
|
|
|
for team in team_credits:
|
|
|
|
team_list.append( team['name'] )
|
|
|
|
metadata.teams = utils.listToString( team_list )
|
|
|
|
|
|
|
|
location_credits = issue_results['location_credits']
|
|
|
|
location_list = list()
|
|
|
|
for location in location_credits:
|
|
|
|
location_list.append( location['name'] )
|
|
|
|
metadata.locations = utils.listToString( location_list )
|
|
|
|
|
|
|
|
story_arc_credits = issue_results['story_arc_credits']
|
2013-01-16 15:59:09 -08:00
|
|
|
arc_list = []
|
|
|
|
for arc in story_arc_credits:
|
|
|
|
arc_list.append(arc['name'])
|
|
|
|
if len(arc_list) > 0:
|
|
|
|
metadata.storyArc = utils.listToString(arc_list)
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
return metadata
|
|
|
|
def cleanup_html( self, string):
|
2014-04-06 12:42:11 -07:00
|
|
|
"""
|
|
|
|
converter = html2text.HTML2Text()
|
|
|
|
#converter.emphasis_mark = '*'
|
|
|
|
#converter.ignore_links = True
|
|
|
|
converter.body_width = 0
|
|
|
|
|
|
|
|
print html2text.html2text(string)
|
|
|
|
return string
|
|
|
|
#return converter.handle(string)
|
|
|
|
"""
|
|
|
|
|
2012-11-20 11:19:33 -08:00
|
|
|
|
2013-03-30 09:31:56 -07:00
|
|
|
if string is None:
|
|
|
|
return ""
|
2014-04-06 12:42:11 -07:00
|
|
|
# find any tables
|
|
|
|
soup = BeautifulSoup(string)
|
|
|
|
tables = soup.findAll('table')
|
|
|
|
|
2012-11-20 11:19:33 -08:00
|
|
|
# remove all newlines first
|
|
|
|
string = string.replace("\n", "")
|
|
|
|
|
|
|
|
#put in our own
|
|
|
|
string = string.replace("<br>", "\n")
|
|
|
|
string = string.replace("</p>", "\n\n")
|
|
|
|
string = string.replace("<h4>", "*")
|
|
|
|
string = string.replace("</h4>", "*\n")
|
2014-04-06 12:42:11 -07:00
|
|
|
|
|
|
|
#remove the tables
|
|
|
|
p = re.compile(r'<table[^<]*?>.*?<\/table>')
|
|
|
|
string = p.sub('{}',string)
|
2012-11-20 11:19:33 -08:00
|
|
|
|
|
|
|
# now strip all other tags
|
|
|
|
p = re.compile(r'<[^<]*?>')
|
|
|
|
newstring = p.sub('',string)
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-20 11:19:33 -08:00
|
|
|
newstring = newstring.replace(' ',' ')
|
|
|
|
newstring = newstring.replace('&','&')
|
2012-11-27 10:28:44 -08:00
|
|
|
|
|
|
|
newstring = newstring.strip()
|
2014-04-06 12:42:11 -07:00
|
|
|
|
|
|
|
# now rebuild the tables into text from BSoup
|
|
|
|
try:
|
|
|
|
table_strings = []
|
|
|
|
for table in tables:
|
|
|
|
rows = []
|
|
|
|
hdrs = []
|
|
|
|
col_widths = []
|
|
|
|
for hdr in table.findAll('th'):
|
|
|
|
item = hdr.string.strip()
|
|
|
|
hdrs.append(item)
|
|
|
|
col_widths.append(len(item))
|
|
|
|
rows.append(hdrs)
|
|
|
|
|
|
|
|
for row in table.findAll('tr'):
|
|
|
|
cols = []
|
|
|
|
col = row.findAll('td')
|
|
|
|
i = 0
|
|
|
|
for c in col:
|
|
|
|
item = c.string.strip()
|
|
|
|
cols.append(item)
|
|
|
|
if len(item) > col_widths[i]:
|
|
|
|
col_widths[i] = len(item)
|
|
|
|
i += 1
|
|
|
|
if len(cols) != 0:
|
|
|
|
rows.append(cols)
|
|
|
|
# now we have the data, make it into text
|
|
|
|
fmtstr =""
|
|
|
|
for w in col_widths:
|
|
|
|
fmtstr += " {{:{}}}|".format(w+1)
|
|
|
|
width = sum(col_widths) + len(col_widths)*2
|
|
|
|
print "width=" , width
|
|
|
|
table_text = ""
|
|
|
|
counter = 0
|
|
|
|
for row in rows:
|
|
|
|
table_text += fmtstr.format(*row) + "\n"
|
|
|
|
if counter == 0 and len(hdrs)!= 0:
|
|
|
|
table_text += "-" * width + "\n"
|
|
|
|
counter += 1
|
|
|
|
|
|
|
|
table_strings.append(table_text)
|
|
|
|
|
|
|
|
newstring = newstring.format(*table_strings)
|
|
|
|
except:
|
|
|
|
# we caught an error rebuilding the table.
|
|
|
|
# just bail and remove the formatting
|
|
|
|
print "table pare error"
|
|
|
|
newstring.replace("{}", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
2012-11-20 11:19:33 -08:00
|
|
|
return newstring
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-16 19:02:04 -08:00
|
|
|
def fetchIssueDate( self, issue_id ):
|
2013-02-01 22:03:58 -08:00
|
|
|
details = self.fetchIssueSelectDetails( issue_id )
|
2013-04-02 13:47:18 -07:00
|
|
|
day, month, year = self.parseDateStr( details['cover_date'] )
|
2013-03-29 16:09:41 -07:00
|
|
|
return month, year
|
2013-04-02 13:47:18 -07:00
|
|
|
|
2012-11-09 18:18:41 -08:00
|
|
|
def fetchIssueCoverURLs( self, issue_id ):
|
2013-02-01 22:03:58 -08:00
|
|
|
details = self.fetchIssueSelectDetails( issue_id )
|
|
|
|
return details['image_url'], details['thumb_image_url']
|
|
|
|
|
|
|
|
def fetchIssuePageURL( self, issue_id ):
|
|
|
|
details = self.fetchIssueSelectDetails( issue_id )
|
|
|
|
return details['site_detail_url']
|
|
|
|
|
2012-11-16 19:02:04 -08:00
|
|
|
def fetchIssueSelectDetails( self, issue_id ):
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
#cached_image_url,cached_thumb_url,cached_month,cached_year = self.fetchCachedIssueSelectDetails( issue_id )
|
|
|
|
cached_details = self.fetchCachedIssueSelectDetails( issue_id )
|
|
|
|
if cached_details['image_url'] is not None:
|
|
|
|
return cached_details
|
2012-11-08 20:02:14 -08:00
|
|
|
|
2013-03-29 16:09:41 -07:00
|
|
|
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,cover_date,site_detail_url"
|
2012-11-28 12:15:20 -08:00
|
|
|
|
|
|
|
content = self.getUrlContent(issue_url)
|
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
details = dict()
|
|
|
|
details['image_url'] = None
|
|
|
|
details['thumb_image_url'] = None
|
2013-03-29 16:09:41 -07:00
|
|
|
details['cover_date'] = None
|
2013-02-01 22:03:58 -08:00
|
|
|
details['site_detail_url'] = None
|
|
|
|
|
2012-11-02 13:54:17 -07:00
|
|
|
cv_response = json.loads(content)
|
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-02-05 14:27:35 -08:00
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
2013-02-01 22:03:58 -08:00
|
|
|
return details
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
details['image_url'] = cv_response['results']['image']['super_url']
|
|
|
|
details['thumb_image_url'] = cv_response['results']['image']['thumb_url']
|
2013-03-29 16:09:41 -07:00
|
|
|
details['cover_date'] = cv_response['results']['cover_date']
|
2013-02-01 22:03:58 -08:00
|
|
|
details['site_detail_url'] = cv_response['results']['site_detail_url']
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
if details['image_url'] is not None:
|
|
|
|
self.cacheIssueSelectDetails( issue_id,
|
|
|
|
details['image_url'],
|
|
|
|
details['thumb_image_url'],
|
2013-03-29 16:09:41 -07:00
|
|
|
details['cover_date'],
|
2013-02-01 22:03:58 -08:00
|
|
|
details['site_detail_url'] )
|
|
|
|
#print details['site_detail_url']
|
|
|
|
return details
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2012-11-16 19:02:04 -08:00
|
|
|
def fetchCachedIssueSelectDetails( self, issue_id ):
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
# before we search online, look in our cache, since we might already
|
|
|
|
# have this info
|
2012-11-20 00:57:12 -08:00
|
|
|
cvc = ComicVineCacher( )
|
2012-11-16 19:02:04 -08:00
|
|
|
return cvc.get_issue_select_details( issue_id )
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2013-03-29 16:09:41 -07:00
|
|
|
def cacheIssueSelectDetails( self, issue_id, image_url, thumb_url, cover_date, page_url ):
|
2012-11-20 00:57:12 -08:00
|
|
|
cvc = ComicVineCacher( )
|
2013-03-29 16:09:41 -07:00
|
|
|
cvc.add_issue_select_details( issue_id, image_url, thumb_url, cover_date, page_url )
|
2013-02-02 10:41:06 -08:00
|
|
|
|
2013-02-03 21:09:48 -08:00
|
|
|
|
2013-04-02 15:37:28 -07:00
|
|
|
def fetchAlternateCoverURLs(self, issue_id, issue_page_url):
|
2013-02-03 21:09:48 -08:00
|
|
|
url_list = self.fetchCachedAlternateCoverURLs( issue_id )
|
2013-02-02 10:41:06 -08:00
|
|
|
if url_list is not None:
|
|
|
|
return url_list
|
2013-02-03 21:09:48 -08:00
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
# scrape the CV issue page URL to get the alternate cover URLs
|
|
|
|
resp = urllib2.urlopen( issue_page_url )
|
|
|
|
content = resp.read()
|
2013-02-03 21:09:48 -08:00
|
|
|
alt_cover_url_list = self.parseOutAltCoverUrls( content)
|
|
|
|
|
|
|
|
# cache this alt cover URL list
|
|
|
|
self.cacheAlternateCoverURLs( issue_id, alt_cover_url_list )
|
|
|
|
|
|
|
|
return alt_cover_url_list
|
|
|
|
|
|
|
|
def parseOutAltCoverUrls( self, page_html ):
|
|
|
|
soup = BeautifulSoup( page_html )
|
2013-02-01 22:03:58 -08:00
|
|
|
|
|
|
|
alt_cover_url_list = []
|
|
|
|
|
|
|
|
# Using knowledge of the layout of the ComicVine issue page here:
|
|
|
|
# look for the divs that are in the classes 'content-pod' and 'alt-cover'
|
|
|
|
div_list = soup.find_all( 'div')
|
2013-03-27 10:56:30 -07:00
|
|
|
covers_found = 0
|
2013-02-01 22:03:58 -08:00
|
|
|
for d in div_list:
|
|
|
|
if d.has_key('class'):
|
|
|
|
c = d['class']
|
2013-03-27 10:56:30 -07:00
|
|
|
if 'imgboxart' in c and 'issue-cover' in c:
|
|
|
|
covers_found += 1
|
|
|
|
if covers_found != 1:
|
|
|
|
alt_cover_url_list.append( d.img['src'] )
|
2013-02-02 10:41:06 -08:00
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
return alt_cover_url_list
|
2013-02-03 21:09:48 -08:00
|
|
|
|
|
|
|
def fetchCachedAlternateCoverURLs( self, issue_id ):
|
|
|
|
|
|
|
|
# before we search online, look in our cache, since we might already
|
|
|
|
# have this info
|
|
|
|
cvc = ComicVineCacher( )
|
|
|
|
url_list = cvc.get_alt_covers( issue_id )
|
|
|
|
if url_list is not None:
|
|
|
|
return url_list
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def cacheAlternateCoverURLs( self, issue_id, url_list ):
|
|
|
|
cvc = ComicVineCacher( )
|
|
|
|
cvc.add_alt_covers( issue_id, url_list )
|
|
|
|
|
2013-02-01 22:03:58 -08:00
|
|
|
#---------------------------------------------------------------------------
|
2012-11-12 16:12:43 -08:00
|
|
|
urlFetchComplete = pyqtSignal( str , str, int)
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
def asyncFetchIssueCoverURLs( self, issue_id ):
|
2012-11-02 13:54:17 -07:00
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
self.issue_id = issue_id
|
2013-02-01 22:03:58 -08:00
|
|
|
details = self.fetchCachedIssueSelectDetails( issue_id )
|
|
|
|
if details['image_url'] is not None:
|
|
|
|
self.urlFetchComplete.emit( details['image_url'],details['thumb_image_url'], self.issue_id )
|
2012-11-12 16:12:43 -08:00
|
|
|
return
|
|
|
|
|
2013-05-01 15:31:25 -07:00
|
|
|
issue_url = self.api_base_url + "/issue/" + CVTypeID.Issue + "-" + str(issue_id) + "/?api_key=" + self.api_key + "&format=json&field_list=image,cover_date,site_detail_url"
|
2012-11-12 16:12:43 -08:00
|
|
|
self.nam = QNetworkAccessManager()
|
|
|
|
self.nam.finished.connect( self.asyncFetchIssueCoverURLComplete )
|
|
|
|
self.nam.get(QNetworkRequest(QUrl(issue_url)))
|
|
|
|
|
|
|
|
def asyncFetchIssueCoverURLComplete( self, reply ):
|
|
|
|
|
|
|
|
# read in the response
|
|
|
|
data = reply.readAll()
|
2013-03-27 10:56:30 -07:00
|
|
|
|
|
|
|
try:
|
|
|
|
cv_response = json.loads(str(data))
|
|
|
|
except:
|
|
|
|
print >> sys.stderr, "Comic Vine query failed to get JSON data"
|
|
|
|
print >> sys.stderr, str(data)
|
|
|
|
return
|
|
|
|
|
2012-11-12 16:12:43 -08:00
|
|
|
if cv_response[ 'status_code' ] != 1:
|
2013-02-05 14:27:35 -08:00
|
|
|
print >> sys.stderr, "Comic Vine query failed with error: [{0}]. ".format( cv_response[ 'error' ] )
|
2012-11-12 16:12:43 -08:00
|
|
|
return
|
|
|
|
|
|
|
|
image_url = cv_response['results']['image']['super_url']
|
|
|
|
thumb_url = cv_response['results']['image']['thumb_url']
|
2013-03-29 16:09:41 -07:00
|
|
|
cover_date = cv_response['results']['cover_date']
|
2013-02-01 22:03:58 -08:00
|
|
|
page_url = cv_response['results']['site_detail_url']
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2013-03-29 16:09:41 -07:00
|
|
|
self.cacheIssueSelectDetails( self.issue_id, image_url, thumb_url, cover_date, page_url )
|
2012-11-12 16:12:43 -08:00
|
|
|
|
|
|
|
self.urlFetchComplete.emit( image_url, thumb_url, self.issue_id )
|
|
|
|
|
2013-02-03 21:09:48 -08:00
|
|
|
altUrlListFetchComplete = pyqtSignal( list, int)
|
|
|
|
|
|
|
|
def asyncFetchAlternateCoverURLs( self, issue_id, issue_page_url ):
|
|
|
|
# This async version requires the issue page url to be provided!
|
|
|
|
self.issue_id = issue_id
|
|
|
|
url_list = self.fetchCachedAlternateCoverURLs( issue_id )
|
|
|
|
if url_list is not None:
|
|
|
|
self.altUrlListFetchComplete.emit( url_list, int(self.issue_id) )
|
|
|
|
return
|
|
|
|
|
|
|
|
self.nam = QNetworkAccessManager()
|
|
|
|
self.nam.finished.connect( self.asyncFetchAlternateCoverURLsComplete )
|
|
|
|
self.nam.get(QNetworkRequest(QUrl(str(issue_page_url))))
|
|
|
|
|
|
|
|
|
|
|
|
def asyncFetchAlternateCoverURLsComplete( self, reply ):
|
|
|
|
# read in the response
|
|
|
|
html = str(reply.readAll())
|
|
|
|
alt_cover_url_list = self.parseOutAltCoverUrls( html )
|
|
|
|
|
|
|
|
# cache this alt cover URL list
|
|
|
|
self.cacheAlternateCoverURLs( self.issue_id, alt_cover_url_list )
|
|
|
|
|
|
|
|
self.altUrlListFetchComplete.emit( alt_cover_url_list, int(self.issue_id) )
|
2012-11-12 16:12:43 -08:00
|
|
|
|
2013-04-12 10:53:35 -07:00
|
|
|
def repairUrls(self, issue_list):
|
|
|
|
#make sure there are URLs for the image fields
|
|
|
|
for issue in issue_list:
|
|
|
|
if issue['image'] is None:
|
|
|
|
issue['image'] = dict()
|
|
|
|
issue['image']['super_url'] = ComicVineTalker.logo_url
|
|
|
|
issue['image']['thumb_url'] = ComicVineTalker.logo_url
|
|
|
|
|