2015-02-21 18:30:32 -08:00
""" A python class to manage communication with Comic Vine ' s REST API """
2012-11-06 12:56:30 -08:00
2015-02-21 18:30:32 -08:00
# Copyright 2012-2014 Anthony Beville
2012-11-06 12:56:30 -08:00
2015-02-21 18:30:32 -08:00
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
2012-11-06 12:56:30 -08:00
2015-02-21 18:30:32 -08:00
# http://www.apache.org/licenses/LICENSE-2.0
2012-11-06 12:56:30 -08:00
2015-02-21 18:30:32 -08:00
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2012-11-06 12:56:30 -08:00
2012-11-02 13:54:17 -07:00
import json
2019-09-10 14:52:59 -07:00
import requests
2012-11-02 13:54:17 -07:00
import re
2013-04-20 23:51:43 -07:00
import time
2013-01-20 17:45:51 -08:00
import datetime
2013-01-24 22:17:45 -08:00
import sys
2017-12-21 06:19:45 -08:00
import ssl
2020-02-13 00:27:08 -08:00
import unicodedata
2015-02-21 18:30:32 -08:00
#from pprint import pprint
#import math
2015-02-13 15:08:07 -08:00
2013-02-01 22:03:58 -08:00
from bs4 import BeautifulSoup
2012-11-02 13:54:17 -07:00
2012-11-27 10:00:27 -08:00
try :
2018-09-19 13:05:39 -07:00
from PyQt5 . QtNetwork import QNetworkAccessManager , QNetworkRequest
from PyQt5 . QtCore import QUrl , pyqtSignal , QObject , QByteArray
2012-11-27 10:00:27 -08:00
except ImportError :
2015-02-12 14:57:46 -08:00
# No Qt, so define a few dummy QObjects to help us compile
class QObject ( ) :
2015-02-15 02:44:00 -08:00
def __init__ ( self , * args ) :
2015-02-12 14:57:46 -08:00
pass
2015-02-15 02:44:00 -08:00
2015-02-12 14:57:46 -08:00
class pyqtSignal ( ) :
2015-02-15 02:44:00 -08:00
def __init__ ( self , * args ) :
2015-02-12 14:57:46 -08:00
pass
2015-02-15 02:44:00 -08:00
def emit ( a , b , c ) :
2015-02-12 14:57:46 -08:00
pass
2012-11-12 16:12:43 -08:00
2018-09-19 13:05:39 -07:00
from . import ctversion
from . import utils
from . comicvinecacher import ComicVineCacher
from . genericmetadata import GenericMetadata
from . issuestring import IssueString
2015-02-21 18:30:32 -08:00
#from settings import ComicTaggerSettings
2012-11-02 13:54:17 -07:00
2015-02-13 15:08:07 -08:00
2013-03-28 12:04:30 -07:00
class CVTypeID :
2015-02-12 14:57:46 -08:00
Volume = " 4050 "
2015-02-15 02:44:00 -08:00
Issue = " 4000 "
2015-02-12 14:57:46 -08:00
2012-11-28 12:15:20 -08:00
class ComicVineTalkerException ( Exception ) :
2015-02-12 14:57:46 -08:00
Unknown = - 1
Network = - 2
InvalidKey = 100
RateLimit = 107
def __init__ ( self , code = - 1 , desc = " " ) :
self . desc = desc
self . code = code
def __str__ ( self ) :
if ( self . code == ComicVineTalkerException . Unknown or
2015-02-15 02:44:00 -08:00
self . code == ComicVineTalkerException . Network ) :
2015-02-12 14:57:46 -08:00
return self . desc
else :
2015-02-13 15:08:07 -08:00
return " CV error # {0} : [ {1} ]. \n " . format ( self . code , self . desc )
2015-02-12 14:57:46 -08:00
2012-11-12 16:12:43 -08:00
class ComicVineTalker ( QObject ) :
2012-11-02 13:54:17 -07:00
2015-02-12 14:57:46 -08:00
logo_url = " http://static.comicvine.com/bundles/comicvinesite/images/logo.png "
api_key = " "
@staticmethod
def getRateLimitMessage ( ) :
if ComicVineTalker . api_key == " " :
return " Comic Vine rate limit exceeded. You should configue your own Comic Vine API key. "
else :
return " Comic Vine rate limit exceeded. Please wait a bit. "
def __init__ ( self ) :
QObject . __init__ ( self )
2017-12-21 06:19:45 -08:00
self . api_base_url = " https://comicvine.gamespot.com/api "
2015-02-12 14:57:46 -08:00
self . wait_for_rate_limit = False
# key that is registered to comictagger
default_api_key = ' 27431e6787042105bd3e47e169a624521f89f3a4 '
if ComicVineTalker . api_key == " " :
self . api_key = default_api_key
else :
self . api_key = ComicVineTalker . api_key
self . log_func = None
2015-02-15 02:44:00 -08:00
def setLogFunc ( self , log_func ) :
2015-02-12 14:57:46 -08:00
self . log_func = log_func
2015-02-15 02:44:00 -08:00
def writeLog ( self , text ) :
2015-02-12 14:57:46 -08:00
if self . log_func is None :
2015-02-15 02:44:00 -08:00
# sys.stdout.write(text.encode(errors='replace'))
# sys.stdout.flush()
2018-09-19 13:05:39 -07:00
print ( text , file = sys . stderr )
2015-02-12 14:57:46 -08:00
else :
2015-02-13 15:08:07 -08:00
self . log_func ( text )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
def parseDateStr ( self , date_str ) :
2015-02-12 14:57:46 -08:00
day = None
month = None
year = None
2015-02-15 02:44:00 -08:00
if date_str is not None :
2015-02-12 14:57:46 -08:00
parts = date_str . split ( ' - ' )
2019-09-05 14:40:14 -07:00
year = utils . xlate ( parts [ 0 ] , True )
2015-02-12 14:57:46 -08:00
if len ( parts ) > 1 :
2019-09-05 14:40:14 -07:00
month = utils . xlate ( parts [ 1 ] , True )
2015-02-12 14:57:46 -08:00
if len ( parts ) > 2 :
2019-09-05 14:40:14 -07:00
day = utils . xlate ( parts [ 2 ] , True )
2015-02-12 14:57:46 -08:00
return day , month , year
2015-02-13 15:08:07 -08:00
def testKey ( self , key ) :
2015-02-12 14:57:46 -08:00
2018-09-19 13:05:39 -07:00
try :
2019-09-10 14:52:59 -07:00
test_url = self . api_base_url + " /issue/1/?api_key= " + key + " &format=json&field_list=name "
cv_response = requests . get ( test_url , headers = { ' user-agent ' : ' comictagger/ ' + ctversion . version } ) . json ( )
2018-09-19 13:05:39 -07:00
# Bogus request, but if the key is wrong, you get error 100: "Invalid
# API Key"
return cv_response [ ' status_code ' ] != 100
except :
return False
2015-02-12 14:57:46 -08:00
"""
Get the contect from the CV server . If we ' re in " wait mode " and status code is a rate limit error
sleep for a bit and retry .
"""
2015-02-15 02:44:00 -08:00
2019-09-10 14:52:59 -07:00
def getCVContent ( self , url , params ) :
2015-02-12 14:57:46 -08:00
total_time_waited = 0
limit_wait_time = 1
counter = 0
2015-02-15 02:44:00 -08:00
wait_times = [ 1 , 2 , 3 , 4 ]
2015-02-12 14:57:46 -08:00
while True :
2019-09-10 14:52:59 -07:00
cv_response = self . getUrlContent ( url , params )
2015-02-15 03:44:09 -08:00
if self . wait_for_rate_limit and cv_response [
' status_code ' ] == ComicVineTalkerException . RateLimit :
2015-02-15 02:44:00 -08:00
self . writeLog (
" Rate limit encountered. Waiting for {0} minutes \n " . format ( limit_wait_time ) )
2015-02-12 14:57:46 -08:00
time . sleep ( limit_wait_time * 60 )
total_time_waited + = limit_wait_time
limit_wait_time = wait_times [ counter ]
if counter < 3 :
counter + = 1
# don't wait much more than 20 minutes
if total_time_waited < 20 :
continue
2015-02-15 02:44:00 -08:00
if cv_response [ ' status_code ' ] != 1 :
2015-02-15 03:55:04 -08:00
self . writeLog (
" Comic Vine query failed with error # {0} : [ {1} ]. \n " . format (
cv_response [ ' status_code ' ] ,
cv_response [ ' error ' ] ) )
2015-02-15 02:44:00 -08:00
raise ComicVineTalkerException (
cv_response [ ' status_code ' ] , cv_response [ ' error ' ] )
2015-02-12 14:57:46 -08:00
else :
# it's all good
break
return cv_response
2019-09-10 14:52:59 -07:00
def getUrlContent ( self , url , params ) :
2015-02-12 14:57:46 -08:00
# connect to server:
# if there is a 500 error, try a few more times before giving up
# any other error, just bail
2019-02-04 11:16:44 -08:00
#print("---", url)
2015-02-12 14:57:46 -08:00
for tries in range ( 3 ) :
try :
2019-09-10 14:52:59 -07:00
resp = requests . get ( url , params = params , headers = { ' user-agent ' : ' comictagger/ ' + ctversion . version } )
if resp . status_code == 200 :
return resp . json ( )
if resp . status_code == 500 :
2015-02-15 02:44:00 -08:00
self . writeLog ( " Try # {0} : " . format ( tries + 1 ) )
2015-02-12 14:57:46 -08:00
time . sleep ( 1 )
2019-09-10 14:52:59 -07:00
self . writeLog ( str ( resp . status_code ) + " \n " )
else :
2015-02-12 14:57:46 -08:00
break
2019-09-10 14:52:59 -07:00
except requests . exceptions . RequestException as e :
2015-02-15 02:44:00 -08:00
self . writeLog ( str ( e ) + " \n " )
raise ComicVineTalkerException (
ComicVineTalkerException . Network , " Network Error! " )
2015-02-12 14:57:46 -08:00
2015-02-15 02:44:00 -08:00
raise ComicVineTalkerException (
ComicVineTalkerException . Unknown , " Error on Comic Vine server " )
2015-02-12 14:57:46 -08:00
2020-06-02 20:09:02 -07:00
def literalSearchForSeries ( self , series_name , callback = None ) :
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄ 2 not 1/2
search_series_name = unicodedata . normalize ( ' NFKD ' , series_name ) . encode ( ' ascii ' , ' ignore ' ) . decode ( ' ascii ' )
params = {
' api_key ' : self . api_key ,
' format ' : ' json ' ,
' resources ' : ' volume ' ,
' query ' : search_series_name ,
' field_list ' : ' volume,name,id,start_year,publisher,image,description,count_of_issues ' ,
2019-10-06 07:14:11 -07:00
' page ' : 1 ,
' limit ' : 100 ,
2020-06-02 20:09:02 -07:00
}
cv_response = self . getCVContent ( self . api_base_url + " /search " , params )
search_results = list ( )
# see http://api.comicvine.com/documentation/#handling_responses
limit = cv_response [ ' limit ' ]
current_result_count = cv_response [ ' number_of_page_results ' ]
total_result_count = cv_response [ ' number_of_total_results ' ]
# 8 Dec 2018 - Comic Vine changed query results again. Terms are now
# ORed together, and we get thousands of results. Good news is the
# results are sorted by relevance, so we can be smart about halting
# the search.
# 1. Don't fetch more than some sane amount of pages.
max_results = 50
total_result_count = min ( total_result_count , max_results )
if callback is None :
self . writeLog (
" Found {0} of {1} results \n " . format (
cv_response [ ' number_of_page_results ' ] ,
cv_response [ ' number_of_total_results ' ] ) )
search_results . extend ( cv_response [ ' results ' ] )
page = 1
if callback is not None :
callback ( current_result_count , total_result_count )
# see if we need to keep asking for more pages...
while current_result_count < total_result_count :
if callback is None :
self . writeLog (
" getting another page of results {0} of {1} ... \n " . format (
current_result_count ,
total_result_count ) )
page + = 1
params [ ' page ' ] = page
cv_response = self . getCVContent ( self . api_base_url + " /search " , params )
search_results . extend ( cv_response [ ' results ' ] )
current_result_count + = cv_response [ ' number_of_page_results ' ]
if callback is not None :
callback ( current_result_count , total_result_count )
return search_results
2015-02-15 02:44:00 -08:00
def searchForSeries ( self , series_name , callback = None , refresh_cache = False ) :
2015-02-12 14:57:46 -08:00
2020-02-13 00:27:08 -08:00
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄ 2 not 1/2
search_series_name = unicodedata . normalize ( ' NFKD ' , series_name ) . encode ( ' ascii ' , ' ignore ' ) . decode ( ' ascii ' )
# comicvine ignores punctuation and accents
search_series_name = re . sub ( r ' [^A-Za-z0-9]+ ' , ' ' , search_series_name )
# remove extra space and articles and all lower case
search_series_name = utils . removearticles ( search_series_name ) . lower ( ) . strip ( )
2015-02-12 14:57:46 -08:00
# before we search online, look in our cache, since we might have
# done this same search recently
2015-02-13 15:08:07 -08:00
cvc = ComicVineCacher ( )
2015-02-12 14:57:46 -08:00
if not refresh_cache :
2015-02-13 15:08:07 -08:00
cached_search_results = cvc . get_search_results ( series_name )
2015-02-12 14:57:46 -08:00
2015-02-15 02:44:00 -08:00
if len ( cached_search_results ) > 0 :
2015-02-12 14:57:46 -08:00
return cached_search_results
2019-09-10 14:52:59 -07:00
params = {
' api_key ' : self . api_key ,
' format ' : ' json ' ,
' resources ' : ' volume ' ,
2020-02-13 00:27:08 -08:00
' query ' : search_series_name ,
2019-09-05 15:19:44 -07:00
' field_list ' : ' volume,name,id,start_year,publisher,image,description,count_of_issues ' ,
2019-10-06 07:14:11 -07:00
' page ' : 1 ,
' limit ' : 100 ,
2019-09-10 14:52:59 -07:00
}
2015-02-12 14:57:46 -08:00
2019-09-10 14:52:59 -07:00
cv_response = self . getCVContent ( self . api_base_url + " /search " , params )
2015-02-12 14:57:46 -08:00
search_results = list ( )
# see http://api.comicvine.com/documentation/#handling_responses
limit = cv_response [ ' limit ' ]
current_result_count = cv_response [ ' number_of_page_results ' ]
total_result_count = cv_response [ ' number_of_total_results ' ]
2019-02-04 11:16:44 -08:00
# 8 Dec 2018 - Comic Vine changed query results again. Terms are now
# ORed together, and we get thousands of results. Good news is the
# results are sorted by relevance, so we can be smart about halting
2019-09-10 14:52:59 -07:00
# the search.
2019-02-04 11:16:44 -08:00
# 1. Don't fetch more than some sane amount of pages.
2019-09-10 14:52:59 -07:00
max_results = 500
2019-02-04 11:16:44 -08:00
# 2. Halt when not all of our search terms are present in a result
# 3. Halt when the results contain more (plus threshold) words than
# our search
2020-02-13 00:27:08 -08:00
result_word_count_max = len ( search_series_name . split ( ) ) + 3
2019-02-04 11:16:44 -08:00
2019-09-10 14:52:59 -07:00
total_result_count = min ( total_result_count , max_results )
2019-02-04 11:16:44 -08:00
2015-02-12 14:57:46 -08:00
if callback is None :
2015-02-15 03:55:04 -08:00
self . writeLog (
" Found {0} of {1} results \n " . format (
cv_response [ ' number_of_page_results ' ] ,
cv_response [ ' number_of_total_results ' ] ) )
2015-02-13 15:08:07 -08:00
search_results . extend ( cv_response [ ' results ' ] )
2015-02-12 14:57:46 -08:00
page = 1
if callback is not None :
2015-02-13 15:08:07 -08:00
callback ( current_result_count , total_result_count )
2015-02-12 14:57:46 -08:00
# see if we need to keep asking for more pages...
2019-02-04 11:16:44 -08:00
stop_searching = False
2015-02-13 15:08:07 -08:00
while ( current_result_count < total_result_count ) :
2019-02-04 11:16:44 -08:00
last_result = search_results [ - 1 ] [ ' name ' ]
2020-02-13 00:27:08 -08:00
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄ 2 not 1/2
last_result = unicodedata . normalize ( ' NFKD ' , last_result ) . encode ( ' ascii ' , ' ignore ' ) . decode ( ' ascii ' )
# comicvine ignores punctuation and accents
last_result = re . sub ( r ' [^A-Za-z0-9]+ ' , ' ' , last_result )
# remove extra space and articles and all lower case
last_result = utils . removearticles ( last_result ) . lower ( ) . strip ( )
2019-02-04 11:16:44 -08:00
# See if the last result's name has all the of the search terms.
# if not, break out of this, loop, we're done.
2020-02-13 00:27:08 -08:00
for term in search_series_name . split ( ) :
2019-02-04 11:16:44 -08:00
if term not in last_result . lower ( ) :
#print("Term '{}' not in last result. Halting search result fetching".format(term))
stop_searching = True
break
# Also, stop searching when the word count of last results is too much longer
2019-09-10 14:52:59 -07:00
# than our search terms list
2020-06-02 20:21:05 -07:00
if len ( last_result . split ( ) ) > result_word_count_max :
print ( " Last result ' {} ' is too long: max word count: {} ; Search terms {} . Halting search result fetching " . format ( last_result , result_word_count_max , search_series_name . split ( ) ) , file = sys . stderr )
2019-02-04 11:16:44 -08:00
stop_searching = True
if stop_searching :
break
2015-02-12 14:57:46 -08:00
if callback is None :
2015-02-15 03:55:04 -08:00
self . writeLog (
" getting another page of results {0} of {1} ... \n " . format (
current_result_count ,
total_result_count ) )
2015-02-12 14:57:46 -08:00
page + = 1
2019-09-10 14:52:59 -07:00
params [ ' page ' ] = page
cv_response = self . getCVContent ( self . api_base_url + " /search " , params )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
search_results . extend ( cv_response [ ' results ' ] )
2015-02-12 14:57:46 -08:00
current_result_count + = cv_response [ ' number_of_page_results ' ]
if callback is not None :
2015-02-13 15:08:07 -08:00
callback ( current_result_count , total_result_count )
2015-02-12 14:57:46 -08:00
2019-02-04 11:16:44 -08:00
# Remove any search results that don't contain all the search terms
# (iterate backwards for easy removal)
for i in range ( len ( search_results ) - 1 , - 1 , - 1 ) :
record = search_results [ i ]
2020-02-13 00:27:08 -08:00
for term in search_series_name . split ( ) :
# normalize unicode and convert to ascii. Does not work for everything eg ½ to 1⁄ 2 not 1/2
recordName = unicodedata . normalize ( ' NFKD ' , record [ ' name ' ] ) . encode ( ' ascii ' , ' ignore ' ) . decode ( ' ascii ' )
# comicvine ignores punctuation and accents
recordName = re . sub ( r ' [^A-Za-z0-9]+ ' , ' ' , recordName )
# remove extra space and articles and all lower case
recordName = utils . removearticles ( recordName ) . lower ( ) . strip ( )
if term not in recordName :
2019-02-04 11:16:44 -08:00
del search_results [ i ]
break
2015-02-15 02:44:00 -08:00
# for record in search_results:
2015-02-13 15:08:07 -08:00
#print(u"{0}: {1} ({2})".format(record['id'], record['name'] , record['start_year']))
2015-02-15 02:44:00 -08:00
# print(record)
2015-02-13 15:08:07 -08:00
#record['count_of_issues'] = record['count_of_isssues']
#print(u"{0}: {1} ({2})".format(search_results['results'][0]['id'], search_results['results'][0]['name'] , search_results['results'][0]['start_year']))
2015-02-12 14:57:46 -08:00
# cache these search results
2020-02-13 00:27:08 -08:00
cvc . add_search_results ( series_name , search_results )
2015-02-12 14:57:46 -08:00
return search_results
2015-02-13 15:08:07 -08:00
def fetchVolumeData ( self , series_id ) :
2015-02-12 14:57:46 -08:00
# before we search online, look in our cache, since we might already
# have this info
2015-02-13 15:08:07 -08:00
cvc = ComicVineCacher ( )
cached_volume_result = cvc . get_volume_info ( series_id )
2015-02-12 14:57:46 -08:00
if cached_volume_result is not None :
return cached_volume_result
2019-09-10 14:52:59 -07:00
volume_url = self . api_base_url + " /volume/ " + CVTypeID . Volume + " - " + str ( series_id )
2015-02-12 14:57:46 -08:00
2019-09-10 14:52:59 -07:00
params = {
' api_key ' : self . api_key ,
' format ' : ' json ' ,
' field_list ' : ' name,id,start_year,publisher,count_of_issues '
}
cv_response = self . getCVContent ( volume_url , params )
2015-02-12 14:57:46 -08:00
volume_results = cv_response [ ' results ' ]
2015-02-13 15:08:07 -08:00
cvc . add_volume_info ( volume_results )
2015-02-12 14:57:46 -08:00
return volume_results
2015-02-13 15:08:07 -08:00
def fetchIssuesByVolume ( self , series_id ) :
2015-02-12 14:57:46 -08:00
# before we search online, look in our cache, since we might already
# have this info
2015-02-13 15:08:07 -08:00
cvc = ComicVineCacher ( )
cached_volume_issues_result = cvc . get_volume_issues_info ( series_id )
2015-02-12 14:57:46 -08:00
if cached_volume_issues_result is not None :
return cached_volume_issues_result
2019-09-10 14:52:59 -07:00
params = {
' api_key ' : self . api_key ,
' filter ' : ' volume: ' + str ( series_id ) ,
' format ' : ' json ' ,
' field_list ' : ' id,volume,issue_number,name,image,cover_date,site_detail_url,description '
}
cv_response = self . getCVContent ( self . api_base_url + " /issues/ " , params )
2015-02-12 14:57:46 -08:00
#------------------------------------
limit = cv_response [ ' limit ' ]
current_result_count = cv_response [ ' number_of_page_results ' ]
total_result_count = cv_response [ ' number_of_total_results ' ]
2019-02-04 11:16:44 -08:00
#print("total_result_count", total_result_count)
2015-02-12 14:57:46 -08:00
2019-02-04 11:16:44 -08:00
#print("Found {0} of {1} results".format(cv_response['number_of_page_results'], cv_response['number_of_total_results']))
2015-02-12 14:57:46 -08:00
volume_issues_result = cv_response [ ' results ' ]
page = 1
offset = 0
# see if we need to keep asking for more pages...
2015-02-13 15:08:07 -08:00
while ( current_result_count < total_result_count ) :
2019-02-04 11:16:44 -08:00
#print("getting another page of issue results {0} of {1}...".format(current_result_count, total_result_count))
2015-02-12 14:57:46 -08:00
page + = 1
offset + = cv_response [ ' number_of_page_results ' ]
2019-09-10 14:52:59 -07:00
params [ ' offset ' ] = offset
cv_response = self . getCVContent ( self . api_base_url + " /issues/ " , params )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
volume_issues_result . extend ( cv_response [ ' results ' ] )
2015-02-12 14:57:46 -08:00
current_result_count + = cv_response [ ' number_of_page_results ' ]
2015-02-13 15:08:07 -08:00
self . repairUrls ( volume_issues_result )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
cvc . add_volume_issues_info ( series_id , volume_issues_result )
2015-02-12 14:57:46 -08:00
return volume_issues_result
2019-09-10 14:52:59 -07:00
def fetchIssuesByVolumeIssueNumAndYear ( self , volume_id_list , issue_number , year ) :
volume_filter = " "
2015-02-12 14:57:46 -08:00
for vid in volume_id_list :
volume_filter + = str ( vid ) + " | "
2019-09-10 14:52:59 -07:00
filter = " volume: {} ,issue_number: {} " . format ( volume_filter , issue_number )
2015-02-12 14:57:46 -08:00
2019-09-10 14:52:59 -07:00
intYear = utils . xlate ( year , True )
if intYear is not None :
filter + = " ,cover_date: {} -1-1| {} -1-1 " . format ( intYear , intYear + 1 )
2015-02-12 14:57:46 -08:00
2019-09-10 14:52:59 -07:00
params = {
' api_key ' : self . api_key ,
' format ' : ' json ' ,
' field_list ' : ' id,volume,issue_number,name,image,cover_date,site_detail_url,description ' ,
' filter ' : filter
}
2015-02-12 14:57:46 -08:00
2019-09-10 14:52:59 -07:00
cv_response = self . getCVContent ( self . api_base_url + " /issues " , params )
2015-02-12 14:57:46 -08:00
#------------------------------------
limit = cv_response [ ' limit ' ]
current_result_count = cv_response [ ' number_of_page_results ' ]
total_result_count = cv_response [ ' number_of_total_results ' ]
2019-02-04 11:16:44 -08:00
#print("total_result_count", total_result_count)
2015-02-12 14:57:46 -08:00
2019-02-04 11:16:44 -08:00
#print("Found {0} of {1} results\n".format(cv_response['number_of_page_results'], cv_response['number_of_total_results']))
2015-02-12 14:57:46 -08:00
filtered_issues_result = cv_response [ ' results ' ]
page = 1
offset = 0
# see if we need to keep asking for more pages...
2015-02-13 15:08:07 -08:00
while ( current_result_count < total_result_count ) :
2019-02-04 11:16:44 -08:00
#print("getting another page of issue results {0} of {1}...\n".format(current_result_count, total_result_count))
2015-02-12 14:57:46 -08:00
page + = 1
offset + = cv_response [ ' number_of_page_results ' ]
2019-09-10 14:52:59 -07:00
params [ ' offset ' ] = offset
cv_response = self . getCVContent ( self . api_base_url + " /issues/ " , params )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
filtered_issues_result . extend ( cv_response [ ' results ' ] )
2015-02-12 14:57:46 -08:00
current_result_count + = cv_response [ ' number_of_page_results ' ]
2015-02-13 15:08:07 -08:00
self . repairUrls ( filtered_issues_result )
2015-02-12 14:57:46 -08:00
return filtered_issues_result
2015-02-13 15:08:07 -08:00
def fetchIssueData ( self , series_id , issue_number , settings ) :
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
volume_results = self . fetchVolumeData ( series_id )
issues_list_results = self . fetchIssuesByVolume ( series_id )
2015-02-12 14:57:46 -08:00
found = False
for record in issues_list_results :
if IssueString ( issue_number ) . asString ( ) is None :
issue_number = 1
2015-02-15 03:44:09 -08:00
if IssueString ( record [ ' issue_number ' ] ) . asString ( ) . lower ( ) == IssueString (
issue_number ) . asString ( ) . lower ( ) :
2015-02-12 14:57:46 -08:00
found = True
break
if ( found ) :
2019-09-10 14:52:59 -07:00
issue_url = self . api_base_url + " /issue/ " + CVTypeID . Issue + " - " + str ( record [ ' id ' ] )
params = {
' api_key ' : self . api_key ,
' format ' : ' json '
}
cv_response = self . getCVContent ( issue_url , params )
2015-02-12 14:57:46 -08:00
issue_results = cv_response [ ' results ' ]
else :
return None
2015-02-21 18:30:32 -08:00
# Now, map the Comic Vine data to generic metadata
2015-02-15 03:44:09 -08:00
return self . mapCVDataToMetadata (
volume_results , issue_results , settings )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
def fetchIssueDataByIssueID ( self , issue_id , settings ) :
2015-02-12 14:57:46 -08:00
2019-09-10 14:52:59 -07:00
issue_url = self . api_base_url + " /issue/ " + CVTypeID . Issue + " - " + str ( issue_id )
params = {
' api_key ' : self . api_key ,
' format ' : ' json '
}
cv_response = self . getCVContent ( issue_url , params )
2015-02-12 14:57:46 -08:00
issue_results = cv_response [ ' results ' ]
2015-02-13 15:08:07 -08:00
volume_results = self . fetchVolumeData ( issue_results [ ' volume ' ] [ ' id ' ] )
2015-02-12 14:57:46 -08:00
2015-02-21 18:30:32 -08:00
# Now, map the Comic Vine data to generic metadata
2015-02-13 15:08:07 -08:00
md = self . mapCVDataToMetadata ( volume_results , issue_results , settings )
2015-02-12 14:57:46 -08:00
md . isEmpty = False
return md
2015-02-13 15:08:07 -08:00
def mapCVDataToMetadata ( self , volume_results , issue_results , settings ) :
2015-02-12 14:57:46 -08:00
2015-02-21 18:30:32 -08:00
# Now, map the Comic Vine data to generic metadata
2015-02-12 14:57:46 -08:00
metadata = GenericMetadata ( )
2019-09-05 14:40:14 -07:00
metadata . series = utils . xlate ( issue_results [ ' volume ' ] [ ' name ' ] )
metadata . issue = IssueString ( issue_results [ ' issue_number ' ] ) . asString ( )
metadata . title = utils . xlate ( issue_results [ ' name ' ] )
2015-02-12 14:57:46 -08:00
2019-09-05 14:40:14 -07:00
if volume_results [ ' publisher ' ] is not None :
metadata . publisher = utils . xlate ( volume_results [ ' publisher ' ] [ ' name ' ] )
metadata . day , metadata . month , metadata . year = self . parseDateStr ( issue_results [ ' cover_date ' ] )
2015-02-12 14:57:46 -08:00
2019-09-05 15:19:44 -07:00
metadata . seriesYear = utils . xlate ( volume_results [ ' start_year ' ] )
metadata . issueCount = utils . xlate ( volume_results [ ' count_of_issues ' ] )
2015-02-15 02:44:00 -08:00
metadata . comments = self . cleanup_html (
issue_results [ ' description ' ] , settings . remove_html_tables )
2015-02-12 14:57:46 -08:00
if settings . use_series_start_as_volume :
2019-09-05 15:19:44 -07:00
metadata . volume = utils . xlate ( volume_results [ ' start_year ' ] )
2015-02-12 14:57:46 -08:00
2015-02-15 02:44:00 -08:00
metadata . notes = " Tagged with ComicTagger {0} using info from Comic Vine on {1} . [Issue ID {2} ] " . format (
2015-02-12 14:57:46 -08:00
ctversion . version ,
datetime . datetime . now ( ) . strftime ( " % Y- % m- %d % H: % M: % S " ) ,
issue_results [ ' id ' ] )
#metadata.notes += issue_results['site_detail_url']
metadata . webLink = issue_results [ ' site_detail_url ' ]
person_credits = issue_results [ ' person_credits ' ]
for person in person_credits :
2015-02-15 03:44:09 -08:00
if ' role ' in person :
2015-02-12 14:57:46 -08:00
roles = person [ ' role ' ] . split ( ' , ' )
for role in roles :
# can we determine 'primary' from CV??
2015-02-15 02:44:00 -08:00
metadata . addCredit (
person [ ' name ' ] , role . title ( ) . strip ( ) , False )
2015-02-12 14:57:46 -08:00
character_credits = issue_results [ ' character_credits ' ]
character_list = list ( )
for character in character_credits :
2015-02-13 15:08:07 -08:00
character_list . append ( character [ ' name ' ] )
metadata . characters = utils . listToString ( character_list )
2015-02-12 14:57:46 -08:00
team_credits = issue_results [ ' team_credits ' ]
team_list = list ( )
for team in team_credits :
2015-02-13 15:08:07 -08:00
team_list . append ( team [ ' name ' ] )
metadata . teams = utils . listToString ( team_list )
2015-02-12 14:57:46 -08:00
location_credits = issue_results [ ' location_credits ' ]
location_list = list ( )
for location in location_credits :
2015-02-13 15:08:07 -08:00
location_list . append ( location [ ' name ' ] )
metadata . locations = utils . listToString ( location_list )
2015-02-12 14:57:46 -08:00
story_arc_credits = issue_results [ ' story_arc_credits ' ]
arc_list = [ ]
for arc in story_arc_credits :
arc_list . append ( arc [ ' name ' ] )
if len ( arc_list ) > 0 :
2015-02-15 02:44:00 -08:00
metadata . storyArc = utils . listToString ( arc_list )
2015-02-12 14:57:46 -08:00
return metadata
2015-02-15 02:44:00 -08:00
2015-02-13 15:08:07 -08:00
def cleanup_html ( self , string , remove_html_tables ) :
2015-02-12 14:57:46 -08:00
"""
converter = html2text . HTML2Text ( )
#converter.emphasis_mark = '*'
#converter.ignore_links = True
converter . body_width = 0
2015-02-13 15:08:07 -08:00
print ( html2text . html2text ( string ) )
2015-02-12 14:57:46 -08:00
return string
#return converter.handle(string)
"""
if string is None :
return " "
# find any tables
2018-09-19 13:05:39 -07:00
soup = BeautifulSoup ( string , " html.parser " )
2015-02-12 14:57:46 -08:00
tables = soup . findAll ( ' table ' )
# remove all newlines first
string = string . replace ( " \n " , " " )
2015-02-15 02:44:00 -08:00
# put in our own
2015-02-12 14:57:46 -08:00
string = string . replace ( " <br> " , " \n " )
string = string . replace ( " </p> " , " \n \n " )
string = string . replace ( " <h4> " , " * " )
string = string . replace ( " </h4> " , " * \n " )
2015-02-15 02:44:00 -08:00
# remove the tables
2015-02-12 14:57:46 -08:00
p = re . compile ( r ' <table[^<]*?>.*?< \ /table> ' )
if remove_html_tables :
2015-02-15 02:44:00 -08:00
string = p . sub ( ' ' , string )
string = string . replace ( " *List of covers and their creators:* " , " " )
2015-02-12 14:57:46 -08:00
else :
2015-02-15 02:44:00 -08:00
string = p . sub ( ' {} ' , string )
2015-02-12 14:57:46 -08:00
# now strip all other tags
p = re . compile ( r ' <[^<]*?> ' )
2015-02-15 02:44:00 -08:00
newstring = p . sub ( ' ' , string )
2015-02-12 14:57:46 -08:00
2015-02-15 02:44:00 -08:00
newstring = newstring . replace ( ' ' , ' ' )
newstring = newstring . replace ( ' & ' , ' & ' )
2015-02-12 14:57:46 -08:00
newstring = newstring . strip ( )
if not remove_html_tables :
# now rebuild the tables into text from BSoup
try :
table_strings = [ ]
for table in tables :
rows = [ ]
hdrs = [ ]
col_widths = [ ]
for hdr in table . findAll ( ' th ' ) :
item = hdr . string . strip ( )
hdrs . append ( item )
col_widths . append ( len ( item ) )
rows . append ( hdrs )
for row in table . findAll ( ' tr ' ) :
cols = [ ]
col = row . findAll ( ' td ' )
i = 0
for c in col :
item = c . string . strip ( )
cols . append ( item )
if len ( item ) > col_widths [ i ] :
col_widths [ i ] = len ( item )
i + = 1
if len ( cols ) != 0 :
rows . append ( cols )
# now we have the data, make it into text
2015-02-15 02:44:00 -08:00
fmtstr = " "
2015-02-12 14:57:46 -08:00
for w in col_widths :
2015-02-15 02:44:00 -08:00
fmtstr + = " {{ : {} }}| " . format ( w + 1 )
width = sum ( col_widths ) + len ( col_widths ) * 2
2018-09-19 13:05:39 -07:00
print ( " width= " , width )
2015-02-12 14:57:46 -08:00
table_text = " "
counter = 0
for row in rows :
table_text + = fmtstr . format ( * row ) + " \n "
2015-02-15 02:44:00 -08:00
if counter == 0 and len ( hdrs ) != 0 :
2015-02-12 14:57:46 -08:00
table_text + = " - " * width + " \n "
counter + = 1
table_strings . append ( table_text )
newstring = newstring . format ( * table_strings )
except :
# we caught an error rebuilding the table.
# just bail and remove the formatting
2015-02-13 15:08:07 -08:00
print ( " table parse error " )
2015-02-12 14:57:46 -08:00
newstring . replace ( " {} " , " " )
return newstring
2015-02-13 15:08:07 -08:00
def fetchIssueDate ( self , issue_id ) :
details = self . fetchIssueSelectDetails ( issue_id )
day , month , year = self . parseDateStr ( details [ ' cover_date ' ] )
2015-02-12 14:57:46 -08:00
return month , year
2015-02-13 15:08:07 -08:00
def fetchIssueCoverURLs ( self , issue_id ) :
details = self . fetchIssueSelectDetails ( issue_id )
2015-02-12 14:57:46 -08:00
return details [ ' image_url ' ] , details [ ' thumb_image_url ' ]
2015-02-13 15:08:07 -08:00
def fetchIssuePageURL ( self , issue_id ) :
details = self . fetchIssueSelectDetails ( issue_id )
2015-02-12 14:57:46 -08:00
return details [ ' site_detail_url ' ]
2015-02-13 15:08:07 -08:00
def fetchIssueSelectDetails ( self , issue_id ) :
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
#cached_image_url,cached_thumb_url,cached_month,cached_year = self.fetchCachedIssueSelectDetails(issue_id)
cached_details = self . fetchCachedIssueSelectDetails ( issue_id )
2015-02-12 14:57:46 -08:00
if cached_details [ ' image_url ' ] is not None :
return cached_details
2019-09-10 14:52:59 -07:00
issue_url = self . api_base_url + " /issue/ " + CVTypeID . Issue + " - " + str ( issue_id )
params = {
' api_key ' : self . api_key ,
' format ' : ' json ' ,
' field_list ' : ' image,cover_date,site_detail_url '
}
cv_response = self . getCVContent ( issue_url , params )
2015-02-12 14:57:46 -08:00
details = dict ( )
details [ ' image_url ' ] = None
details [ ' thumb_image_url ' ] = None
details [ ' cover_date ' ] = None
details [ ' site_detail_url ' ] = None
2015-02-15 02:44:00 -08:00
details [ ' image_url ' ] = cv_response [ ' results ' ] [ ' image ' ] [ ' super_url ' ]
details [ ' thumb_image_url ' ] = cv_response [
' results ' ] [ ' image ' ] [ ' thumb_url ' ]
details [ ' cover_date ' ] = cv_response [ ' results ' ] [ ' cover_date ' ]
2015-02-12 14:57:46 -08:00
details [ ' site_detail_url ' ] = cv_response [ ' results ' ] [ ' site_detail_url ' ]
if details [ ' image_url ' ] is not None :
2015-02-13 15:08:07 -08:00
self . cacheIssueSelectDetails ( issue_id ,
2015-02-15 02:44:00 -08:00
details [ ' image_url ' ] ,
details [ ' thumb_image_url ' ] ,
details [ ' cover_date ' ] ,
details [ ' site_detail_url ' ] )
# print(details['site_detail_url'])
2015-02-12 14:57:46 -08:00
return details
2015-02-13 15:08:07 -08:00
def fetchCachedIssueSelectDetails ( self , issue_id ) :
2015-02-12 14:57:46 -08:00
# before we search online, look in our cache, since we might already
# have this info
2015-02-13 15:08:07 -08:00
cvc = ComicVineCacher ( )
2015-02-15 02:44:00 -08:00
return cvc . get_issue_select_details ( issue_id )
2015-02-12 14:57:46 -08:00
2015-02-15 03:44:09 -08:00
def cacheIssueSelectDetails (
self , issue_id , image_url , thumb_url , cover_date , page_url ) :
2015-02-13 15:08:07 -08:00
cvc = ComicVineCacher ( )
2015-02-15 02:44:00 -08:00
cvc . add_issue_select_details (
issue_id , image_url , thumb_url , cover_date , page_url )
2015-02-12 14:57:46 -08:00
def fetchAlternateCoverURLs ( self , issue_id , issue_page_url ) :
2015-02-13 15:08:07 -08:00
url_list = self . fetchCachedAlternateCoverURLs ( issue_id )
2015-02-12 14:57:46 -08:00
if url_list is not None :
return url_list
# scrape the CV issue page URL to get the alternate cover URLs
2019-09-10 14:52:59 -07:00
content = requests . get ( issue_page_url , headers = { ' user-agent ' : ' comictagger/ ' + ctversion . version } ) . text
2015-02-13 15:08:07 -08:00
alt_cover_url_list = self . parseOutAltCoverUrls ( content )
2015-02-12 14:57:46 -08:00
# cache this alt cover URL list
2015-02-13 15:08:07 -08:00
self . cacheAlternateCoverURLs ( issue_id , alt_cover_url_list )
2015-02-12 14:57:46 -08:00
return alt_cover_url_list
2015-02-13 15:08:07 -08:00
def parseOutAltCoverUrls ( self , page_html ) :
2018-09-19 13:05:39 -07:00
soup = BeautifulSoup ( page_html , " html.parser " )
2019-09-10 14:52:59 -07:00
2015-02-12 14:57:46 -08:00
alt_cover_url_list = [ ]
2019-09-10 14:52:59 -07:00
2015-02-21 18:30:32 -08:00
# Using knowledge of the layout of the Comic Vine issue page here:
2018-09-19 13:05:39 -07:00
# look for the divs that are in the classes 'imgboxart' and
# 'issue-cover'
2015-02-13 15:08:07 -08:00
div_list = soup . find_all ( ' div ' )
2015-02-12 14:57:46 -08:00
covers_found = 0
for d in div_list :
2018-09-19 13:05:39 -07:00
if ' class ' in d . attrs :
2015-02-12 14:57:46 -08:00
c = d [ ' class ' ]
2019-09-10 14:52:59 -07:00
if ( ' imgboxart ' in c and
2018-09-19 13:05:39 -07:00
' issue-cover ' in c and
d . img [ ' src ' ] . startswith ( " http " )
) :
2019-09-10 14:52:59 -07:00
2015-02-12 14:57:46 -08:00
covers_found + = 1
if covers_found != 1 :
2018-09-19 13:05:39 -07:00
alt_cover_url_list . append ( d . img [ ' src ' ] )
2019-09-10 14:52:59 -07:00
2015-02-12 14:57:46 -08:00
return alt_cover_url_list
2015-02-13 15:08:07 -08:00
def fetchCachedAlternateCoverURLs ( self , issue_id ) :
2015-02-12 14:57:46 -08:00
# before we search online, look in our cache, since we might already
# have this info
2015-02-13 15:08:07 -08:00
cvc = ComicVineCacher ( )
url_list = cvc . get_alt_covers ( issue_id )
2015-02-12 14:57:46 -08:00
if url_list is not None :
return url_list
else :
return None
2015-02-13 15:08:07 -08:00
def cacheAlternateCoverURLs ( self , issue_id , url_list ) :
cvc = ComicVineCacher ( )
cvc . add_alt_covers ( issue_id , url_list )
2015-02-12 14:57:46 -08:00
2015-02-15 02:44:00 -08:00
#-------------------------------------------------------------------------
urlFetchComplete = pyqtSignal ( str , str , int )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
def asyncFetchIssueCoverURLs ( self , issue_id ) :
2015-02-12 14:57:46 -08:00
self . issue_id = issue_id
2015-02-13 15:08:07 -08:00
details = self . fetchCachedIssueSelectDetails ( issue_id )
2015-02-12 14:57:46 -08:00
if details [ ' image_url ' ] is not None :
2015-02-15 02:44:00 -08:00
self . urlFetchComplete . emit (
2015-02-15 03:55:04 -08:00
details [ ' image_url ' ] ,
details [ ' thumb_image_url ' ] ,
self . issue_id )
2015-02-12 14:57:46 -08:00
return
2015-02-15 02:44:00 -08:00
issue_url = self . api_base_url + " /issue/ " + CVTypeID . Issue + " - " + \
str ( issue_id ) + " /?api_key= " + self . api_key + \
" &format=json&field_list=image,cover_date,site_detail_url "
2015-02-12 14:57:46 -08:00
self . nam = QNetworkAccessManager ( )
2015-02-13 15:08:07 -08:00
self . nam . finished . connect ( self . asyncFetchIssueCoverURLComplete )
2015-02-12 14:57:46 -08:00
self . nam . get ( QNetworkRequest ( QUrl ( issue_url ) ) )
2015-02-13 15:08:07 -08:00
def asyncFetchIssueCoverURLComplete ( self , reply ) :
2015-02-12 14:57:46 -08:00
# read in the response
data = reply . readAll ( )
try :
2018-09-19 13:05:39 -07:00
cv_response = json . loads ( bytes ( data ) )
except Exception as e :
print ( " Comic Vine query failed to get JSON data " , file = sys . stderr )
print ( str ( data ) , file = sys . stderr )
2015-02-12 14:57:46 -08:00
return
2015-02-15 02:44:00 -08:00
if cv_response [ ' status_code ' ] != 1 :
2018-09-19 13:05:39 -07:00
print ( " Comic Vine query failed with error: [ {0} ]. " . format (
cv_response [ ' error ' ] ) , file = sys . stderr )
2015-02-12 14:57:46 -08:00
return
image_url = cv_response [ ' results ' ] [ ' image ' ] [ ' super_url ' ]
thumb_url = cv_response [ ' results ' ] [ ' image ' ] [ ' thumb_url ' ]
cover_date = cv_response [ ' results ' ] [ ' cover_date ' ]
page_url = cv_response [ ' results ' ] [ ' site_detail_url ' ]
2015-02-15 02:44:00 -08:00
self . cacheIssueSelectDetails (
self . issue_id , image_url , thumb_url , cover_date , page_url )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
self . urlFetchComplete . emit ( image_url , thumb_url , self . issue_id )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
altUrlListFetchComplete = pyqtSignal ( list , int )
2015-02-12 14:57:46 -08:00
2015-02-13 15:08:07 -08:00
def asyncFetchAlternateCoverURLs ( self , issue_id , issue_page_url ) :
2015-02-12 14:57:46 -08:00
# This async version requires the issue page url to be provided!
self . issue_id = issue_id
2015-02-13 15:08:07 -08:00
url_list = self . fetchCachedAlternateCoverURLs ( issue_id )
2015-02-12 14:57:46 -08:00
if url_list is not None :
2015-02-13 15:08:07 -08:00
self . altUrlListFetchComplete . emit ( url_list , int ( self . issue_id ) )
2015-02-12 14:57:46 -08:00
return
self . nam = QNetworkAccessManager ( )
2015-02-13 15:08:07 -08:00
self . nam . finished . connect ( self . asyncFetchAlternateCoverURLsComplete )
2015-02-12 14:57:46 -08:00
self . nam . get ( QNetworkRequest ( QUrl ( str ( issue_page_url ) ) ) )
2015-02-13 15:08:07 -08:00
def asyncFetchAlternateCoverURLsComplete ( self , reply ) :
2015-02-12 14:57:46 -08:00
# read in the response
html = str ( reply . readAll ( ) )
2015-02-13 15:08:07 -08:00
alt_cover_url_list = self . parseOutAltCoverUrls ( html )
2015-02-12 14:57:46 -08:00
# cache this alt cover URL list
2015-02-13 15:08:07 -08:00
self . cacheAlternateCoverURLs ( self . issue_id , alt_cover_url_list )
2015-02-12 14:57:46 -08:00
2015-02-15 02:44:00 -08:00
self . altUrlListFetchComplete . emit (
alt_cover_url_list , int ( self . issue_id ) )
2015-02-12 14:57:46 -08:00
def repairUrls ( self , issue_list ) :
2015-02-15 02:44:00 -08:00
# make sure there are URLs for the image fields
2015-02-12 14:57:46 -08:00
for issue in issue_list :
if issue [ ' image ' ] is None :
issue [ ' image ' ] = dict ( )
issue [ ' image ' ] [ ' super_url ' ] = ComicVineTalker . logo_url
issue [ ' image ' ] [ ' thumb_url ' ] = ComicVineTalker . logo_url