"""
A python class to automatically identify a comic archive
"""

"""
Copyright 2012  Anthony Beville

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys
import math
import urllib2, urllib
import StringIO
try: 
	import Image
	pil_available = True
except ImportError:
	pil_available = False

from settings import ComicTaggerSettings
from comicvinecacher import ComicVineCacher
from genericmetadata import GenericMetadata
from comicvinetalker import ComicVineTalker, ComicVineTalkerException
from imagehasher import ImageHasher
from imagefetcher import ImageFetcher, ImageFetcherException
from issuestring import IssueString

import utils 

class IssueIdentifierNetworkError(Exception):
	pass
class IssueIdentifierCancelled(Exception):
	pass

class IssueIdentifier:
	
	ResultNoMatches                         = 0
	ResultFoundMatchButBadCoverScore        = 1
	ResultFoundMatchButNotFirstPage         = 2
	ResultMultipleMatchesWithBadImageScores = 3
	ResultOneGoodMatch                      = 4
	ResultMultipleGoodMatches               = 5	

	def __init__(self, comic_archive, settings ):
		self.comic_archive = comic_archive
		self.image_hasher = 1
		
		self.onlyUseAdditionalMetaData = False

		# a decent hamming score, good enough to call it a match
		self.min_score_thresh = 16

		# for alternate covers, be more stringent, since we're a bit more scattershot in comparisons
		self.min_alternate_score_thresh = 12
		
		# the min distance a hamming score must be to separate itself from closest neighbor
		self.min_score_distance = 4

		# a very strong hamming score, almost certainly the same image
		self.strong_score_thresh = 8
		
		# used to eliminate series names that are too long based on our search string
		self.length_delta_thresh = settings.id_length_delta_thresh

		# used to eliminate unlikely publishers
		self.publisher_blacklist = [ s.strip().lower() for s in settings.id_publisher_blacklist.split(',') ]
		
		self.additional_metadata = GenericMetadata()
		self.output_function = IssueIdentifier.defaultWriteOutput
		self.callback = None
		self.coverUrlCallback = None
		self.search_result = self.ResultNoMatches
		self.cover_page_index = 0
		self.cancel = False
		
	def setScoreMinThreshold( self, thresh ):
		self.min_score_thresh = thresh

	def setScoreMinDistance( self, distance ):
		self.min_score_distance = distance
		
	def setAdditionalMetadata( self, md ):
		self.additional_metadata = md

	def setNameLengthDeltaThreshold( self, delta ):
		self.length_delta_thresh = delta

	def setPublisherBlackList( self, blacklist ):
		self.publisher_blacklist = blacklist

	def setHasherAlgorithm( self, algo ):
		self.image_hasher = algo
		pass

	def setOutputFunction( self, func ):
		self.output_function = func
		pass

	def calculateHash( self, image_data ):
		if self.image_hasher == '3':
			return ImageHasher( data=image_data ).dct_average_hash() 
		elif self.image_hasher == '2':
			return ImageHasher( data=image_data ).average_hash2() 
		else:
			return ImageHasher( data=image_data ).average_hash() 
	
	def getAspectRatio( self, image_data ):
		try:
			im = Image.open(StringIO.StringIO(image_data))
			w,h = im.size
			return float(h)/float(w)
		except:
			return 1.5
		
	def cropCover( self, image_data ):
		
		im = Image.open(StringIO.StringIO(image_data))
		w,h = im.size

		try:
			cropped_im = im.crop( (int(w/2), 0, w, h) )
		except Exception as e:
			sys.exc_clear()
			print "cropCover() error:", e
			return None

		output = StringIO.StringIO()
		cropped_im.save(output, format="JPEG")
		cropped_image_data = output.getvalue()
		output.close()
		
		return cropped_image_data

		
	def setProgressCallback( self, cb_func ):
		self.callback = cb_func

	def setCoverURLCallback( self, cb_func ):
		self.coverUrlCallback = cb_func
		
	def getSearchKeys( self ):
	
		ca = self.comic_archive
		search_keys = dict()
		search_keys['series'] = None
		search_keys['issue_number'] = None
		search_keys['month'] = None
		search_keys['year'] = None
		
		if ca is None:
			return

		if self.onlyUseAdditionalMetaData:
			search_keys['series'] = self.additional_metadata.series
			search_keys['issue_number'] = self.additional_metadata.issue
			search_keys['year'] = self.additional_metadata.year
			search_keys['month'] = self.additional_metadata.month
			return search_keys

		# see if the archive has any useful meta data for searching with
		if ca.hasCIX():
			internal_metadata = ca.readCIX()
		elif ca.hasCBI():
			internal_metadata = ca.readCBI()
		else:
			internal_metadata = ca.readCBI()

		# try to get some metadata from filename
		md_from_filename = ca.metadataFromFilename()

		# preference order:
			#1. Additional metadata
			#1. Internal metadata
			#1. Filename metadata
		
		if self.additional_metadata.series is not None:
			search_keys['series'] = self.additional_metadata.series
		elif internal_metadata.series is not None:
			search_keys['series'] = internal_metadata.series
		else:
			search_keys['series'] = md_from_filename.series

		if self.additional_metadata.issue is not None:
			search_keys['issue_number'] = self.additional_metadata.issue
		elif internal_metadata.issue is not None:
			search_keys['issue_number'] = internal_metadata.issue
		else:
			search_keys['issue_number'] = md_from_filename.issue
			
		if self.additional_metadata.year is not None:
			search_keys['year'] = self.additional_metadata.year
		elif internal_metadata.year is not None:
			search_keys['year'] = internal_metadata.year
		else:
			search_keys['year'] = md_from_filename.year

		if self.additional_metadata.month is not None:
			search_keys['month'] = self.additional_metadata.month
		elif internal_metadata.month is not None:
			search_keys['month'] = internal_metadata.month
		else:
			search_keys['month'] = md_from_filename.month
			
		return search_keys

	@staticmethod
	def defaultWriteOutput( text ):
		sys.stdout.write( text )
		sys.stdout.flush()
		
	def log_msg( self, msg , newline=True ):
		self.output_function(msg)
		if newline:
			self.output_function("\n")
	
	def getIssueCoverMatchScore( self, comicVine, issue_id, localCoverHashList, useRemoteAlternates = False , useLog=True):

		# localHashes is a list of pre-calculated hashs.
		# useRemoteAlternates - indicates to use alternate covers from CV
		
		# first get the primary cover image
		primary_img_url, primary_thumb_url = comicVine.fetchIssueCoverURLs( issue_id )
				
		try:
			url_image_data = ImageFetcher().fetch(primary_thumb_url, blocking=True)
		except ImageFetcherException:
			self.log_msg( "Network issue while fetching cover image from ComicVine.  Aborting...")
			raise IssueIdentifierNetworkError

		if self.cancel == True:
			raise IssueIdentifierCancelled
			
		# alert the GUI, if needed
		if self.coverUrlCallback is not None:
			self.coverUrlCallback( url_image_data )

		remote_cover_list = []
		item = dict()
		item['url'] = primary_img_url

		item['hash'] = self.calculateHash( url_image_data )
		remote_cover_list.append( item )

		if self.cancel == True:
			raise IssueIdentifierCancelled
		
		if useRemoteAlternates:
			alt_img_url_list = comicVine.fetchAlternateCoverURLs( issue_id )
			
			for alt_url in alt_img_url_list:
				try:
					alt_url_image_data = ImageFetcher().fetch(alt_url, blocking=True)
				except ImageFetcherException:
					self.log_msg( "Network issue while fetching alt. cover image from ComicVine.  Aborting...")
					raise IssueIdentifierNetworkError

				if self.cancel == True:
					raise IssueIdentifierCancelled
					
				# alert the GUI, if needed
				if self.coverUrlCallback is not None:
					self.coverUrlCallback( alt_url_image_data )

				item = dict()
				item['url'] = alt_url
				item['hash'] = self.calculateHash( alt_url_image_data )
				remote_cover_list.append( item )
				
				if self.cancel == True:
					raise IssueIdentifierCancelled
				
		if useLog and useRemoteAlternates:
			self.log_msg( "[{0} alt. covers]".format(len(remote_cover_list)-1), False )
		if useLog:
			self.log_msg( "[ ", False )
			
		score_list = []
		done = False
		for local_cover_hash in localCoverHashList:
			for remote_cover_item in remote_cover_list:
				score = ImageHasher.hamming_distance(local_cover_hash, remote_cover_item['hash'] )
				score_item = dict()
				score_item['score'] = score
				score_item['url'] = remote_cover_item['url']
				score_item['hash'] = remote_cover_item['hash']
				score_list.append( score_item )
				if useLog:
					self.log_msg( "{0} ".format(score), False )
				
				if score <= self.strong_score_thresh:
					# such a good score, we can quit now, since for sure we have a winner
					done = True
					break
			if done:
				break
				
		if useLog:
			self.log_msg( " ]", False )

		best_score_item = min(score_list, key=lambda x:x['score'])

		return best_score_item	

	"""
	def validate( self, issue_id ):
		# create hash list
		score = self.getIssueMatchScore( issue_id, hash_list, useRemoteAlternates = True )
		if score < 20:
			return True
		else:
			return False
	"""
	
	def search( self ):
	
		ca = self.comic_archive
		self.match_list = []
		self.cancel = False
		self.search_result = self.ResultNoMatches
		
		if not pil_available:
			self.log_msg( "Python Imaging Library (PIL) is not available and is needed for issue identification." )
			return self.match_list
			
		if not ca.seemsToBeAComicArchive():
			self.log_msg( "Sorry, but "+ opts.filename + "  is not a comic archive!")
			return self.match_list
		
		cover_image_data = ca.getPage( self.cover_page_index )
		cover_hash = self.calculateHash( cover_image_data )

		#check the apect ratio
		# if it's wider than it is high, it's probably a two page spread
		# if so, crop it and calculate a second hash
		narrow_cover_hash = None
		aspect_ratio = self.getAspectRatio( cover_image_data )
		if aspect_ratio < 1.0:
			right_side_image_data = self.cropCover( cover_image_data )
			if right_side_image_data is not None:
				narrow_cover_hash = self.calculateHash( right_side_image_data )
				
		#self.log_msg( "Cover hash = {0:016x}".format(cover_hash) )

		keys = self.getSearchKeys()
		
		# we need, at minimum, a series and issue number
		if keys['series'] is None or keys['issue_number'] is None:
			self.log_msg("Not enough info for a search!")
			return []
		
		
		self.log_msg( "Going to search for:" )
		self.log_msg( "\tSeries: " + keys['series'] )
		self.log_msg( "\tIssue : " + keys['issue_number']  )
		if keys['year'] is not None:
			self.log_msg( "\tYear :  " + str(keys['year']) )
		if keys['month'] is not None:
			self.log_msg( "\tMonth : " + str(keys['month']) )
		
		#self.log_msg("Publisher Blacklist: " + str(self.publisher_blacklist))
		
		comicVine = ComicVineTalker( )
		comicVine.setLogFunc( self.output_function )

		#self.log_msg( ( "Searching for " + keys['series'] + "...")
		self.log_msg( u"Searching for  {0} #{1} ...".format( keys['series'], keys['issue_number']) )
		try:
			cv_search_results = comicVine.searchForSeries( keys['series'] )
		except ComicVineTalkerException:
			self.log_msg( "Network issue while searching for series.  Aborting...")
			return []
		
		#self.log_msg( "Found " + str(len(cv_search_results)) + " initial results" )
		if self.cancel == True:
			return []
		
		series_second_round_list = []
		
		#self.log_msg( "Removing results with too long names, banned publishers, or future start dates" )
		for item in cv_search_results:
			length_approved = False
			publisher_approved = True
			date_approved = True
			
			# remove any series that starts after the issue year
			if keys['year'] is not None and str(keys['year']).isdigit():
				if int(keys['year']) < item['start_year']:
					date_approved = False
					
			#assume that our search name is close to the actual name, say within ,e.g. 5 chars
			shortened_key =       utils.removearticles(keys['series'])
			shortened_item_name = utils.removearticles(item['name'])
			if len( shortened_item_name ) <  ( len( shortened_key ) + self.length_delta_thresh) :
				length_approved = True
				
			# remove any series from publishers on the blacklist
			if item['publisher'] is not None:
				publisher = item['publisher']['name']
				if publisher is not None and publisher.lower() in self.publisher_blacklist:
					publisher_approved = False

			if length_approved and publisher_approved and date_approved:
				series_second_round_list.append(item)
		
		# if we don't think it's an issue number 1, remove any series' that are one-shots
		if keys['issue_number'] not in [ '1', '0' ]:
			#self.log_msg( "Removing one-shots" )
			series_second_round_list[:] = [x for x in series_second_round_list if not x['count_of_issues'] == 1]	

		self.log_msg( "Searching in " + str(len(series_second_round_list)) +" series" )
		
		if self.callback is not None:
			self.callback( 0, len(series_second_round_list))
			
		# now sort the list by name length
		series_second_round_list.sort(key=lambda x: len(x['name']), reverse=False)
		
		# Now we've got a list of series that we can dig into look for matching issue number
		counter = 0
		shortlist = []
		for series in series_second_round_list:
			if self.callback is not None:
				self.callback( counter, len(series_second_round_list)*3)
				counter += 1
				
			self.log_msg( u"Fetching info for  ID: {0} {1} ({2}) ...".format(
			               series['id'], 
			               series['name'], 
			               series['start_year']), newline=True )

			try:
				cv_series_results = comicVine.fetchVolumeData( series['id'] )
			except ComicVineTalkerException:
				self.log_msg( "Network issue while searching for series details.  Aborting...")
				return []
			
			issue_list = cv_series_results['issues']
			for issue in issue_list:
				num_s = IssueString(issue['issue_number']).asString()
				
				# look for a matching issue number
				if num_s == keys['issue_number']:

					# now, if we have an issue year key given, reject this one if not a match
					month, year = comicVine.fetchIssueDate( issue['id'] )					
					if keys['year'] is not None:
						if unicode(keys['year']) != unicode(year):
							break
				
					# found a matching issue number!  add it to short list
					shortlist.append( (series, cv_series_results, issue) )
					
		if keys['year'] is None:
			self.log_msg( "Found {0} series that have an issue #{1}".format(len(shortlist), keys['issue_number']) )
		else:
			self.log_msg( "Found {0} series that have an issue #{1} from {2}".format(len(shortlist), keys['issue_number'], keys['year'] ))
		
			
		# now we have a shortlist of volumes with the desired issue number
		# Do first round of cover matching
		counter = len(shortlist)
		for series, cv_series_results, issue in  shortlist:		
			if self.callback is not None:
				self.callback( counter, len(shortlist)*3)
				counter += 1
			
			self.log_msg( u"Examining covers for  ID: {0} {1} ({2}) ...".format(
			               series['id'], 
			               series['name'], 
			               series['start_year']), newline=False )
			
			# now, if we have an issue year key given, reject this one if not a match
			month, year = comicVine.fetchIssueDate( issue['id'] )					

			# Now check the cover match against the primary image
			hash_list = [ cover_hash ]
			if narrow_cover_hash is not None:
				hash_list.append(narrow_cover_hash)
			try:	
				score_item = self.getIssueCoverMatchScore( comicVine, issue['id'], hash_list, useRemoteAlternates = False )
			except:
				self.match_list = []
				return self.match_list

			match = dict()
			match['series'] = u"{0} ({1})".format(series['name'], series['start_year'])
			match['distance'] = score_item['score']
			match['issue_number'] = keys['issue_number']
			match['url_image_hash'] = score_item['hash']
			match['issue_title'] = issue['name']
			match['img_url'] = score_item['url']
			match['issue_id'] = issue['id']
			match['volume_id'] = series['id']
			match['month'] = month
			match['year'] = year
			match['publisher'] = None
			if series['publisher'] is not None:
				match['publisher'] = series['publisher']['name']
				
			self.match_list.append(match)

			self.log_msg( " --> {0}".format(match['distance']), newline=False )

			self.log_msg( "" )
		
		if len(self.match_list) == 0:
			self.log_msg( ":-(  no matches!" )
			self.search_result = self.ResultNoMatches
			return self.match_list


		# sort list by image match scores
		self.match_list.sort(key=lambda k: k['distance'])		
		
		l = []
		for i in self.match_list:
			l.append( i['distance'] )

		self.log_msg( "Compared to covers in {0} issue(s):".format(len(self.match_list)), newline=False)
		self.log_msg( str(l))

		def print_match(item):
			self.log_msg( u"-----> {0} #{1} {2} ({3}/{4}) -- score: {5}".format(
									item['series'], 
									item['issue_number'], 
									item['issue_title'],
									item['month'],
									item['year'],
									item['distance']) )
		
		best_score = self.match_list[0]['distance']

		if best_score >= self.min_score_thresh:
			# we have 1 or more low-confidence matches (all bad cover scores)
			# look at a few more pages in the archive, and also alternate covers online
			self.log_msg( "Very weak scores for the cover.  Analyzing alternate pages and covers..." )
			hash_list = [ cover_hash ]
			if narrow_cover_hash is not None:
				hash_list.append(narrow_cover_hash)
			for i in range( 1, min(3, ca.getNumberOfPages())):
				image_data = ca.getPage(i)
				page_hash = self.calculateHash( image_data )
				hash_list.append( page_hash )
				
			second_match_list = []
			counter = 2*len(self.match_list)
			for m in self.match_list:
				if self.callback is not None:
					self.callback( counter, len(self.match_list)*3)
					counter += 1
				self.log_msg( u"Examining alternate covers for ID: {0} {1} ...".format(
							   m['volume_id'], 
							   m['series']), newline=False )
				try:	
					score_item = self.getIssueCoverMatchScore( comicVine, m['issue_id'], hash_list, useRemoteAlternates = True )
				except:
					self.match_list = []
					return self.match_list
				self.log_msg("--->{0}".format(score_item['score']))
				self.log_msg( "" )

				if score_item['score'] < self.min_alternate_score_thresh:
					second_match_list.append(m)
					m['distance'] = score_item['score']
					
			if len(	second_match_list ) == 0:
				if len( self.match_list) == 1:
					self.log_msg( "No matching pages in the issue." )
					self.log_msg( u"--------------------------------------------------")
					print_match(self.match_list[0])
					self.log_msg( u"--------------------------------------------------")
					self.search_result = self.ResultFoundMatchButBadCoverScore
				else:
					self.log_msg( u"--------------------------------------------------")
					self.log_msg( u"Multiple bad cover matches!  Need to use other info..." )
					self.log_msg( u"--------------------------------------------------")
					self.search_result = self.ResultMultipleMatchesWithBadImageScores
				return self.match_list
			else:
				# We did good, found something!
				self.log_msg( "Success in secondary/alternate cover matching!" )
				
				self.match_list = second_match_list
				# sort new list by image match scores
				self.match_list.sort(key=lambda k: k['distance'])		
				best_score = self.match_list[0]['distance']
				self.log_msg("[Second round cover matching: best score = {0}]".format(best_score))
				# now drop down into the rest of the processing
				
		if self.callback is not None:
			self.callback( 99, 100)
		
		#now pare down list, remove any item more than specified distant from the top scores
		for item in reversed(self.match_list):
			if item['distance'] > best_score + self.min_score_distance:
				self.match_list.remove(item)

		if len(self.match_list) == 1:
			self.log_msg( u"--------------------------------------------------")
			print_match(self.match_list[0])
			self.log_msg( u"--------------------------------------------------")
			self.search_result = self.ResultOneGoodMatch
			
		elif len(self.match_list) == 0:
			self.log_msg( u"--------------------------------------------------")
			self.log_msg( "No matches found :(" )
			self.log_msg( u"--------------------------------------------------")
			self.search_result = self.ResultNoMatches
		else:
			self.log_msg( "More than one likley candiate." )
			self.search_result = self.ResultMultipleGoodMatches
			self.log_msg( u"--------------------------------------------------")
			for item in self.match_list:
				print_match(item)
			self.log_msg( u"--------------------------------------------------")

		return self.match_list