"""
A python class to automatically identify a comic archive
"""

"""
Copyright 2012  Anthony Beville

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import sys
import math
import urllib2, urllib

from settings import ComicTaggerSettings
from comicvinecacher import ComicVineCacher
from genericmetadata import GenericMetadata
from comicvinetalker import ComicVineTalker
from imagehasher import ImageHasher
from imagefetcher import  ImageFetcher

import utils 

class IssueIdentifier:
	
	ResultNoMatches                         = 0
	ResultFoundMatchButBadCoverScore        = 1
	ResultFoundMatchButNotFirstPage         = 2
	ResultMultipleMatchesWithBadImageScores = 3
	ResultOneGoodMatch                      = 4
	ResultMultipleGoodMatches               = 5	

	def __init__(self, comic_archive, cv_api_key ):
		self.comic_archive = comic_archive
		self.image_hasher = 1
		self.additional_metadata = None
		self.min_score_thresh = 22
		self.min_score_distance = 2
		self.strong_score_thresh = 8
		self.additional_metadata = GenericMetadata()
		self.cv_api_key = cv_api_key
		self.output_function = IssueIdentifier.defaultWriteOutput
		self.callback = None
	
	def setScoreMinThreshold( self, thresh ):
		self.min_score_thresh = thresh

	def setScoreMinDistance( self, distance ):
		self.min_score_distance = distance
		
	def setAdditionalMetadata( self, md ):
		self.additional_metadata = md

	def setHasherAlgorithm( self, algo ):
		self.image_hasher = algo
		pass

	def setOutputFunction( self, func ):
		self.output_function = func
		pass
	
	def calculateHash( self, image_data ):
		if self.image_hasher == '3':
			return ImageHasher( data=image_data ).dct_average_hash() 
		elif self.image_hasher == '2':
			return ImageHasher( data=image_data ).average_hash2() 
		else:
			return ImageHasher( data=image_data ).average_hash() 
	
	def setProgressCallback( self, cb_func ):
		self.callback = cb_func
		
	def getSearchKeys( self ):
	
		ca = self.comic_archive
		search_keys = dict()
		search_keys['series'] = None
		search_keys['issue_number'] = None
		search_keys['month'] = None
		search_keys['year'] = None
		
		if ca is None:
			return
		
		# see if the archive has any useful meta data for searching with
		if ca.hasCIX():
			internal_metadata = ca.readCIX()
		elif ca.hasCBI():
			internal_metadata = ca.readCBI()
		else:
			internal_metadata = ca.readCBI()

		# try to get some metadata from filename
		md_from_filename = ca.metadataFromFilename()

		# preference order:
			#1. Additional metadata
			#1. Internal metadata
			#1. Filename metadata
		
		if self.additional_metadata.series is not None:
			search_keys['series'] = self.additional_metadata.series
		elif internal_metadata.series is not None:
			search_keys['series'] = internal_metadata.series
		else:
			search_keys['series'] = md_from_filename.series

		if self.additional_metadata.issueNumber is not None:
			search_keys['issue_number'] = self.additional_metadata.issueNumber
		elif internal_metadata.issueNumber is not None:
			search_keys['issue_number'] = internal_metadata.issueNumber
		else:
			search_keys['issue_number'] = md_from_filename.issueNumber
			
		if self.additional_metadata.publicationYear is not None:
			search_keys['year'] = self.additional_metadata.publicationYear
		elif internal_metadata.publicationYear is not None:
			search_keys['year'] = internal_metadata.publicationYear
		else:
			search_keys['year'] = md_from_filename.publicationYear
			
		if self.additional_metadata.publicationMonth is not None:
			search_keys['month'] = self.additional_metadata.publicationMonth
		elif internal_metadata.publicationMonth is not None:
			search_keys['month'] = internal_metadata.publicationMonth
		else:
			search_keys['month'] = md_from_filename.publicationMonth
			
		return search_keys

	@staticmethod
	def defaultWriteOutput( text ):
		sys.stdout.write(text)
		sys.stdout.flush()
		
	def log_msg( self, msg , newline=True ):
		self.output_function(msg)
		if newline:
			self.output_function("\n")
	
	def search( self ):
	
		ca = self.comic_archive
		self.match_list = []
		self.cancel = False

		if not ca.seemsToBeAComicArchive():
			self.log_msg( "Sorry, but "+ opts.filename + "  is not a comic archive!")
			return []
		
		cover_image_data = ca.getCoverPage()

		cover_hash = self.calculateHash( cover_image_data )

		#self.log_msg( "Cover hash = {0:016x}".format(cover_hash) )

		keys = self.getSearchKeys()
		
		# we need, at minimum, a series and issue number
		if keys['series'] is None or keys['issue_number'] is None:
			self.log_msg("Not enough info for a search!")
			return []
		
		"""
		self.log_msg( "Going to search for:" )
		self.log_msg( "Series: " + keys['series'] )
		self.log_msg( "Issue : " + keys['issue_number']  )
		if keys['year'] is not None:
			self.log_msg( "Year :  " + keys['year'] )
		if keys['month'] is not None:
			self.log_msg( "Month : " + keys['month'] )
		"""
		comicVine = ComicVineTalker( self.cv_api_key )

		#self.log_msg( ( "Searching for " + keys['series'] + "...")
		self.log_msg( "Searching for  {0} #{1} ...".format( keys['series'], keys['issue_number']) )

		keys['series'] = utils.removearticles( keys['series'] )
		
		cv_search_results = comicVine.searchForSeries( keys['series'] )
		
		#self.log_msg( "Found " + str(len(cv_search_results)) + " initial results" )
		if self.cancel == True:
			return []
		
		series_shortlist = []
		
		#self.log_msg( "Removing results with too long names" )
		for item in cv_search_results:
			#assume that our search name is close to the actual name, say within 5 characters
			if len( utils.removearticles(item['name'])) < len( keys['series'] ) + 5:
				series_shortlist.append(item)
		
		# if we don't think it's an issue number 1, remove any series' that are one-shots
		if keys['issue_number'] != '1':
			#self.log_msg( "Removing one-shots" )
			series_shortlist[:] = [x for x in series_shortlist if not x['count_of_issues'] == 1]	

		self.log_msg( "Searching in " + str(len(series_shortlist)) +" series" )
		
		if self.callback is not None:
			self.callback( 0, len(series_shortlist))
			
		
		# now sort the list by name length
		series_shortlist.sort(key=lambda x: len(x['name']), reverse=False)
		
		# Now we've got a list of series that we can dig into, 
		# and look for matching issue number, date, and cover image
		
		counter = 0
		for series in series_shortlist:
			if self.callback is not None:
				counter += 1
				self.callback( counter, len(series_shortlist))
				
			self.log_msg( "Fetching info for  ID: {0} {1} ({2}) ...".format(
			               series['id'], 
			               series['name'], 
			               series['start_year']), newline=False )
			
			cv_series_results = comicVine.fetchVolumeData( series['id'] )
			issue_list = cv_series_results['issues']
			for issue in issue_list:
				
				# format the issue number string nicely, since it's usually something like "2.00"
				num_f = float(issue['issue_number'])
				num_s = str( int(math.floor(num_f)) )
				if math.floor(num_f) != num_f:
					num_s = str( num_f )			

				# look for a matching issue number
				if num_s == keys['issue_number']:
					# found a matching issue number!  now get the issue data 
					img_url, thumb_url = comicVine.fetchIssueCoverURLs( issue['id'] )
					url_image_data = ImageFetcher().fetch(thumb_url, blocking=True)

					if self.cancel == True:
						self.match_list = []
						return self.match_list

					url_image_hash = self.calculateHash( url_image_data )
					
					match = dict()
					match['series'] = "{0} ({1})".format(series['name'], series['start_year'])
					match['distance'] = ImageHasher.hamming_distance(cover_hash, url_image_hash)
					match['issue_number'] = num_s
					match['url_image_hash'] = url_image_hash
					match['issue_title'] = issue['name']
					match['img_url'] = thumb_url
					match['issue_id'] = issue['id']
					match['volume_id'] = series['id']
					self.match_list.append(match)

					self.log_msg( " --> {0}".format(match['distance']), newline=False )
					
					break
			self.log_msg( "" )

		
		if len(self.match_list) == 0:
			self.log_msg( ":-(  no matches!" )
			return self.match_list

		# sort list by image match scores
		self.match_list.sort(key=lambda k: k['distance'])		
		
		l = []
		for i in self.match_list:
			l.append( i['distance'] )

		self.log_msg( "Compared {0} covers".format(len(self.match_list)), newline=False)
		self.log_msg( str(l))

		def print_match(item):
			self.log_msg( u"-----> {0} #{1} {2} -- score: {3}".format(
									item['series'], 
									item['issue_number'], 
									item['issue_title'],
									item['distance']) )
		
		best_score = self.match_list[0]['distance']

		if len(self.match_list) == 1:
			if best_score > self.min_score_thresh:
				self.log_msg( "!!!! Very weak score for the cover.  Maybe it's not the cover?" )


				self.log_msg( "Comparing other pages now..." )
				found = False
				for i in range(ca.getNumberOfPages()):
					image_data = ca.getPage(i)
					page_hash = self.calculateHash( image_data )
					distance = ImageHasher.hamming_distance(page_hash, self.match_list[0]['url_image_hash'])
					if distance <= self.strong_score_thresh:
						print "Found a great match d={0} on page {1}!".format(distance, i+1)
						found = True
						break
					elif distance < self.min_score_thresh:
						print "Found a good match d={0} on page {1}".format(distance, i)
						found = True
					self.log_msg( ".", newline=False )
				self.log_msg( "" )
				if not found:
					self.log_msg( "No matching pages in the issue.  Bummer" )

			print_match(self.match_list[0])
			return self.match_list

		elif best_score > self.min_score_thresh and len(self.match_list) > 1:
			self.log_msg( "No good image matches!  Need to use other info..." )
			return self.match_list

		#now pare down list, remove any item more than specified distant from the top scores
		for item in reversed(self.match_list):
			if item['distance'] > best_score + self.min_score_distance:
				self.match_list.remove(item)

		if len(self.match_list) == 1:
			print_match(self.match_list[0])
		elif len(self.match_list) == 0:
			self.log_msg( "No matches found :(" )
		else:
			print 
			self.log_msg( "More than one likley candiate.  Maybe a lexical comparison??" )
			for item in self.match_list:
				print_match(item)

		return self.match_list