From 00202cc8657308307c5c5bc8915a95cc12cb9e21 Mon Sep 17 00:00:00 2001 From: beville Date: Thu, 14 Feb 2013 06:36:28 +0000 Subject: [PATCH] more scripts git-svn-id: http://comictagger.googlecode.com/svn/trunk@499 6c5673fe-1810-88d6-992b-cd32ca31540c --- scripts/find_dupes.py | 82 ++++++++++++++++++++++++ scripts/inventory.py | 39 ++++++------ scripts/make_links.py | 81 ++++++++++++++++++++++++ scripts/remove_ads.py | 129 ++++++++++++++++++++++++++++++++++++++ scripts/validate_cover.py | 57 +++++++++++++++++ 5 files changed, 370 insertions(+), 18 deletions(-) create mode 100755 scripts/find_dupes.py create mode 100755 scripts/make_links.py create mode 100755 scripts/remove_ads.py create mode 100755 scripts/validate_cover.py diff --git a/scripts/find_dupes.py b/scripts/find_dupes.py new file mode 100755 index 0000000..7734e6d --- /dev/null +++ b/scripts/find_dupes.py @@ -0,0 +1,82 @@ +#!/usr/bin/python +""" +find all duplicate comics +""" + +import sys + +from comictaggerlib.comicarchive import * +from comictaggerlib.settings import * +from comictaggerlib.issuestring import * +import comictaggerlib.utils + + +def main(): + utils.fix_output_encoding() + settings = ComicTaggerSettings() + + style = MetaDataStyle.CIX + + if len(sys.argv) < 2: + print "usage: {0} comic_folder ".format(sys.argv[0]) + return + + filelist = utils.get_recursive_filelist( sys.argv[1:] ) + + #first find all comics with metadata + print "reading in all comics..." + comic_list = [] + max_name_len = 2 + for filename in filelist: + ca = ComicArchive(filename, settings ) + if ca.seemsToBeAComicArchive() and ca.hasMetadata( style ): + fmt_str = u"{{0:{0}}}".format(max_name_len) + print fmt_str.format( filename ) + "\r", + sys.stdout.flush() + comic_list.append((filename, ca.readMetadata( style ))) + max_name_len = max ( max_name_len, len(filename)) + + print fmt_str.format( "" ) + "\r", + print "Found {0} tagged comics.".format( len(comic_list)) + + #sort the list by series+issue+year, to put all the dupes together + def makeKey(x): + return "<" + unicode(x[1].series) + u" #" + unicode( x[1].issue ) + u" - " + unicode( x[1].year ) + ">" + comic_list.sort(key=makeKey, reverse=False) + + # look for duplicate blocks + dupe_set_list = list() + dupe_set = list() + prev_key = "" + for filename, md in comic_list: + print fmt_str.format( filename ) + "\r", + sys.stdout.flush() + + new_key = makeKey((filename, md)) + + #if the new key same as the last, add to to dupe set + if new_key == prev_key: + dupe_set.append(filename) + + #else we're on a new potential block + else: + # only add if the dupe list has 2 or more + if len (dupe_set) > 1: + dupe_set_list.append( dupe_set ) + dupe_set = list() + dupe_set.append(filename) + + prev_key = new_key + + print fmt_str.format( "" ) + "\r", + print "Found {0} duplicate sets".format( len(dupe_set_list)) + + for dupe_set in dupe_set_list: + ca = ComicArchive(dupe_set[0], settings ) + md = ca.readMetadata( style ) + print "{0} #{1} ({2})".format( md.series, md.issue, md.year ) + for filename in dupe_set: + print "------------->{0}".format( filename ) + +if __name__ == '__main__': + main() diff --git a/scripts/inventory.py b/scripts/inventory.py index a3054fd..4e76dc7 100755 --- a/scripts/inventory.py +++ b/scripts/inventory.py @@ -1,41 +1,46 @@ #!/usr/bin/python """ -An experiment with comictaggerlib +An example script using the comictagger library """ import sys import os -import platform -import locale -import codecs - -sys.path.append("..") from comictaggerlib.comicarchive import * from comictaggerlib.settings import * from comictaggerlib.issuestring import * import comictaggerlib.utils - def main(): - utils.fix_output_encoding() settings = ComicTaggerSettings() + + style = MetaDataStyle.CIX + + if len(sys.argv) < 2: + print "usage: {0} comic_folder ".format(sys.argv[0]) + return filelist = utils.get_recursive_filelist( sys.argv[1:] ) - - #first read in CIX metadata from all files + + #first read in metadata from all files metadata_list = [] max_name_len = 2 for filename in filelist: ca = ComicArchive(filename, settings ) - metadata_list.append((filename, ca.readCIX())) + #make a list of paired filenames and metadata objects + metadata_list.append((filename, ca.readMetadata( style ))) fmt_str = u"{{0:{0}}}".format(max_name_len) print fmt_str.format( filename ) + "\r", sys.stdout.flush() + max_name_len = max ( max_name_len, len(filename)) + print fmt_str.format( "" ) + "\r", - + print "-----------------------------------------------" + print "Found {0} comics with {1} tags".format( len(metadata_list), MetaDataStyle.name[style]) + print "-----------------------------------------------" + # now, figure out column widths w0 = 4 w1 = 4 @@ -47,18 +52,16 @@ def main(): w0 += 2 # build a format string - fmt_str = "{0:" + str(w0) + "} {1:" + str(w1) + "} #{2:6} ({3})" + fmt_str = u"{0:" + str(w0) + "} {1:" + str(w1) + "} #{2:6} ({3})" # now sort the list by series, and then issue metadata_list.sort(key=lambda x: IssueString(x[1].issue).asString(3), reverse=False) - metadata_list.sort(key=lambda x: str(x[1].series).lower()+str(x[1].year), reverse=False) - #metadata_list.sort(key=lambda x: x[1].series, reverse=False) + metadata_list.sort(key=lambda x: unicode(x[1].series).lower()+str(x[1].year), reverse=False) # now print - for filename,md in metadata_list: + for filename, md in metadata_list: if not md.isEmpty: - print fmt_str.format(os.path.split(filename)[1]+":", md.series, md.issue, md.year) - + print fmt_str.format(os.path.split(filename)[1]+":", md.series, md.issue, md.year), md.title if __name__ == '__main__': main() diff --git a/scripts/make_links.py b/scripts/make_links.py new file mode 100755 index 0000000..bd8dc34 --- /dev/null +++ b/scripts/make_links.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +""" +find all duplicate comics +""" + +import sys +import os + +from comictaggerlib.comicarchive import * +from comictaggerlib.settings import * +from comictaggerlib.issuestring import * +import comictaggerlib.utils + +def make_folder( folder ): + if not os.path.exists( folder ): + try: + os.makedirs(folder) + except Exception as e: + print "{0} Can't make {1} -- quitting".format(e, folder) + quit() + +def make_link( source, link ): + if not os.path.exists( link ): + os.symlink( source , link ) + +def main(): + utils.fix_output_encoding() + settings = ComicTaggerSettings() + + style = MetaDataStyle.CBI + + if len(sys.argv) < 3: + print "usage: {0} comic_root link_root".format(sys.argv[0]) + return + + comic_root = sys.argv[1] + link_root = sys.argv[2] + + print "root is : ", comic_root + filelist = utils.get_recursive_filelist( [ comic_root ] ) + make_folder( link_root ) + + #first find all comics with metadata + print "reading in all comics..." + comic_list = [] + max_name_len = 2 + for filename in filelist: + ca = ComicArchive(filename, settings ) + if ca.seemsToBeAComicArchive() and ca.hasMetadata( style ): + + comic_list.append((filename, ca.readMetadata( style ))) + + fmt_str = u"{{0:{0}}}".format(max_name_len) + print fmt_str.format( filename ) + "\r", + sys.stdout.flush() + max_name_len = max ( max_name_len, len(filename)) + + print fmt_str.format( "" ) + print "Found {0} tagged comics.".format( len(comic_list)) + + # walk through the comic list and add subdirs and links for each one + for filename, md in comic_list: + print fmt_str.format( filename ) + "\r", + sys.stdout.flush() + + #do date organizing: + if md.month is not None: + month_str = "{0:02d}".format(int(md.month)) + else: + month_str = "00" + date_folder = os.path.join(link_root, "date", str(md.year), month_str) + make_folder( date_folder ) + make_link( filename, os.path.join(date_folder, os.path.basename(filename)) ) + + #do publisher/series organizing: + series_folder = os.path.join(link_root, "series", str(md.publisher), str(md.series)) + make_folder( series_folder ) + make_link( filename, os.path.join(series_folder, os.path.basename(filename)) ) + +if __name__ == '__main__': + main() diff --git a/scripts/remove_ads.py b/scripts/remove_ads.py new file mode 100755 index 0000000..e789a05 --- /dev/null +++ b/scripts/remove_ads.py @@ -0,0 +1,129 @@ +#!/usr/bin/python +""" +Create new comic archives from old one, removing pages marked as ads +and deleted. Walks recursivly through the given folders. Originals +are kept in a subfolder at the level of the original +""" + +import sys +import os +import tempfile +import zipfile +import shutil + + +from comictaggerlib.comicarchive import * +from comictaggerlib.settings import * +import comictagger.utils + +subfolder_name = "PRE_AD_REMOVAL" +unwanted_types = [ 'Deleted', 'Advertisment' ] + + +def main(): + utils.fix_output_encoding() + settings = ComicTaggerSettings() + + style = MetaDataStyle.CIX + + filelist = utils.get_recursive_filelist( sys.argv[1:] ) + + #first read in CIX metadata from all files, make a list of candidates + modify_list = [] + for filename in filelist: + + ca = ComicArchive(filename, settings ) + if (ca.isZip or ca.isRar()) and ca.hasMetadata( style ): + md = ca.readMetadata( style ) + if len(md.pages) != 0: + for p in md.pages: + if p.has_key('Type') and p['Type'] in unwanted_types: + #This one has pages to remove. add to list! + modify_list.append((filename, md)) + break + + #now actually process those files + for filename,md in modify_list: + ca = ComicArchive(filename, settings ) + curr_folder = os.path.dirname( filename ) + curr_subfolder = os.path.join( curr_folder, subfolder_name ) + + #skip any of our generated subfolders... + if os.path.basename(curr_folder) == subfolder_name: + continue + sys.stdout.write("Removing unwanted pages from " + filename) + + # verify that we can write to current folder + if not os.access(filename, os.W_OK): + print "Can't move: {0}: skipped!".format(filename) + continue + if not os.path.exists( curr_subfolder ) and not os.access(curr_folder, os.W_OK): + print "Can't create subfolder here: {0}: skipped!".format(filename) + continue + if not os.path.exists( curr_subfolder ): + os.mkdir( curr_subfolder ) + if not os.access(curr_subfolder, os.W_OK): + print "Can't write to the subfolder here: {0}: skipped!".format(filename) + continue + + # generate a new file with temp name + tmp_fd, tmp_name = tempfile.mkstemp( dir=os.path.dirname(filename) ) + os.close( tmp_fd ) + + try: + zout = zipfile.ZipFile (tmp_name, 'w') + + # now read in all the pages from the old one, except the ones we want to skip + new_num = 0 + new_pages = list() + for p in md.pages: + if p.has_key('Type') and p['Type'] in unwanted_types: + + continue + else: + pageNum = int(p['Image']) + name = ca.getPageName( pageNum ) + buffer = ca.getPage( pageNum ) + sys.stdout.write('.') + sys.stdout.flush() + + #Generate a new name for the page file + ext = os.path.splitext(name)[1] + new_name = "page{0:04d}{1}".format(new_num,ext) + zout.writestr(new_name, buffer) + + # create new page entry + new_p = dict() + new_p['Image'] = str(new_num) + if p.has_key('Type'): + new_p['Type'] = p['Type'] + new_pages.append(new_p) + new_num += 1 + + #preserve the old comment + zout.comment = ca.archiver.getArchiveComment() + + except Exception as e: + print "Failure creating new archive: {0}!".format(filename) + print e, sys.exc_info()[0] + zout.close() + os.unlink( tmp_name ) + else: + zout.close() + + # Success! Now move the files + shutil.move( filename, curr_subfolder ) + os.rename( tmp_name, filename ) + # TODO: We might have converted a rar to a zip, and should probably change + # the extension, as needed. + + print "Done!".format(filename) + + # Create a new archive object for the new file, and write the old CIX data, with new page info + ca = ComicArchive( filename, settings ) + md.pages = new_pages + ca.writeMetadata( style ) + + +if __name__ == '__main__': + main() diff --git a/scripts/validate_cover.py b/scripts/validate_cover.py new file mode 100755 index 0000000..e9020f6 --- /dev/null +++ b/scripts/validate_cover.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +""" +test archive cover against comicvine for a given issue ID +""" +import sys +sys.path.append("..") +import os + +import comictaggerlib.utils +from comictaggerlib.settings import * +from comictaggerlib.comicarchive import * +from comictaggerlib.issueidentifier import * +from comictaggerlib.comicvinetalker import * + +def main(): + + utils.fix_output_encoding() + settings = ComicTaggerSettings() + + if len(sys.argv) < 3: + print "usage: {0} comicfile issueid".format(sys.argv[0]) + return + + filename = sys.argv[1] + issue_id = sys.argv[2] + + if not os.path.exists(filename): + print opts.filename + ": not found!" + return + + ca = ComicArchive(filename, settings ) + if not ca.seemsToBeAComicArchive(): + print "Sorry, but "+ opts.filename + " is not a comic archive!" + return + + ii = IssueIdentifier( ca, settings ) + + # calculate the hashes of the first two pages + cover_image_data = ca.getPage( 0 ) + cover_hash0 = ii.calculateHash( cover_image_data ) + cover_image_data = ca.getPage( 1 ) + cover_hash1 = ii.calculateHash( cover_image_data ) + hash_list = [ cover_hash0, cover_hash1 ] + + comicVine = ComicVineTalker( ) + result = ii.getIssueCoverMatchScore( comicVine, issue_id, hash_list, useRemoteAlternates=True, useLog=False) + + print "Best cover match score is :", result['score'] + if result['score'] < ii.min_alternate_score_thresh: + print "Looks like a match!" + else: + print "Bad score, maybe not a match?" + print result['url'] + + +if __name__ == '__main__': + main()