more scripts

git-svn-id: http://comictagger.googlecode.com/svn/trunk@499 6c5673fe-1810-88d6-992b-cd32ca31540c
This commit is contained in:
beville 2013-02-14 06:36:28 +00:00
parent 235524b06d
commit 00202cc865
5 changed files with 370 additions and 18 deletions

82
scripts/find_dupes.py Executable file
View File

@ -0,0 +1,82 @@
#!/usr/bin/python
"""
find all duplicate comics
"""
import sys
from comictaggerlib.comicarchive import *
from comictaggerlib.settings import *
from comictaggerlib.issuestring import *
import comictaggerlib.utils
def main():
utils.fix_output_encoding()
settings = ComicTaggerSettings()
style = MetaDataStyle.CIX
if len(sys.argv) < 2:
print "usage: {0} comic_folder ".format(sys.argv[0])
return
filelist = utils.get_recursive_filelist( sys.argv[1:] )
#first find all comics with metadata
print "reading in all comics..."
comic_list = []
max_name_len = 2
for filename in filelist:
ca = ComicArchive(filename, settings )
if ca.seemsToBeAComicArchive() and ca.hasMetadata( style ):
fmt_str = u"{{0:{0}}}".format(max_name_len)
print fmt_str.format( filename ) + "\r",
sys.stdout.flush()
comic_list.append((filename, ca.readMetadata( style )))
max_name_len = max ( max_name_len, len(filename))
print fmt_str.format( "" ) + "\r",
print "Found {0} tagged comics.".format( len(comic_list))
#sort the list by series+issue+year, to put all the dupes together
def makeKey(x):
return "<" + unicode(x[1].series) + u" #" + unicode( x[1].issue ) + u" - " + unicode( x[1].year ) + ">"
comic_list.sort(key=makeKey, reverse=False)
# look for duplicate blocks
dupe_set_list = list()
dupe_set = list()
prev_key = ""
for filename, md in comic_list:
print fmt_str.format( filename ) + "\r",
sys.stdout.flush()
new_key = makeKey((filename, md))
#if the new key same as the last, add to to dupe set
if new_key == prev_key:
dupe_set.append(filename)
#else we're on a new potential block
else:
# only add if the dupe list has 2 or more
if len (dupe_set) > 1:
dupe_set_list.append( dupe_set )
dupe_set = list()
dupe_set.append(filename)
prev_key = new_key
print fmt_str.format( "" ) + "\r",
print "Found {0} duplicate sets".format( len(dupe_set_list))
for dupe_set in dupe_set_list:
ca = ComicArchive(dupe_set[0], settings )
md = ca.readMetadata( style )
print "{0} #{1} ({2})".format( md.series, md.issue, md.year )
for filename in dupe_set:
print "------------->{0}".format( filename )
if __name__ == '__main__':
main()

View File

@ -1,41 +1,46 @@
#!/usr/bin/python
"""
An experiment with comictaggerlib
An example script using the comictagger library
"""
import sys
import os
import platform
import locale
import codecs
sys.path.append("..")
from comictaggerlib.comicarchive import *
from comictaggerlib.settings import *
from comictaggerlib.issuestring import *
import comictaggerlib.utils
def main():
utils.fix_output_encoding()
settings = ComicTaggerSettings()
style = MetaDataStyle.CIX
if len(sys.argv) < 2:
print "usage: {0} comic_folder ".format(sys.argv[0])
return
filelist = utils.get_recursive_filelist( sys.argv[1:] )
#first read in CIX metadata from all files
#first read in metadata from all files
metadata_list = []
max_name_len = 2
for filename in filelist:
ca = ComicArchive(filename, settings )
metadata_list.append((filename, ca.readCIX()))
#make a list of paired filenames and metadata objects
metadata_list.append((filename, ca.readMetadata( style )))
fmt_str = u"{{0:{0}}}".format(max_name_len)
print fmt_str.format( filename ) + "\r",
sys.stdout.flush()
max_name_len = max ( max_name_len, len(filename))
print fmt_str.format( "" ) + "\r",
print "-----------------------------------------------"
print "Found {0} comics with {1} tags".format( len(metadata_list), MetaDataStyle.name[style])
print "-----------------------------------------------"
# now, figure out column widths
w0 = 4
w1 = 4
@ -47,18 +52,16 @@ def main():
w0 += 2
# build a format string
fmt_str = "{0:" + str(w0) + "} {1:" + str(w1) + "} #{2:6} ({3})"
fmt_str = u"{0:" + str(w0) + "} {1:" + str(w1) + "} #{2:6} ({3})"
# now sort the list by series, and then issue
metadata_list.sort(key=lambda x: IssueString(x[1].issue).asString(3), reverse=False)
metadata_list.sort(key=lambda x: str(x[1].series).lower()+str(x[1].year), reverse=False)
#metadata_list.sort(key=lambda x: x[1].series, reverse=False)
metadata_list.sort(key=lambda x: unicode(x[1].series).lower()+str(x[1].year), reverse=False)
# now print
for filename,md in metadata_list:
for filename, md in metadata_list:
if not md.isEmpty:
print fmt_str.format(os.path.split(filename)[1]+":", md.series, md.issue, md.year)
print fmt_str.format(os.path.split(filename)[1]+":", md.series, md.issue, md.year), md.title
if __name__ == '__main__':
main()

81
scripts/make_links.py Executable file
View File

@ -0,0 +1,81 @@
#!/usr/bin/python
"""
find all duplicate comics
"""
import sys
import os
from comictaggerlib.comicarchive import *
from comictaggerlib.settings import *
from comictaggerlib.issuestring import *
import comictaggerlib.utils
def make_folder( folder ):
if not os.path.exists( folder ):
try:
os.makedirs(folder)
except Exception as e:
print "{0} Can't make {1} -- quitting".format(e, folder)
quit()
def make_link( source, link ):
if not os.path.exists( link ):
os.symlink( source , link )
def main():
utils.fix_output_encoding()
settings = ComicTaggerSettings()
style = MetaDataStyle.CBI
if len(sys.argv) < 3:
print "usage: {0} comic_root link_root".format(sys.argv[0])
return
comic_root = sys.argv[1]
link_root = sys.argv[2]
print "root is : ", comic_root
filelist = utils.get_recursive_filelist( [ comic_root ] )
make_folder( link_root )
#first find all comics with metadata
print "reading in all comics..."
comic_list = []
max_name_len = 2
for filename in filelist:
ca = ComicArchive(filename, settings )
if ca.seemsToBeAComicArchive() and ca.hasMetadata( style ):
comic_list.append((filename, ca.readMetadata( style )))
fmt_str = u"{{0:{0}}}".format(max_name_len)
print fmt_str.format( filename ) + "\r",
sys.stdout.flush()
max_name_len = max ( max_name_len, len(filename))
print fmt_str.format( "" )
print "Found {0} tagged comics.".format( len(comic_list))
# walk through the comic list and add subdirs and links for each one
for filename, md in comic_list:
print fmt_str.format( filename ) + "\r",
sys.stdout.flush()
#do date organizing:
if md.month is not None:
month_str = "{0:02d}".format(int(md.month))
else:
month_str = "00"
date_folder = os.path.join(link_root, "date", str(md.year), month_str)
make_folder( date_folder )
make_link( filename, os.path.join(date_folder, os.path.basename(filename)) )
#do publisher/series organizing:
series_folder = os.path.join(link_root, "series", str(md.publisher), str(md.series))
make_folder( series_folder )
make_link( filename, os.path.join(series_folder, os.path.basename(filename)) )
if __name__ == '__main__':
main()

129
scripts/remove_ads.py Executable file
View File

@ -0,0 +1,129 @@
#!/usr/bin/python
"""
Create new comic archives from old one, removing pages marked as ads
and deleted. Walks recursivly through the given folders. Originals
are kept in a subfolder at the level of the original
"""
import sys
import os
import tempfile
import zipfile
import shutil
from comictaggerlib.comicarchive import *
from comictaggerlib.settings import *
import comictagger.utils
subfolder_name = "PRE_AD_REMOVAL"
unwanted_types = [ 'Deleted', 'Advertisment' ]
def main():
utils.fix_output_encoding()
settings = ComicTaggerSettings()
style = MetaDataStyle.CIX
filelist = utils.get_recursive_filelist( sys.argv[1:] )
#first read in CIX metadata from all files, make a list of candidates
modify_list = []
for filename in filelist:
ca = ComicArchive(filename, settings )
if (ca.isZip or ca.isRar()) and ca.hasMetadata( style ):
md = ca.readMetadata( style )
if len(md.pages) != 0:
for p in md.pages:
if p.has_key('Type') and p['Type'] in unwanted_types:
#This one has pages to remove. add to list!
modify_list.append((filename, md))
break
#now actually process those files
for filename,md in modify_list:
ca = ComicArchive(filename, settings )
curr_folder = os.path.dirname( filename )
curr_subfolder = os.path.join( curr_folder, subfolder_name )
#skip any of our generated subfolders...
if os.path.basename(curr_folder) == subfolder_name:
continue
sys.stdout.write("Removing unwanted pages from " + filename)
# verify that we can write to current folder
if not os.access(filename, os.W_OK):
print "Can't move: {0}: skipped!".format(filename)
continue
if not os.path.exists( curr_subfolder ) and not os.access(curr_folder, os.W_OK):
print "Can't create subfolder here: {0}: skipped!".format(filename)
continue
if not os.path.exists( curr_subfolder ):
os.mkdir( curr_subfolder )
if not os.access(curr_subfolder, os.W_OK):
print "Can't write to the subfolder here: {0}: skipped!".format(filename)
continue
# generate a new file with temp name
tmp_fd, tmp_name = tempfile.mkstemp( dir=os.path.dirname(filename) )
os.close( tmp_fd )
try:
zout = zipfile.ZipFile (tmp_name, 'w')
# now read in all the pages from the old one, except the ones we want to skip
new_num = 0
new_pages = list()
for p in md.pages:
if p.has_key('Type') and p['Type'] in unwanted_types:
continue
else:
pageNum = int(p['Image'])
name = ca.getPageName( pageNum )
buffer = ca.getPage( pageNum )
sys.stdout.write('.')
sys.stdout.flush()
#Generate a new name for the page file
ext = os.path.splitext(name)[1]
new_name = "page{0:04d}{1}".format(new_num,ext)
zout.writestr(new_name, buffer)
# create new page entry
new_p = dict()
new_p['Image'] = str(new_num)
if p.has_key('Type'):
new_p['Type'] = p['Type']
new_pages.append(new_p)
new_num += 1
#preserve the old comment
zout.comment = ca.archiver.getArchiveComment()
except Exception as e:
print "Failure creating new archive: {0}!".format(filename)
print e, sys.exc_info()[0]
zout.close()
os.unlink( tmp_name )
else:
zout.close()
# Success! Now move the files
shutil.move( filename, curr_subfolder )
os.rename( tmp_name, filename )
# TODO: We might have converted a rar to a zip, and should probably change
# the extension, as needed.
print "Done!".format(filename)
# Create a new archive object for the new file, and write the old CIX data, with new page info
ca = ComicArchive( filename, settings )
md.pages = new_pages
ca.writeMetadata( style )
if __name__ == '__main__':
main()

57
scripts/validate_cover.py Executable file
View File

@ -0,0 +1,57 @@
#!/usr/bin/python
"""
test archive cover against comicvine for a given issue ID
"""
import sys
sys.path.append("..")
import os
import comictaggerlib.utils
from comictaggerlib.settings import *
from comictaggerlib.comicarchive import *
from comictaggerlib.issueidentifier import *
from comictaggerlib.comicvinetalker import *
def main():
utils.fix_output_encoding()
settings = ComicTaggerSettings()
if len(sys.argv) < 3:
print "usage: {0} comicfile issueid".format(sys.argv[0])
return
filename = sys.argv[1]
issue_id = sys.argv[2]
if not os.path.exists(filename):
print opts.filename + ": not found!"
return
ca = ComicArchive(filename, settings )
if not ca.seemsToBeAComicArchive():
print "Sorry, but "+ opts.filename + " is not a comic archive!"
return
ii = IssueIdentifier( ca, settings )
# calculate the hashes of the first two pages
cover_image_data = ca.getPage( 0 )
cover_hash0 = ii.calculateHash( cover_image_data )
cover_image_data = ca.getPage( 1 )
cover_hash1 = ii.calculateHash( cover_image_data )
hash_list = [ cover_hash0, cover_hash1 ]
comicVine = ComicVineTalker( )
result = ii.getIssueCoverMatchScore( comicVine, issue_id, hash_list, useRemoteAlternates=True, useLog=False)
print "Best cover match score is :", result['score']
if result['score'] < ii.min_alternate_score_thresh:
print "Looks like a match!"
else:
print "Bad score, maybe not a match?"
print result['url']
if __name__ == '__main__':
main()