more scripts
git-svn-id: http://comictagger.googlecode.com/svn/trunk@499 6c5673fe-1810-88d6-992b-cd32ca31540c
This commit is contained in:
parent
235524b06d
commit
00202cc865
82
scripts/find_dupes.py
Executable file
82
scripts/find_dupes.py
Executable file
@ -0,0 +1,82 @@
|
||||
#!/usr/bin/python
|
||||
"""
|
||||
find all duplicate comics
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
from comictaggerlib.comicarchive import *
|
||||
from comictaggerlib.settings import *
|
||||
from comictaggerlib.issuestring import *
|
||||
import comictaggerlib.utils
|
||||
|
||||
|
||||
def main():
|
||||
utils.fix_output_encoding()
|
||||
settings = ComicTaggerSettings()
|
||||
|
||||
style = MetaDataStyle.CIX
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print "usage: {0} comic_folder ".format(sys.argv[0])
|
||||
return
|
||||
|
||||
filelist = utils.get_recursive_filelist( sys.argv[1:] )
|
||||
|
||||
#first find all comics with metadata
|
||||
print "reading in all comics..."
|
||||
comic_list = []
|
||||
max_name_len = 2
|
||||
for filename in filelist:
|
||||
ca = ComicArchive(filename, settings )
|
||||
if ca.seemsToBeAComicArchive() and ca.hasMetadata( style ):
|
||||
fmt_str = u"{{0:{0}}}".format(max_name_len)
|
||||
print fmt_str.format( filename ) + "\r",
|
||||
sys.stdout.flush()
|
||||
comic_list.append((filename, ca.readMetadata( style )))
|
||||
max_name_len = max ( max_name_len, len(filename))
|
||||
|
||||
print fmt_str.format( "" ) + "\r",
|
||||
print "Found {0} tagged comics.".format( len(comic_list))
|
||||
|
||||
#sort the list by series+issue+year, to put all the dupes together
|
||||
def makeKey(x):
|
||||
return "<" + unicode(x[1].series) + u" #" + unicode( x[1].issue ) + u" - " + unicode( x[1].year ) + ">"
|
||||
comic_list.sort(key=makeKey, reverse=False)
|
||||
|
||||
# look for duplicate blocks
|
||||
dupe_set_list = list()
|
||||
dupe_set = list()
|
||||
prev_key = ""
|
||||
for filename, md in comic_list:
|
||||
print fmt_str.format( filename ) + "\r",
|
||||
sys.stdout.flush()
|
||||
|
||||
new_key = makeKey((filename, md))
|
||||
|
||||
#if the new key same as the last, add to to dupe set
|
||||
if new_key == prev_key:
|
||||
dupe_set.append(filename)
|
||||
|
||||
#else we're on a new potential block
|
||||
else:
|
||||
# only add if the dupe list has 2 or more
|
||||
if len (dupe_set) > 1:
|
||||
dupe_set_list.append( dupe_set )
|
||||
dupe_set = list()
|
||||
dupe_set.append(filename)
|
||||
|
||||
prev_key = new_key
|
||||
|
||||
print fmt_str.format( "" ) + "\r",
|
||||
print "Found {0} duplicate sets".format( len(dupe_set_list))
|
||||
|
||||
for dupe_set in dupe_set_list:
|
||||
ca = ComicArchive(dupe_set[0], settings )
|
||||
md = ca.readMetadata( style )
|
||||
print "{0} #{1} ({2})".format( md.series, md.issue, md.year )
|
||||
for filename in dupe_set:
|
||||
print "------------->{0}".format( filename )
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,41 +1,46 @@
|
||||
#!/usr/bin/python
|
||||
"""
|
||||
An experiment with comictaggerlib
|
||||
An example script using the comictagger library
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import platform
|
||||
import locale
|
||||
import codecs
|
||||
|
||||
sys.path.append("..")
|
||||
|
||||
from comictaggerlib.comicarchive import *
|
||||
from comictaggerlib.settings import *
|
||||
from comictaggerlib.issuestring import *
|
||||
import comictaggerlib.utils
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
utils.fix_output_encoding()
|
||||
settings = ComicTaggerSettings()
|
||||
|
||||
style = MetaDataStyle.CIX
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print "usage: {0} comic_folder ".format(sys.argv[0])
|
||||
return
|
||||
|
||||
filelist = utils.get_recursive_filelist( sys.argv[1:] )
|
||||
|
||||
#first read in CIX metadata from all files
|
||||
|
||||
#first read in metadata from all files
|
||||
metadata_list = []
|
||||
max_name_len = 2
|
||||
for filename in filelist:
|
||||
ca = ComicArchive(filename, settings )
|
||||
metadata_list.append((filename, ca.readCIX()))
|
||||
#make a list of paired filenames and metadata objects
|
||||
metadata_list.append((filename, ca.readMetadata( style )))
|
||||
|
||||
fmt_str = u"{{0:{0}}}".format(max_name_len)
|
||||
print fmt_str.format( filename ) + "\r",
|
||||
sys.stdout.flush()
|
||||
max_name_len = max ( max_name_len, len(filename))
|
||||
|
||||
print fmt_str.format( "" ) + "\r",
|
||||
|
||||
print "-----------------------------------------------"
|
||||
print "Found {0} comics with {1} tags".format( len(metadata_list), MetaDataStyle.name[style])
|
||||
print "-----------------------------------------------"
|
||||
|
||||
# now, figure out column widths
|
||||
w0 = 4
|
||||
w1 = 4
|
||||
@ -47,18 +52,16 @@ def main():
|
||||
w0 += 2
|
||||
|
||||
# build a format string
|
||||
fmt_str = "{0:" + str(w0) + "} {1:" + str(w1) + "} #{2:6} ({3})"
|
||||
fmt_str = u"{0:" + str(w0) + "} {1:" + str(w1) + "} #{2:6} ({3})"
|
||||
|
||||
# now sort the list by series, and then issue
|
||||
metadata_list.sort(key=lambda x: IssueString(x[1].issue).asString(3), reverse=False)
|
||||
metadata_list.sort(key=lambda x: str(x[1].series).lower()+str(x[1].year), reverse=False)
|
||||
#metadata_list.sort(key=lambda x: x[1].series, reverse=False)
|
||||
metadata_list.sort(key=lambda x: unicode(x[1].series).lower()+str(x[1].year), reverse=False)
|
||||
|
||||
# now print
|
||||
for filename,md in metadata_list:
|
||||
for filename, md in metadata_list:
|
||||
if not md.isEmpty:
|
||||
print fmt_str.format(os.path.split(filename)[1]+":", md.series, md.issue, md.year)
|
||||
|
||||
print fmt_str.format(os.path.split(filename)[1]+":", md.series, md.issue, md.year), md.title
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
81
scripts/make_links.py
Executable file
81
scripts/make_links.py
Executable file
@ -0,0 +1,81 @@
|
||||
#!/usr/bin/python
|
||||
"""
|
||||
find all duplicate comics
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
from comictaggerlib.comicarchive import *
|
||||
from comictaggerlib.settings import *
|
||||
from comictaggerlib.issuestring import *
|
||||
import comictaggerlib.utils
|
||||
|
||||
def make_folder( folder ):
|
||||
if not os.path.exists( folder ):
|
||||
try:
|
||||
os.makedirs(folder)
|
||||
except Exception as e:
|
||||
print "{0} Can't make {1} -- quitting".format(e, folder)
|
||||
quit()
|
||||
|
||||
def make_link( source, link ):
|
||||
if not os.path.exists( link ):
|
||||
os.symlink( source , link )
|
||||
|
||||
def main():
|
||||
utils.fix_output_encoding()
|
||||
settings = ComicTaggerSettings()
|
||||
|
||||
style = MetaDataStyle.CBI
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print "usage: {0} comic_root link_root".format(sys.argv[0])
|
||||
return
|
||||
|
||||
comic_root = sys.argv[1]
|
||||
link_root = sys.argv[2]
|
||||
|
||||
print "root is : ", comic_root
|
||||
filelist = utils.get_recursive_filelist( [ comic_root ] )
|
||||
make_folder( link_root )
|
||||
|
||||
#first find all comics with metadata
|
||||
print "reading in all comics..."
|
||||
comic_list = []
|
||||
max_name_len = 2
|
||||
for filename in filelist:
|
||||
ca = ComicArchive(filename, settings )
|
||||
if ca.seemsToBeAComicArchive() and ca.hasMetadata( style ):
|
||||
|
||||
comic_list.append((filename, ca.readMetadata( style )))
|
||||
|
||||
fmt_str = u"{{0:{0}}}".format(max_name_len)
|
||||
print fmt_str.format( filename ) + "\r",
|
||||
sys.stdout.flush()
|
||||
max_name_len = max ( max_name_len, len(filename))
|
||||
|
||||
print fmt_str.format( "" )
|
||||
print "Found {0} tagged comics.".format( len(comic_list))
|
||||
|
||||
# walk through the comic list and add subdirs and links for each one
|
||||
for filename, md in comic_list:
|
||||
print fmt_str.format( filename ) + "\r",
|
||||
sys.stdout.flush()
|
||||
|
||||
#do date organizing:
|
||||
if md.month is not None:
|
||||
month_str = "{0:02d}".format(int(md.month))
|
||||
else:
|
||||
month_str = "00"
|
||||
date_folder = os.path.join(link_root, "date", str(md.year), month_str)
|
||||
make_folder( date_folder )
|
||||
make_link( filename, os.path.join(date_folder, os.path.basename(filename)) )
|
||||
|
||||
#do publisher/series organizing:
|
||||
series_folder = os.path.join(link_root, "series", str(md.publisher), str(md.series))
|
||||
make_folder( series_folder )
|
||||
make_link( filename, os.path.join(series_folder, os.path.basename(filename)) )
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
129
scripts/remove_ads.py
Executable file
129
scripts/remove_ads.py
Executable file
@ -0,0 +1,129 @@
|
||||
#!/usr/bin/python
|
||||
"""
|
||||
Create new comic archives from old one, removing pages marked as ads
|
||||
and deleted. Walks recursivly through the given folders. Originals
|
||||
are kept in a subfolder at the level of the original
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
import shutil
|
||||
|
||||
|
||||
from comictaggerlib.comicarchive import *
|
||||
from comictaggerlib.settings import *
|
||||
import comictagger.utils
|
||||
|
||||
subfolder_name = "PRE_AD_REMOVAL"
|
||||
unwanted_types = [ 'Deleted', 'Advertisment' ]
|
||||
|
||||
|
||||
def main():
|
||||
utils.fix_output_encoding()
|
||||
settings = ComicTaggerSettings()
|
||||
|
||||
style = MetaDataStyle.CIX
|
||||
|
||||
filelist = utils.get_recursive_filelist( sys.argv[1:] )
|
||||
|
||||
#first read in CIX metadata from all files, make a list of candidates
|
||||
modify_list = []
|
||||
for filename in filelist:
|
||||
|
||||
ca = ComicArchive(filename, settings )
|
||||
if (ca.isZip or ca.isRar()) and ca.hasMetadata( style ):
|
||||
md = ca.readMetadata( style )
|
||||
if len(md.pages) != 0:
|
||||
for p in md.pages:
|
||||
if p.has_key('Type') and p['Type'] in unwanted_types:
|
||||
#This one has pages to remove. add to list!
|
||||
modify_list.append((filename, md))
|
||||
break
|
||||
|
||||
#now actually process those files
|
||||
for filename,md in modify_list:
|
||||
ca = ComicArchive(filename, settings )
|
||||
curr_folder = os.path.dirname( filename )
|
||||
curr_subfolder = os.path.join( curr_folder, subfolder_name )
|
||||
|
||||
#skip any of our generated subfolders...
|
||||
if os.path.basename(curr_folder) == subfolder_name:
|
||||
continue
|
||||
sys.stdout.write("Removing unwanted pages from " + filename)
|
||||
|
||||
# verify that we can write to current folder
|
||||
if not os.access(filename, os.W_OK):
|
||||
print "Can't move: {0}: skipped!".format(filename)
|
||||
continue
|
||||
if not os.path.exists( curr_subfolder ) and not os.access(curr_folder, os.W_OK):
|
||||
print "Can't create subfolder here: {0}: skipped!".format(filename)
|
||||
continue
|
||||
if not os.path.exists( curr_subfolder ):
|
||||
os.mkdir( curr_subfolder )
|
||||
if not os.access(curr_subfolder, os.W_OK):
|
||||
print "Can't write to the subfolder here: {0}: skipped!".format(filename)
|
||||
continue
|
||||
|
||||
# generate a new file with temp name
|
||||
tmp_fd, tmp_name = tempfile.mkstemp( dir=os.path.dirname(filename) )
|
||||
os.close( tmp_fd )
|
||||
|
||||
try:
|
||||
zout = zipfile.ZipFile (tmp_name, 'w')
|
||||
|
||||
# now read in all the pages from the old one, except the ones we want to skip
|
||||
new_num = 0
|
||||
new_pages = list()
|
||||
for p in md.pages:
|
||||
if p.has_key('Type') and p['Type'] in unwanted_types:
|
||||
|
||||
continue
|
||||
else:
|
||||
pageNum = int(p['Image'])
|
||||
name = ca.getPageName( pageNum )
|
||||
buffer = ca.getPage( pageNum )
|
||||
sys.stdout.write('.')
|
||||
sys.stdout.flush()
|
||||
|
||||
#Generate a new name for the page file
|
||||
ext = os.path.splitext(name)[1]
|
||||
new_name = "page{0:04d}{1}".format(new_num,ext)
|
||||
zout.writestr(new_name, buffer)
|
||||
|
||||
# create new page entry
|
||||
new_p = dict()
|
||||
new_p['Image'] = str(new_num)
|
||||
if p.has_key('Type'):
|
||||
new_p['Type'] = p['Type']
|
||||
new_pages.append(new_p)
|
||||
new_num += 1
|
||||
|
||||
#preserve the old comment
|
||||
zout.comment = ca.archiver.getArchiveComment()
|
||||
|
||||
except Exception as e:
|
||||
print "Failure creating new archive: {0}!".format(filename)
|
||||
print e, sys.exc_info()[0]
|
||||
zout.close()
|
||||
os.unlink( tmp_name )
|
||||
else:
|
||||
zout.close()
|
||||
|
||||
# Success! Now move the files
|
||||
shutil.move( filename, curr_subfolder )
|
||||
os.rename( tmp_name, filename )
|
||||
# TODO: We might have converted a rar to a zip, and should probably change
|
||||
# the extension, as needed.
|
||||
|
||||
print "Done!".format(filename)
|
||||
|
||||
# Create a new archive object for the new file, and write the old CIX data, with new page info
|
||||
ca = ComicArchive( filename, settings )
|
||||
md.pages = new_pages
|
||||
ca.writeMetadata( style )
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
57
scripts/validate_cover.py
Executable file
57
scripts/validate_cover.py
Executable file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/python
|
||||
"""
|
||||
test archive cover against comicvine for a given issue ID
|
||||
"""
|
||||
import sys
|
||||
sys.path.append("..")
|
||||
import os
|
||||
|
||||
import comictaggerlib.utils
|
||||
from comictaggerlib.settings import *
|
||||
from comictaggerlib.comicarchive import *
|
||||
from comictaggerlib.issueidentifier import *
|
||||
from comictaggerlib.comicvinetalker import *
|
||||
|
||||
def main():
|
||||
|
||||
utils.fix_output_encoding()
|
||||
settings = ComicTaggerSettings()
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print "usage: {0} comicfile issueid".format(sys.argv[0])
|
||||
return
|
||||
|
||||
filename = sys.argv[1]
|
||||
issue_id = sys.argv[2]
|
||||
|
||||
if not os.path.exists(filename):
|
||||
print opts.filename + ": not found!"
|
||||
return
|
||||
|
||||
ca = ComicArchive(filename, settings )
|
||||
if not ca.seemsToBeAComicArchive():
|
||||
print "Sorry, but "+ opts.filename + " is not a comic archive!"
|
||||
return
|
||||
|
||||
ii = IssueIdentifier( ca, settings )
|
||||
|
||||
# calculate the hashes of the first two pages
|
||||
cover_image_data = ca.getPage( 0 )
|
||||
cover_hash0 = ii.calculateHash( cover_image_data )
|
||||
cover_image_data = ca.getPage( 1 )
|
||||
cover_hash1 = ii.calculateHash( cover_image_data )
|
||||
hash_list = [ cover_hash0, cover_hash1 ]
|
||||
|
||||
comicVine = ComicVineTalker( )
|
||||
result = ii.getIssueCoverMatchScore( comicVine, issue_id, hash_list, useRemoteAlternates=True, useLog=False)
|
||||
|
||||
print "Best cover match score is :", result['score']
|
||||
if result['score'] < ii.min_alternate_score_thresh:
|
||||
print "Looks like a match!"
|
||||
else:
|
||||
print "Bad score, maybe not a match?"
|
||||
print result['url']
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user