commit f6439049d8d8b5a4709f1b78afbfd289d00e8c25 Author: Davide Romanini Date: Mon Feb 16 13:27:21 2015 +0100 Squashed 'comicapi/' content from commit b7d2458 git-subtree-dir: comicapi git-subtree-split: b7d2458b80467a47be1d1d58b31ffcac62c2743c diff --git a/UnRAR2/UnRARDLL/license.txt b/UnRAR2/UnRARDLL/license.txt new file mode 100644 index 0000000..0c1540e --- /dev/null +++ b/UnRAR2/UnRARDLL/license.txt @@ -0,0 +1,18 @@ + The unrar.dll library is freeware. This means: + + 1. All copyrights to RAR and the unrar.dll are exclusively + owned by the author - Alexander Roshal. + + 2. The unrar.dll library may be used in any software to handle RAR + archives without limitations free of charge. + + 3. THE RAR ARCHIVER AND THE UNRAR.DLL LIBRARY ARE DISTRIBUTED "AS IS". + NO WARRANTY OF ANY KIND IS EXPRESSED OR IMPLIED. YOU USE AT + YOUR OWN RISK. THE AUTHOR WILL NOT BE LIABLE FOR DATA LOSS, + DAMAGES, LOSS OF PROFITS OR ANY OTHER KIND OF LOSS WHILE USING + OR MISUSING THIS SOFTWARE. + + Thank you for your interest in RAR and unrar.dll. + + + Alexander L. Roshal \ No newline at end of file diff --git a/UnRAR2/UnRARDLL/unrar.dll b/UnRAR2/UnRARDLL/unrar.dll new file mode 100644 index 0000000..9757bf3 Binary files /dev/null and b/UnRAR2/UnRARDLL/unrar.dll differ diff --git a/UnRAR2/UnRARDLL/unrar.h b/UnRAR2/UnRARDLL/unrar.h new file mode 100644 index 0000000..7643fa7 --- /dev/null +++ b/UnRAR2/UnRARDLL/unrar.h @@ -0,0 +1,140 @@ +#ifndef _UNRAR_DLL_ +#define _UNRAR_DLL_ + +#define ERAR_END_ARCHIVE 10 +#define ERAR_NO_MEMORY 11 +#define ERAR_BAD_DATA 12 +#define ERAR_BAD_ARCHIVE 13 +#define ERAR_UNKNOWN_FORMAT 14 +#define ERAR_EOPEN 15 +#define ERAR_ECREATE 16 +#define ERAR_ECLOSE 17 +#define ERAR_EREAD 18 +#define ERAR_EWRITE 19 +#define ERAR_SMALL_BUF 20 +#define ERAR_UNKNOWN 21 +#define ERAR_MISSING_PASSWORD 22 + +#define RAR_OM_LIST 0 +#define RAR_OM_EXTRACT 1 +#define RAR_OM_LIST_INCSPLIT 2 + +#define RAR_SKIP 0 +#define RAR_TEST 1 +#define RAR_EXTRACT 2 + +#define RAR_VOL_ASK 0 +#define RAR_VOL_NOTIFY 1 + +#define RAR_DLL_VERSION 4 + +#ifdef _UNIX +#define CALLBACK +#define PASCAL +#define LONG long +#define HANDLE void * +#define LPARAM long +#define UINT unsigned int +#endif + +struct RARHeaderData +{ + char ArcName[260]; + char FileName[260]; + unsigned int Flags; + unsigned int PackSize; + unsigned int UnpSize; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; +}; + + +struct RARHeaderDataEx +{ + char ArcName[1024]; + wchar_t ArcNameW[1024]; + char FileName[1024]; + wchar_t FileNameW[1024]; + unsigned int Flags; + unsigned int PackSize; + unsigned int PackSizeHigh; + unsigned int UnpSize; + unsigned int UnpSizeHigh; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Reserved[1024]; +}; + + +struct RAROpenArchiveData +{ + char *ArcName; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; +}; + +struct RAROpenArchiveDataEx +{ + char *ArcName; + wchar_t *ArcNameW; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Flags; + unsigned int Reserved[32]; +}; + +enum UNRARCALLBACK_MESSAGES { + UCM_CHANGEVOLUME,UCM_PROCESSDATA,UCM_NEEDPASSWORD +}; + +typedef int (CALLBACK *UNRARCALLBACK)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2); + +typedef int (PASCAL *CHANGEVOLPROC)(char *ArcName,int Mode); +typedef int (PASCAL *PROCESSDATAPROC)(unsigned char *Addr,int Size); + +#ifdef __cplusplus +extern "C" { +#endif + +HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData); +HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData); +int PASCAL RARCloseArchive(HANDLE hArcData); +int PASCAL RARReadHeader(HANDLE hArcData,struct RARHeaderData *HeaderData); +int PASCAL RARReadHeaderEx(HANDLE hArcData,struct RARHeaderDataEx *HeaderData); +int PASCAL RARProcessFile(HANDLE hArcData,int Operation,char *DestPath,char *DestName); +int PASCAL RARProcessFileW(HANDLE hArcData,int Operation,wchar_t *DestPath,wchar_t *DestName); +void PASCAL RARSetCallback(HANDLE hArcData,UNRARCALLBACK Callback,LPARAM UserData); +void PASCAL RARSetChangeVolProc(HANDLE hArcData,CHANGEVOLPROC ChangeVolProc); +void PASCAL RARSetProcessDataProc(HANDLE hArcData,PROCESSDATAPROC ProcessDataProc); +void PASCAL RARSetPassword(HANDLE hArcData,char *Password); +int PASCAL RARGetDllVersion(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/UnRAR2/UnRARDLL/unrar.lib b/UnRAR2/UnRARDLL/unrar.lib new file mode 100644 index 0000000..0f6b314 Binary files /dev/null and b/UnRAR2/UnRARDLL/unrar.lib differ diff --git a/UnRAR2/UnRARDLL/unrardll.txt b/UnRAR2/UnRARDLL/unrardll.txt new file mode 100644 index 0000000..291c871 --- /dev/null +++ b/UnRAR2/UnRARDLL/unrardll.txt @@ -0,0 +1,606 @@ + + UnRAR.dll Manual + ~~~~~~~~~~~~~~~~ + + UnRAR.dll is a 32-bit Windows dynamic-link library which provides + file extraction from RAR archives. + + + Exported functions + +==================================================================== +HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData) +==================================================================== + +Description +~~~~~~~~~~~ + Open RAR archive and allocate memory structures + +Parameters +~~~~~~~~~~ +ArchiveData Points to RAROpenArchiveData structure + +struct RAROpenArchiveData +{ + char *ArcName; + UINT OpenMode; + UINT OpenResult; + char *CmtBuf; + UINT CmtBufSize; + UINT CmtSize; + UINT CmtState; +}; + +Structure fields: + +ArcName + Input parameter which should point to zero terminated string + containing the archive name. + +OpenMode + Input parameter. + + Possible values + + RAR_OM_LIST + Open archive for reading file headers only. + + RAR_OM_EXTRACT + Open archive for testing and extracting files. + + RAR_OM_LIST_INCSPLIT + Open archive for reading file headers only. If you open an archive + in such mode, RARReadHeader[Ex] will return all file headers, + including those with "file continued from previous volume" flag. + In case of RAR_OM_LIST such headers are automatically skipped. + So if you process RAR volumes in RAR_OM_LIST_INCSPLIT mode, you will + get several file header records for same file if file is split between + volumes. For such files only the last file header record will contain + the correct file CRC and if you wish to get the correct packed size, + you need to sum up packed sizes of all parts. + +OpenResult + Output parameter. + + Possible values + + 0 Success + ERAR_NO_MEMORY Not enough memory to initialize data structures + ERAR_BAD_DATA Archive header broken + ERAR_BAD_ARCHIVE File is not valid RAR archive + ERAR_UNKNOWN_FORMAT Unknown encryption used for archive headers + ERAR_EOPEN File open error + +CmtBuf + Input parameter which should point to the buffer for archive + comments. Maximum comment size is limited to 64Kb. Comment text is + zero terminated. If the comment text is larger than the buffer + size, the comment text will be truncated. If CmtBuf is set to + NULL, comments will not be read. + +CmtBufSize + Input parameter which should contain size of buffer for archive + comments. + +CmtSize + Output parameter containing size of comments actually read into the + buffer, cannot exceed CmtBufSize. + +CmtState + Output parameter. + + Possible values + + 0 comments not present + 1 Comments read completely + ERAR_NO_MEMORY Not enough memory to extract comments + ERAR_BAD_DATA Broken comment + ERAR_UNKNOWN_FORMAT Unknown comment format + ERAR_SMALL_BUF Buffer too small, comments not completely read + +Return values +~~~~~~~~~~~~~ + Archive handle or NULL in case of error + + +======================================================================== +HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData) +======================================================================== + +Description +~~~~~~~~~~~ + Similar to RAROpenArchive, but uses RAROpenArchiveDataEx structure + allowing to specify Unicode archive name and returning information + about archive flags. + +Parameters +~~~~~~~~~~ +ArchiveData Points to RAROpenArchiveDataEx structure + +struct RAROpenArchiveDataEx +{ + char *ArcName; + wchar_t *ArcNameW; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Flags; + unsigned int Reserved[32]; +}; + +Structure fields: + +ArcNameW + Input parameter which should point to zero terminated Unicode string + containing the archive name or NULL if Unicode name is not specified. + +Flags + Output parameter. Combination of bit flags. + + Possible values + + 0x0001 - Volume attribute (archive volume) + 0x0002 - Archive comment present + 0x0004 - Archive lock attribute + 0x0008 - Solid attribute (solid archive) + 0x0010 - New volume naming scheme ('volname.partN.rar') + 0x0020 - Authenticity information present + 0x0040 - Recovery record present + 0x0080 - Block headers are encrypted + 0x0100 - First volume (set only by RAR 3.0 and later) + +Reserved[32] + Reserved for future use. Must be zero. + +Information on other structure fields and function return values +is available above, in RAROpenArchive function description. + + +==================================================================== +int PASCAL RARCloseArchive(HANDLE hArcData) +==================================================================== + +Description +~~~~~~~~~~~ + Close RAR archive and release allocated memory. It must be called when + archive processing is finished, even if the archive processing was stopped + due to an error. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +Return values +~~~~~~~~~~~~~ + 0 Success + ERAR_ECLOSE Archive close error + + +==================================================================== +int PASCAL RARReadHeader(HANDLE hArcData, + struct RARHeaderData *HeaderData) +==================================================================== + +Description +~~~~~~~~~~~ + Read header of file in archive. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +HeaderData + It should point to RARHeaderData structure: + +struct RARHeaderData +{ + char ArcName[260]; + char FileName[260]; + UINT Flags; + UINT PackSize; + UINT UnpSize; + UINT HostOS; + UINT FileCRC; + UINT FileTime; + UINT UnpVer; + UINT Method; + UINT FileAttr; + char *CmtBuf; + UINT CmtBufSize; + UINT CmtSize; + UINT CmtState; +}; + +Structure fields: + +ArcName + Output parameter which contains a zero terminated string of the + current archive name. May be used to determine the current volume + name. + +FileName + Output parameter which contains a zero terminated string of the + file name in OEM (DOS) encoding. + +Flags + Output parameter which contains file flags: + + 0x01 - file continued from previous volume + 0x02 - file continued on next volume + 0x04 - file encrypted with password + 0x08 - file comment present + 0x10 - compression of previous files is used (solid flag) + + bits 7 6 5 + + 0 0 0 - dictionary size 64 Kb + 0 0 1 - dictionary size 128 Kb + 0 1 0 - dictionary size 256 Kb + 0 1 1 - dictionary size 512 Kb + 1 0 0 - dictionary size 1024 Kb + 1 0 1 - dictionary size 2048 KB + 1 1 0 - dictionary size 4096 KB + 1 1 1 - file is directory + + Other bits are reserved. + +PackSize + Output parameter means packed file size or size of the + file part if file was split between volumes. + +UnpSize + Output parameter - unpacked file size. + +HostOS + Output parameter - operating system used for archiving: + + 0 - MS DOS; + 1 - OS/2. + 2 - Win32 + 3 - Unix + +FileCRC + Output parameter which contains unpacked file CRC. In case of file parts + split between volumes only the last part contains the correct CRC + and it is accessible only in RAR_OM_LIST_INCSPLIT listing mode. + +FileTime + Output parameter - contains date and time in standard MS DOS format. + +UnpVer + Output parameter - RAR version needed to extract file. + It is encoded as 10 * Major version + minor version. + +Method + Output parameter - packing method. + +FileAttr + Output parameter - file attributes. + +CmtBuf + File comments support is not implemented in the new DLL version yet. + Now CmtState is always 0. + +/* + * Input parameter which should point to the buffer for file + * comments. Maximum comment size is limited to 64Kb. Comment text is + * a zero terminated string in OEM encoding. If the comment text is + * larger than the buffer size, the comment text will be truncated. + * If CmtBuf is set to NULL, comments will not be read. + */ + +CmtBufSize + Input parameter which should contain size of buffer for archive + comments. + +CmtSize + Output parameter containing size of comments actually read into the + buffer, should not exceed CmtBufSize. + +CmtState + Output parameter. + + Possible values + + 0 Absent comments + 1 Comments read completely + ERAR_NO_MEMORY Not enough memory to extract comments + ERAR_BAD_DATA Broken comment + ERAR_UNKNOWN_FORMAT Unknown comment format + ERAR_SMALL_BUF Buffer too small, comments not completely read + +Return values +~~~~~~~~~~~~~ + + 0 Success + ERAR_END_ARCHIVE End of archive + ERAR_BAD_DATA File header broken + + +==================================================================== +int PASCAL RARReadHeaderEx(HANDLE hArcData, + struct RARHeaderDataEx *HeaderData) +==================================================================== + +Description +~~~~~~~~~~~ + Similar to RARReadHeader, but uses RARHeaderDataEx structure, +containing information about Unicode file names and 64 bit file sizes. + +struct RARHeaderDataEx +{ + char ArcName[1024]; + wchar_t ArcNameW[1024]; + char FileName[1024]; + wchar_t FileNameW[1024]; + unsigned int Flags; + unsigned int PackSize; + unsigned int PackSizeHigh; + unsigned int UnpSize; + unsigned int UnpSizeHigh; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Reserved[1024]; +}; + + +==================================================================== +int PASCAL RARProcessFile(HANDLE hArcData, + int Operation, + char *DestPath, + char *DestName) +==================================================================== + +Description +~~~~~~~~~~~ + Performs action and moves the current position in the archive to + the next file. Extract or test the current file from the archive + opened in RAR_OM_EXTRACT mode. If the mode RAR_OM_LIST is set, + then a call to this function will simply skip the archive position + to the next file. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +Operation + File operation. + + Possible values + + RAR_SKIP Move to the next file in the archive. If the + archive is solid and RAR_OM_EXTRACT mode was set + when the archive was opened, the current file will + be processed - the operation will be performed + slower than a simple seek. + + RAR_TEST Test the current file and move to the next file in + the archive. If the archive was opened with + RAR_OM_LIST mode, the operation is equal to + RAR_SKIP. + + RAR_EXTRACT Extract the current file and move to the next file. + If the archive was opened with RAR_OM_LIST mode, + the operation is equal to RAR_SKIP. + + +DestPath + This parameter should point to a zero terminated string containing the + destination directory to which to extract files to. If DestPath is equal + to NULL, it means extract to the current directory. This parameter has + meaning only if DestName is NULL. + +DestName + This parameter should point to a string containing the full path and name + to assign to extracted file or it can be NULL to use the default name. + If DestName is defined (not NULL), it overrides both the original file + name saved in the archive and path specigied in DestPath setting. + + Both DestPath and DestName must be in OEM encoding. If necessary, + use CharToOem to convert text to OEM before passing to this function. + +Return values +~~~~~~~~~~~~~ + 0 Success + ERAR_BAD_DATA File CRC error + ERAR_BAD_ARCHIVE Volume is not valid RAR archive + ERAR_UNKNOWN_FORMAT Unknown archive format + ERAR_EOPEN Volume open error + ERAR_ECREATE File create error + ERAR_ECLOSE File close error + ERAR_EREAD Read error + ERAR_EWRITE Write error + + +Note: if you wish to cancel extraction, return -1 when processing + UCM_PROCESSDATA callback message. + + +==================================================================== +int PASCAL RARProcessFileW(HANDLE hArcData, + int Operation, + wchar_t *DestPath, + wchar_t *DestName) +==================================================================== + +Description +~~~~~~~~~~~ + Unicode version of RARProcessFile. It uses Unicode DestPath + and DestName parameters, other parameters and return values + are the same as in RARProcessFile. + + +==================================================================== +void PASCAL RARSetCallback(HANDLE hArcData, + int PASCAL (*CallbackProc)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2), + LPARAM UserData); +==================================================================== + +Description +~~~~~~~~~~~ + Set a user-defined callback function to process Unrar events. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +CallbackProc + It should point to a user-defined callback function. + + The function will be passed four parameters: + + + msg Type of event. Described below. + + UserData User defined value passed to RARSetCallback. + + P1 and P2 Event dependent parameters. Described below. + + + Possible events + + UCM_CHANGEVOLUME Process volume change. + + P1 Points to the zero terminated name + of the next volume. + + P2 The function call mode: + + RAR_VOL_ASK Required volume is absent. The function should + prompt user and return a positive value + to retry or return -1 value to terminate + operation. The function may also specify a new + volume name, placing it to the address specified + by P1 parameter. + + RAR_VOL_NOTIFY Required volume is successfully opened. + This is a notification call and volume name + modification is not allowed. The function should + return a positive value to continue or -1 + to terminate operation. + + UCM_PROCESSDATA Process unpacked data. It may be used to read + a file while it is being extracted or tested + without actual extracting file to disk. + Return a positive value to continue process + or -1 to cancel the archive operation + + P1 Address pointing to the unpacked data. + Function may refer to the data but must not + change it. + + P2 Size of the unpacked data. It is guaranteed + only that the size will not exceed the maximum + dictionary size (4 Mb in RAR 3.0). + + UCM_NEEDPASSWORD DLL needs a password to process archive. + This message must be processed if you wish + to be able to handle archives with encrypted + file names. It can be also used as replacement + of RARSetPassword function even for usual + encrypted files with non-encrypted names. + + P1 Address pointing to the buffer for a password. + You need to copy a password here. + + P2 Size of the password buffer. + + +UserData + User data passed to callback function. + + Other functions of UnRAR.dll should not be called from the callback + function. + +Return values +~~~~~~~~~~~~~ + None + + + +==================================================================== +void PASCAL RARSetChangeVolProc(HANDLE hArcData, + int PASCAL (*ChangeVolProc)(char *ArcName,int Mode)); +==================================================================== + +Obsoleted, use RARSetCallback instead. + + + +==================================================================== +void PASCAL RARSetProcessDataProc(HANDLE hArcData, + int PASCAL (*ProcessDataProc)(unsigned char *Addr,int Size)) +==================================================================== + +Obsoleted, use RARSetCallback instead. + + +==================================================================== +void PASCAL RARSetPassword(HANDLE hArcData, + char *Password); +==================================================================== + +Description +~~~~~~~~~~~ + Set a password to decrypt files. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +Password + It should point to a string containing a zero terminated password. + +Return values +~~~~~~~~~~~~~ + None + + +==================================================================== +void PASCAL RARGetDllVersion(); +==================================================================== + +Description +~~~~~~~~~~~ + Returns API version. + +Parameters +~~~~~~~~~~ + None. + +Return values +~~~~~~~~~~~~~ + Returns an integer value denoting UnRAR.dll API version, which is also +defined in unrar.h as RAR_DLL_VERSION. API version number is incremented +only in case of noticeable changes in UnRAR.dll API. Do not confuse it +with version of UnRAR.dll stored in DLL resources, which is incremented +with every DLL rebuild. + + If RARGetDllVersion() returns a value lower than UnRAR.dll which your +application was designed for, it may indicate that DLL version is too old +and it will fail to provide all necessary functions to your application. + + This function is absent in old versions of UnRAR.dll, so it is safer +to use LoadLibrary and GetProcAddress to access this function. + diff --git a/UnRAR2/UnRARDLL/whatsnew.txt b/UnRAR2/UnRARDLL/whatsnew.txt new file mode 100644 index 0000000..84ad72c --- /dev/null +++ b/UnRAR2/UnRARDLL/whatsnew.txt @@ -0,0 +1,80 @@ +List of unrar.dll API changes. We do not include performance and reliability +improvements into this list, but this library and RAR/UnRAR tools share +the same source code. So the latest version of unrar.dll usually contains +same decompression algorithm changes as the latest UnRAR version. +============================================================================ + +-- 18 January 2008 + +all LONG parameters of CallbackProc function were changed +to LPARAM type for 64 bit mode compatibility. + + +-- 12 December 2007 + +Added new RAR_OM_LIST_INCSPLIT open mode for function RAROpenArchive. + + +-- 14 August 2007 + +Added NoCrypt\unrar_nocrypt.dll without decryption code for those +applications where presence of encryption or decryption code is not +allowed because of legal restrictions. + + +-- 14 December 2006 + +Added ERAR_MISSING_PASSWORD error type. This error is returned +if empty password is specified for encrypted file. + + +-- 12 June 2003 + +Added RARProcessFileW function, Unicode version of RARProcessFile + + +-- 9 August 2002 + +Added RAROpenArchiveEx function allowing to specify Unicode archive +name and get archive flags. + + +-- 24 January 2002 + +Added RARReadHeaderEx function allowing to read Unicode file names +and 64 bit file sizes. + + +-- 23 January 2002 + +Added ERAR_UNKNOWN error type (it is used for all errors which +do not have special ERAR code yet) and UCM_NEEDPASSWORD callback +message. + +Unrar.dll automatically opens all next volumes not only when extracting, +but also in RAR_OM_LIST mode. + + +-- 27 November 2001 + +RARSetChangeVolProc and RARSetProcessDataProc are replaced by +the single callback function installed with RARSetCallback. +Unlike old style callbacks, the new function accepts the user defined +parameter. Unrar.dll still supports RARSetChangeVolProc and +RARSetProcessDataProc for compatibility purposes, but if you write +a new application, better use RARSetCallback. + +File comments support is not implemented in the new DLL version yet. +Now CmtState is always 0. + + +-- 13 August 2001 + +Added RARGetDllVersion function, so you may distinguish old unrar.dll, +which used C style callback functions and the new one with PASCAL callbacks. + + +-- 10 May 2001 + +Callback functions in RARSetChangeVolProc and RARSetProcessDataProc +use PASCAL style call convention now. diff --git a/UnRAR2/UnRARDLL/x64/readme.txt b/UnRAR2/UnRARDLL/x64/readme.txt new file mode 100644 index 0000000..8f3b4e1 --- /dev/null +++ b/UnRAR2/UnRARDLL/x64/readme.txt @@ -0,0 +1 @@ +This is x64 version of unrar.dll. diff --git a/UnRAR2/UnRARDLL/x64/unrar64.dll b/UnRAR2/UnRARDLL/x64/unrar64.dll new file mode 100644 index 0000000..e17a19e Binary files /dev/null and b/UnRAR2/UnRARDLL/x64/unrar64.dll differ diff --git a/UnRAR2/UnRARDLL/x64/unrar64.lib b/UnRAR2/UnRARDLL/x64/unrar64.lib new file mode 100644 index 0000000..fd03791 Binary files /dev/null and b/UnRAR2/UnRARDLL/x64/unrar64.lib differ diff --git a/UnRAR2/__init__.py b/UnRAR2/__init__.py new file mode 100644 index 0000000..a913fcb --- /dev/null +++ b/UnRAR2/__init__.py @@ -0,0 +1,177 @@ +# Copyright (c) 2003-2005 Jimmy Retzlaff, 2008 Konstantin Yegupov +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +pyUnRAR2 is a ctypes based wrapper around the free UnRAR.dll. + +It is an modified version of Jimmy Retzlaff's pyUnRAR - more simple, +stable and foolproof. +Notice that it has INCOMPATIBLE interface. + +It enables reading and unpacking of archives created with the +RAR/WinRAR archivers. There is a low-level interface which is very +similar to the C interface provided by UnRAR. There is also a +higher level interface which makes some common operations easier. +""" + +__version__ = '0.99.3' + +try: + WindowsError + in_windows = True +except NameError: + in_windows = False + +if in_windows: + from windows import RarFileImplementation +else: + from unix import RarFileImplementation + + +import fnmatch, time, weakref + +class RarInfo(object): + """Represents a file header in an archive. Don't instantiate directly. + Use only to obtain information about file. + YOU CANNOT EXTRACT FILE CONTENTS USING THIS OBJECT. + USE METHODS OF RarFile CLASS INSTEAD. + + Properties: + index - index of file within the archive + filename - name of the file in the archive including path (if any) + datetime - file date/time as a struct_time suitable for time.strftime + isdir - True if the file is a directory + size - size in bytes of the uncompressed file + comment - comment associated with the file + + Note - this is not currently intended to be a Python file-like object. + """ + + def __init__(self, rarfile, data): + self.rarfile = weakref.proxy(rarfile) + self.index = data['index'] + self.filename = data['filename'] + self.isdir = data['isdir'] + self.size = data['size'] + self.datetime = data['datetime'] + self.comment = data['comment'] + + + + def __str__(self): + try : + arcName = self.rarfile.archiveName + except ReferenceError: + arcName = "[ARCHIVE_NO_LONGER_LOADED]" + return '' % (self.filename, arcName) + +class RarFile(RarFileImplementation): + + def __init__(self, archiveName, password=None): + """Instantiate the archive. + + archiveName is the name of the RAR file. + password is used to decrypt the files in the archive. + + Properties: + comment - comment associated with the archive + + >>> print RarFile('test.rar').comment + This is a test. + """ + self.archiveName = archiveName + RarFileImplementation.init(self, password) + + def __del__(self): + self.destruct() + + def infoiter(self): + """Iterate over all the files in the archive, generating RarInfos. + + >>> import os + >>> for fileInArchive in RarFile('test.rar').infoiter(): + ... print os.path.split(fileInArchive.filename)[-1], + ... print fileInArchive.isdir, + ... print fileInArchive.size, + ... print fileInArchive.comment, + ... print tuple(fileInArchive.datetime)[0:5], + ... print time.strftime('%a, %d %b %Y %H:%M', fileInArchive.datetime) + test True 0 None (2003, 6, 30, 1, 59) Mon, 30 Jun 2003 01:59 + test.txt False 20 None (2003, 6, 30, 2, 1) Mon, 30 Jun 2003 02:01 + this.py False 1030 None (2002, 2, 8, 16, 47) Fri, 08 Feb 2002 16:47 + """ + for params in RarFileImplementation.infoiter(self): + yield RarInfo(self, params) + + def infolist(self): + """Return a list of RarInfos, descripting the contents of the archive.""" + return list(self.infoiter()) + + def read_files(self, condition='*'): + """Read specific files from archive into memory. + If "condition" is a list of numbers, then return files which have those positions in infolist. + If "condition" is a string, then it is treated as a wildcard for names of files to extract. + If "condition" is a function, it is treated as a callback function, which accepts a RarInfo object + and returns boolean True (extract) or False (skip). + If "condition" is omitted, all files are returned. + + Returns list of tuples (RarInfo info, str contents) + """ + checker = condition2checker(condition) + return RarFileImplementation.read_files(self, checker) + + + def extract(self, condition='*', path='.', withSubpath=True, overwrite=True): + """Extract specific files from archive to disk. + + If "condition" is a list of numbers, then extract files which have those positions in infolist. + If "condition" is a string, then it is treated as a wildcard for names of files to extract. + If "condition" is a function, it is treated as a callback function, which accepts a RarInfo object + and returns either boolean True (extract) or boolean False (skip). + DEPRECATED: If "condition" callback returns string (only supported for Windows) - + that string will be used as a new name to save the file under. + If "condition" is omitted, all files are extracted. + + "path" is a directory to extract to + "withSubpath" flag denotes whether files are extracted with their full path in the archive. + "overwrite" flag denotes whether extracted files will overwrite old ones. Defaults to true. + + Returns list of RarInfos for extracted files.""" + checker = condition2checker(condition) + return RarFileImplementation.extract(self, checker, path, withSubpath, overwrite) + +def condition2checker(condition): + """Converts different condition types to callback""" + if type(condition) in [str, unicode]: + def smatcher(info): + return fnmatch.fnmatch(info.filename, condition) + return smatcher + elif type(condition) in [list, tuple] and type(condition[0]) in [int, long]: + def imatcher(info): + return info.index in condition + return imatcher + elif callable(condition): + return condition + else: + raise TypeError + + diff --git a/UnRAR2/rar_exceptions.py b/UnRAR2/rar_exceptions.py new file mode 100644 index 0000000..d90d1c8 --- /dev/null +++ b/UnRAR2/rar_exceptions.py @@ -0,0 +1,30 @@ +# Copyright (c) 2003-2005 Jimmy Retzlaff, 2008 Konstantin Yegupov +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Low level interface - see UnRARDLL\UNRARDLL.TXT + + +class ArchiveHeaderBroken(Exception): pass +class InvalidRARArchive(Exception): pass +class FileOpenError(Exception): pass +class IncorrectRARPassword(Exception): pass +class InvalidRARArchiveUsage(Exception): pass diff --git a/UnRAR2/test_UnRAR2.py b/UnRAR2/test_UnRAR2.py new file mode 100644 index 0000000..e86ba2c --- /dev/null +++ b/UnRAR2/test_UnRAR2.py @@ -0,0 +1,138 @@ +import os, sys + +import UnRAR2 +from UnRAR2.rar_exceptions import * + + +def cleanup(dir='test'): + for path, dirs, files in os.walk(dir): + for fn in files: + os.remove(os.path.join(path, fn)) + for dir in dirs: + os.removedirs(os.path.join(path, dir)) + + +# basic test +cleanup() +rarc = UnRAR2.RarFile('test.rar') +rarc.infolist() +assert rarc.comment == "This is a test." +for info in rarc.infoiter(): + saveinfo = info + assert (str(info)=="""""") + break +rarc.extract() +assert os.path.exists('test'+os.sep+'test.txt') +assert os.path.exists('test'+os.sep+'this.py') +del rarc +assert (str(saveinfo)=="""""") +cleanup() + +# extract all the files in test.rar +cleanup() +UnRAR2.RarFile('test.rar').extract() +assert os.path.exists('test'+os.sep+'test.txt') +assert os.path.exists('test'+os.sep+'this.py') +cleanup() + +# extract all the files in test.rar matching the wildcard *.txt +cleanup() +UnRAR2.RarFile('test.rar').extract('*.txt') +assert os.path.exists('test'+os.sep+'test.txt') +assert not os.path.exists('test'+os.sep+'this.py') +cleanup() + + +# check the name and size of each file, extracting small ones +cleanup() +archive = UnRAR2.RarFile('test.rar') +assert archive.comment == 'This is a test.' +archive.extract(lambda rarinfo: rarinfo.size <= 1024) +for rarinfo in archive.infoiter(): + if rarinfo.size <= 1024 and not rarinfo.isdir: + assert rarinfo.size == os.stat(rarinfo.filename).st_size +assert file('test'+os.sep+'test.txt', 'rt').read() == 'This is only a test.' +assert not os.path.exists('test'+os.sep+'this.py') +cleanup() + + +# extract this.py, overriding it's destination +cleanup('test2') +archive = UnRAR2.RarFile('test.rar') +archive.extract('*.py', 'test2', False) +assert os.path.exists('test2'+os.sep+'this.py') +cleanup('test2') + + +# extract test.txt to memory +cleanup() +archive = UnRAR2.RarFile('test.rar') +entries = UnRAR2.RarFile('test.rar').read_files('*test.txt') +assert len(entries)==1 +assert entries[0][0].filename.endswith('test.txt') +assert entries[0][1]=='This is only a test.' + + +# extract all the files in test.rar with overwriting +cleanup() +fo = open('test'+os.sep+'test.txt',"wt") +fo.write("blah") +fo.close() +UnRAR2.RarFile('test.rar').extract('*.txt') +assert open('test'+os.sep+'test.txt',"rt").read()!="blah" +cleanup() + +# extract all the files in test.rar without overwriting +cleanup() +fo = open('test'+os.sep+'test.txt',"wt") +fo.write("blahblah") +fo.close() +UnRAR2.RarFile('test.rar').extract('*.txt', overwrite = False) +assert open('test'+os.sep+'test.txt',"rt").read()=="blahblah" +cleanup() + +# list big file in an archive +list(UnRAR2.RarFile('test_nulls.rar').infoiter()) + +# extract files from an archive with protected files +cleanup() +rarc = UnRAR2.RarFile('test_protected_files.rar', password="protected") +rarc.extract() +assert os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +cleanup() +errored = False +try: + UnRAR2.RarFile('test_protected_files.rar', password="proteqted").extract() +except IncorrectRARPassword: + errored = True +assert not os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +assert errored +cleanup() + +# extract files from an archive with protected headers +cleanup() +UnRAR2.RarFile('test_protected_headers.rar', password="secret").extract() +assert os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +cleanup() +errored = False +try: + UnRAR2.RarFile('test_protected_headers.rar', password="seqret").extract() +except IncorrectRARPassword: + errored = True +assert not os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +assert errored +cleanup() + +# make sure docstring examples are working +import doctest +doctest.testmod(UnRAR2) + +# update documentation +import pydoc +pydoc.writedoc(UnRAR2) + +# cleanup +try: + os.remove('__init__.pyc') +except: + pass diff --git a/UnRAR2/unix.py b/UnRAR2/unix.py new file mode 100644 index 0000000..bd9ee85 --- /dev/null +++ b/UnRAR2/unix.py @@ -0,0 +1,218 @@ +# Copyright (c) 2003-2005 Jimmy Retzlaff, 2008 Konstantin Yegupov +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Unix version uses unrar command line executable + +import subprocess +import gc + +import os, os.path +import time, re + +from rar_exceptions import * + +class UnpackerNotInstalled(Exception): pass + +rar_executable_cached = None +rar_executable_version = None + +def call_unrar(params): + "Calls rar/unrar command line executable, returns stdout pipe" + global rar_executable_cached + if rar_executable_cached is None: + for command in ('unrar', 'rar'): + try: + subprocess.Popen([command], stdout=subprocess.PIPE) + rar_executable_cached = command + break + except OSError: + pass + if rar_executable_cached is None: + raise UnpackerNotInstalled("No suitable RAR unpacker installed") + + assert type(params) == list, "params must be list" + args = [rar_executable_cached] + params + try: + gc.disable() # See http://bugs.python.org/issue1336 + return subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + finally: + gc.enable() + +class RarFileImplementation(object): + + def init(self, password=None): + global rar_executable_version + self.password = password + + + stdoutdata, stderrdata = self.call('v', []).communicate() + + for line in stderrdata.splitlines(): + if line.strip().startswith("Cannot open"): + raise FileOpenError + if line.find("CRC failed")>=0: + raise IncorrectRARPassword + accum = [] + source = iter(stdoutdata.splitlines()) + line = '' + while not (line.startswith('UNRAR')): + line = source.next() + signature = line + # The code below is mighty flaky + # and will probably crash on localized versions of RAR + # but I see no safe way to rewrite it using a CLI tool + if signature.startswith("UNRAR 4"): + rar_executable_version = 4 + while not (line.startswith('Comment:') or line.startswith('Pathname/Comment')): + if line.strip().endswith('is not RAR archive'): + raise InvalidRARArchive + line = source.next() + while not line.startswith('Pathname/Comment'): + accum.append(line.rstrip('\n')) + line = source.next() + if len(accum): + accum[0] = accum[0][9:] # strip out "Comment:" part + self.comment = '\n'.join(accum[:-1]) + else: + self.comment = None + elif signature.startswith("UNRAR 5"): + rar_executable_version = 5 + line = source.next() + while not line.startswith('Archive:'): + if line.strip().endswith('is not RAR archive'): + raise InvalidRARArchive + accum.append(line.rstrip('\n')) + line = source.next() + if len(accum): + self.comment = '\n'.join(accum[:-1]).strip() + else: + self.comment = None + else: + raise UnpackerNotInstalled("Unsupported RAR version, expected 4.x or 5.x, found: " + + signature.split(" ")[1]) + + + def escaped_password(self): + return '-' if self.password == None else self.password + + + def call(self, cmd, options=[], files=[]): + options2 = options + ['p'+self.escaped_password()] + soptions = ['-'+x for x in options2] + return call_unrar([cmd]+soptions+['--',self.archiveName]+files) + + def infoiter(self): + + command = "v" if rar_executable_version == 4 else "l" + stdoutdata, stderrdata = self.call(command, ['c-']).communicate() + + for line in stderrdata.splitlines(): + if line.strip().startswith("Cannot open"): + raise FileOpenError + + accum = [] + source = iter(stdoutdata.splitlines()) + line = '' + while not line.startswith('-----------'): + if line.strip().endswith('is not RAR archive'): + raise InvalidRARArchive + if line.startswith("CRC failed") or line.startswith("Checksum error"): + raise IncorrectRARPassword + line = source.next() + line = source.next() + i = 0 + re_spaces = re.compile(r"\s+") + if rar_executable_version == 4: + while not line.startswith('-----------'): + accum.append(line) + if len(accum)==2: + data = {} + data['index'] = i + # asterisks mark password-encrypted files + data['filename'] = accum[0].strip().lstrip("*") # asterisks marks password-encrypted files + fields = re_spaces.split(accum[1].strip()) + data['size'] = int(fields[0]) + attr = fields[5] + data['isdir'] = 'd' in attr.lower() + data['datetime'] = time.strptime(fields[3]+" "+fields[4], '%d-%m-%y %H:%M') + data['comment'] = None + yield data + accum = [] + i += 1 + line = source.next() + elif rar_executable_version == 5: + while not line.startswith('-----------'): + fields = line.strip().lstrip("*").split() + data = {} + data['index'] = i + data['filename'] = " ".join(fields[4:]) + data['size'] = int(fields[1]) + attr = fields[0] + data['isdir'] = 'd' in attr.lower() + data['datetime'] = time.strptime(fields[2]+" "+fields[3], '%d-%m-%y %H:%M') + data['comment'] = None + yield data + i += 1 + line = source.next() + + + def read_files(self, checker): + res = [] + for info in self.infoiter(): + checkres = checker(info) + if checkres==True and not info.isdir: + pipe = self.call('p', ['inul'], [info.filename]).stdout + res.append((info, pipe.read())) + return res + + + def extract(self, checker, path, withSubpath, overwrite): + res = [] + command = 'x' + if not withSubpath: + command = 'e' + options = [] + if overwrite: + options.append('o+') + else: + options.append('o-') + if not path.endswith(os.sep): + path += os.sep + names = [] + for info in self.infoiter(): + checkres = checker(info) + if type(checkres) in [str, unicode]: + raise NotImplementedError("Condition callbacks returning strings are deprecated and only supported in Windows") + if checkres==True and not info.isdir: + names.append(info.filename) + res.append(info) + names.append(path) + proc = self.call(command, options, names) + stdoutdata, stderrdata = proc.communicate() + if stderrdata.find("CRC failed")>=0 or stderrdata.find("Checksum error")>=0: + raise IncorrectRARPassword + return res + + def destruct(self): + pass + + diff --git a/UnRAR2/windows.py b/UnRAR2/windows.py new file mode 100644 index 0000000..bb92481 --- /dev/null +++ b/UnRAR2/windows.py @@ -0,0 +1,309 @@ +# Copyright (c) 2003-2005 Jimmy Retzlaff, 2008 Konstantin Yegupov +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Low level interface - see UnRARDLL\UNRARDLL.TXT + +from __future__ import generators + +import ctypes, ctypes.wintypes +import os, os.path, sys +import Queue +import time + +from rar_exceptions import * + +ERAR_END_ARCHIVE = 10 +ERAR_NO_MEMORY = 11 +ERAR_BAD_DATA = 12 +ERAR_BAD_ARCHIVE = 13 +ERAR_UNKNOWN_FORMAT = 14 +ERAR_EOPEN = 15 +ERAR_ECREATE = 16 +ERAR_ECLOSE = 17 +ERAR_EREAD = 18 +ERAR_EWRITE = 19 +ERAR_SMALL_BUF = 20 +ERAR_UNKNOWN = 21 + +RAR_OM_LIST = 0 +RAR_OM_EXTRACT = 1 + +RAR_SKIP = 0 +RAR_TEST = 1 +RAR_EXTRACT = 2 + +RAR_VOL_ASK = 0 +RAR_VOL_NOTIFY = 1 + +RAR_DLL_VERSION = 3 + +# enum UNRARCALLBACK_MESSAGES +UCM_CHANGEVOLUME = 0 +UCM_PROCESSDATA = 1 +UCM_NEEDPASSWORD = 2 + +architecture_bits = ctypes.sizeof(ctypes.c_voidp)*8 +dll_name = "unrar.dll" +if architecture_bits == 64: + dll_name = "x64\\unrar64.dll" + + +try: + unrar = ctypes.WinDLL(os.path.join(os.path.split(__file__)[0], 'UnRARDLL', dll_name)) +except WindowsError: + unrar = ctypes.WinDLL(dll_name) + + +class RAROpenArchiveDataEx(ctypes.Structure): + def __init__(self, ArcName=None, ArcNameW=u'', OpenMode=RAR_OM_LIST): + self.CmtBuf = ctypes.c_buffer(64*1024) + ctypes.Structure.__init__(self, ArcName=ArcName, ArcNameW=ArcNameW, OpenMode=OpenMode, _CmtBuf=ctypes.addressof(self.CmtBuf), CmtBufSize=ctypes.sizeof(self.CmtBuf)) + + _fields_ = [ + ('ArcName', ctypes.c_char_p), + ('ArcNameW', ctypes.c_wchar_p), + ('OpenMode', ctypes.c_uint), + ('OpenResult', ctypes.c_uint), + ('_CmtBuf', ctypes.c_voidp), + ('CmtBufSize', ctypes.c_uint), + ('CmtSize', ctypes.c_uint), + ('CmtState', ctypes.c_uint), + ('Flags', ctypes.c_uint), + ('Reserved', ctypes.c_uint*32), + ] + +class RARHeaderDataEx(ctypes.Structure): + def __init__(self): + self.CmtBuf = ctypes.c_buffer(64*1024) + ctypes.Structure.__init__(self, _CmtBuf=ctypes.addressof(self.CmtBuf), CmtBufSize=ctypes.sizeof(self.CmtBuf)) + + _fields_ = [ + ('ArcName', ctypes.c_char*1024), + ('ArcNameW', ctypes.c_wchar*1024), + ('FileName', ctypes.c_char*1024), + ('FileNameW', ctypes.c_wchar*1024), + ('Flags', ctypes.c_uint), + ('PackSize', ctypes.c_uint), + ('PackSizeHigh', ctypes.c_uint), + ('UnpSize', ctypes.c_uint), + ('UnpSizeHigh', ctypes.c_uint), + ('HostOS', ctypes.c_uint), + ('FileCRC', ctypes.c_uint), + ('FileTime', ctypes.c_uint), + ('UnpVer', ctypes.c_uint), + ('Method', ctypes.c_uint), + ('FileAttr', ctypes.c_uint), + ('_CmtBuf', ctypes.c_voidp), + ('CmtBufSize', ctypes.c_uint), + ('CmtSize', ctypes.c_uint), + ('CmtState', ctypes.c_uint), + ('Reserved', ctypes.c_uint*1024), + ] + +def DosDateTimeToTimeTuple(dosDateTime): + """Convert an MS-DOS format date time to a Python time tuple. + """ + dosDate = dosDateTime >> 16 + dosTime = dosDateTime & 0xffff + day = dosDate & 0x1f + month = (dosDate >> 5) & 0xf + year = 1980 + (dosDate >> 9) + second = 2*(dosTime & 0x1f) + minute = (dosTime >> 5) & 0x3f + hour = dosTime >> 11 + return time.localtime(time.mktime((year, month, day, hour, minute, second, 0, 1, -1))) + +def _wrap(restype, function, argtypes): + result = function + result.argtypes = argtypes + result.restype = restype + return result + +RARGetDllVersion = _wrap(ctypes.c_int, unrar.RARGetDllVersion, []) + +RAROpenArchiveEx = _wrap(ctypes.wintypes.HANDLE, unrar.RAROpenArchiveEx, [ctypes.POINTER(RAROpenArchiveDataEx)]) + +RARReadHeaderEx = _wrap(ctypes.c_int, unrar.RARReadHeaderEx, [ctypes.wintypes.HANDLE, ctypes.POINTER(RARHeaderDataEx)]) + +_RARSetPassword = _wrap(ctypes.c_int, unrar.RARSetPassword, [ctypes.wintypes.HANDLE, ctypes.c_char_p]) +def RARSetPassword(*args, **kwargs): + _RARSetPassword(*args, **kwargs) + +RARProcessFile = _wrap(ctypes.c_int, unrar.RARProcessFile, [ctypes.wintypes.HANDLE, ctypes.c_int, ctypes.c_char_p, ctypes.c_char_p]) + +RARCloseArchive = _wrap(ctypes.c_int, unrar.RARCloseArchive, [ctypes.wintypes.HANDLE]) + +UNRARCALLBACK = ctypes.WINFUNCTYPE(ctypes.c_int, ctypes.c_uint, ctypes.c_long, ctypes.c_long, ctypes.c_long) +RARSetCallback = _wrap(ctypes.c_int, unrar.RARSetCallback, [ctypes.wintypes.HANDLE, UNRARCALLBACK, ctypes.c_long]) + + + +RARExceptions = { + ERAR_NO_MEMORY : MemoryError, + ERAR_BAD_DATA : ArchiveHeaderBroken, + ERAR_BAD_ARCHIVE : InvalidRARArchive, + ERAR_EOPEN : FileOpenError, + } + +class PassiveReader: + """Used for reading files to memory""" + def __init__(self, usercallback = None): + self.buf = [] + self.ucb = usercallback + + def _callback(self, msg, UserData, P1, P2): + if msg == UCM_PROCESSDATA: + data = (ctypes.c_char*P2).from_address(P1).raw + if self.ucb!=None: + self.ucb(data) + else: + self.buf.append(data) + return 1 + + def get_result(self): + return ''.join(self.buf) + +class RarInfoIterator(object): + def __init__(self, arc): + self.arc = arc + self.index = 0 + self.headerData = RARHeaderDataEx() + self.res = RARReadHeaderEx(self.arc._handle, ctypes.byref(self.headerData)) + if self.res==ERAR_BAD_DATA: + raise IncorrectRARPassword + self.arc.lockStatus = "locked" + self.arc.needskip = False + + def __iter__(self): + return self + + def next(self): + if self.index>0: + if self.arc.needskip: + RARProcessFile(self.arc._handle, RAR_SKIP, None, None) + self.res = RARReadHeaderEx(self.arc._handle, ctypes.byref(self.headerData)) + + if self.res: + raise StopIteration + self.arc.needskip = True + + data = {} + data['index'] = self.index + data['filename'] = self.headerData.FileName + data['datetime'] = DosDateTimeToTimeTuple(self.headerData.FileTime) + data['isdir'] = ((self.headerData.Flags & 0xE0) == 0xE0) + data['size'] = self.headerData.UnpSize + (self.headerData.UnpSizeHigh << 32) + if self.headerData.CmtState == 1: + data['comment'] = self.headerData.CmtBuf.value + else: + data['comment'] = None + self.index += 1 + return data + + + def __del__(self): + self.arc.lockStatus = "finished" + +def generate_password_provider(password): + def password_provider_callback(msg, UserData, P1, P2): + if msg == UCM_NEEDPASSWORD and password!=None: + (ctypes.c_char*P2).from_address(P1).value = password + return 1 + return password_provider_callback + +class RarFileImplementation(object): + + def init(self, password=None): + self.password = password + archiveData = RAROpenArchiveDataEx(ArcNameW=self.archiveName, OpenMode=RAR_OM_EXTRACT) + self._handle = RAROpenArchiveEx(ctypes.byref(archiveData)) + self.c_callback = UNRARCALLBACK(generate_password_provider(self.password)) + RARSetCallback(self._handle, self.c_callback, 1) + + if archiveData.OpenResult != 0: + raise RARExceptions[archiveData.OpenResult] + + if archiveData.CmtState == 1: + self.comment = archiveData.CmtBuf.value + else: + self.comment = None + + if password: + RARSetPassword(self._handle, password) + + self.lockStatus = "ready" + + + + def destruct(self): + if self._handle and RARCloseArchive: + RARCloseArchive(self._handle) + + def make_sure_ready(self): + if self.lockStatus == "locked": + raise InvalidRARArchiveUsage("cannot execute infoiter() without finishing previous one") + if self.lockStatus == "finished": + self.destruct() + self.init(self.password) + + def infoiter(self): + self.make_sure_ready() + return RarInfoIterator(self) + + def read_files(self, checker): + res = [] + for info in self.infoiter(): + if checker(info) and not info.isdir: + reader = PassiveReader() + c_callback = UNRARCALLBACK(reader._callback) + RARSetCallback(self._handle, c_callback, 1) + tmpres = RARProcessFile(self._handle, RAR_TEST, None, None) + if tmpres==ERAR_BAD_DATA: + raise IncorrectRARPassword + self.needskip = False + res.append((info, reader.get_result())) + return res + + + def extract(self, checker, path, withSubpath, overwrite): + res = [] + for info in self.infoiter(): + checkres = checker(info) + if checkres!=False and not info.isdir: + if checkres==True: + fn = info.filename + if not withSubpath: + fn = os.path.split(fn)[-1] + target = os.path.join(path, fn) + else: + raise DeprecationWarning, "Condition callbacks returning strings are deprecated and only supported in Windows" + target = checkres + if overwrite or (not os.path.exists(target)): + tmpres = RARProcessFile(self._handle, RAR_EXTRACT, None, target) + if tmpres==ERAR_BAD_DATA: + raise IncorrectRARPassword + + self.needskip = False + res.append(info) + return res + + diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..0d9bd7c --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +__author__ = 'dromanin' diff --git a/comet.py b/comet.py new file mode 100644 index 0000000..1a06977 --- /dev/null +++ b/comet.py @@ -0,0 +1,260 @@ +""" +A python class to encapsulate CoMet data +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from datetime import datetime +import zipfile +from pprint import pprint +import xml.etree.ElementTree as ET +from genericmetadata import GenericMetadata +import utils + +class CoMet: + + writer_synonyms = ['writer', 'plotter', 'scripter'] + penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] + inker_synonyms = [ 'inker', 'artist', 'finishes' ] + colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] + letterer_synonyms = [ 'letterer'] + cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] + editor_synonyms = [ 'editor'] + + def metadataFromString( self, string ): + + tree = ET.ElementTree(ET.fromstring( string )) + return self.convertXMLToMetadata( tree ) + + def stringFromMetadata( self, metadata ): + + header = '\n' + + tree = self.convertMetadataToXML( self, metadata ) + return header + ET.tostring(tree.getroot()) + + def indent( self, elem, level=0 ): + # for making the XML output readable + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + self.indent( elem, level+1 ) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + def convertMetadataToXML( self, filename, metadata ): + + #shorthand for the metadata + md = metadata + + # build a tree structure + root = ET.Element("comet") + root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/" + root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" + root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd" + + #helper func + def assign( comet_entry, md_entry): + if md_entry is not None: + ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry) + + # title is manditory + if md.title is None: + md.title = "" + assign( 'title', md.title ) + assign( 'series', md.series ) + assign( 'issue', md.issue ) #must be int?? + assign( 'volume', md.volume ) + assign( 'description', md.comments ) + assign( 'publisher', md.publisher ) + assign( 'pages', md.pageCount ) + assign( 'format', md.format ) + assign( 'language', md.language ) + assign( 'rating', md.maturityRating ) + assign( 'price', md.price ) + assign( 'isVersionOf', md.isVersionOf ) + assign( 'rights', md.rights ) + assign( 'identifier', md.identifier ) + assign( 'lastMark', md.lastMark ) + assign( 'genre', md.genre ) # TODO repeatable + + if md.characters is not None: + char_list = [ c.strip() for c in md.characters.split(',') ] + for c in char_list: + assign( 'character', c ) + + if md.manga is not None and md.manga == "YesAndRightToLeft": + assign( 'readingDirection', "rtl") + + date_str = "" + if md.year is not None: + date_str = str(md.year).zfill(4) + if md.month is not None: + date_str += "-" + str(md.month).zfill(2) + assign( 'date', date_str ) + + assign( 'coverImage', md.coverImage ) + + # need to specially process the credits, since they are structured differently than CIX + credit_writer_list = list() + credit_penciller_list = list() + credit_inker_list = list() + credit_colorist_list = list() + credit_letterer_list = list() + credit_cover_list = list() + credit_editor_list = list() + + # loop thru credits, and build a list for each role that CoMet supports + for credit in metadata.credits: + + if credit['role'].lower() in set( self.writer_synonyms ): + ET.SubElement(root, 'writer').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.penciller_synonyms ): + ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.inker_synonyms ): + ET.SubElement(root, 'inker').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.colorist_synonyms ): + ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.letterer_synonyms ): + ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.cover_synonyms ): + ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.editor_synonyms ): + ET.SubElement(root, 'editor').text = u"{0}".format(credit['person']) + + + # self pretty-print + self.indent(root) + + # wrap it in an ElementTree instance, and save as XML + tree = ET.ElementTree(root) + return tree + + + def convertXMLToMetadata( self, tree ): + + root = tree.getroot() + + if root.tag != 'comet': + raise 1 + return None + + metadata = GenericMetadata() + md = metadata + + # Helper function + def xlate( tag ): + node = root.find( tag ) + if node is not None: + return node.text + else: + return None + + md.series = xlate( 'series' ) + md.title = xlate( 'title' ) + md.issue = xlate( 'issue' ) + md.volume = xlate( 'volume' ) + md.comments = xlate( 'description' ) + md.publisher = xlate( 'publisher' ) + md.language = xlate( 'language' ) + md.format = xlate( 'format' ) + md.pageCount = xlate( 'pages' ) + md.maturityRating = xlate( 'rating' ) + md.price = xlate( 'price' ) + md.isVersionOf = xlate( 'isVersionOf' ) + md.rights = xlate( 'rights' ) + md.identifier = xlate( 'identifier' ) + md.lastMark = xlate( 'lastMark' ) + md.genre = xlate( 'genre' ) # TODO - repeatable field + + date = xlate( 'date' ) + if date is not None: + parts = date.split('-') + if len( parts) > 0: + md.year = parts[0] + if len( parts) > 1: + md.month = parts[1] + + md.coverImage = xlate( 'coverImage' ) + + readingDirection = xlate( 'readingDirection' ) + if readingDirection is not None and readingDirection == "rtl": + md.manga = "YesAndRightToLeft" + + # loop for character tags + char_list = [] + for n in root: + if n.tag == 'character': + char_list.append(n.text.strip()) + md.characters = utils.listToString( char_list ) + + # Now extract the credit info + for n in root: + if ( n.tag == 'writer' or + n.tag == 'penciller' or + n.tag == 'inker' or + n.tag == 'colorist' or + n.tag == 'letterer' or + n.tag == 'editor' + ): + metadata.addCredit( n.text.strip(), n.tag.title() ) + + if n.tag == 'coverDesigner': + metadata.addCredit( n.text.strip(), "Cover" ) + + + metadata.isEmpty = False + + return metadata + + #verify that the string actually contains CoMet data in XML format + def validateString( self, string ): + try: + tree = ET.ElementTree(ET.fromstring( string )) + root = tree.getroot() + if root.tag != 'comet': + raise Exception + except: + return False + + return True + + + def writeToExternalFile( self, filename, metadata ): + + tree = self.convertMetadataToXML( self, metadata ) + #ET.dump(tree) + tree.write(filename, encoding='utf-8') + + def readFromExternalFile( self, filename ): + + tree = ET.parse( filename ) + return self.convertXMLToMetadata( tree ) + diff --git a/comicarchive.py b/comicarchive.py new file mode 100644 index 0000000..381dc68 --- /dev/null +++ b/comicarchive.py @@ -0,0 +1,1088 @@ +""" +A python class to represent a single comic, be it file or folder of images +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import zipfile +import os +import struct +import sys +import tempfile +import subprocess +import platform +import locale +from natsort import natsorted + +if platform.system() == "Windows": + import _subprocess +import time + +import StringIO +try: + import Image + pil_available = True +except ImportError: + pil_available = False + +sys.path.insert(0, os.path.abspath(".") ) +import UnRAR2 +from UnRAR2.rar_exceptions import * + +#from settings import ComicTaggerSettings +from comicinfoxml import ComicInfoXml +from comicbookinfo import ComicBookInfo +from comet import CoMet +from genericmetadata import GenericMetadata, PageType +from filenameparser import FileNameParser +from PyPDF2 import PdfFileReader + +class MetaDataStyle: + CBI = 0 + CIX = 1 + COMET = 2 + name = [ 'ComicBookLover', 'ComicRack', 'CoMet' ] + +class ZipArchiver: + + def __init__( self, path ): + self.path = path + + def getArchiveComment( self ): + zf = zipfile.ZipFile( self.path, 'r' ) + comment = zf.comment + zf.close() + return comment + + def setArchiveComment( self, comment ): + return self.writeZipComment( self.path, comment ) + + def readArchiveFile( self, archive_file ): + data = "" + zf = zipfile.ZipFile( self.path, 'r' ) + + try: + data = zf.read( archive_file ) + except zipfile.BadZipfile as e: + print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) + zf.close() + raise IOError + except Exception as e: + zf.close() + print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) + raise IOError + finally: + zf.close() + return data + + def removeArchiveFile( self, archive_file ): + try: + self.rebuildZipFile( [ archive_file ] ) + except: + return False + else: + return True + + def writeArchiveFile( self, archive_file, data ): + # At the moment, no other option but to rebuild the whole + # zip archive w/o the indicated file. Very sucky, but maybe + # another solution can be found + try: + self.rebuildZipFile( [ archive_file ] ) + + #now just add the archive file as a new one + zf = zipfile.ZipFile(self.path, mode='a', compression=zipfile.ZIP_DEFLATED ) + zf.writestr( archive_file, data ) + zf.close() + return True + except: + return False + + def getArchiveFilenameList( self ): + try: + zf = zipfile.ZipFile( self.path, 'r' ) + namelist = zf.namelist() + zf.close() + return namelist + except Exception as e: + print >> sys.stderr, u"Unable to get zipfile list [{0}]: {1}".format(e, self.path) + return [] + + # zip helper func + def rebuildZipFile( self, exclude_list ): + + # this recompresses the zip archive, without the files in the exclude_list + #print ">> sys.stderr, Rebuilding zip {0} without {1}".format( self.path, exclude_list ) + + # generate temp file + tmp_fd, tmp_name = tempfile.mkstemp( dir=os.path.dirname(self.path) ) + os.close( tmp_fd ) + + zin = zipfile.ZipFile (self.path, 'r') + zout = zipfile.ZipFile (tmp_name, 'w') + for item in zin.infolist(): + buffer = zin.read(item.filename) + if ( item.filename not in exclude_list ): + zout.writestr(item, buffer) + + #preserve the old comment + zout.comment = zin.comment + + zout.close() + zin.close() + + # replace with the new file + os.remove( self.path ) + os.rename( tmp_name, self.path ) + + + def writeZipComment( self, filename, comment ): + """ + This is a custom function for writing a comment to a zip file, + since the built-in one doesn't seem to work on Windows and Mac OS/X + + Fortunately, the zip comment is at the end of the file, and it's + easy to manipulate. See this website for more info: + see: http://en.wikipedia.org/wiki/Zip_(file_format)#Structure + """ + + #get file size + statinfo = os.stat(filename) + file_length = statinfo.st_size + + try: + fo = open(filename, "r+b") + + #the starting position, relative to EOF + pos = -4 + + found = False + value = bytearray() + + # walk backwards to find the "End of Central Directory" record + while ( not found ) and ( -pos != file_length ): + # seek, relative to EOF + fo.seek( pos, 2) + + value = fo.read( 4 ) + + #look for the end of central directory signature + if bytearray(value) == bytearray([ 0x50, 0x4b, 0x05, 0x06 ]): + found = True + else: + # not found, step back another byte + pos = pos - 1 + #print pos,"{1} int: {0:x}".format(bytearray(value)[0], value) + + if found: + + # now skip forward 20 bytes to the comment length word + pos += 20 + fo.seek( pos, 2) + + # Pack the length of the comment string + format = "H" # one 2-byte integer + comment_length = struct.pack(format, len(comment)) # pack integer in a binary string + + # write out the length + fo.write( comment_length ) + fo.seek( pos+2, 2) + + # write out the comment itself + fo.write( comment ) + fo.truncate() + fo.close() + else: + raise Exception('Failed to write comment to zip file!') + except: + return False + else: + return True + + def copyFromArchive( self, otherArchive ): + # Replace the current zip with one copied from another archive + try: + zout = zipfile.ZipFile (self.path, 'w') + for fname in otherArchive.getArchiveFilenameList(): + data = otherArchive.readArchiveFile( fname ) + if data is not None: + zout.writestr( fname, data ) + zout.close() + + #preserve the old comment + comment = otherArchive.getArchiveComment() + if comment is not None: + if not self.writeZipComment( self.path, comment ): + return False + except Exception as e: + print >> sys.stderr, u"Error while copying to {0}: {1}".format(self.path, e) + return False + else: + return True + + +#------------------------------------------ +# RAR implementation + +class RarArchiver: + + devnull = None + def __init__( self, path, rar_exe_path ): + self.path = path + self.rar_exe_path = rar_exe_path + + if RarArchiver.devnull is None: + RarArchiver.devnull = open(os.devnull, "w") + + # windows only, keeps the cmd.exe from popping up + if platform.system() == "Windows": + self.startupinfo = subprocess.STARTUPINFO() + self.startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW + else: + self.startupinfo = None + + def __del__(self): + #RarArchiver.devnull.close() + pass + + def getArchiveComment( self ): + + rarc = self.getRARObj() + return rarc.comment + + def setArchiveComment( self, comment ): + + if self.rar_exe_path is not None: + try: + # write comment to temp file + tmp_fd, tmp_name = tempfile.mkstemp() + f = os.fdopen(tmp_fd, 'w+b') + f.write( comment ) + f.close() + + working_dir = os.path.dirname( os.path.abspath( self.path ) ) + + # use external program to write comment to Rar archive + subprocess.call([self.rar_exe_path, 'c', '-w' + working_dir , '-c-', '-z' + tmp_name, self.path], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull) + + if platform.system() == "Darwin": + time.sleep(1) + + os.remove( tmp_name) + except: + return False + else: + return True + else: + return False + + def readArchiveFile( self, archive_file ): + + # Make sure to escape brackets, since some funky stuff is going on + # underneath with "fnmatch" + archive_file = archive_file.replace("[", '[[]') + entries = [] + + rarc = self.getRARObj() + + tries = 0 + while tries < 7: + try: + tries = tries+1 + entries = rarc.read_files( archive_file ) + + if entries[0][0].size != len(entries[0][1]): + print >> sys.stderr, u"readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format( + entries[0][0].size,len(entries[0][1]), self.path, archive_file, tries) + continue + + except (OSError, IOError) as e: + print >> sys.stderr, u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) + time.sleep(1) + except Exception as e: + print >> sys.stderr, u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) + break + + else: + #Success" + #entries is a list of of tuples: ( rarinfo, filedata) + if tries > 1: + print >> sys.stderr, u"Attempted read_files() {0} times".format(tries) + if (len(entries) == 1): + return entries[0][1] + else: + raise IOError + + raise IOError + + + + def writeArchiveFile( self, archive_file, data ): + + if self.rar_exe_path is not None: + try: + tmp_folder = tempfile.mkdtemp() + + tmp_file = os.path.join( tmp_folder, archive_file ) + + working_dir = os.path.dirname( os.path.abspath( self.path ) ) + + # TODO: will this break if 'archive_file' is in a subfolder. i.e. "foo/bar.txt" + # will need to create the subfolder above, I guess... + f = open(tmp_file, 'w') + f.write( data ) + f.close() + + # use external program to write file to Rar archive + subprocess.call([self.rar_exe_path, 'a', '-w' + working_dir ,'-c-', '-ep', self.path, tmp_file], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull) + + if platform.system() == "Darwin": + time.sleep(1) + os.remove( tmp_file) + os.rmdir( tmp_folder) + except: + return False + else: + return True + else: + return False + + def removeArchiveFile( self, archive_file ): + if self.rar_exe_path is not None: + try: + # use external program to remove file from Rar archive + subprocess.call([self.rar_exe_path, 'd','-c-', self.path, archive_file], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull) + + if platform.system() == "Darwin": + time.sleep(1) + except: + return False + else: + return True + else: + return False + + def getArchiveFilenameList( self ): + + rarc = self.getRARObj() + #namelist = [ item.filename for item in rarc.infolist() ] + #return namelist + + tries = 0 + while tries < 7: + try: + tries = tries+1 + #namelist = [ item.filename for item in rarc.infolist() ] + namelist = [] + for item in rarc.infolist(): + if item.size != 0: + namelist.append( item.filename ) + + except (OSError, IOError) as e: + print >> sys.stderr, u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) + time.sleep(1) + + else: + #Success" + return namelist + + raise e + + + def getRARObj( self ): + tries = 0 + while tries < 7: + try: + tries = tries+1 + rarc = UnRAR2.RarFile( self.path ) + + except (OSError, IOError) as e: + print >> sys.stderr, u"getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) + time.sleep(1) + + else: + #Success" + return rarc + + raise e + +#------------------------------------------ +# Folder implementation +class FolderArchiver: + + def __init__( self, path ): + self.path = path + self.comment_file_name = "ComicTaggerFolderComment.txt" + + def getArchiveComment( self ): + return self.readArchiveFile( self.comment_file_name ) + + def setArchiveComment( self, comment ): + return self.writeArchiveFile( self.comment_file_name, comment ) + + def readArchiveFile( self, archive_file ): + + data = "" + fname = os.path.join( self.path, archive_file ) + try: + with open( fname, 'rb' ) as f: + data = f.read() + f.close() + except IOError as e: + pass + + return data + + def writeArchiveFile( self, archive_file, data ): + + fname = os.path.join( self.path, archive_file ) + try: + with open(fname, 'w+') as f: + f.write( data ) + f.close() + except: + return False + else: + return True + + def removeArchiveFile( self, archive_file ): + + fname = os.path.join( self.path, archive_file ) + try: + os.remove( fname ) + except: + return False + else: + return True + + def getArchiveFilenameList( self ): + return self.listFiles( self.path ) + + def listFiles( self, folder ): + + itemlist = list() + + for item in os.listdir( folder ): + itemlist.append( item ) + if os.path.isdir( item ): + itemlist.extend( self.listFiles( os.path.join( folder, item ) )) + + return itemlist + +#------------------------------------------ +# Unknown implementation +class UnknownArchiver: + + def __init__( self, path ): + self.path = path + + def getArchiveComment( self ): + return "" + def setArchiveComment( self, comment ): + return False + def readArchiveFile( self ): + return "" + def writeArchiveFile( self, archive_file, data ): + return False + def removeArchiveFile( self, archive_file ): + return False + def getArchiveFilenameList( self ): + return [] + +class PdfArchiver: + def __init__( self, path ): + self.path = path + + def getArchiveComment( self ): + return "" + def setArchiveComment( self, comment ): + return False + def readArchiveFile( self, page_num ): + return subprocess.check_output(['mudraw', '-o','-', self.path, str(int(os.path.basename(page_num)[:-4]))]) + def writeArchiveFile( self, archive_file, data ): + return False + def removeArchiveFile( self, archive_file ): + return False + def getArchiveFilenameList( self ): + out = [] + pdf = PdfFileReader(open(self.path, 'rb')) + for page in range(1, pdf.getNumPages() + 1): + out.append("/%04d.jpg" % (page)) + return out + +#------------------------------------------------------------------ +class ComicArchive: + + logo_data = None + + class ArchiveType: + Zip, Rar, Folder, Pdf, Unknown = range(5) + + def __init__( self, path, rar_exe_path=None, default_image_path=None ): + self.path = path + + self.rar_exe_path = rar_exe_path + self.ci_xml_filename = 'ComicInfo.xml' + self.comet_default_filename = 'CoMet.xml' + self.resetCache() + self.default_image_path = default_image_path + + # Use file extension to decide which archive test we do first + ext = os.path.splitext(path)[1].lower() + + self.archive_type = self.ArchiveType.Unknown + self.archiver = UnknownArchiver( self.path ) + + if ext == ".cbr" or ext == ".rar": + if self.rarTest(): + self.archive_type = self.ArchiveType.Rar + self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path ) + + elif self.zipTest(): + self.archive_type = self.ArchiveType.Zip + self.archiver = ZipArchiver( self.path ) + else: + if self.zipTest(): + self.archive_type = self.ArchiveType.Zip + self.archiver = ZipArchiver( self.path ) + + elif self.rarTest(): + self.archive_type = self.ArchiveType.Rar + self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path ) + elif os.path.basename(self.path)[-3:] == 'pdf': + self.archive_type = self.ArchiveType.Pdf + self.archiver = PdfArchiver(self.path) + + if ComicArchive.logo_data is None: + #fname = ComicTaggerSettings.getGraphic('nocover.png') + fname = self.default_image_path + with open(fname, 'rb') as fd: + ComicArchive.logo_data = fd.read() + + # Clears the cached data + def resetCache( self ): + self.has_cix = None + self.has_cbi = None + self.has_comet = None + self.comet_filename = None + self.page_count = None + self.page_list = None + self.cix_md = None + self.cbi_md = None + self.comet_md = None + + def loadCache( self, style_list ): + for style in style_list: + self.readMetadata(style) + + def rename( self, path ): + self.path = path + self.archiver.path = path + + def zipTest( self ): + return zipfile.is_zipfile( self.path ) + + def rarTest( self ): + try: + rarc = UnRAR2.RarFile( self.path ) + except: # InvalidRARArchive: + return False + else: + return True + + + def isZip( self ): + return self.archive_type == self.ArchiveType.Zip + + def isRar( self ): + return self.archive_type == self.ArchiveType.Rar + def isPdf(self): + return self.archive_type == self.ArchiveType.Pdf + def isFolder( self ): + return self.archive_type == self.ArchiveType.Folder + + def isWritable( self, check_rar_status=True ): + if self.archive_type == self.ArchiveType.Unknown : + return False + + elif check_rar_status and self.isRar() and self.rar_exe_path is None: + return False + + elif not os.access(self.path, os.W_OK): + return False + + elif ((self.archive_type != self.ArchiveType.Folder) and + (not os.access( os.path.dirname( os.path.abspath(self.path)), os.W_OK ))): + return False + + return True + + def isWritableForStyle( self, data_style ): + + if self.isRar() and data_style == MetaDataStyle.CBI: + return False + + return self.isWritable() + + def seemsToBeAComicArchive( self ): + + # Do we even care about extensions?? + ext = os.path.splitext(self.path)[1].lower() + + if ( + ( self.isZip() or self.isRar() or self.isPdf()) #or self.isFolder() ) + and + ( self.getNumberOfPages() > 0) + + ): + return True + else: + return False + + def readMetadata( self, style ): + + if style == MetaDataStyle.CIX: + return self.readCIX() + elif style == MetaDataStyle.CBI: + return self.readCBI() + elif style == MetaDataStyle.COMET: + return self.readCoMet() + else: + return GenericMetadata() + + def writeMetadata( self, metadata, style ): + + retcode = None + if style == MetaDataStyle.CIX: + retcode = self.writeCIX( metadata ) + elif style == MetaDataStyle.CBI: + retcode = self.writeCBI( metadata ) + elif style == MetaDataStyle.COMET: + retcode = self.writeCoMet( metadata ) + return retcode + + + def hasMetadata( self, style ): + + if style == MetaDataStyle.CIX: + return self.hasCIX() + elif style == MetaDataStyle.CBI: + return self.hasCBI() + elif style == MetaDataStyle.COMET: + return self.hasCoMet() + else: + return False + + def removeMetadata( self, style ): + retcode = True + if style == MetaDataStyle.CIX: + retcode = self.removeCIX() + elif style == MetaDataStyle.CBI: + retcode = self.removeCBI() + elif style == MetaDataStyle.COMET: + retcode = self.removeCoMet() + return retcode + + def getPage( self, index ): + + image_data = None + + filename = self.getPageName( index ) + + if filename is not None: + try: + image_data = self.archiver.readArchiveFile( filename ) + except IOError: + print >> sys.stderr, u"Error reading in page. Substituting logo page." + image_data = ComicArchive.logo_data + + return image_data + + def getPageName( self, index ): + + if index is None: + return None + + page_list = self.getPageNameList() + + num_pages = len( page_list ) + if num_pages == 0 or index >= num_pages: + return None + + return page_list[index] + + def getScannerPageIndex( self ): + + scanner_page_index = None + + #make a guess at the scanner page + name_list = self.getPageNameList() + count = self.getNumberOfPages() + + #too few pages to really know + if count < 5: + return None + + # count the length of every filename, and count occurences + length_buckets = dict() + for name in name_list: + fname = os.path.split(name)[1] + length = len(fname) + if length_buckets.has_key( length ): + length_buckets[ length ] += 1 + else: + length_buckets[ length ] = 1 + + # sort by most common + sorted_buckets = sorted(length_buckets.iteritems(), key=lambda (k,v): (v,k), reverse=True) + + # statistical mode occurence is first + mode_length = sorted_buckets[0][0] + + # we are only going to consider the final image file: + final_name = os.path.split(name_list[count-1])[1] + + common_length_list = list() + for name in name_list: + if len(os.path.split(name)[1]) == mode_length: + common_length_list.append( os.path.split(name)[1] ) + + prefix = os.path.commonprefix(common_length_list) + + if mode_length <= 7 and prefix == "": + #probably all numbers + if len(final_name) > mode_length: + scanner_page_index = count-1 + + # see if the last page doesn't start with the same prefix as most others + elif not final_name.startswith(prefix): + scanner_page_index = count-1 + + return scanner_page_index + + + def getPageNameList( self , sort_list=True): + + if self.page_list is None: + # get the list file names in the archive, and sort + files = self.archiver.getArchiveFilenameList() + + # seems like some archive creators are on Windows, and don't know about case-sensitivity! + if sort_list: + def keyfunc(k): + #hack to account for some weird scanner ID pages + #basename=os.path.split(k)[1] + #if basename < '0': + # k = os.path.join(os.path.split(k)[0], "z" + basename) + return k.lower() + + files = natsorted(files, key=keyfunc,signed=False) + + # make a sub-list of image files + self.page_list = [] + for name in files: + if ( name[-4:].lower() in [ ".jpg", "jpeg", ".png", ".gif", "webp" ] and os.path.basename(name)[0] != "." ): + self.page_list.append(name) + + return self.page_list + + def getNumberOfPages( self ): + + if self.page_count is None: + self.page_count = len( self.getPageNameList( ) ) + return self.page_count + + def readCBI( self ): + if self.cbi_md is None: + raw_cbi = self.readRawCBI() + if raw_cbi is None: + self.cbi_md = GenericMetadata() + else: + self.cbi_md = ComicBookInfo().metadataFromString( raw_cbi ) + + self.cbi_md.setDefaultPageList( self.getNumberOfPages() ) + + return self.cbi_md + + def readRawCBI( self ): + if ( not self.hasCBI() ): + return None + + return self.archiver.getArchiveComment() + + def hasCBI(self): + if self.has_cbi is None: + + #if ( not ( self.isZip() or self.isRar()) or not self.seemsToBeAComicArchive() ): + if not self.seemsToBeAComicArchive(): + self.has_cbi = False + else: + comment = self.archiver.getArchiveComment() + self.has_cbi = ComicBookInfo().validateString( comment ) + + return self.has_cbi + + def writeCBI( self, metadata ): + if metadata is not None: + self.applyArchiveInfoToMetadata( metadata ) + cbi_string = ComicBookInfo().stringFromMetadata( metadata ) + write_success = self.archiver.setArchiveComment( cbi_string ) + if write_success: + self.has_cbi = True + self.cbi_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCBI( self ): + if self.hasCBI(): + write_success = self.archiver.setArchiveComment( "" ) + if write_success: + self.has_cbi = False + self.cbi_md = None + self.resetCache() + return write_success + return True + + def readCIX( self ): + if self.cix_md is None: + raw_cix = self.readRawCIX() + if raw_cix is None or raw_cix == "": + self.cix_md = GenericMetadata() + else: + self.cix_md = ComicInfoXml().metadataFromString( raw_cix ) + + #validate the existing page list (make sure count is correct) + if len ( self.cix_md.pages ) != 0 : + if len ( self.cix_md.pages ) != self.getNumberOfPages(): + # pages array doesn't match the actual number of images we're seeing + # in the archive, so discard the data + self.cix_md.pages = [] + + if len( self.cix_md.pages ) == 0: + self.cix_md.setDefaultPageList( self.getNumberOfPages() ) + + return self.cix_md + + def readRawCIX( self ): + if not self.hasCIX(): + return None + try: + raw_cix = self.archiver.readArchiveFile( self.ci_xml_filename ) + except IOError: + print "Error reading in raw CIX!" + raw_cix = "" + return raw_cix + + def writeCIX(self, metadata): + + if metadata is not None: + self.applyArchiveInfoToMetadata( metadata, calc_page_sizes=True ) + cix_string = ComicInfoXml().stringFromMetadata( metadata ) + write_success = self.archiver.writeArchiveFile( self.ci_xml_filename, cix_string ) + if write_success: + self.has_cix = True + self.cix_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCIX( self ): + if self.hasCIX(): + write_success = self.archiver.removeArchiveFile( self.ci_xml_filename ) + if write_success: + self.has_cix = False + self.cix_md = None + self.resetCache() + return write_success + return True + + + def hasCIX(self): + if self.has_cix is None: + + if not self.seemsToBeAComicArchive(): + self.has_cix = False + elif self.ci_xml_filename in self.archiver.getArchiveFilenameList(): + self.has_cix = True + else: + self.has_cix = False + return self.has_cix + + + def readCoMet( self ): + if self.comet_md is None: + raw_comet = self.readRawCoMet() + if raw_comet is None or raw_comet == "": + self.comet_md = GenericMetadata() + else: + self.comet_md = CoMet().metadataFromString( raw_comet ) + + self.comet_md.setDefaultPageList( self.getNumberOfPages() ) + #use the coverImage value from the comet_data to mark the cover in this struct + # walk through list of images in file, and find the matching one for md.coverImage + # need to remove the existing one in the default + if self.comet_md.coverImage is not None: + cover_idx = 0 + for idx,f in enumerate(self.getPageNameList()): + if self.comet_md.coverImage == f: + cover_idx = idx + break + if cover_idx != 0: + del (self.comet_md.pages[0]['Type'] ) + self.comet_md.pages[ cover_idx ]['Type'] = PageType.FrontCover + + return self.comet_md + + def readRawCoMet( self ): + if not self.hasCoMet(): + print >> sys.stderr, self.path, "doesn't have CoMet data!" + return None + + try: + raw_comet = self.archiver.readArchiveFile( self.comet_filename ) + except IOError: + print >> sys.stderr, u"Error reading in raw CoMet!" + raw_comet = "" + return raw_comet + + def writeCoMet(self, metadata): + + if metadata is not None: + if not self.hasCoMet(): + self.comet_filename = self.comet_default_filename + + self.applyArchiveInfoToMetadata( metadata ) + # Set the coverImage value, if it's not the first page + cover_idx = int(metadata.getCoverPageIndexList()[0]) + if cover_idx != 0: + metadata.coverImage = self.getPageName( cover_idx ) + + comet_string = CoMet().stringFromMetadata( metadata ) + write_success = self.archiver.writeArchiveFile( self.comet_filename, comet_string ) + if write_success: + self.has_comet = True + self.comet_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCoMet( self ): + if self.hasCoMet(): + write_success = self.archiver.removeArchiveFile( self.comet_filename ) + if write_success: + self.has_comet = False + self.comet_md = None + self.resetCache() + return write_success + return True + + def hasCoMet(self): + if self.has_comet is None: + self.has_comet = False + if not self.seemsToBeAComicArchive(): + return self.has_comet + + #look at all xml files in root, and search for CoMet data, get first + for n in self.archiver.getArchiveFilenameList(): + if ( os.path.dirname(n) == "" and + os.path.splitext(n)[1].lower() == '.xml'): + # read in XML file, and validate it + try: + data = self.archiver.readArchiveFile( n ) + except: + data = "" + print >> sys.stderr, u"Error reading in Comet XML for validation!" + if CoMet().validateString( data ): + # since we found it, save it! + self.comet_filename = n + self.has_comet = True + break + + return self.has_comet + + + + def applyArchiveInfoToMetadata( self, md, calc_page_sizes=False): + md.pageCount = self.getNumberOfPages() + + if calc_page_sizes: + for p in md.pages: + idx = int( p['Image'] ) + if pil_available: + if 'ImageSize' not in p or 'ImageHeight' not in p or 'ImageWidth' not in p: + data = self.getPage( idx ) + if data is not None: + try: + im = Image.open(StringIO.StringIO(data)) + w,h = im.size + + p['ImageSize'] = str(len(data)) + p['ImageHeight'] = str(h) + p['ImageWidth'] = str(w) + except IOError: + p['ImageSize'] = str(len(data)) + + else: + if 'ImageSize' not in p: + data = self.getPage( idx ) + p['ImageSize'] = str(len(data)) + + + + def metadataFromFilename( self , parse_scan_info=True): + + metadata = GenericMetadata() + + fnp = FileNameParser() + fnp.parseFilename( self.path ) + + if fnp.issue != "": + metadata.issue = fnp.issue + if fnp.series != "": + metadata.series = fnp.series + if fnp.volume != "": + metadata.volume = fnp.volume + if fnp.year != "": + metadata.year = fnp.year + if fnp.issue_count != "": + metadata.issueCount = fnp.issue_count + if parse_scan_info: + if fnp.remainder != "": + metadata.scanInfo = fnp.remainder + + metadata.isEmpty = False + + return metadata + + def exportAsZip( self, zipfilename ): + if self.archive_type == self.ArchiveType.Zip: + # nothing to do, we're already a zip + return True + + zip_archiver = ZipArchiver( zipfilename ) + return zip_archiver.copyFromArchive( self.archiver ) + diff --git a/comicbookinfo.py b/comicbookinfo.py new file mode 100644 index 0000000..a0bbaf0 --- /dev/null +++ b/comicbookinfo.py @@ -0,0 +1,152 @@ +""" +A python class to encapsulate the ComicBookInfo data +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +import json +from datetime import datetime +import zipfile + +from genericmetadata import GenericMetadata +import utils +#import ctversion + +class ComicBookInfo: + + + def metadataFromString( self, string ): + + cbi_container = json.loads( unicode(string, 'utf-8') ) + + metadata = GenericMetadata() + + cbi = cbi_container[ 'ComicBookInfo/1.0' ] + + #helper func + # If item is not in CBI, return None + def xlate( cbi_entry): + if cbi_entry in cbi: + return cbi[cbi_entry] + else: + return None + + metadata.series = xlate( 'series' ) + metadata.title = xlate( 'title' ) + metadata.issue = xlate( 'issue' ) + metadata.publisher = xlate( 'publisher' ) + metadata.month = xlate( 'publicationMonth' ) + metadata.year = xlate( 'publicationYear' ) + metadata.issueCount = xlate( 'numberOfIssues' ) + metadata.comments = xlate( 'comments' ) + metadata.credits = xlate( 'credits' ) + metadata.genre = xlate( 'genre' ) + metadata.volume = xlate( 'volume' ) + metadata.volumeCount = xlate( 'numberOfVolumes' ) + metadata.language = xlate( 'language' ) + metadata.country = xlate( 'country' ) + metadata.criticalRating = xlate( 'rating' ) + metadata.tags = xlate( 'tags' ) + + # make sure credits and tags are at least empty lists and not None + if metadata.credits is None: + metadata.credits = [] + if metadata.tags is None: + metadata.tags = [] + + #need to massage the language string to be ISO + if metadata.language is not None: + # reverse look-up + pattern = metadata.language + metadata.language = None + for key in utils.getLanguageDict(): + if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'): + metadata.language = key + break + + metadata.isEmpty = False + + return metadata + + def stringFromMetadata( self, metadata ): + + cbi_container = self.createJSONDictionary( metadata ) + return json.dumps( cbi_container ) + + #verify that the string actually contains CBI data in JSON format + def validateString( self, string ): + + try: + cbi_container = json.loads( string ) + except: + return False + + return ( 'ComicBookInfo/1.0' in cbi_container ) + + + def createJSONDictionary( self, metadata ): + + # Create the dictionary that we will convert to JSON text + cbi = dict() + cbi_container = {'appID' : 'ComicTagger/' + '1.0.0', #ctversion.version, + 'lastModified' : str(datetime.now()), + 'ComicBookInfo/1.0' : cbi } + + #helper func + def assign( cbi_entry, md_entry): + if md_entry is not None: + cbi[cbi_entry] = md_entry + + #helper func + def toInt(s): + i = None + if type(s) in [ str, unicode, int ]: + try: + i = int(s) + except ValueError: + pass + return i + + assign( 'series', metadata.series ) + assign( 'title', metadata.title ) + assign( 'issue', metadata.issue ) + assign( 'publisher', metadata.publisher ) + assign( 'publicationMonth', toInt(metadata.month) ) + assign( 'publicationYear', toInt(metadata.year) ) + assign( 'numberOfIssues', toInt(metadata.issueCount) ) + assign( 'comments', metadata.comments ) + assign( 'genre', metadata.genre ) + assign( 'volume', toInt(metadata.volume) ) + assign( 'numberOfVolumes', toInt(metadata.volumeCount) ) + assign( 'language', utils.getLanguageFromISO(metadata.language) ) + assign( 'country', metadata.country ) + assign( 'rating', metadata.criticalRating ) + assign( 'credits', metadata.credits ) + assign( 'tags', metadata.tags ) + + return cbi_container + + + def writeToExternalFile( self, filename, metadata ): + + cbi_container = self.createJSONDictionary(metadata) + + f = open(filename, 'w') + f.write(json.dumps(cbi_container, indent=4)) + f.close + diff --git a/comicinfoxml.py b/comicinfoxml.py new file mode 100644 index 0000000..9e9df07 --- /dev/null +++ b/comicinfoxml.py @@ -0,0 +1,293 @@ +""" +A python class to encapsulate ComicRack's ComicInfo.xml data +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from datetime import datetime +import zipfile +from pprint import pprint +import xml.etree.ElementTree as ET +from genericmetadata import GenericMetadata +import utils + +class ComicInfoXml: + + writer_synonyms = ['writer', 'plotter', 'scripter'] + penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] + inker_synonyms = [ 'inker', 'artist', 'finishes' ] + colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] + letterer_synonyms = [ 'letterer'] + cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] + editor_synonyms = [ 'editor'] + + + def getParseableCredits( self ): + parsable_credits = [] + parsable_credits.extend( self.writer_synonyms ) + parsable_credits.extend( self.penciller_synonyms ) + parsable_credits.extend( self.inker_synonyms ) + parsable_credits.extend( self.colorist_synonyms ) + parsable_credits.extend( self.letterer_synonyms ) + parsable_credits.extend( self.cover_synonyms ) + parsable_credits.extend( self.editor_synonyms ) + return parsable_credits + + def metadataFromString( self, string ): + + tree = ET.ElementTree(ET.fromstring( string )) + return self.convertXMLToMetadata( tree ) + + def stringFromMetadata( self, metadata ): + + header = '\n' + + tree = self.convertMetadataToXML( self, metadata ) + return header + ET.tostring(tree.getroot()) + + def indent( self, elem, level=0 ): + # for making the XML output readable + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + self.indent( elem, level+1 ) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + def convertMetadataToXML( self, filename, metadata ): + + #shorthand for the metadata + md = metadata + + # build a tree structure + root = ET.Element("ComicInfo") + root.attrib['xmlns:xsi']="http://www.w3.org/2001/XMLSchema-instance" + root.attrib['xmlns:xsd']="http://www.w3.org/2001/XMLSchema" + #helper func + def assign( cix_entry, md_entry): + if md_entry is not None: + ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry) + + assign( 'Title', md.title ) + assign( 'Series', md.series ) + assign( 'Number', md.issue ) + assign( 'Count', md.issueCount ) + assign( 'Volume', md.volume ) + assign( 'AlternateSeries', md.alternateSeries ) + assign( 'AlternateNumber', md.alternateNumber ) + assign( 'StoryArc', md.storyArc ) + assign( 'SeriesGroup', md.seriesGroup ) + assign( 'AlternateCount', md.alternateCount ) + assign( 'Summary', md.comments ) + assign( 'Notes', md.notes ) + assign( 'Year', md.year ) + assign( 'Month', md.month ) + assign( 'Day', md.day ) + + # need to specially process the credits, since they are structured differently than CIX + credit_writer_list = list() + credit_penciller_list = list() + credit_inker_list = list() + credit_colorist_list = list() + credit_letterer_list = list() + credit_cover_list = list() + credit_editor_list = list() + + # first, loop thru credits, and build a list for each role that CIX supports + for credit in metadata.credits: + + if credit['role'].lower() in set( self.writer_synonyms ): + credit_writer_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.penciller_synonyms ): + credit_penciller_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.inker_synonyms ): + credit_inker_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.colorist_synonyms ): + credit_colorist_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.letterer_synonyms ): + credit_letterer_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.cover_synonyms ): + credit_cover_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.editor_synonyms ): + credit_editor_list.append(credit['person'].replace(",","")) + + # second, convert each list to string, and add to XML struct + if len( credit_writer_list ) > 0: + node = ET.SubElement(root, 'Writer') + node.text = utils.listToString( credit_writer_list ) + + if len( credit_penciller_list ) > 0: + node = ET.SubElement(root, 'Penciller') + node.text = utils.listToString( credit_penciller_list ) + + if len( credit_inker_list ) > 0: + node = ET.SubElement(root, 'Inker') + node.text = utils.listToString( credit_inker_list ) + + if len( credit_colorist_list ) > 0: + node = ET.SubElement(root, 'Colorist') + node.text = utils.listToString( credit_colorist_list ) + + if len( credit_letterer_list ) > 0: + node = ET.SubElement(root, 'Letterer') + node.text = utils.listToString( credit_letterer_list ) + + if len( credit_cover_list ) > 0: + node = ET.SubElement(root, 'CoverArtist') + node.text = utils.listToString( credit_cover_list ) + + if len( credit_editor_list ) > 0: + node = ET.SubElement(root, 'Editor') + node.text = utils.listToString( credit_editor_list ) + + assign( 'Publisher', md.publisher ) + assign( 'Imprint', md.imprint ) + assign( 'Genre', md.genre ) + assign( 'Web', md.webLink ) + assign( 'PageCount', md.pageCount ) + assign( 'LanguageISO', md.language ) + assign( 'Format', md.format ) + assign( 'AgeRating', md.maturityRating ) + if md.blackAndWhite is not None and md.blackAndWhite: + ET.SubElement(root, 'BlackAndWhite').text = "Yes" + assign( 'Manga', md.manga ) + assign( 'Characters', md.characters ) + assign( 'Teams', md.teams ) + assign( 'Locations', md.locations ) + assign( 'ScanInformation', md.scanInfo ) + + # loop and add the page entries under pages node + if len( md.pages ) > 0: + pages_node = ET.SubElement(root, 'Pages') + for page_dict in md.pages: + page_node = ET.SubElement(pages_node, 'Page') + page_node.attrib = page_dict + + # self pretty-print + self.indent(root) + + # wrap it in an ElementTree instance, and save as XML + tree = ET.ElementTree(root) + return tree + + + def convertXMLToMetadata( self, tree ): + + root = tree.getroot() + + if root.tag != 'ComicInfo': + raise 1 + return None + + metadata = GenericMetadata() + md = metadata + + + # Helper function + def xlate( tag ): + node = root.find( tag ) + if node is not None: + return node.text + else: + return None + + md.series = xlate( 'Series' ) + md.title = xlate( 'Title' ) + md.issue = xlate( 'Number' ) + md.issueCount = xlate( 'Count' ) + md.volume = xlate( 'Volume' ) + md.alternateSeries = xlate( 'AlternateSeries' ) + md.alternateNumber = xlate( 'AlternateNumber' ) + md.alternateCount = xlate( 'AlternateCount' ) + md.comments = xlate( 'Summary' ) + md.notes = xlate( 'Notes' ) + md.year = xlate( 'Year' ) + md.month = xlate( 'Month' ) + md.day = xlate( 'Day' ) + md.publisher = xlate( 'Publisher' ) + md.imprint = xlate( 'Imprint' ) + md.genre = xlate( 'Genre' ) + md.webLink = xlate( 'Web' ) + md.language = xlate( 'LanguageISO' ) + md.format = xlate( 'Format' ) + md.manga = xlate( 'Manga' ) + md.characters = xlate( 'Characters' ) + md.teams = xlate( 'Teams' ) + md.locations = xlate( 'Locations' ) + md.pageCount = xlate( 'PageCount' ) + md.scanInfo = xlate( 'ScanInformation' ) + md.storyArc = xlate( 'StoryArc' ) + md.seriesGroup = xlate( 'SeriesGroup' ) + md.maturityRating = xlate( 'AgeRating' ) + + tmp = xlate( 'BlackAndWhite' ) + md.blackAndWhite = False + if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]: + md.blackAndWhite = True + # Now extract the credit info + for n in root: + if ( n.tag == 'Writer' or + n.tag == 'Penciller' or + n.tag == 'Inker' or + n.tag == 'Colorist' or + n.tag == 'Letterer' or + n.tag == 'Editor' + ): + if n.text is not None: + for name in n.text.split(','): + metadata.addCredit( name.strip(), n.tag ) + + if n.tag == 'CoverArtist': + if n.text is not None: + for name in n.text.split(','): + metadata.addCredit( name.strip(), "Cover" ) + + # parse page data now + pages_node = root.find( "Pages" ) + if pages_node is not None: + for page in pages_node: + metadata.pages.append( page.attrib ) + #print page.attrib + + metadata.isEmpty = False + + return metadata + + def writeToExternalFile( self, filename, metadata ): + + tree = self.convertMetadataToXML( self, metadata ) + #ET.dump(tree) + tree.write(filename, encoding='utf-8') + + def readFromExternalFile( self, filename ): + + tree = ET.parse( filename ) + return self.convertXMLToMetadata( tree ) + diff --git a/filenameparser.py b/filenameparser.py new file mode 100644 index 0000000..6f3aa05 --- /dev/null +++ b/filenameparser.py @@ -0,0 +1,277 @@ +""" +Functions for parsing comic info from filename + +This should probably be re-written, but, well, it mostly works! + +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +# Some portions of this code were modified from pyComicMetaThis project +# http://code.google.com/p/pycomicmetathis/ + +import re +import os +from urllib import unquote + +class FileNameParser: + + def repl(self, m): + return ' ' * len(m.group()) + + def fixSpaces( self, string, remove_dashes=True ): + if remove_dashes: + placeholders = ['[-_]',' +'] + else: + placeholders = ['[_]',' +'] + for ph in placeholders: + string = re.sub(ph, self.repl, string ) + return string #.strip() + + + def getIssueCount( self,filename, issue_end ): + + count = "" + filename = filename[issue_end:] + + # replace any name seperators with spaces + tmpstr = self.fixSpaces(filename) + found = False + + match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + if not found: + match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + + count = count.lstrip("0") + + return count + + def getIssueNumber( self, filename ): + + # Returns a tuple of issue number string, and start and end indexs in the filename + # (The indexes will be used to split the string up for further parsing) + + found = False + issue = '' + start = 0 + end = 0 + + # first, look for multiple "--", this means it's formatted differently from most: + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" is the series name followed by issue + filename = re.sub("--.*", self.repl, filename) + + elif "__" in filename: + # the pattern seems to be that anything to left of the first "__" is the series name followed by issue + filename = re.sub("__.*", self.repl, filename) + + filename = filename.replace("+", " ") + + # replace parenthetical phrases with spaces + filename = re.sub( "\(.*?\)", self.repl, filename) + filename = re.sub( "\[.*?\]", self.repl, filename) + + # replace any name seperators with spaces + filename = self.fixSpaces(filename) + + # remove any "of NN" phrase with spaces (problem: this could break on some titles) + filename = re.sub( "of [\d]+", self.repl, filename) + + #print u"[{0}]".format(filename) + + # we should now have a cleaned up filename version with all the words in + # the same positions as original filename + + # make a list of each word and its position + word_list = list() + for m in re.finditer("\S+", filename): + word_list.append( (m.group(0), m.start(), m.end()) ) + + # remove the first word, since it can't be the issue number + if len(word_list) > 1: + word_list = word_list[1:] + else: + #only one word?? just bail. + return issue, start, end + + # Now try to search for the likely issue number word in the list + + # first look for a word with "#" followed by digits with optional sufix + # this is almost certainly the issue number + for w in reversed(word_list): + if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + found = True + break + + # same as above but w/o a '#', and only look at the last word in the list + if not found: + w = word_list[-1] + if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + found = True + + # now try to look for a # followed by any characters + if not found: + for w in reversed(word_list): + if re.match("#\S+", w[0]): + found = True + break + + if found: + issue = w[0] + start = w[1] + end = w[2] + if issue[0] == '#': + issue = issue[1:] + + return issue, start, end + + def getSeriesName(self, filename, issue_start ): + + # use the issue number string index to split the filename string + + if issue_start != 0: + filename = filename[:issue_start] + + # in case there is no issue number, remove some obvious stuff + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" is the series name followed by issue + filename = re.sub("--.*", self.repl, filename) + + elif "__" in filename: + # the pattern seems to be that anything to left of the first "__" is the series name followed by issue + filename = re.sub("__.*", self.repl, filename) + + filename = filename.replace("+", " ") + tmpstr = self.fixSpaces(filename, remove_dashes=False) + + series = tmpstr + volume = "" + + #save the last word + try: + last_word = series.split()[-1] + except: + last_word = "" + + # remove any parenthetical phrases + series = re.sub( "\(.*?\)", "", series) + + # search for volume number + match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) + if match: + series = match.group(1) + volume = match.group(3) + + # if a volume wasn't found, see if the last word is a year in parentheses + # since that's a common way to designate the volume + if volume == "": + #match either (YEAR), (YEAR-), or (YEAR-YEAR2) + match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) + if match: + volume = match.group(2) + + series = series.strip() + + # if we don't have an issue number (issue_start==0), look + # for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might + # be removed to help search online + if issue_start == 0: + one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ] + try: + last_word = series.split()[-1] + if last_word.lower() in one_shot_words: + series = series.rsplit(' ', 1)[0] + except: + pass + + return series, volume.strip() + + def getYear( self,filename, issue_end): + + filename = filename[issue_end:] + + year = "" + # look for four digit number with "(" ")" or "--" around it + match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) + if match: + year = match.group() + # remove non-numerics + year = re.sub("[^0-9]", "", year) + return year + + def getRemainder( self, filename, year, count, issue_end ): + + #make a guess at where the the non-interesting stuff begins + remainder = "" + + if "--" in filename: + remainder = filename.split("--",1)[1] + elif "__" in filename: + remainder = filename.split("__",1)[1] + elif issue_end != 0: + remainder = filename[issue_end:] + + remainder = self.fixSpaces(remainder, remove_dashes=False) + if year != "": + remainder = remainder.replace(year,"",1) + if count != "": + remainder = remainder.replace("of "+count,"",1) + + remainder = remainder.replace("()","") + + return remainder.strip() + + def parseFilename( self, filename ): + + # remove the path + filename = os.path.basename(filename) + + # remove the extension + filename = os.path.splitext(filename)[0] + + #url decode, just in case + filename = unquote(filename) + + # sometimes archives get messed up names from too many decodings + # often url encodings will break and leave "_28" and "_29" in place + # of "(" and ")" see if there are a number of these, and replace them + if filename.count("_28") > 1 and filename.count("_29") > 1: + filename = filename.replace("_28", "(") + filename = filename.replace("_29", ")") + + self.issue, issue_start, issue_end = self.getIssueNumber(filename) + self.series, self.volume = self.getSeriesName(filename, issue_start) + self.year = self.getYear(filename, issue_end) + self.issue_count = self.getIssueCount(filename, issue_end) + self.remainder = self.getRemainder( filename, self.year, self.issue_count, issue_end ) + + if self.issue != "": + # strip off leading zeros + self.issue = self.issue.lstrip("0") + if self.issue == "": + self.issue = "0" + if self.issue[0] == ".": + self.issue = "0" + self.issue diff --git a/genericmetadata.py b/genericmetadata.py new file mode 100644 index 0000000..8e7aeaf --- /dev/null +++ b/genericmetadata.py @@ -0,0 +1,316 @@ +""" + A python class for internal metadata storage + + The goal of this class is to handle ALL the data that might come from various + tagging schemes and databases, such as ComicVine or GCD. This makes conversion + possible, however lossy it might be + +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import utils + +# These page info classes are exactly the same as the CIX scheme, since it's unique +class PageType: + FrontCover = "FrontCover" + InnerCover = "InnerCover" + Roundup = "Roundup" + Story = "Story" + Advertisement = "Advertisement" + Editorial = "Editorial" + Letters = "Letters" + Preview = "Preview" + BackCover = "BackCover" + Other = "Other" + Deleted = "Deleted" + +""" +class PageInfo: + Image = 0 + Type = PageType.Story + DoublePage = False + ImageSize = 0 + Key = "" + ImageWidth = 0 + ImageHeight = 0 +""" + +class GenericMetadata: + + def __init__(self): + + self.isEmpty = True + self.tagOrigin = None + + self.series = None + self.issue = None + self.title = None + self.publisher = None + self.month = None + self.year = None + self.day = None + self.issueCount = None + self.volume = None + self.genre = None + self.language = None # 2 letter iso code + self.comments = None # use same way as Summary in CIX + + self.volumeCount = None + self.criticalRating = None + self.country = None + + self.alternateSeries = None + self.alternateNumber = None + self.alternateCount = None + self.imprint = None + self.notes = None + self.webLink = None + self.format = None + self.manga = None + self.blackAndWhite = None + self.pageCount = None + self.maturityRating = None + + self.storyArc = None + self.seriesGroup = None + self.scanInfo = None + + self.characters = None + self.teams = None + self.locations = None + + self.credits = list() + self.tags = list() + self.pages = list() + + # Some CoMet-only items + self.price = None + self.isVersionOf = None + self.rights = None + self.identifier = None + self.lastMark = None + self.coverImage = None + + def overlay( self, new_md ): + # Overlay a metadata object on this one + # that is, when the new object has non-None + # values, over-write them to this one + + def assign( cur, new ): + if new is not None: + if type(new) == str and len(new) == 0: + setattr(self, cur, None) + else: + setattr(self, cur, new) + + if not new_md.isEmpty: + self.isEmpty = False + + assign( 'series', new_md.series ) + assign( "issue", new_md.issue ) + assign( "issueCount", new_md.issueCount ) + assign( "title", new_md.title ) + assign( "publisher", new_md.publisher ) + assign( "day", new_md.day ) + assign( "month", new_md.month ) + assign( "year", new_md.year ) + assign( "volume", new_md.volume ) + assign( "volumeCount", new_md.volumeCount ) + assign( "genre", new_md.genre ) + assign( "language", new_md.language ) + assign( "country", new_md.country ) + assign( "criticalRating", new_md.criticalRating ) + assign( "alternateSeries", new_md.alternateSeries ) + assign( "alternateNumber", new_md.alternateNumber ) + assign( "alternateCount", new_md.alternateCount ) + assign( "imprint", new_md.imprint ) + assign( "webLink", new_md.webLink ) + assign( "format", new_md.format ) + assign( "manga", new_md.manga ) + assign( "blackAndWhite", new_md.blackAndWhite ) + assign( "maturityRating", new_md.maturityRating ) + assign( "storyArc", new_md.storyArc ) + assign( "seriesGroup", new_md.seriesGroup ) + assign( "scanInfo", new_md.scanInfo ) + assign( "characters", new_md.characters ) + assign( "teams", new_md.teams ) + assign( "locations", new_md.locations ) + assign( "comments", new_md.comments ) + assign( "notes", new_md.notes ) + + assign( "price", new_md.price ) + assign( "isVersionOf", new_md.isVersionOf ) + assign( "rights", new_md.rights ) + assign( "identifier", new_md.identifier ) + assign( "lastMark", new_md.lastMark ) + + self.overlayCredits( new_md.credits ) + # TODO + + # not sure if the tags and pages should broken down, or treated + # as whole lists.... + + # For now, go the easy route, where any overlay + # value wipes out the whole list + if len(new_md.tags) > 0: + assign( "tags", new_md.tags ) + + if len(new_md.pages) > 0: + assign( "pages", new_md.pages ) + + + def overlayCredits( self, new_credits ): + for c in new_credits: + if c.has_key('primary') and c['primary']: + primary = True + else: + primary = False + + # Remove credit role if person is blank + if c['person'] == "": + for r in reversed(self.credits): + if r['role'].lower() == c['role'].lower(): + self.credits.remove(r) + # otherwise, add it! + else: + self.addCredit( c['person'], c['role'], primary ) + + def setDefaultPageList( self, count ): + # generate a default page list, with the first page marked as the cover + for i in range(count): + page_dict = dict() + page_dict['Image'] = str(i) + if i == 0: + page_dict['Type'] = PageType.FrontCover + self.pages.append( page_dict ) + + def getArchivePageIndex( self, pagenum ): + # convert the displayed page number to the page index of the file in the archive + if pagenum < len( self.pages ): + return int( self.pages[pagenum]['Image'] ) + else: + return 0 + + def getCoverPageIndexList( self ): + # return a list of archive page indices of cover pages + coverlist = [] + for p in self.pages: + if 'Type' in p and p['Type'] == PageType.FrontCover: + coverlist.append( int(p['Image'])) + + if len(coverlist) == 0: + coverlist.append( 0 ) + + return coverlist + + def addCredit( self, person, role, primary = False ): + + credit = dict() + credit['person'] = person + credit['role'] = role + if primary: + credit['primary'] = primary + + # look to see if it's not already there... + found = False + for c in self.credits: + if ( c['person'].lower() == person.lower() and + c['role'].lower() == role.lower() ): + # no need to add it. just adjust the "primary" flag as needed + c['primary'] = primary + found = True + break + + if not found: + self.credits.append(credit) + + + def __str__( self ): + vals = [] + if self.isEmpty: + return "No metadata" + + def add_string( tag, val ): + if val is not None and u"{0}".format(val) != "": + vals.append( (tag, val) ) + + def add_attr_string( tag ): + val = getattr(self,tag) + add_string( tag, getattr(self,tag) ) + + add_attr_string( "series" ) + add_attr_string( "issue" ) + add_attr_string( "issueCount" ) + add_attr_string( "title" ) + add_attr_string( "publisher" ) + add_attr_string( "year" ) + add_attr_string( "month" ) + add_attr_string( "day" ) + add_attr_string( "volume" ) + add_attr_string( "volumeCount" ) + add_attr_string( "genre" ) + add_attr_string( "language" ) + add_attr_string( "country" ) + add_attr_string( "criticalRating" ) + add_attr_string( "alternateSeries" ) + add_attr_string( "alternateNumber" ) + add_attr_string( "alternateCount" ) + add_attr_string( "imprint" ) + add_attr_string( "webLink" ) + add_attr_string( "format" ) + add_attr_string( "manga" ) + + add_attr_string( "price" ) + add_attr_string( "isVersionOf" ) + add_attr_string( "rights" ) + add_attr_string( "identifier" ) + add_attr_string( "lastMark" ) + + if self.blackAndWhite: + add_attr_string( "blackAndWhite" ) + add_attr_string( "maturityRating" ) + add_attr_string( "storyArc" ) + add_attr_string( "seriesGroup" ) + add_attr_string( "scanInfo" ) + add_attr_string( "characters" ) + add_attr_string( "teams" ) + add_attr_string( "locations" ) + add_attr_string( "comments" ) + add_attr_string( "notes" ) + + add_string( "tags", utils.listToString( self.tags ) ) + + for c in self.credits: + primary = "" + if c.has_key('primary') and c['primary']: + primary = " [P]" + add_string( "credit", c['role']+": "+c['person'] + primary) + + # find the longest field name + flen = 0 + for i in vals: + flen = max( flen, len(i[0]) ) + flen += 1 + + #format the data nicely + outstr = "" + fmt_str = u"{0: <" + str(flen) + "} {1}\n" + for i in vals: + outstr += fmt_str.format( i[0]+":", i[1] ) + + return outstr diff --git a/issuestring.py b/issuestring.py new file mode 100644 index 0000000..751aa8c --- /dev/null +++ b/issuestring.py @@ -0,0 +1,140 @@ +# coding=utf-8 +""" +Class for handling the odd permutations of an 'issue number' that the comics industry throws at us + +e.g.: + +"12" +"12.1" +"0" +"-1" +"5AU" +"100-2" + +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import utils +import math +import re + +class IssueString: + def __init__(self, text): + + # break up the issue number string into 2 parts: the numeric and suffix string. + # ( assumes that the numeric portion is always first ) + + self.num = None + self.suffix = "" + + if text is None: + return + + if type(text) == int: + text = str(text) + + if len(text) == 0: + return + + text = unicode(text) + + #skip the minus sign if it's first + if text[0] == '-': + start = 1 + else: + start = 0 + + # if it's still not numeric at start skip it + if text[start].isdigit() or text[start] == ".": + # walk through the string, look for split point (the first non-numeric) + decimal_count = 0 + for idx in range( start, len(text) ): + if text[idx] not in "0123456789.": + break + # special case: also split on second "." + if text[idx] == ".": + decimal_count += 1 + if decimal_count > 1: + break + else: + idx = len(text) + + # move trailing numeric decimal to suffix + # (only if there is other junk after ) + if text[idx-1] == "." and len(text) != idx: + idx = idx -1 + + # if there is no numeric after the minus, make the minus part of the suffix + if idx == 1 and start == 1: + idx = 0 + + part1 = text[0:idx] + part2 = text[idx:len(text)] + + if part1 != "": + self.num = float( part1 ) + self.suffix = part2 + else: + self.suffix = text + + #print "num: {0} suf: {1}".format(self.num, self.suffix) + + def asString( self, pad = 0 ): + #return the float, left side zero-padded, with suffix attached + if self.num is None: + return self.suffix + + negative = self.num < 0 + + num_f = abs(self.num) + + num_int = int( num_f ) + num_s = str( num_int ) + if float( num_int ) != num_f: + num_s = str( num_f ) + + num_s += self.suffix + + # create padding + padding = "" + l = len( str(num_int)) + if l < pad : + padding = "0" * (pad - l) + + num_s = padding + num_s + if negative: + num_s = "-" + num_s + + return num_s + + def asFloat( self ): + #return the float, with no suffix + if self.suffix == u"½": + if self.num is not None: + return self.num + .5 + else: + return .5 + return self.num + + def asInt( self ): + #return the int version of the float + if self.num is None: + return None + return int( self.num ) + + diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..e315cd7 --- /dev/null +++ b/utils.py @@ -0,0 +1,597 @@ +# coding=utf-8 + +""" +Some generic utilities +""" + + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import sys +import os +import re +import platform +import locale +import codecs + + +class UtilsVars: + already_fixed_encoding = False + +def get_actual_preferred_encoding(): + preferred_encoding = locale.getpreferredencoding() + if platform.system() == "Darwin": + preferred_encoding = "utf-8" + return preferred_encoding + +def fix_output_encoding( ): + if not UtilsVars.already_fixed_encoding: + # this reads the environment and inits the right locale + locale.setlocale(locale.LC_ALL, "") + + # try to make stdout/stderr encodings happy for unicode printing + preferred_encoding = get_actual_preferred_encoding() + sys.stdout = codecs.getwriter(preferred_encoding)(sys.stdout) + sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr) + UtilsVars.already_fixed_encoding = True + +def get_recursive_filelist( pathlist ): + """ + Get a recursive list of of all files under all path items in the list + """ + filename_encoding = sys.getfilesystemencoding() + filelist = [] + for p in pathlist: + # if path is a folder, walk it recursivly, and all files underneath + if type(p) == str: + #make sure string is unicode + p = p.decode(filename_encoding) #, 'replace') + elif type(p) != unicode: + #it's probably a QString + p = unicode(p) + + if os.path.isdir( p ): + for root,dirs,files in os.walk( p ): + for f in files: + if type(f) == str: + #make sure string is unicode + f = f.decode(filename_encoding, 'replace') + elif type(f) != unicode: + #it's probably a QString + f = unicode(f) + filelist.append(os.path.join(root,f)) + else: + filelist.append(p) + + return filelist + +def listToString( l ): + string = "" + if l is not None: + for item in l: + if len(string) > 0: + string += ", " + string += item + return string + +def addtopath( dirname ): + if dirname is not None and dirname != "": + + # verify that path doesn't already contain the given dirname + tmpdirname = re.escape(dirname) + pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( dir=tmpdirname, sep=os.pathsep) + + match = re.search(pattern, os.environ['PATH']) + if not match: + os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH'] + +# returns executable path, if it exists +def which(program): + + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def removearticles( text ): + text = text.lower() + articles = ['and', 'the', 'a', '&', 'issue' ] + newText = '' + for word in text.split(' '): + if word not in articles: + newText += word+' ' + + newText = newText[:-1] + + # now get rid of some other junk + newText = newText.replace(":", "") + newText = newText.replace(",", "") + newText = newText.replace("-", " ") + + # since the CV api changed, searches for series names with periods + # now explicity require the period to be in the search key, + # so the line below is removed (for now) + #newText = newText.replace(".", "") + + return newText + + +def unique_file(file_name): + counter = 1 + file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext') + while 1: + if not os.path.lexists( file_name): + return file_name + file_name = file_name_parts[0] + ' (' + str(counter) + ')' + file_name_parts[1] + counter += 1 + + +# -o- coding: utf-8 -o- +# ISO639 python dict +# oficial list in http://www.loc.gov/standards/iso639-2/php/code_list.php + +lang_dict = { + 'ab': 'Abkhaz', + 'aa': 'Afar', + 'af': 'Afrikaans', + 'ak': 'Akan', + 'sq': 'Albanian', + 'am': 'Amharic', + 'ar': 'Arabic', + 'an': 'Aragonese', + 'hy': 'Armenian', + 'as': 'Assamese', + 'av': 'Avaric', + 'ae': 'Avestan', + 'ay': 'Aymara', + 'az': 'Azerbaijani', + 'bm': 'Bambara', + 'ba': 'Bashkir', + 'eu': 'Basque', + 'be': 'Belarusian', + 'bn': 'Bengali', + 'bh': 'Bihari', + 'bi': 'Bislama', + 'bs': 'Bosnian', + 'br': 'Breton', + 'bg': 'Bulgarian', + 'my': 'Burmese', + 'ca': 'Catalan; Valencian', + 'ch': 'Chamorro', + 'ce': 'Chechen', + 'ny': 'Chichewa; Chewa; Nyanja', + 'zh': 'Chinese', + 'cv': 'Chuvash', + 'kw': 'Cornish', + 'co': 'Corsican', + 'cr': 'Cree', + 'hr': 'Croatian', + 'cs': 'Czech', + 'da': 'Danish', + 'dv': 'Divehi; Maldivian;', + 'nl': 'Dutch', + 'dz': 'Dzongkha', + 'en': 'English', + 'eo': 'Esperanto', + 'et': 'Estonian', + 'ee': 'Ewe', + 'fo': 'Faroese', + 'fj': 'Fijian', + 'fi': 'Finnish', + 'fr': 'French', + 'ff': 'Fula', + 'gl': 'Galician', + 'ka': 'Georgian', + 'de': 'German', + 'el': 'Greek, Modern', + 'gn': 'Guaraní', + 'gu': 'Gujarati', + 'ht': 'Haitian', + 'ha': 'Hausa', + 'he': 'Hebrew (modern)', + 'hz': 'Herero', + 'hi': 'Hindi', + 'ho': 'Hiri Motu', + 'hu': 'Hungarian', + 'ia': 'Interlingua', + 'id': 'Indonesian', + 'ie': 'Interlingue', + 'ga': 'Irish', + 'ig': 'Igbo', + 'ik': 'Inupiaq', + 'io': 'Ido', + 'is': 'Icelandic', + 'it': 'Italian', + 'iu': 'Inuktitut', + 'ja': 'Japanese', + 'jv': 'Javanese', + 'kl': 'Kalaallisut', + 'kn': 'Kannada', + 'kr': 'Kanuri', + 'ks': 'Kashmiri', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'ki': 'Kikuyu, Gikuyu', + 'rw': 'Kinyarwanda', + 'ky': 'Kirghiz, Kyrgyz', + 'kv': 'Komi', + 'kg': 'Kongo', + 'ko': 'Korean', + 'ku': 'Kurdish', + 'kj': 'Kwanyama, Kuanyama', + 'la': 'Latin', + 'lb': 'Luxembourgish', + 'lg': 'Luganda', + 'li': 'Limburgish', + 'ln': 'Lingala', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lu': 'Luba-Katanga', + 'lv': 'Latvian', + 'gv': 'Manx', + 'mk': 'Macedonian', + 'mg': 'Malagasy', + 'ms': 'Malay', + 'ml': 'Malayalam', + 'mt': 'Maltese', + 'mi': 'Māori', + 'mr': 'Marathi (Marāṭhī)', + 'mh': 'Marshallese', + 'mn': 'Mongolian', + 'na': 'Nauru', + 'nv': 'Navajo, Navaho', + 'nb': 'Norwegian Bokmål', + 'nd': 'North Ndebele', + 'ne': 'Nepali', + 'ng': 'Ndonga', + 'nn': 'Norwegian Nynorsk', + 'no': 'Norwegian', + 'ii': 'Nuosu', + 'nr': 'South Ndebele', + 'oc': 'Occitan', + 'oj': 'Ojibwe, Ojibwa', + 'cu': 'Old Church Slavonic', + 'om': 'Oromo', + 'or': 'Oriya', + 'os': 'Ossetian, Ossetic', + 'pa': 'Panjabi, Punjabi', + 'pi': 'Pāli', + 'fa': 'Persian', + 'pl': 'Polish', + 'ps': 'Pashto, Pushto', + 'pt': 'Portuguese', + 'qu': 'Quechua', + 'rm': 'Romansh', + 'rn': 'Kirundi', + 'ro': 'Romanian, Moldavan', + 'ru': 'Russian', + 'sa': 'Sanskrit (Saṁskṛta)', + 'sc': 'Sardinian', + 'sd': 'Sindhi', + 'se': 'Northern Sami', + 'sm': 'Samoan', + 'sg': 'Sango', + 'sr': 'Serbian', + 'gd': 'Scottish Gaelic', + 'sn': 'Shona', + 'si': 'Sinhala, Sinhalese', + 'sk': 'Slovak', + 'sl': 'Slovene', + 'so': 'Somali', + 'st': 'Southern Sotho', + 'es': 'Spanish; Castilian', + 'su': 'Sundanese', + 'sw': 'Swahili', + 'ss': 'Swati', + 'sv': 'Swedish', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'ti': 'Tigrinya', + 'bo': 'Tibetan', + 'tk': 'Turkmen', + 'tl': 'Tagalog', + 'tn': 'Tswana', + 'to': 'Tonga', + 'tr': 'Turkish', + 'ts': 'Tsonga', + 'tt': 'Tatar', + 'tw': 'Twi', + 'ty': 'Tahitian', + 'ug': 'Uighur, Uyghur', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 've': 'Venda', + 'vi': 'Vietnamese', + 'vo': 'Volapük', + 'wa': 'Walloon', + 'cy': 'Welsh', + 'wo': 'Wolof', + 'fy': 'Western Frisian', + 'xh': 'Xhosa', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'za': 'Zhuang, Chuang', + 'zu': 'Zulu', +} + + +countries = [ + ('AF', 'Afghanistan'), + ('AL', 'Albania'), + ('DZ', 'Algeria'), + ('AS', 'American Samoa'), + ('AD', 'Andorra'), + ('AO', 'Angola'), + ('AI', 'Anguilla'), + ('AQ', 'Antarctica'), + ('AG', 'Antigua And Barbuda'), + ('AR', 'Argentina'), + ('AM', 'Armenia'), + ('AW', 'Aruba'), + ('AU', 'Australia'), + ('AT', 'Austria'), + ('AZ', 'Azerbaijan'), + ('BS', 'Bahamas'), + ('BH', 'Bahrain'), + ('BD', 'Bangladesh'), + ('BB', 'Barbados'), + ('BY', 'Belarus'), + ('BE', 'Belgium'), + ('BZ', 'Belize'), + ('BJ', 'Benin'), + ('BM', 'Bermuda'), + ('BT', 'Bhutan'), + ('BO', 'Bolivia'), + ('BA', 'Bosnia And Herzegowina'), + ('BW', 'Botswana'), + ('BV', 'Bouvet Island'), + ('BR', 'Brazil'), + ('BN', 'Brunei Darussalam'), + ('BG', 'Bulgaria'), + ('BF', 'Burkina Faso'), + ('BI', 'Burundi'), + ('KH', 'Cambodia'), + ('CM', 'Cameroon'), + ('CA', 'Canada'), + ('CV', 'Cape Verde'), + ('KY', 'Cayman Islands'), + ('CF', 'Central African Rep'), + ('TD', 'Chad'), + ('CL', 'Chile'), + ('CN', 'China'), + ('CX', 'Christmas Island'), + ('CC', 'Cocos Islands'), + ('CO', 'Colombia'), + ('KM', 'Comoros'), + ('CG', 'Congo'), + ('CK', 'Cook Islands'), + ('CR', 'Costa Rica'), + ('CI', 'Cote D`ivoire'), + ('HR', 'Croatia'), + ('CU', 'Cuba'), + ('CY', 'Cyprus'), + ('CZ', 'Czech Republic'), + ('DK', 'Denmark'), + ('DJ', 'Djibouti'), + ('DM', 'Dominica'), + ('DO', 'Dominican Republic'), + ('TP', 'East Timor'), + ('EC', 'Ecuador'), + ('EG', 'Egypt'), + ('SV', 'El Salvador'), + ('GQ', 'Equatorial Guinea'), + ('ER', 'Eritrea'), + ('EE', 'Estonia'), + ('ET', 'Ethiopia'), + ('FK', 'Falkland Islands (Malvinas)'), + ('FO', 'Faroe Islands'), + ('FJ', 'Fiji'), + ('FI', 'Finland'), + ('FR', 'France'), + ('GF', 'French Guiana'), + ('PF', 'French Polynesia'), + ('TF', 'French S. Territories'), + ('GA', 'Gabon'), + ('GM', 'Gambia'), + ('GE', 'Georgia'), + ('DE', 'Germany'), + ('GH', 'Ghana'), + ('GI', 'Gibraltar'), + ('GR', 'Greece'), + ('GL', 'Greenland'), + ('GD', 'Grenada'), + ('GP', 'Guadeloupe'), + ('GU', 'Guam'), + ('GT', 'Guatemala'), + ('GN', 'Guinea'), + ('GW', 'Guinea-bissau'), + ('GY', 'Guyana'), + ('HT', 'Haiti'), + ('HN', 'Honduras'), + ('HK', 'Hong Kong'), + ('HU', 'Hungary'), + ('IS', 'Iceland'), + ('IN', 'India'), + ('ID', 'Indonesia'), + ('IR', 'Iran'), + ('IQ', 'Iraq'), + ('IE', 'Ireland'), + ('IL', 'Israel'), + ('IT', 'Italy'), + ('JM', 'Jamaica'), + ('JP', 'Japan'), + ('JO', 'Jordan'), + ('KZ', 'Kazakhstan'), + ('KE', 'Kenya'), + ('KI', 'Kiribati'), + ('KP', 'Korea (North)'), + ('KR', 'Korea (South)'), + ('KW', 'Kuwait'), + ('KG', 'Kyrgyzstan'), + ('LA', 'Laos'), + ('LV', 'Latvia'), + ('LB', 'Lebanon'), + ('LS', 'Lesotho'), + ('LR', 'Liberia'), + ('LY', 'Libya'), + ('LI', 'Liechtenstein'), + ('LT', 'Lithuania'), + ('LU', 'Luxembourg'), + ('MO', 'Macau'), + ('MK', 'Macedonia'), + ('MG', 'Madagascar'), + ('MW', 'Malawi'), + ('MY', 'Malaysia'), + ('MV', 'Maldives'), + ('ML', 'Mali'), + ('MT', 'Malta'), + ('MH', 'Marshall Islands'), + ('MQ', 'Martinique'), + ('MR', 'Mauritania'), + ('MU', 'Mauritius'), + ('YT', 'Mayotte'), + ('MX', 'Mexico'), + ('FM', 'Micronesia'), + ('MD', 'Moldova'), + ('MC', 'Monaco'), + ('MN', 'Mongolia'), + ('MS', 'Montserrat'), + ('MA', 'Morocco'), + ('MZ', 'Mozambique'), + ('MM', 'Myanmar'), + ('NA', 'Namibia'), + ('NR', 'Nauru'), + ('NP', 'Nepal'), + ('NL', 'Netherlands'), + ('AN', 'Netherlands Antilles'), + ('NC', 'New Caledonia'), + ('NZ', 'New Zealand'), + ('NI', 'Nicaragua'), + ('NE', 'Niger'), + ('NG', 'Nigeria'), + ('NU', 'Niue'), + ('NF', 'Norfolk Island'), + ('MP', 'Northern Mariana Islands'), + ('NO', 'Norway'), + ('OM', 'Oman'), + ('PK', 'Pakistan'), + ('PW', 'Palau'), + ('PA', 'Panama'), + ('PG', 'Papua New Guinea'), + ('PY', 'Paraguay'), + ('PE', 'Peru'), + ('PH', 'Philippines'), + ('PN', 'Pitcairn'), + ('PL', 'Poland'), + ('PT', 'Portugal'), + ('PR', 'Puerto Rico'), + ('QA', 'Qatar'), + ('RE', 'Reunion'), + ('RO', 'Romania'), + ('RU', 'Russian Federation'), + ('RW', 'Rwanda'), + ('KN', 'Saint Kitts And Nevis'), + ('LC', 'Saint Lucia'), + ('VC', 'St Vincent/Grenadines'), + ('WS', 'Samoa'), + ('SM', 'San Marino'), + ('ST', 'Sao Tome'), + ('SA', 'Saudi Arabia'), + ('SN', 'Senegal'), + ('SC', 'Seychelles'), + ('SL', 'Sierra Leone'), + ('SG', 'Singapore'), + ('SK', 'Slovakia'), + ('SI', 'Slovenia'), + ('SB', 'Solomon Islands'), + ('SO', 'Somalia'), + ('ZA', 'South Africa'), + ('ES', 'Spain'), + ('LK', 'Sri Lanka'), + ('SH', 'St. Helena'), + ('PM', 'St.Pierre'), + ('SD', 'Sudan'), + ('SR', 'Suriname'), + ('SZ', 'Swaziland'), + ('SE', 'Sweden'), + ('CH', 'Switzerland'), + ('SY', 'Syrian Arab Republic'), + ('TW', 'Taiwan'), + ('TJ', 'Tajikistan'), + ('TZ', 'Tanzania'), + ('TH', 'Thailand'), + ('TG', 'Togo'), + ('TK', 'Tokelau'), + ('TO', 'Tonga'), + ('TT', 'Trinidad And Tobago'), + ('TN', 'Tunisia'), + ('TR', 'Turkey'), + ('TM', 'Turkmenistan'), + ('TV', 'Tuvalu'), + ('UG', 'Uganda'), + ('UA', 'Ukraine'), + ('AE', 'United Arab Emirates'), + ('UK', 'United Kingdom'), + ('US', 'United States'), + ('UY', 'Uruguay'), + ('UZ', 'Uzbekistan'), + ('VU', 'Vanuatu'), + ('VA', 'Vatican City State'), + ('VE', 'Venezuela'), + ('VN', 'Viet Nam'), + ('VG', 'Virgin Islands (British)'), + ('VI', 'Virgin Islands (U.S.)'), + ('EH', 'Western Sahara'), + ('YE', 'Yemen'), + ('YU', 'Yugoslavia'), + ('ZR', 'Zaire'), + ('ZM', 'Zambia'), + ('ZW', 'Zimbabwe') +] + + + +def getLanguageDict(): + return lang_dict + +def getLanguageFromISO( iso ): + if iso == None: + return None + else: + return lang_dict[ iso ] + + + + + + + + + +