From a9ff8f37b060082cc3ba714eff6eb1a39343e7c2 Mon Sep 17 00:00:00 2001 From: Davide Romanini Date: Wed, 11 Feb 2015 19:45:02 +0100 Subject: [PATCH] refactor core comicarchive classes in its own package comicapi --- .../UnRAR2/UnRARDLL/license.txt | 0 .../UnRAR2/UnRARDLL/unrar.dll | Bin .../UnRAR2/UnRARDLL/unrar.h | 280 ++-- .../UnRAR2/UnRARDLL/unrardll.txt | 1212 ++++++++--------- .../UnRAR2/UnRARDLL/whatsnew.txt | 160 +-- .../UnRAR2/UnRARDLL/x64/readme.txt | 2 +- .../UnRAR2/UnRARDLL/x64/unrar64.dll | Bin .../UnRAR2/__init__.py | 354 ++--- .../UnRAR2/rar_exceptions.py | 0 .../UnRAR2/test_UnRAR2.py | 276 ++-- {comictaggerlib => comicapi}/UnRAR2/unix.py | 0 .../UnRAR2/windows.py | 0 comicapi/__init__.py | 1 + comicapi/comet.py | 260 ++++ comicapi/comicarchive.py | 1088 +++++++++++++++ comicapi/comicbookinfo.py | 152 +++ comicapi/comicinfoxml.py | 293 ++++ comicapi/filenameparser.py | 277 ++++ comicapi/genericmetadata.py | 316 +++++ comicapi/issuestring.py | 140 ++ comicapi/utils.py | 597 ++++++++ comictaggerlib/comet.py | 261 +--- comictaggerlib/comicarchive.py | 1089 +-------------- comictaggerlib/comicbookinfo.py | 153 +-- comictaggerlib/comicinfoxml.py | 294 +--- comictaggerlib/filenameparser.py | 278 +--- comictaggerlib/genericmetadata.py | 317 +---- comictaggerlib/issuestring.py | 141 +- comictaggerlib/utils.py | 598 +------- 29 files changed, 4274 insertions(+), 4265 deletions(-) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/license.txt (100%) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/unrar.dll (100%) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/unrar.h (96%) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/unrardll.txt (96%) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/whatsnew.txt (96%) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/x64/readme.txt (97%) rename {comictaggerlib => comicapi}/UnRAR2/UnRARDLL/x64/unrar64.dll (100%) rename {comictaggerlib => comicapi}/UnRAR2/__init__.py (97%) rename {comictaggerlib => comicapi}/UnRAR2/rar_exceptions.py (100%) rename {comictaggerlib => comicapi}/UnRAR2/test_UnRAR2.py (96%) rename {comictaggerlib => comicapi}/UnRAR2/unix.py (100%) rename {comictaggerlib => comicapi}/UnRAR2/windows.py (100%) create mode 100644 comicapi/__init__.py create mode 100644 comicapi/comet.py create mode 100644 comicapi/comicarchive.py create mode 100644 comicapi/comicbookinfo.py create mode 100644 comicapi/comicinfoxml.py create mode 100644 comicapi/filenameparser.py create mode 100644 comicapi/genericmetadata.py create mode 100644 comicapi/issuestring.py create mode 100644 comicapi/utils.py diff --git a/comictaggerlib/UnRAR2/UnRARDLL/license.txt b/comicapi/UnRAR2/UnRARDLL/license.txt similarity index 100% rename from comictaggerlib/UnRAR2/UnRARDLL/license.txt rename to comicapi/UnRAR2/UnRARDLL/license.txt diff --git a/comictaggerlib/UnRAR2/UnRARDLL/unrar.dll b/comicapi/UnRAR2/UnRARDLL/unrar.dll similarity index 100% rename from comictaggerlib/UnRAR2/UnRARDLL/unrar.dll rename to comicapi/UnRAR2/UnRARDLL/unrar.dll diff --git a/comictaggerlib/UnRAR2/UnRARDLL/unrar.h b/comicapi/UnRAR2/UnRARDLL/unrar.h similarity index 96% rename from comictaggerlib/UnRAR2/UnRARDLL/unrar.h rename to comicapi/UnRAR2/UnRARDLL/unrar.h index 7643fa7..4582f2c 100644 --- a/comictaggerlib/UnRAR2/UnRARDLL/unrar.h +++ b/comicapi/UnRAR2/UnRARDLL/unrar.h @@ -1,140 +1,140 @@ -#ifndef _UNRAR_DLL_ -#define _UNRAR_DLL_ - -#define ERAR_END_ARCHIVE 10 -#define ERAR_NO_MEMORY 11 -#define ERAR_BAD_DATA 12 -#define ERAR_BAD_ARCHIVE 13 -#define ERAR_UNKNOWN_FORMAT 14 -#define ERAR_EOPEN 15 -#define ERAR_ECREATE 16 -#define ERAR_ECLOSE 17 -#define ERAR_EREAD 18 -#define ERAR_EWRITE 19 -#define ERAR_SMALL_BUF 20 -#define ERAR_UNKNOWN 21 -#define ERAR_MISSING_PASSWORD 22 - -#define RAR_OM_LIST 0 -#define RAR_OM_EXTRACT 1 -#define RAR_OM_LIST_INCSPLIT 2 - -#define RAR_SKIP 0 -#define RAR_TEST 1 -#define RAR_EXTRACT 2 - -#define RAR_VOL_ASK 0 -#define RAR_VOL_NOTIFY 1 - -#define RAR_DLL_VERSION 4 - -#ifdef _UNIX -#define CALLBACK -#define PASCAL -#define LONG long -#define HANDLE void * -#define LPARAM long -#define UINT unsigned int -#endif - -struct RARHeaderData -{ - char ArcName[260]; - char FileName[260]; - unsigned int Flags; - unsigned int PackSize; - unsigned int UnpSize; - unsigned int HostOS; - unsigned int FileCRC; - unsigned int FileTime; - unsigned int UnpVer; - unsigned int Method; - unsigned int FileAttr; - char *CmtBuf; - unsigned int CmtBufSize; - unsigned int CmtSize; - unsigned int CmtState; -}; - - -struct RARHeaderDataEx -{ - char ArcName[1024]; - wchar_t ArcNameW[1024]; - char FileName[1024]; - wchar_t FileNameW[1024]; - unsigned int Flags; - unsigned int PackSize; - unsigned int PackSizeHigh; - unsigned int UnpSize; - unsigned int UnpSizeHigh; - unsigned int HostOS; - unsigned int FileCRC; - unsigned int FileTime; - unsigned int UnpVer; - unsigned int Method; - unsigned int FileAttr; - char *CmtBuf; - unsigned int CmtBufSize; - unsigned int CmtSize; - unsigned int CmtState; - unsigned int Reserved[1024]; -}; - - -struct RAROpenArchiveData -{ - char *ArcName; - unsigned int OpenMode; - unsigned int OpenResult; - char *CmtBuf; - unsigned int CmtBufSize; - unsigned int CmtSize; - unsigned int CmtState; -}; - -struct RAROpenArchiveDataEx -{ - char *ArcName; - wchar_t *ArcNameW; - unsigned int OpenMode; - unsigned int OpenResult; - char *CmtBuf; - unsigned int CmtBufSize; - unsigned int CmtSize; - unsigned int CmtState; - unsigned int Flags; - unsigned int Reserved[32]; -}; - -enum UNRARCALLBACK_MESSAGES { - UCM_CHANGEVOLUME,UCM_PROCESSDATA,UCM_NEEDPASSWORD -}; - -typedef int (CALLBACK *UNRARCALLBACK)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2); - -typedef int (PASCAL *CHANGEVOLPROC)(char *ArcName,int Mode); -typedef int (PASCAL *PROCESSDATAPROC)(unsigned char *Addr,int Size); - -#ifdef __cplusplus -extern "C" { -#endif - -HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData); -HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData); -int PASCAL RARCloseArchive(HANDLE hArcData); -int PASCAL RARReadHeader(HANDLE hArcData,struct RARHeaderData *HeaderData); -int PASCAL RARReadHeaderEx(HANDLE hArcData,struct RARHeaderDataEx *HeaderData); -int PASCAL RARProcessFile(HANDLE hArcData,int Operation,char *DestPath,char *DestName); -int PASCAL RARProcessFileW(HANDLE hArcData,int Operation,wchar_t *DestPath,wchar_t *DestName); -void PASCAL RARSetCallback(HANDLE hArcData,UNRARCALLBACK Callback,LPARAM UserData); -void PASCAL RARSetChangeVolProc(HANDLE hArcData,CHANGEVOLPROC ChangeVolProc); -void PASCAL RARSetProcessDataProc(HANDLE hArcData,PROCESSDATAPROC ProcessDataProc); -void PASCAL RARSetPassword(HANDLE hArcData,char *Password); -int PASCAL RARGetDllVersion(); - -#ifdef __cplusplus -} -#endif - -#endif +#ifndef _UNRAR_DLL_ +#define _UNRAR_DLL_ + +#define ERAR_END_ARCHIVE 10 +#define ERAR_NO_MEMORY 11 +#define ERAR_BAD_DATA 12 +#define ERAR_BAD_ARCHIVE 13 +#define ERAR_UNKNOWN_FORMAT 14 +#define ERAR_EOPEN 15 +#define ERAR_ECREATE 16 +#define ERAR_ECLOSE 17 +#define ERAR_EREAD 18 +#define ERAR_EWRITE 19 +#define ERAR_SMALL_BUF 20 +#define ERAR_UNKNOWN 21 +#define ERAR_MISSING_PASSWORD 22 + +#define RAR_OM_LIST 0 +#define RAR_OM_EXTRACT 1 +#define RAR_OM_LIST_INCSPLIT 2 + +#define RAR_SKIP 0 +#define RAR_TEST 1 +#define RAR_EXTRACT 2 + +#define RAR_VOL_ASK 0 +#define RAR_VOL_NOTIFY 1 + +#define RAR_DLL_VERSION 4 + +#ifdef _UNIX +#define CALLBACK +#define PASCAL +#define LONG long +#define HANDLE void * +#define LPARAM long +#define UINT unsigned int +#endif + +struct RARHeaderData +{ + char ArcName[260]; + char FileName[260]; + unsigned int Flags; + unsigned int PackSize; + unsigned int UnpSize; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; +}; + + +struct RARHeaderDataEx +{ + char ArcName[1024]; + wchar_t ArcNameW[1024]; + char FileName[1024]; + wchar_t FileNameW[1024]; + unsigned int Flags; + unsigned int PackSize; + unsigned int PackSizeHigh; + unsigned int UnpSize; + unsigned int UnpSizeHigh; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Reserved[1024]; +}; + + +struct RAROpenArchiveData +{ + char *ArcName; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; +}; + +struct RAROpenArchiveDataEx +{ + char *ArcName; + wchar_t *ArcNameW; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Flags; + unsigned int Reserved[32]; +}; + +enum UNRARCALLBACK_MESSAGES { + UCM_CHANGEVOLUME,UCM_PROCESSDATA,UCM_NEEDPASSWORD +}; + +typedef int (CALLBACK *UNRARCALLBACK)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2); + +typedef int (PASCAL *CHANGEVOLPROC)(char *ArcName,int Mode); +typedef int (PASCAL *PROCESSDATAPROC)(unsigned char *Addr,int Size); + +#ifdef __cplusplus +extern "C" { +#endif + +HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData); +HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData); +int PASCAL RARCloseArchive(HANDLE hArcData); +int PASCAL RARReadHeader(HANDLE hArcData,struct RARHeaderData *HeaderData); +int PASCAL RARReadHeaderEx(HANDLE hArcData,struct RARHeaderDataEx *HeaderData); +int PASCAL RARProcessFile(HANDLE hArcData,int Operation,char *DestPath,char *DestName); +int PASCAL RARProcessFileW(HANDLE hArcData,int Operation,wchar_t *DestPath,wchar_t *DestName); +void PASCAL RARSetCallback(HANDLE hArcData,UNRARCALLBACK Callback,LPARAM UserData); +void PASCAL RARSetChangeVolProc(HANDLE hArcData,CHANGEVOLPROC ChangeVolProc); +void PASCAL RARSetProcessDataProc(HANDLE hArcData,PROCESSDATAPROC ProcessDataProc); +void PASCAL RARSetPassword(HANDLE hArcData,char *Password); +int PASCAL RARGetDllVersion(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/comictaggerlib/UnRAR2/UnRARDLL/unrardll.txt b/comicapi/UnRAR2/UnRARDLL/unrardll.txt similarity index 96% rename from comictaggerlib/UnRAR2/UnRARDLL/unrardll.txt rename to comicapi/UnRAR2/UnRARDLL/unrardll.txt index 291c871..c49dd5b 100644 --- a/comictaggerlib/UnRAR2/UnRARDLL/unrardll.txt +++ b/comicapi/UnRAR2/UnRARDLL/unrardll.txt @@ -1,606 +1,606 @@ - - UnRAR.dll Manual - ~~~~~~~~~~~~~~~~ - - UnRAR.dll is a 32-bit Windows dynamic-link library which provides - file extraction from RAR archives. - - - Exported functions - -==================================================================== -HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData) -==================================================================== - -Description -~~~~~~~~~~~ - Open RAR archive and allocate memory structures - -Parameters -~~~~~~~~~~ -ArchiveData Points to RAROpenArchiveData structure - -struct RAROpenArchiveData -{ - char *ArcName; - UINT OpenMode; - UINT OpenResult; - char *CmtBuf; - UINT CmtBufSize; - UINT CmtSize; - UINT CmtState; -}; - -Structure fields: - -ArcName - Input parameter which should point to zero terminated string - containing the archive name. - -OpenMode - Input parameter. - - Possible values - - RAR_OM_LIST - Open archive for reading file headers only. - - RAR_OM_EXTRACT - Open archive for testing and extracting files. - - RAR_OM_LIST_INCSPLIT - Open archive for reading file headers only. If you open an archive - in such mode, RARReadHeader[Ex] will return all file headers, - including those with "file continued from previous volume" flag. - In case of RAR_OM_LIST such headers are automatically skipped. - So if you process RAR volumes in RAR_OM_LIST_INCSPLIT mode, you will - get several file header records for same file if file is split between - volumes. For such files only the last file header record will contain - the correct file CRC and if you wish to get the correct packed size, - you need to sum up packed sizes of all parts. - -OpenResult - Output parameter. - - Possible values - - 0 Success - ERAR_NO_MEMORY Not enough memory to initialize data structures - ERAR_BAD_DATA Archive header broken - ERAR_BAD_ARCHIVE File is not valid RAR archive - ERAR_UNKNOWN_FORMAT Unknown encryption used for archive headers - ERAR_EOPEN File open error - -CmtBuf - Input parameter which should point to the buffer for archive - comments. Maximum comment size is limited to 64Kb. Comment text is - zero terminated. If the comment text is larger than the buffer - size, the comment text will be truncated. If CmtBuf is set to - NULL, comments will not be read. - -CmtBufSize - Input parameter which should contain size of buffer for archive - comments. - -CmtSize - Output parameter containing size of comments actually read into the - buffer, cannot exceed CmtBufSize. - -CmtState - Output parameter. - - Possible values - - 0 comments not present - 1 Comments read completely - ERAR_NO_MEMORY Not enough memory to extract comments - ERAR_BAD_DATA Broken comment - ERAR_UNKNOWN_FORMAT Unknown comment format - ERAR_SMALL_BUF Buffer too small, comments not completely read - -Return values -~~~~~~~~~~~~~ - Archive handle or NULL in case of error - - -======================================================================== -HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData) -======================================================================== - -Description -~~~~~~~~~~~ - Similar to RAROpenArchive, but uses RAROpenArchiveDataEx structure - allowing to specify Unicode archive name and returning information - about archive flags. - -Parameters -~~~~~~~~~~ -ArchiveData Points to RAROpenArchiveDataEx structure - -struct RAROpenArchiveDataEx -{ - char *ArcName; - wchar_t *ArcNameW; - unsigned int OpenMode; - unsigned int OpenResult; - char *CmtBuf; - unsigned int CmtBufSize; - unsigned int CmtSize; - unsigned int CmtState; - unsigned int Flags; - unsigned int Reserved[32]; -}; - -Structure fields: - -ArcNameW - Input parameter which should point to zero terminated Unicode string - containing the archive name or NULL if Unicode name is not specified. - -Flags - Output parameter. Combination of bit flags. - - Possible values - - 0x0001 - Volume attribute (archive volume) - 0x0002 - Archive comment present - 0x0004 - Archive lock attribute - 0x0008 - Solid attribute (solid archive) - 0x0010 - New volume naming scheme ('volname.partN.rar') - 0x0020 - Authenticity information present - 0x0040 - Recovery record present - 0x0080 - Block headers are encrypted - 0x0100 - First volume (set only by RAR 3.0 and later) - -Reserved[32] - Reserved for future use. Must be zero. - -Information on other structure fields and function return values -is available above, in RAROpenArchive function description. - - -==================================================================== -int PASCAL RARCloseArchive(HANDLE hArcData) -==================================================================== - -Description -~~~~~~~~~~~ - Close RAR archive and release allocated memory. It must be called when - archive processing is finished, even if the archive processing was stopped - due to an error. - -Parameters -~~~~~~~~~~ -hArcData - This parameter should contain the archive handle obtained from the - RAROpenArchive function call. - -Return values -~~~~~~~~~~~~~ - 0 Success - ERAR_ECLOSE Archive close error - - -==================================================================== -int PASCAL RARReadHeader(HANDLE hArcData, - struct RARHeaderData *HeaderData) -==================================================================== - -Description -~~~~~~~~~~~ - Read header of file in archive. - -Parameters -~~~~~~~~~~ -hArcData - This parameter should contain the archive handle obtained from the - RAROpenArchive function call. - -HeaderData - It should point to RARHeaderData structure: - -struct RARHeaderData -{ - char ArcName[260]; - char FileName[260]; - UINT Flags; - UINT PackSize; - UINT UnpSize; - UINT HostOS; - UINT FileCRC; - UINT FileTime; - UINT UnpVer; - UINT Method; - UINT FileAttr; - char *CmtBuf; - UINT CmtBufSize; - UINT CmtSize; - UINT CmtState; -}; - -Structure fields: - -ArcName - Output parameter which contains a zero terminated string of the - current archive name. May be used to determine the current volume - name. - -FileName - Output parameter which contains a zero terminated string of the - file name in OEM (DOS) encoding. - -Flags - Output parameter which contains file flags: - - 0x01 - file continued from previous volume - 0x02 - file continued on next volume - 0x04 - file encrypted with password - 0x08 - file comment present - 0x10 - compression of previous files is used (solid flag) - - bits 7 6 5 - - 0 0 0 - dictionary size 64 Kb - 0 0 1 - dictionary size 128 Kb - 0 1 0 - dictionary size 256 Kb - 0 1 1 - dictionary size 512 Kb - 1 0 0 - dictionary size 1024 Kb - 1 0 1 - dictionary size 2048 KB - 1 1 0 - dictionary size 4096 KB - 1 1 1 - file is directory - - Other bits are reserved. - -PackSize - Output parameter means packed file size or size of the - file part if file was split between volumes. - -UnpSize - Output parameter - unpacked file size. - -HostOS - Output parameter - operating system used for archiving: - - 0 - MS DOS; - 1 - OS/2. - 2 - Win32 - 3 - Unix - -FileCRC - Output parameter which contains unpacked file CRC. In case of file parts - split between volumes only the last part contains the correct CRC - and it is accessible only in RAR_OM_LIST_INCSPLIT listing mode. - -FileTime - Output parameter - contains date and time in standard MS DOS format. - -UnpVer - Output parameter - RAR version needed to extract file. - It is encoded as 10 * Major version + minor version. - -Method - Output parameter - packing method. - -FileAttr - Output parameter - file attributes. - -CmtBuf - File comments support is not implemented in the new DLL version yet. - Now CmtState is always 0. - -/* - * Input parameter which should point to the buffer for file - * comments. Maximum comment size is limited to 64Kb. Comment text is - * a zero terminated string in OEM encoding. If the comment text is - * larger than the buffer size, the comment text will be truncated. - * If CmtBuf is set to NULL, comments will not be read. - */ - -CmtBufSize - Input parameter which should contain size of buffer for archive - comments. - -CmtSize - Output parameter containing size of comments actually read into the - buffer, should not exceed CmtBufSize. - -CmtState - Output parameter. - - Possible values - - 0 Absent comments - 1 Comments read completely - ERAR_NO_MEMORY Not enough memory to extract comments - ERAR_BAD_DATA Broken comment - ERAR_UNKNOWN_FORMAT Unknown comment format - ERAR_SMALL_BUF Buffer too small, comments not completely read - -Return values -~~~~~~~~~~~~~ - - 0 Success - ERAR_END_ARCHIVE End of archive - ERAR_BAD_DATA File header broken - - -==================================================================== -int PASCAL RARReadHeaderEx(HANDLE hArcData, - struct RARHeaderDataEx *HeaderData) -==================================================================== - -Description -~~~~~~~~~~~ - Similar to RARReadHeader, but uses RARHeaderDataEx structure, -containing information about Unicode file names and 64 bit file sizes. - -struct RARHeaderDataEx -{ - char ArcName[1024]; - wchar_t ArcNameW[1024]; - char FileName[1024]; - wchar_t FileNameW[1024]; - unsigned int Flags; - unsigned int PackSize; - unsigned int PackSizeHigh; - unsigned int UnpSize; - unsigned int UnpSizeHigh; - unsigned int HostOS; - unsigned int FileCRC; - unsigned int FileTime; - unsigned int UnpVer; - unsigned int Method; - unsigned int FileAttr; - char *CmtBuf; - unsigned int CmtBufSize; - unsigned int CmtSize; - unsigned int CmtState; - unsigned int Reserved[1024]; -}; - - -==================================================================== -int PASCAL RARProcessFile(HANDLE hArcData, - int Operation, - char *DestPath, - char *DestName) -==================================================================== - -Description -~~~~~~~~~~~ - Performs action and moves the current position in the archive to - the next file. Extract or test the current file from the archive - opened in RAR_OM_EXTRACT mode. If the mode RAR_OM_LIST is set, - then a call to this function will simply skip the archive position - to the next file. - -Parameters -~~~~~~~~~~ -hArcData - This parameter should contain the archive handle obtained from the - RAROpenArchive function call. - -Operation - File operation. - - Possible values - - RAR_SKIP Move to the next file in the archive. If the - archive is solid and RAR_OM_EXTRACT mode was set - when the archive was opened, the current file will - be processed - the operation will be performed - slower than a simple seek. - - RAR_TEST Test the current file and move to the next file in - the archive. If the archive was opened with - RAR_OM_LIST mode, the operation is equal to - RAR_SKIP. - - RAR_EXTRACT Extract the current file and move to the next file. - If the archive was opened with RAR_OM_LIST mode, - the operation is equal to RAR_SKIP. - - -DestPath - This parameter should point to a zero terminated string containing the - destination directory to which to extract files to. If DestPath is equal - to NULL, it means extract to the current directory. This parameter has - meaning only if DestName is NULL. - -DestName - This parameter should point to a string containing the full path and name - to assign to extracted file or it can be NULL to use the default name. - If DestName is defined (not NULL), it overrides both the original file - name saved in the archive and path specigied in DestPath setting. - - Both DestPath and DestName must be in OEM encoding. If necessary, - use CharToOem to convert text to OEM before passing to this function. - -Return values -~~~~~~~~~~~~~ - 0 Success - ERAR_BAD_DATA File CRC error - ERAR_BAD_ARCHIVE Volume is not valid RAR archive - ERAR_UNKNOWN_FORMAT Unknown archive format - ERAR_EOPEN Volume open error - ERAR_ECREATE File create error - ERAR_ECLOSE File close error - ERAR_EREAD Read error - ERAR_EWRITE Write error - - -Note: if you wish to cancel extraction, return -1 when processing - UCM_PROCESSDATA callback message. - - -==================================================================== -int PASCAL RARProcessFileW(HANDLE hArcData, - int Operation, - wchar_t *DestPath, - wchar_t *DestName) -==================================================================== - -Description -~~~~~~~~~~~ - Unicode version of RARProcessFile. It uses Unicode DestPath - and DestName parameters, other parameters and return values - are the same as in RARProcessFile. - - -==================================================================== -void PASCAL RARSetCallback(HANDLE hArcData, - int PASCAL (*CallbackProc)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2), - LPARAM UserData); -==================================================================== - -Description -~~~~~~~~~~~ - Set a user-defined callback function to process Unrar events. - -Parameters -~~~~~~~~~~ -hArcData - This parameter should contain the archive handle obtained from the - RAROpenArchive function call. - -CallbackProc - It should point to a user-defined callback function. - - The function will be passed four parameters: - - - msg Type of event. Described below. - - UserData User defined value passed to RARSetCallback. - - P1 and P2 Event dependent parameters. Described below. - - - Possible events - - UCM_CHANGEVOLUME Process volume change. - - P1 Points to the zero terminated name - of the next volume. - - P2 The function call mode: - - RAR_VOL_ASK Required volume is absent. The function should - prompt user and return a positive value - to retry or return -1 value to terminate - operation. The function may also specify a new - volume name, placing it to the address specified - by P1 parameter. - - RAR_VOL_NOTIFY Required volume is successfully opened. - This is a notification call and volume name - modification is not allowed. The function should - return a positive value to continue or -1 - to terminate operation. - - UCM_PROCESSDATA Process unpacked data. It may be used to read - a file while it is being extracted or tested - without actual extracting file to disk. - Return a positive value to continue process - or -1 to cancel the archive operation - - P1 Address pointing to the unpacked data. - Function may refer to the data but must not - change it. - - P2 Size of the unpacked data. It is guaranteed - only that the size will not exceed the maximum - dictionary size (4 Mb in RAR 3.0). - - UCM_NEEDPASSWORD DLL needs a password to process archive. - This message must be processed if you wish - to be able to handle archives with encrypted - file names. It can be also used as replacement - of RARSetPassword function even for usual - encrypted files with non-encrypted names. - - P1 Address pointing to the buffer for a password. - You need to copy a password here. - - P2 Size of the password buffer. - - -UserData - User data passed to callback function. - - Other functions of UnRAR.dll should not be called from the callback - function. - -Return values -~~~~~~~~~~~~~ - None - - - -==================================================================== -void PASCAL RARSetChangeVolProc(HANDLE hArcData, - int PASCAL (*ChangeVolProc)(char *ArcName,int Mode)); -==================================================================== - -Obsoleted, use RARSetCallback instead. - - - -==================================================================== -void PASCAL RARSetProcessDataProc(HANDLE hArcData, - int PASCAL (*ProcessDataProc)(unsigned char *Addr,int Size)) -==================================================================== - -Obsoleted, use RARSetCallback instead. - - -==================================================================== -void PASCAL RARSetPassword(HANDLE hArcData, - char *Password); -==================================================================== - -Description -~~~~~~~~~~~ - Set a password to decrypt files. - -Parameters -~~~~~~~~~~ -hArcData - This parameter should contain the archive handle obtained from the - RAROpenArchive function call. - -Password - It should point to a string containing a zero terminated password. - -Return values -~~~~~~~~~~~~~ - None - - -==================================================================== -void PASCAL RARGetDllVersion(); -==================================================================== - -Description -~~~~~~~~~~~ - Returns API version. - -Parameters -~~~~~~~~~~ - None. - -Return values -~~~~~~~~~~~~~ - Returns an integer value denoting UnRAR.dll API version, which is also -defined in unrar.h as RAR_DLL_VERSION. API version number is incremented -only in case of noticeable changes in UnRAR.dll API. Do not confuse it -with version of UnRAR.dll stored in DLL resources, which is incremented -with every DLL rebuild. - - If RARGetDllVersion() returns a value lower than UnRAR.dll which your -application was designed for, it may indicate that DLL version is too old -and it will fail to provide all necessary functions to your application. - - This function is absent in old versions of UnRAR.dll, so it is safer -to use LoadLibrary and GetProcAddress to access this function. - + + UnRAR.dll Manual + ~~~~~~~~~~~~~~~~ + + UnRAR.dll is a 32-bit Windows dynamic-link library which provides + file extraction from RAR archives. + + + Exported functions + +==================================================================== +HANDLE PASCAL RAROpenArchive(struct RAROpenArchiveData *ArchiveData) +==================================================================== + +Description +~~~~~~~~~~~ + Open RAR archive and allocate memory structures + +Parameters +~~~~~~~~~~ +ArchiveData Points to RAROpenArchiveData structure + +struct RAROpenArchiveData +{ + char *ArcName; + UINT OpenMode; + UINT OpenResult; + char *CmtBuf; + UINT CmtBufSize; + UINT CmtSize; + UINT CmtState; +}; + +Structure fields: + +ArcName + Input parameter which should point to zero terminated string + containing the archive name. + +OpenMode + Input parameter. + + Possible values + + RAR_OM_LIST + Open archive for reading file headers only. + + RAR_OM_EXTRACT + Open archive for testing and extracting files. + + RAR_OM_LIST_INCSPLIT + Open archive for reading file headers only. If you open an archive + in such mode, RARReadHeader[Ex] will return all file headers, + including those with "file continued from previous volume" flag. + In case of RAR_OM_LIST such headers are automatically skipped. + So if you process RAR volumes in RAR_OM_LIST_INCSPLIT mode, you will + get several file header records for same file if file is split between + volumes. For such files only the last file header record will contain + the correct file CRC and if you wish to get the correct packed size, + you need to sum up packed sizes of all parts. + +OpenResult + Output parameter. + + Possible values + + 0 Success + ERAR_NO_MEMORY Not enough memory to initialize data structures + ERAR_BAD_DATA Archive header broken + ERAR_BAD_ARCHIVE File is not valid RAR archive + ERAR_UNKNOWN_FORMAT Unknown encryption used for archive headers + ERAR_EOPEN File open error + +CmtBuf + Input parameter which should point to the buffer for archive + comments. Maximum comment size is limited to 64Kb. Comment text is + zero terminated. If the comment text is larger than the buffer + size, the comment text will be truncated. If CmtBuf is set to + NULL, comments will not be read. + +CmtBufSize + Input parameter which should contain size of buffer for archive + comments. + +CmtSize + Output parameter containing size of comments actually read into the + buffer, cannot exceed CmtBufSize. + +CmtState + Output parameter. + + Possible values + + 0 comments not present + 1 Comments read completely + ERAR_NO_MEMORY Not enough memory to extract comments + ERAR_BAD_DATA Broken comment + ERAR_UNKNOWN_FORMAT Unknown comment format + ERAR_SMALL_BUF Buffer too small, comments not completely read + +Return values +~~~~~~~~~~~~~ + Archive handle or NULL in case of error + + +======================================================================== +HANDLE PASCAL RAROpenArchiveEx(struct RAROpenArchiveDataEx *ArchiveData) +======================================================================== + +Description +~~~~~~~~~~~ + Similar to RAROpenArchive, but uses RAROpenArchiveDataEx structure + allowing to specify Unicode archive name and returning information + about archive flags. + +Parameters +~~~~~~~~~~ +ArchiveData Points to RAROpenArchiveDataEx structure + +struct RAROpenArchiveDataEx +{ + char *ArcName; + wchar_t *ArcNameW; + unsigned int OpenMode; + unsigned int OpenResult; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Flags; + unsigned int Reserved[32]; +}; + +Structure fields: + +ArcNameW + Input parameter which should point to zero terminated Unicode string + containing the archive name or NULL if Unicode name is not specified. + +Flags + Output parameter. Combination of bit flags. + + Possible values + + 0x0001 - Volume attribute (archive volume) + 0x0002 - Archive comment present + 0x0004 - Archive lock attribute + 0x0008 - Solid attribute (solid archive) + 0x0010 - New volume naming scheme ('volname.partN.rar') + 0x0020 - Authenticity information present + 0x0040 - Recovery record present + 0x0080 - Block headers are encrypted + 0x0100 - First volume (set only by RAR 3.0 and later) + +Reserved[32] + Reserved for future use. Must be zero. + +Information on other structure fields and function return values +is available above, in RAROpenArchive function description. + + +==================================================================== +int PASCAL RARCloseArchive(HANDLE hArcData) +==================================================================== + +Description +~~~~~~~~~~~ + Close RAR archive and release allocated memory. It must be called when + archive processing is finished, even if the archive processing was stopped + due to an error. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +Return values +~~~~~~~~~~~~~ + 0 Success + ERAR_ECLOSE Archive close error + + +==================================================================== +int PASCAL RARReadHeader(HANDLE hArcData, + struct RARHeaderData *HeaderData) +==================================================================== + +Description +~~~~~~~~~~~ + Read header of file in archive. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +HeaderData + It should point to RARHeaderData structure: + +struct RARHeaderData +{ + char ArcName[260]; + char FileName[260]; + UINT Flags; + UINT PackSize; + UINT UnpSize; + UINT HostOS; + UINT FileCRC; + UINT FileTime; + UINT UnpVer; + UINT Method; + UINT FileAttr; + char *CmtBuf; + UINT CmtBufSize; + UINT CmtSize; + UINT CmtState; +}; + +Structure fields: + +ArcName + Output parameter which contains a zero terminated string of the + current archive name. May be used to determine the current volume + name. + +FileName + Output parameter which contains a zero terminated string of the + file name in OEM (DOS) encoding. + +Flags + Output parameter which contains file flags: + + 0x01 - file continued from previous volume + 0x02 - file continued on next volume + 0x04 - file encrypted with password + 0x08 - file comment present + 0x10 - compression of previous files is used (solid flag) + + bits 7 6 5 + + 0 0 0 - dictionary size 64 Kb + 0 0 1 - dictionary size 128 Kb + 0 1 0 - dictionary size 256 Kb + 0 1 1 - dictionary size 512 Kb + 1 0 0 - dictionary size 1024 Kb + 1 0 1 - dictionary size 2048 KB + 1 1 0 - dictionary size 4096 KB + 1 1 1 - file is directory + + Other bits are reserved. + +PackSize + Output parameter means packed file size or size of the + file part if file was split between volumes. + +UnpSize + Output parameter - unpacked file size. + +HostOS + Output parameter - operating system used for archiving: + + 0 - MS DOS; + 1 - OS/2. + 2 - Win32 + 3 - Unix + +FileCRC + Output parameter which contains unpacked file CRC. In case of file parts + split between volumes only the last part contains the correct CRC + and it is accessible only in RAR_OM_LIST_INCSPLIT listing mode. + +FileTime + Output parameter - contains date and time in standard MS DOS format. + +UnpVer + Output parameter - RAR version needed to extract file. + It is encoded as 10 * Major version + minor version. + +Method + Output parameter - packing method. + +FileAttr + Output parameter - file attributes. + +CmtBuf + File comments support is not implemented in the new DLL version yet. + Now CmtState is always 0. + +/* + * Input parameter which should point to the buffer for file + * comments. Maximum comment size is limited to 64Kb. Comment text is + * a zero terminated string in OEM encoding. If the comment text is + * larger than the buffer size, the comment text will be truncated. + * If CmtBuf is set to NULL, comments will not be read. + */ + +CmtBufSize + Input parameter which should contain size of buffer for archive + comments. + +CmtSize + Output parameter containing size of comments actually read into the + buffer, should not exceed CmtBufSize. + +CmtState + Output parameter. + + Possible values + + 0 Absent comments + 1 Comments read completely + ERAR_NO_MEMORY Not enough memory to extract comments + ERAR_BAD_DATA Broken comment + ERAR_UNKNOWN_FORMAT Unknown comment format + ERAR_SMALL_BUF Buffer too small, comments not completely read + +Return values +~~~~~~~~~~~~~ + + 0 Success + ERAR_END_ARCHIVE End of archive + ERAR_BAD_DATA File header broken + + +==================================================================== +int PASCAL RARReadHeaderEx(HANDLE hArcData, + struct RARHeaderDataEx *HeaderData) +==================================================================== + +Description +~~~~~~~~~~~ + Similar to RARReadHeader, but uses RARHeaderDataEx structure, +containing information about Unicode file names and 64 bit file sizes. + +struct RARHeaderDataEx +{ + char ArcName[1024]; + wchar_t ArcNameW[1024]; + char FileName[1024]; + wchar_t FileNameW[1024]; + unsigned int Flags; + unsigned int PackSize; + unsigned int PackSizeHigh; + unsigned int UnpSize; + unsigned int UnpSizeHigh; + unsigned int HostOS; + unsigned int FileCRC; + unsigned int FileTime; + unsigned int UnpVer; + unsigned int Method; + unsigned int FileAttr; + char *CmtBuf; + unsigned int CmtBufSize; + unsigned int CmtSize; + unsigned int CmtState; + unsigned int Reserved[1024]; +}; + + +==================================================================== +int PASCAL RARProcessFile(HANDLE hArcData, + int Operation, + char *DestPath, + char *DestName) +==================================================================== + +Description +~~~~~~~~~~~ + Performs action and moves the current position in the archive to + the next file. Extract or test the current file from the archive + opened in RAR_OM_EXTRACT mode. If the mode RAR_OM_LIST is set, + then a call to this function will simply skip the archive position + to the next file. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +Operation + File operation. + + Possible values + + RAR_SKIP Move to the next file in the archive. If the + archive is solid and RAR_OM_EXTRACT mode was set + when the archive was opened, the current file will + be processed - the operation will be performed + slower than a simple seek. + + RAR_TEST Test the current file and move to the next file in + the archive. If the archive was opened with + RAR_OM_LIST mode, the operation is equal to + RAR_SKIP. + + RAR_EXTRACT Extract the current file and move to the next file. + If the archive was opened with RAR_OM_LIST mode, + the operation is equal to RAR_SKIP. + + +DestPath + This parameter should point to a zero terminated string containing the + destination directory to which to extract files to. If DestPath is equal + to NULL, it means extract to the current directory. This parameter has + meaning only if DestName is NULL. + +DestName + This parameter should point to a string containing the full path and name + to assign to extracted file or it can be NULL to use the default name. + If DestName is defined (not NULL), it overrides both the original file + name saved in the archive and path specigied in DestPath setting. + + Both DestPath and DestName must be in OEM encoding. If necessary, + use CharToOem to convert text to OEM before passing to this function. + +Return values +~~~~~~~~~~~~~ + 0 Success + ERAR_BAD_DATA File CRC error + ERAR_BAD_ARCHIVE Volume is not valid RAR archive + ERAR_UNKNOWN_FORMAT Unknown archive format + ERAR_EOPEN Volume open error + ERAR_ECREATE File create error + ERAR_ECLOSE File close error + ERAR_EREAD Read error + ERAR_EWRITE Write error + + +Note: if you wish to cancel extraction, return -1 when processing + UCM_PROCESSDATA callback message. + + +==================================================================== +int PASCAL RARProcessFileW(HANDLE hArcData, + int Operation, + wchar_t *DestPath, + wchar_t *DestName) +==================================================================== + +Description +~~~~~~~~~~~ + Unicode version of RARProcessFile. It uses Unicode DestPath + and DestName parameters, other parameters and return values + are the same as in RARProcessFile. + + +==================================================================== +void PASCAL RARSetCallback(HANDLE hArcData, + int PASCAL (*CallbackProc)(UINT msg,LPARAM UserData,LPARAM P1,LPARAM P2), + LPARAM UserData); +==================================================================== + +Description +~~~~~~~~~~~ + Set a user-defined callback function to process Unrar events. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +CallbackProc + It should point to a user-defined callback function. + + The function will be passed four parameters: + + + msg Type of event. Described below. + + UserData User defined value passed to RARSetCallback. + + P1 and P2 Event dependent parameters. Described below. + + + Possible events + + UCM_CHANGEVOLUME Process volume change. + + P1 Points to the zero terminated name + of the next volume. + + P2 The function call mode: + + RAR_VOL_ASK Required volume is absent. The function should + prompt user and return a positive value + to retry or return -1 value to terminate + operation. The function may also specify a new + volume name, placing it to the address specified + by P1 parameter. + + RAR_VOL_NOTIFY Required volume is successfully opened. + This is a notification call and volume name + modification is not allowed. The function should + return a positive value to continue or -1 + to terminate operation. + + UCM_PROCESSDATA Process unpacked data. It may be used to read + a file while it is being extracted or tested + without actual extracting file to disk. + Return a positive value to continue process + or -1 to cancel the archive operation + + P1 Address pointing to the unpacked data. + Function may refer to the data but must not + change it. + + P2 Size of the unpacked data. It is guaranteed + only that the size will not exceed the maximum + dictionary size (4 Mb in RAR 3.0). + + UCM_NEEDPASSWORD DLL needs a password to process archive. + This message must be processed if you wish + to be able to handle archives with encrypted + file names. It can be also used as replacement + of RARSetPassword function even for usual + encrypted files with non-encrypted names. + + P1 Address pointing to the buffer for a password. + You need to copy a password here. + + P2 Size of the password buffer. + + +UserData + User data passed to callback function. + + Other functions of UnRAR.dll should not be called from the callback + function. + +Return values +~~~~~~~~~~~~~ + None + + + +==================================================================== +void PASCAL RARSetChangeVolProc(HANDLE hArcData, + int PASCAL (*ChangeVolProc)(char *ArcName,int Mode)); +==================================================================== + +Obsoleted, use RARSetCallback instead. + + + +==================================================================== +void PASCAL RARSetProcessDataProc(HANDLE hArcData, + int PASCAL (*ProcessDataProc)(unsigned char *Addr,int Size)) +==================================================================== + +Obsoleted, use RARSetCallback instead. + + +==================================================================== +void PASCAL RARSetPassword(HANDLE hArcData, + char *Password); +==================================================================== + +Description +~~~~~~~~~~~ + Set a password to decrypt files. + +Parameters +~~~~~~~~~~ +hArcData + This parameter should contain the archive handle obtained from the + RAROpenArchive function call. + +Password + It should point to a string containing a zero terminated password. + +Return values +~~~~~~~~~~~~~ + None + + +==================================================================== +void PASCAL RARGetDllVersion(); +==================================================================== + +Description +~~~~~~~~~~~ + Returns API version. + +Parameters +~~~~~~~~~~ + None. + +Return values +~~~~~~~~~~~~~ + Returns an integer value denoting UnRAR.dll API version, which is also +defined in unrar.h as RAR_DLL_VERSION. API version number is incremented +only in case of noticeable changes in UnRAR.dll API. Do not confuse it +with version of UnRAR.dll stored in DLL resources, which is incremented +with every DLL rebuild. + + If RARGetDllVersion() returns a value lower than UnRAR.dll which your +application was designed for, it may indicate that DLL version is too old +and it will fail to provide all necessary functions to your application. + + This function is absent in old versions of UnRAR.dll, so it is safer +to use LoadLibrary and GetProcAddress to access this function. + diff --git a/comictaggerlib/UnRAR2/UnRARDLL/whatsnew.txt b/comicapi/UnRAR2/UnRARDLL/whatsnew.txt similarity index 96% rename from comictaggerlib/UnRAR2/UnRARDLL/whatsnew.txt rename to comicapi/UnRAR2/UnRARDLL/whatsnew.txt index 84ad72c..874d19b 100644 --- a/comictaggerlib/UnRAR2/UnRARDLL/whatsnew.txt +++ b/comicapi/UnRAR2/UnRARDLL/whatsnew.txt @@ -1,80 +1,80 @@ -List of unrar.dll API changes. We do not include performance and reliability -improvements into this list, but this library and RAR/UnRAR tools share -the same source code. So the latest version of unrar.dll usually contains -same decompression algorithm changes as the latest UnRAR version. -============================================================================ - --- 18 January 2008 - -all LONG parameters of CallbackProc function were changed -to LPARAM type for 64 bit mode compatibility. - - --- 12 December 2007 - -Added new RAR_OM_LIST_INCSPLIT open mode for function RAROpenArchive. - - --- 14 August 2007 - -Added NoCrypt\unrar_nocrypt.dll without decryption code for those -applications where presence of encryption or decryption code is not -allowed because of legal restrictions. - - --- 14 December 2006 - -Added ERAR_MISSING_PASSWORD error type. This error is returned -if empty password is specified for encrypted file. - - --- 12 June 2003 - -Added RARProcessFileW function, Unicode version of RARProcessFile - - --- 9 August 2002 - -Added RAROpenArchiveEx function allowing to specify Unicode archive -name and get archive flags. - - --- 24 January 2002 - -Added RARReadHeaderEx function allowing to read Unicode file names -and 64 bit file sizes. - - --- 23 January 2002 - -Added ERAR_UNKNOWN error type (it is used for all errors which -do not have special ERAR code yet) and UCM_NEEDPASSWORD callback -message. - -Unrar.dll automatically opens all next volumes not only when extracting, -but also in RAR_OM_LIST mode. - - --- 27 November 2001 - -RARSetChangeVolProc and RARSetProcessDataProc are replaced by -the single callback function installed with RARSetCallback. -Unlike old style callbacks, the new function accepts the user defined -parameter. Unrar.dll still supports RARSetChangeVolProc and -RARSetProcessDataProc for compatibility purposes, but if you write -a new application, better use RARSetCallback. - -File comments support is not implemented in the new DLL version yet. -Now CmtState is always 0. - - --- 13 August 2001 - -Added RARGetDllVersion function, so you may distinguish old unrar.dll, -which used C style callback functions and the new one with PASCAL callbacks. - - --- 10 May 2001 - -Callback functions in RARSetChangeVolProc and RARSetProcessDataProc -use PASCAL style call convention now. +List of unrar.dll API changes. We do not include performance and reliability +improvements into this list, but this library and RAR/UnRAR tools share +the same source code. So the latest version of unrar.dll usually contains +same decompression algorithm changes as the latest UnRAR version. +============================================================================ + +-- 18 January 2008 + +all LONG parameters of CallbackProc function were changed +to LPARAM type for 64 bit mode compatibility. + + +-- 12 December 2007 + +Added new RAR_OM_LIST_INCSPLIT open mode for function RAROpenArchive. + + +-- 14 August 2007 + +Added NoCrypt\unrar_nocrypt.dll without decryption code for those +applications where presence of encryption or decryption code is not +allowed because of legal restrictions. + + +-- 14 December 2006 + +Added ERAR_MISSING_PASSWORD error type. This error is returned +if empty password is specified for encrypted file. + + +-- 12 June 2003 + +Added RARProcessFileW function, Unicode version of RARProcessFile + + +-- 9 August 2002 + +Added RAROpenArchiveEx function allowing to specify Unicode archive +name and get archive flags. + + +-- 24 January 2002 + +Added RARReadHeaderEx function allowing to read Unicode file names +and 64 bit file sizes. + + +-- 23 January 2002 + +Added ERAR_UNKNOWN error type (it is used for all errors which +do not have special ERAR code yet) and UCM_NEEDPASSWORD callback +message. + +Unrar.dll automatically opens all next volumes not only when extracting, +but also in RAR_OM_LIST mode. + + +-- 27 November 2001 + +RARSetChangeVolProc and RARSetProcessDataProc are replaced by +the single callback function installed with RARSetCallback. +Unlike old style callbacks, the new function accepts the user defined +parameter. Unrar.dll still supports RARSetChangeVolProc and +RARSetProcessDataProc for compatibility purposes, but if you write +a new application, better use RARSetCallback. + +File comments support is not implemented in the new DLL version yet. +Now CmtState is always 0. + + +-- 13 August 2001 + +Added RARGetDllVersion function, so you may distinguish old unrar.dll, +which used C style callback functions and the new one with PASCAL callbacks. + + +-- 10 May 2001 + +Callback functions in RARSetChangeVolProc and RARSetProcessDataProc +use PASCAL style call convention now. diff --git a/comictaggerlib/UnRAR2/UnRARDLL/x64/readme.txt b/comicapi/UnRAR2/UnRARDLL/x64/readme.txt similarity index 97% rename from comictaggerlib/UnRAR2/UnRARDLL/x64/readme.txt rename to comicapi/UnRAR2/UnRARDLL/x64/readme.txt index 8f3b4e1..bbfb340 100644 --- a/comictaggerlib/UnRAR2/UnRARDLL/x64/readme.txt +++ b/comicapi/UnRAR2/UnRARDLL/x64/readme.txt @@ -1 +1 @@ -This is x64 version of unrar.dll. +This is x64 version of unrar.dll. diff --git a/comictaggerlib/UnRAR2/UnRARDLL/x64/unrar64.dll b/comicapi/UnRAR2/UnRARDLL/x64/unrar64.dll similarity index 100% rename from comictaggerlib/UnRAR2/UnRARDLL/x64/unrar64.dll rename to comicapi/UnRAR2/UnRARDLL/x64/unrar64.dll diff --git a/comictaggerlib/UnRAR2/__init__.py b/comicapi/UnRAR2/__init__.py similarity index 97% rename from comictaggerlib/UnRAR2/__init__.py rename to comicapi/UnRAR2/__init__.py index a913fcb..fe27cfe 100644 --- a/comictaggerlib/UnRAR2/__init__.py +++ b/comicapi/UnRAR2/__init__.py @@ -1,177 +1,177 @@ -# Copyright (c) 2003-2005 Jimmy Retzlaff, 2008 Konstantin Yegupov -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -""" -pyUnRAR2 is a ctypes based wrapper around the free UnRAR.dll. - -It is an modified version of Jimmy Retzlaff's pyUnRAR - more simple, -stable and foolproof. -Notice that it has INCOMPATIBLE interface. - -It enables reading and unpacking of archives created with the -RAR/WinRAR archivers. There is a low-level interface which is very -similar to the C interface provided by UnRAR. There is also a -higher level interface which makes some common operations easier. -""" - -__version__ = '0.99.3' - -try: - WindowsError - in_windows = True -except NameError: - in_windows = False - -if in_windows: - from windows import RarFileImplementation -else: - from unix import RarFileImplementation - - -import fnmatch, time, weakref - -class RarInfo(object): - """Represents a file header in an archive. Don't instantiate directly. - Use only to obtain information about file. - YOU CANNOT EXTRACT FILE CONTENTS USING THIS OBJECT. - USE METHODS OF RarFile CLASS INSTEAD. - - Properties: - index - index of file within the archive - filename - name of the file in the archive including path (if any) - datetime - file date/time as a struct_time suitable for time.strftime - isdir - True if the file is a directory - size - size in bytes of the uncompressed file - comment - comment associated with the file - - Note - this is not currently intended to be a Python file-like object. - """ - - def __init__(self, rarfile, data): - self.rarfile = weakref.proxy(rarfile) - self.index = data['index'] - self.filename = data['filename'] - self.isdir = data['isdir'] - self.size = data['size'] - self.datetime = data['datetime'] - self.comment = data['comment'] - - - - def __str__(self): - try : - arcName = self.rarfile.archiveName - except ReferenceError: - arcName = "[ARCHIVE_NO_LONGER_LOADED]" - return '' % (self.filename, arcName) - -class RarFile(RarFileImplementation): - - def __init__(self, archiveName, password=None): - """Instantiate the archive. - - archiveName is the name of the RAR file. - password is used to decrypt the files in the archive. - - Properties: - comment - comment associated with the archive - - >>> print RarFile('test.rar').comment - This is a test. - """ - self.archiveName = archiveName - RarFileImplementation.init(self, password) - - def __del__(self): - self.destruct() - - def infoiter(self): - """Iterate over all the files in the archive, generating RarInfos. - - >>> import os - >>> for fileInArchive in RarFile('test.rar').infoiter(): - ... print os.path.split(fileInArchive.filename)[-1], - ... print fileInArchive.isdir, - ... print fileInArchive.size, - ... print fileInArchive.comment, - ... print tuple(fileInArchive.datetime)[0:5], - ... print time.strftime('%a, %d %b %Y %H:%M', fileInArchive.datetime) - test True 0 None (2003, 6, 30, 1, 59) Mon, 30 Jun 2003 01:59 - test.txt False 20 None (2003, 6, 30, 2, 1) Mon, 30 Jun 2003 02:01 - this.py False 1030 None (2002, 2, 8, 16, 47) Fri, 08 Feb 2002 16:47 - """ - for params in RarFileImplementation.infoiter(self): - yield RarInfo(self, params) - - def infolist(self): - """Return a list of RarInfos, descripting the contents of the archive.""" - return list(self.infoiter()) - - def read_files(self, condition='*'): - """Read specific files from archive into memory. - If "condition" is a list of numbers, then return files which have those positions in infolist. - If "condition" is a string, then it is treated as a wildcard for names of files to extract. - If "condition" is a function, it is treated as a callback function, which accepts a RarInfo object - and returns boolean True (extract) or False (skip). - If "condition" is omitted, all files are returned. - - Returns list of tuples (RarInfo info, str contents) - """ - checker = condition2checker(condition) - return RarFileImplementation.read_files(self, checker) - - - def extract(self, condition='*', path='.', withSubpath=True, overwrite=True): - """Extract specific files from archive to disk. - - If "condition" is a list of numbers, then extract files which have those positions in infolist. - If "condition" is a string, then it is treated as a wildcard for names of files to extract. - If "condition" is a function, it is treated as a callback function, which accepts a RarInfo object - and returns either boolean True (extract) or boolean False (skip). - DEPRECATED: If "condition" callback returns string (only supported for Windows) - - that string will be used as a new name to save the file under. - If "condition" is omitted, all files are extracted. - - "path" is a directory to extract to - "withSubpath" flag denotes whether files are extracted with their full path in the archive. - "overwrite" flag denotes whether extracted files will overwrite old ones. Defaults to true. - - Returns list of RarInfos for extracted files.""" - checker = condition2checker(condition) - return RarFileImplementation.extract(self, checker, path, withSubpath, overwrite) - -def condition2checker(condition): - """Converts different condition types to callback""" - if type(condition) in [str, unicode]: - def smatcher(info): - return fnmatch.fnmatch(info.filename, condition) - return smatcher - elif type(condition) in [list, tuple] and type(condition[0]) in [int, long]: - def imatcher(info): - return info.index in condition - return imatcher - elif callable(condition): - return condition - else: - raise TypeError - - +# Copyright (c) 2003-2005 Jimmy Retzlaff, 2008 Konstantin Yegupov +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +""" +pyUnRAR2 is a ctypes based wrapper around the free UnRAR.dll. + +It is an modified version of Jimmy Retzlaff's pyUnRAR - more simple, +stable and foolproof. +Notice that it has INCOMPATIBLE interface. + +It enables reading and unpacking of archives created with the +RAR/WinRAR archivers. There is a low-level interface which is very +similar to the C interface provided by UnRAR. There is also a +higher level interface which makes some common operations easier. +""" + +__version__ = '0.99.3' + +try: + WindowsError + in_windows = True +except NameError: + in_windows = False + +if in_windows: + from windows import RarFileImplementation +else: + from unix import RarFileImplementation + + +import fnmatch, time, weakref + +class RarInfo(object): + """Represents a file header in an archive. Don't instantiate directly. + Use only to obtain information about file. + YOU CANNOT EXTRACT FILE CONTENTS USING THIS OBJECT. + USE METHODS OF RarFile CLASS INSTEAD. + + Properties: + index - index of file within the archive + filename - name of the file in the archive including path (if any) + datetime - file date/time as a struct_time suitable for time.strftime + isdir - True if the file is a directory + size - size in bytes of the uncompressed file + comment - comment associated with the file + + Note - this is not currently intended to be a Python file-like object. + """ + + def __init__(self, rarfile, data): + self.rarfile = weakref.proxy(rarfile) + self.index = data['index'] + self.filename = data['filename'] + self.isdir = data['isdir'] + self.size = data['size'] + self.datetime = data['datetime'] + self.comment = data['comment'] + + + + def __str__(self): + try : + arcName = self.rarfile.archiveName + except ReferenceError: + arcName = "[ARCHIVE_NO_LONGER_LOADED]" + return '' % (self.filename, arcName) + +class RarFile(RarFileImplementation): + + def __init__(self, archiveName, password=None): + """Instantiate the archive. + + archiveName is the name of the RAR file. + password is used to decrypt the files in the archive. + + Properties: + comment - comment associated with the archive + + >>> print RarFile('test.rar').comment + This is a test. + """ + self.archiveName = archiveName + RarFileImplementation.init(self, password) + + def __del__(self): + self.destruct() + + def infoiter(self): + """Iterate over all the files in the archive, generating RarInfos. + + >>> import os + >>> for fileInArchive in RarFile('test.rar').infoiter(): + ... print os.path.split(fileInArchive.filename)[-1], + ... print fileInArchive.isdir, + ... print fileInArchive.size, + ... print fileInArchive.comment, + ... print tuple(fileInArchive.datetime)[0:5], + ... print time.strftime('%a, %d %b %Y %H:%M', fileInArchive.datetime) + test True 0 None (2003, 6, 30, 1, 59) Mon, 30 Jun 2003 01:59 + test.txt False 20 None (2003, 6, 30, 2, 1) Mon, 30 Jun 2003 02:01 + this.py False 1030 None (2002, 2, 8, 16, 47) Fri, 08 Feb 2002 16:47 + """ + for params in RarFileImplementation.infoiter(self): + yield RarInfo(self, params) + + def infolist(self): + """Return a list of RarInfos, descripting the contents of the archive.""" + return list(self.infoiter()) + + def read_files(self, condition='*'): + """Read specific files from archive into memory. + If "condition" is a list of numbers, then return files which have those positions in infolist. + If "condition" is a string, then it is treated as a wildcard for names of files to extract. + If "condition" is a function, it is treated as a callback function, which accepts a RarInfo object + and returns boolean True (extract) or False (skip). + If "condition" is omitted, all files are returned. + + Returns list of tuples (RarInfo info, str contents) + """ + checker = condition2checker(condition) + return RarFileImplementation.read_files(self, checker) + + + def extract(self, condition='*', path='.', withSubpath=True, overwrite=True): + """Extract specific files from archive to disk. + + If "condition" is a list of numbers, then extract files which have those positions in infolist. + If "condition" is a string, then it is treated as a wildcard for names of files to extract. + If "condition" is a function, it is treated as a callback function, which accepts a RarInfo object + and returns either boolean True (extract) or boolean False (skip). + DEPRECATED: If "condition" callback returns string (only supported for Windows) - + that string will be used as a new name to save the file under. + If "condition" is omitted, all files are extracted. + + "path" is a directory to extract to + "withSubpath" flag denotes whether files are extracted with their full path in the archive. + "overwrite" flag denotes whether extracted files will overwrite old ones. Defaults to true. + + Returns list of RarInfos for extracted files.""" + checker = condition2checker(condition) + return RarFileImplementation.extract(self, checker, path, withSubpath, overwrite) + +def condition2checker(condition): + """Converts different condition types to callback""" + if type(condition) in [str, unicode]: + def smatcher(info): + return fnmatch.fnmatch(info.filename, condition) + return smatcher + elif type(condition) in [list, tuple] and type(condition[0]) in [int, long]: + def imatcher(info): + return info.index in condition + return imatcher + elif callable(condition): + return condition + else: + raise TypeError + + diff --git a/comictaggerlib/UnRAR2/rar_exceptions.py b/comicapi/UnRAR2/rar_exceptions.py similarity index 100% rename from comictaggerlib/UnRAR2/rar_exceptions.py rename to comicapi/UnRAR2/rar_exceptions.py diff --git a/comictaggerlib/UnRAR2/test_UnRAR2.py b/comicapi/UnRAR2/test_UnRAR2.py similarity index 96% rename from comictaggerlib/UnRAR2/test_UnRAR2.py rename to comicapi/UnRAR2/test_UnRAR2.py index e86ba2c..13c092b 100644 --- a/comictaggerlib/UnRAR2/test_UnRAR2.py +++ b/comicapi/UnRAR2/test_UnRAR2.py @@ -1,138 +1,138 @@ -import os, sys - -import UnRAR2 -from UnRAR2.rar_exceptions import * - - -def cleanup(dir='test'): - for path, dirs, files in os.walk(dir): - for fn in files: - os.remove(os.path.join(path, fn)) - for dir in dirs: - os.removedirs(os.path.join(path, dir)) - - -# basic test -cleanup() -rarc = UnRAR2.RarFile('test.rar') -rarc.infolist() -assert rarc.comment == "This is a test." -for info in rarc.infoiter(): - saveinfo = info - assert (str(info)=="""""") - break -rarc.extract() -assert os.path.exists('test'+os.sep+'test.txt') -assert os.path.exists('test'+os.sep+'this.py') -del rarc -assert (str(saveinfo)=="""""") -cleanup() - -# extract all the files in test.rar -cleanup() -UnRAR2.RarFile('test.rar').extract() -assert os.path.exists('test'+os.sep+'test.txt') -assert os.path.exists('test'+os.sep+'this.py') -cleanup() - -# extract all the files in test.rar matching the wildcard *.txt -cleanup() -UnRAR2.RarFile('test.rar').extract('*.txt') -assert os.path.exists('test'+os.sep+'test.txt') -assert not os.path.exists('test'+os.sep+'this.py') -cleanup() - - -# check the name and size of each file, extracting small ones -cleanup() -archive = UnRAR2.RarFile('test.rar') -assert archive.comment == 'This is a test.' -archive.extract(lambda rarinfo: rarinfo.size <= 1024) -for rarinfo in archive.infoiter(): - if rarinfo.size <= 1024 and not rarinfo.isdir: - assert rarinfo.size == os.stat(rarinfo.filename).st_size -assert file('test'+os.sep+'test.txt', 'rt').read() == 'This is only a test.' -assert not os.path.exists('test'+os.sep+'this.py') -cleanup() - - -# extract this.py, overriding it's destination -cleanup('test2') -archive = UnRAR2.RarFile('test.rar') -archive.extract('*.py', 'test2', False) -assert os.path.exists('test2'+os.sep+'this.py') -cleanup('test2') - - -# extract test.txt to memory -cleanup() -archive = UnRAR2.RarFile('test.rar') -entries = UnRAR2.RarFile('test.rar').read_files('*test.txt') -assert len(entries)==1 -assert entries[0][0].filename.endswith('test.txt') -assert entries[0][1]=='This is only a test.' - - -# extract all the files in test.rar with overwriting -cleanup() -fo = open('test'+os.sep+'test.txt',"wt") -fo.write("blah") -fo.close() -UnRAR2.RarFile('test.rar').extract('*.txt') -assert open('test'+os.sep+'test.txt',"rt").read()!="blah" -cleanup() - -# extract all the files in test.rar without overwriting -cleanup() -fo = open('test'+os.sep+'test.txt',"wt") -fo.write("blahblah") -fo.close() -UnRAR2.RarFile('test.rar').extract('*.txt', overwrite = False) -assert open('test'+os.sep+'test.txt',"rt").read()=="blahblah" -cleanup() - -# list big file in an archive -list(UnRAR2.RarFile('test_nulls.rar').infoiter()) - -# extract files from an archive with protected files -cleanup() -rarc = UnRAR2.RarFile('test_protected_files.rar', password="protected") -rarc.extract() -assert os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') -cleanup() -errored = False -try: - UnRAR2.RarFile('test_protected_files.rar', password="proteqted").extract() -except IncorrectRARPassword: - errored = True -assert not os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') -assert errored -cleanup() - -# extract files from an archive with protected headers -cleanup() -UnRAR2.RarFile('test_protected_headers.rar', password="secret").extract() -assert os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') -cleanup() -errored = False -try: - UnRAR2.RarFile('test_protected_headers.rar', password="seqret").extract() -except IncorrectRARPassword: - errored = True -assert not os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') -assert errored -cleanup() - -# make sure docstring examples are working -import doctest -doctest.testmod(UnRAR2) - -# update documentation -import pydoc -pydoc.writedoc(UnRAR2) - -# cleanup -try: - os.remove('__init__.pyc') -except: - pass +import os, sys + +import UnRAR2 +from UnRAR2.rar_exceptions import * + + +def cleanup(dir='test'): + for path, dirs, files in os.walk(dir): + for fn in files: + os.remove(os.path.join(path, fn)) + for dir in dirs: + os.removedirs(os.path.join(path, dir)) + + +# basic test +cleanup() +rarc = UnRAR2.RarFile('test.rar') +rarc.infolist() +assert rarc.comment == "This is a test." +for info in rarc.infoiter(): + saveinfo = info + assert (str(info)=="""""") + break +rarc.extract() +assert os.path.exists('test'+os.sep+'test.txt') +assert os.path.exists('test'+os.sep+'this.py') +del rarc +assert (str(saveinfo)=="""""") +cleanup() + +# extract all the files in test.rar +cleanup() +UnRAR2.RarFile('test.rar').extract() +assert os.path.exists('test'+os.sep+'test.txt') +assert os.path.exists('test'+os.sep+'this.py') +cleanup() + +# extract all the files in test.rar matching the wildcard *.txt +cleanup() +UnRAR2.RarFile('test.rar').extract('*.txt') +assert os.path.exists('test'+os.sep+'test.txt') +assert not os.path.exists('test'+os.sep+'this.py') +cleanup() + + +# check the name and size of each file, extracting small ones +cleanup() +archive = UnRAR2.RarFile('test.rar') +assert archive.comment == 'This is a test.' +archive.extract(lambda rarinfo: rarinfo.size <= 1024) +for rarinfo in archive.infoiter(): + if rarinfo.size <= 1024 and not rarinfo.isdir: + assert rarinfo.size == os.stat(rarinfo.filename).st_size +assert file('test'+os.sep+'test.txt', 'rt').read() == 'This is only a test.' +assert not os.path.exists('test'+os.sep+'this.py') +cleanup() + + +# extract this.py, overriding it's destination +cleanup('test2') +archive = UnRAR2.RarFile('test.rar') +archive.extract('*.py', 'test2', False) +assert os.path.exists('test2'+os.sep+'this.py') +cleanup('test2') + + +# extract test.txt to memory +cleanup() +archive = UnRAR2.RarFile('test.rar') +entries = UnRAR2.RarFile('test.rar').read_files('*test.txt') +assert len(entries)==1 +assert entries[0][0].filename.endswith('test.txt') +assert entries[0][1]=='This is only a test.' + + +# extract all the files in test.rar with overwriting +cleanup() +fo = open('test'+os.sep+'test.txt',"wt") +fo.write("blah") +fo.close() +UnRAR2.RarFile('test.rar').extract('*.txt') +assert open('test'+os.sep+'test.txt',"rt").read()!="blah" +cleanup() + +# extract all the files in test.rar without overwriting +cleanup() +fo = open('test'+os.sep+'test.txt',"wt") +fo.write("blahblah") +fo.close() +UnRAR2.RarFile('test.rar').extract('*.txt', overwrite = False) +assert open('test'+os.sep+'test.txt',"rt").read()=="blahblah" +cleanup() + +# list big file in an archive +list(UnRAR2.RarFile('test_nulls.rar').infoiter()) + +# extract files from an archive with protected files +cleanup() +rarc = UnRAR2.RarFile('test_protected_files.rar', password="protected") +rarc.extract() +assert os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +cleanup() +errored = False +try: + UnRAR2.RarFile('test_protected_files.rar', password="proteqted").extract() +except IncorrectRARPassword: + errored = True +assert not os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +assert errored +cleanup() + +# extract files from an archive with protected headers +cleanup() +UnRAR2.RarFile('test_protected_headers.rar', password="secret").extract() +assert os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +cleanup() +errored = False +try: + UnRAR2.RarFile('test_protected_headers.rar', password="seqret").extract() +except IncorrectRARPassword: + errored = True +assert not os.path.exists('test'+os.sep+'top_secret_xxx_file.txt') +assert errored +cleanup() + +# make sure docstring examples are working +import doctest +doctest.testmod(UnRAR2) + +# update documentation +import pydoc +pydoc.writedoc(UnRAR2) + +# cleanup +try: + os.remove('__init__.pyc') +except: + pass diff --git a/comictaggerlib/UnRAR2/unix.py b/comicapi/UnRAR2/unix.py similarity index 100% rename from comictaggerlib/UnRAR2/unix.py rename to comicapi/UnRAR2/unix.py diff --git a/comictaggerlib/UnRAR2/windows.py b/comicapi/UnRAR2/windows.py similarity index 100% rename from comictaggerlib/UnRAR2/windows.py rename to comicapi/UnRAR2/windows.py diff --git a/comicapi/__init__.py b/comicapi/__init__.py new file mode 100644 index 0000000..0d9bd7c --- /dev/null +++ b/comicapi/__init__.py @@ -0,0 +1 @@ +__author__ = 'dromanin' diff --git a/comicapi/comet.py b/comicapi/comet.py new file mode 100644 index 0000000..1a06977 --- /dev/null +++ b/comicapi/comet.py @@ -0,0 +1,260 @@ +""" +A python class to encapsulate CoMet data +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from datetime import datetime +import zipfile +from pprint import pprint +import xml.etree.ElementTree as ET +from genericmetadata import GenericMetadata +import utils + +class CoMet: + + writer_synonyms = ['writer', 'plotter', 'scripter'] + penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] + inker_synonyms = [ 'inker', 'artist', 'finishes' ] + colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] + letterer_synonyms = [ 'letterer'] + cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] + editor_synonyms = [ 'editor'] + + def metadataFromString( self, string ): + + tree = ET.ElementTree(ET.fromstring( string )) + return self.convertXMLToMetadata( tree ) + + def stringFromMetadata( self, metadata ): + + header = '\n' + + tree = self.convertMetadataToXML( self, metadata ) + return header + ET.tostring(tree.getroot()) + + def indent( self, elem, level=0 ): + # for making the XML output readable + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + self.indent( elem, level+1 ) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + def convertMetadataToXML( self, filename, metadata ): + + #shorthand for the metadata + md = metadata + + # build a tree structure + root = ET.Element("comet") + root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/" + root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" + root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd" + + #helper func + def assign( comet_entry, md_entry): + if md_entry is not None: + ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry) + + # title is manditory + if md.title is None: + md.title = "" + assign( 'title', md.title ) + assign( 'series', md.series ) + assign( 'issue', md.issue ) #must be int?? + assign( 'volume', md.volume ) + assign( 'description', md.comments ) + assign( 'publisher', md.publisher ) + assign( 'pages', md.pageCount ) + assign( 'format', md.format ) + assign( 'language', md.language ) + assign( 'rating', md.maturityRating ) + assign( 'price', md.price ) + assign( 'isVersionOf', md.isVersionOf ) + assign( 'rights', md.rights ) + assign( 'identifier', md.identifier ) + assign( 'lastMark', md.lastMark ) + assign( 'genre', md.genre ) # TODO repeatable + + if md.characters is not None: + char_list = [ c.strip() for c in md.characters.split(',') ] + for c in char_list: + assign( 'character', c ) + + if md.manga is not None and md.manga == "YesAndRightToLeft": + assign( 'readingDirection', "rtl") + + date_str = "" + if md.year is not None: + date_str = str(md.year).zfill(4) + if md.month is not None: + date_str += "-" + str(md.month).zfill(2) + assign( 'date', date_str ) + + assign( 'coverImage', md.coverImage ) + + # need to specially process the credits, since they are structured differently than CIX + credit_writer_list = list() + credit_penciller_list = list() + credit_inker_list = list() + credit_colorist_list = list() + credit_letterer_list = list() + credit_cover_list = list() + credit_editor_list = list() + + # loop thru credits, and build a list for each role that CoMet supports + for credit in metadata.credits: + + if credit['role'].lower() in set( self.writer_synonyms ): + ET.SubElement(root, 'writer').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.penciller_synonyms ): + ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.inker_synonyms ): + ET.SubElement(root, 'inker').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.colorist_synonyms ): + ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.letterer_synonyms ): + ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.cover_synonyms ): + ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person']) + + if credit['role'].lower() in set( self.editor_synonyms ): + ET.SubElement(root, 'editor').text = u"{0}".format(credit['person']) + + + # self pretty-print + self.indent(root) + + # wrap it in an ElementTree instance, and save as XML + tree = ET.ElementTree(root) + return tree + + + def convertXMLToMetadata( self, tree ): + + root = tree.getroot() + + if root.tag != 'comet': + raise 1 + return None + + metadata = GenericMetadata() + md = metadata + + # Helper function + def xlate( tag ): + node = root.find( tag ) + if node is not None: + return node.text + else: + return None + + md.series = xlate( 'series' ) + md.title = xlate( 'title' ) + md.issue = xlate( 'issue' ) + md.volume = xlate( 'volume' ) + md.comments = xlate( 'description' ) + md.publisher = xlate( 'publisher' ) + md.language = xlate( 'language' ) + md.format = xlate( 'format' ) + md.pageCount = xlate( 'pages' ) + md.maturityRating = xlate( 'rating' ) + md.price = xlate( 'price' ) + md.isVersionOf = xlate( 'isVersionOf' ) + md.rights = xlate( 'rights' ) + md.identifier = xlate( 'identifier' ) + md.lastMark = xlate( 'lastMark' ) + md.genre = xlate( 'genre' ) # TODO - repeatable field + + date = xlate( 'date' ) + if date is not None: + parts = date.split('-') + if len( parts) > 0: + md.year = parts[0] + if len( parts) > 1: + md.month = parts[1] + + md.coverImage = xlate( 'coverImage' ) + + readingDirection = xlate( 'readingDirection' ) + if readingDirection is not None and readingDirection == "rtl": + md.manga = "YesAndRightToLeft" + + # loop for character tags + char_list = [] + for n in root: + if n.tag == 'character': + char_list.append(n.text.strip()) + md.characters = utils.listToString( char_list ) + + # Now extract the credit info + for n in root: + if ( n.tag == 'writer' or + n.tag == 'penciller' or + n.tag == 'inker' or + n.tag == 'colorist' or + n.tag == 'letterer' or + n.tag == 'editor' + ): + metadata.addCredit( n.text.strip(), n.tag.title() ) + + if n.tag == 'coverDesigner': + metadata.addCredit( n.text.strip(), "Cover" ) + + + metadata.isEmpty = False + + return metadata + + #verify that the string actually contains CoMet data in XML format + def validateString( self, string ): + try: + tree = ET.ElementTree(ET.fromstring( string )) + root = tree.getroot() + if root.tag != 'comet': + raise Exception + except: + return False + + return True + + + def writeToExternalFile( self, filename, metadata ): + + tree = self.convertMetadataToXML( self, metadata ) + #ET.dump(tree) + tree.write(filename, encoding='utf-8') + + def readFromExternalFile( self, filename ): + + tree = ET.parse( filename ) + return self.convertXMLToMetadata( tree ) + diff --git a/comicapi/comicarchive.py b/comicapi/comicarchive.py new file mode 100644 index 0000000..381dc68 --- /dev/null +++ b/comicapi/comicarchive.py @@ -0,0 +1,1088 @@ +""" +A python class to represent a single comic, be it file or folder of images +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import zipfile +import os +import struct +import sys +import tempfile +import subprocess +import platform +import locale +from natsort import natsorted + +if platform.system() == "Windows": + import _subprocess +import time + +import StringIO +try: + import Image + pil_available = True +except ImportError: + pil_available = False + +sys.path.insert(0, os.path.abspath(".") ) +import UnRAR2 +from UnRAR2.rar_exceptions import * + +#from settings import ComicTaggerSettings +from comicinfoxml import ComicInfoXml +from comicbookinfo import ComicBookInfo +from comet import CoMet +from genericmetadata import GenericMetadata, PageType +from filenameparser import FileNameParser +from PyPDF2 import PdfFileReader + +class MetaDataStyle: + CBI = 0 + CIX = 1 + COMET = 2 + name = [ 'ComicBookLover', 'ComicRack', 'CoMet' ] + +class ZipArchiver: + + def __init__( self, path ): + self.path = path + + def getArchiveComment( self ): + zf = zipfile.ZipFile( self.path, 'r' ) + comment = zf.comment + zf.close() + return comment + + def setArchiveComment( self, comment ): + return self.writeZipComment( self.path, comment ) + + def readArchiveFile( self, archive_file ): + data = "" + zf = zipfile.ZipFile( self.path, 'r' ) + + try: + data = zf.read( archive_file ) + except zipfile.BadZipfile as e: + print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) + zf.close() + raise IOError + except Exception as e: + zf.close() + print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) + raise IOError + finally: + zf.close() + return data + + def removeArchiveFile( self, archive_file ): + try: + self.rebuildZipFile( [ archive_file ] ) + except: + return False + else: + return True + + def writeArchiveFile( self, archive_file, data ): + # At the moment, no other option but to rebuild the whole + # zip archive w/o the indicated file. Very sucky, but maybe + # another solution can be found + try: + self.rebuildZipFile( [ archive_file ] ) + + #now just add the archive file as a new one + zf = zipfile.ZipFile(self.path, mode='a', compression=zipfile.ZIP_DEFLATED ) + zf.writestr( archive_file, data ) + zf.close() + return True + except: + return False + + def getArchiveFilenameList( self ): + try: + zf = zipfile.ZipFile( self.path, 'r' ) + namelist = zf.namelist() + zf.close() + return namelist + except Exception as e: + print >> sys.stderr, u"Unable to get zipfile list [{0}]: {1}".format(e, self.path) + return [] + + # zip helper func + def rebuildZipFile( self, exclude_list ): + + # this recompresses the zip archive, without the files in the exclude_list + #print ">> sys.stderr, Rebuilding zip {0} without {1}".format( self.path, exclude_list ) + + # generate temp file + tmp_fd, tmp_name = tempfile.mkstemp( dir=os.path.dirname(self.path) ) + os.close( tmp_fd ) + + zin = zipfile.ZipFile (self.path, 'r') + zout = zipfile.ZipFile (tmp_name, 'w') + for item in zin.infolist(): + buffer = zin.read(item.filename) + if ( item.filename not in exclude_list ): + zout.writestr(item, buffer) + + #preserve the old comment + zout.comment = zin.comment + + zout.close() + zin.close() + + # replace with the new file + os.remove( self.path ) + os.rename( tmp_name, self.path ) + + + def writeZipComment( self, filename, comment ): + """ + This is a custom function for writing a comment to a zip file, + since the built-in one doesn't seem to work on Windows and Mac OS/X + + Fortunately, the zip comment is at the end of the file, and it's + easy to manipulate. See this website for more info: + see: http://en.wikipedia.org/wiki/Zip_(file_format)#Structure + """ + + #get file size + statinfo = os.stat(filename) + file_length = statinfo.st_size + + try: + fo = open(filename, "r+b") + + #the starting position, relative to EOF + pos = -4 + + found = False + value = bytearray() + + # walk backwards to find the "End of Central Directory" record + while ( not found ) and ( -pos != file_length ): + # seek, relative to EOF + fo.seek( pos, 2) + + value = fo.read( 4 ) + + #look for the end of central directory signature + if bytearray(value) == bytearray([ 0x50, 0x4b, 0x05, 0x06 ]): + found = True + else: + # not found, step back another byte + pos = pos - 1 + #print pos,"{1} int: {0:x}".format(bytearray(value)[0], value) + + if found: + + # now skip forward 20 bytes to the comment length word + pos += 20 + fo.seek( pos, 2) + + # Pack the length of the comment string + format = "H" # one 2-byte integer + comment_length = struct.pack(format, len(comment)) # pack integer in a binary string + + # write out the length + fo.write( comment_length ) + fo.seek( pos+2, 2) + + # write out the comment itself + fo.write( comment ) + fo.truncate() + fo.close() + else: + raise Exception('Failed to write comment to zip file!') + except: + return False + else: + return True + + def copyFromArchive( self, otherArchive ): + # Replace the current zip with one copied from another archive + try: + zout = zipfile.ZipFile (self.path, 'w') + for fname in otherArchive.getArchiveFilenameList(): + data = otherArchive.readArchiveFile( fname ) + if data is not None: + zout.writestr( fname, data ) + zout.close() + + #preserve the old comment + comment = otherArchive.getArchiveComment() + if comment is not None: + if not self.writeZipComment( self.path, comment ): + return False + except Exception as e: + print >> sys.stderr, u"Error while copying to {0}: {1}".format(self.path, e) + return False + else: + return True + + +#------------------------------------------ +# RAR implementation + +class RarArchiver: + + devnull = None + def __init__( self, path, rar_exe_path ): + self.path = path + self.rar_exe_path = rar_exe_path + + if RarArchiver.devnull is None: + RarArchiver.devnull = open(os.devnull, "w") + + # windows only, keeps the cmd.exe from popping up + if platform.system() == "Windows": + self.startupinfo = subprocess.STARTUPINFO() + self.startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW + else: + self.startupinfo = None + + def __del__(self): + #RarArchiver.devnull.close() + pass + + def getArchiveComment( self ): + + rarc = self.getRARObj() + return rarc.comment + + def setArchiveComment( self, comment ): + + if self.rar_exe_path is not None: + try: + # write comment to temp file + tmp_fd, tmp_name = tempfile.mkstemp() + f = os.fdopen(tmp_fd, 'w+b') + f.write( comment ) + f.close() + + working_dir = os.path.dirname( os.path.abspath( self.path ) ) + + # use external program to write comment to Rar archive + subprocess.call([self.rar_exe_path, 'c', '-w' + working_dir , '-c-', '-z' + tmp_name, self.path], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull) + + if platform.system() == "Darwin": + time.sleep(1) + + os.remove( tmp_name) + except: + return False + else: + return True + else: + return False + + def readArchiveFile( self, archive_file ): + + # Make sure to escape brackets, since some funky stuff is going on + # underneath with "fnmatch" + archive_file = archive_file.replace("[", '[[]') + entries = [] + + rarc = self.getRARObj() + + tries = 0 + while tries < 7: + try: + tries = tries+1 + entries = rarc.read_files( archive_file ) + + if entries[0][0].size != len(entries[0][1]): + print >> sys.stderr, u"readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format( + entries[0][0].size,len(entries[0][1]), self.path, archive_file, tries) + continue + + except (OSError, IOError) as e: + print >> sys.stderr, u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) + time.sleep(1) + except Exception as e: + print >> sys.stderr, u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) + break + + else: + #Success" + #entries is a list of of tuples: ( rarinfo, filedata) + if tries > 1: + print >> sys.stderr, u"Attempted read_files() {0} times".format(tries) + if (len(entries) == 1): + return entries[0][1] + else: + raise IOError + + raise IOError + + + + def writeArchiveFile( self, archive_file, data ): + + if self.rar_exe_path is not None: + try: + tmp_folder = tempfile.mkdtemp() + + tmp_file = os.path.join( tmp_folder, archive_file ) + + working_dir = os.path.dirname( os.path.abspath( self.path ) ) + + # TODO: will this break if 'archive_file' is in a subfolder. i.e. "foo/bar.txt" + # will need to create the subfolder above, I guess... + f = open(tmp_file, 'w') + f.write( data ) + f.close() + + # use external program to write file to Rar archive + subprocess.call([self.rar_exe_path, 'a', '-w' + working_dir ,'-c-', '-ep', self.path, tmp_file], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull) + + if platform.system() == "Darwin": + time.sleep(1) + os.remove( tmp_file) + os.rmdir( tmp_folder) + except: + return False + else: + return True + else: + return False + + def removeArchiveFile( self, archive_file ): + if self.rar_exe_path is not None: + try: + # use external program to remove file from Rar archive + subprocess.call([self.rar_exe_path, 'd','-c-', self.path, archive_file], + startupinfo=self.startupinfo, + stdout=RarArchiver.devnull) + + if platform.system() == "Darwin": + time.sleep(1) + except: + return False + else: + return True + else: + return False + + def getArchiveFilenameList( self ): + + rarc = self.getRARObj() + #namelist = [ item.filename for item in rarc.infolist() ] + #return namelist + + tries = 0 + while tries < 7: + try: + tries = tries+1 + #namelist = [ item.filename for item in rarc.infolist() ] + namelist = [] + for item in rarc.infolist(): + if item.size != 0: + namelist.append( item.filename ) + + except (OSError, IOError) as e: + print >> sys.stderr, u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) + time.sleep(1) + + else: + #Success" + return namelist + + raise e + + + def getRARObj( self ): + tries = 0 + while tries < 7: + try: + tries = tries+1 + rarc = UnRAR2.RarFile( self.path ) + + except (OSError, IOError) as e: + print >> sys.stderr, u"getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) + time.sleep(1) + + else: + #Success" + return rarc + + raise e + +#------------------------------------------ +# Folder implementation +class FolderArchiver: + + def __init__( self, path ): + self.path = path + self.comment_file_name = "ComicTaggerFolderComment.txt" + + def getArchiveComment( self ): + return self.readArchiveFile( self.comment_file_name ) + + def setArchiveComment( self, comment ): + return self.writeArchiveFile( self.comment_file_name, comment ) + + def readArchiveFile( self, archive_file ): + + data = "" + fname = os.path.join( self.path, archive_file ) + try: + with open( fname, 'rb' ) as f: + data = f.read() + f.close() + except IOError as e: + pass + + return data + + def writeArchiveFile( self, archive_file, data ): + + fname = os.path.join( self.path, archive_file ) + try: + with open(fname, 'w+') as f: + f.write( data ) + f.close() + except: + return False + else: + return True + + def removeArchiveFile( self, archive_file ): + + fname = os.path.join( self.path, archive_file ) + try: + os.remove( fname ) + except: + return False + else: + return True + + def getArchiveFilenameList( self ): + return self.listFiles( self.path ) + + def listFiles( self, folder ): + + itemlist = list() + + for item in os.listdir( folder ): + itemlist.append( item ) + if os.path.isdir( item ): + itemlist.extend( self.listFiles( os.path.join( folder, item ) )) + + return itemlist + +#------------------------------------------ +# Unknown implementation +class UnknownArchiver: + + def __init__( self, path ): + self.path = path + + def getArchiveComment( self ): + return "" + def setArchiveComment( self, comment ): + return False + def readArchiveFile( self ): + return "" + def writeArchiveFile( self, archive_file, data ): + return False + def removeArchiveFile( self, archive_file ): + return False + def getArchiveFilenameList( self ): + return [] + +class PdfArchiver: + def __init__( self, path ): + self.path = path + + def getArchiveComment( self ): + return "" + def setArchiveComment( self, comment ): + return False + def readArchiveFile( self, page_num ): + return subprocess.check_output(['mudraw', '-o','-', self.path, str(int(os.path.basename(page_num)[:-4]))]) + def writeArchiveFile( self, archive_file, data ): + return False + def removeArchiveFile( self, archive_file ): + return False + def getArchiveFilenameList( self ): + out = [] + pdf = PdfFileReader(open(self.path, 'rb')) + for page in range(1, pdf.getNumPages() + 1): + out.append("/%04d.jpg" % (page)) + return out + +#------------------------------------------------------------------ +class ComicArchive: + + logo_data = None + + class ArchiveType: + Zip, Rar, Folder, Pdf, Unknown = range(5) + + def __init__( self, path, rar_exe_path=None, default_image_path=None ): + self.path = path + + self.rar_exe_path = rar_exe_path + self.ci_xml_filename = 'ComicInfo.xml' + self.comet_default_filename = 'CoMet.xml' + self.resetCache() + self.default_image_path = default_image_path + + # Use file extension to decide which archive test we do first + ext = os.path.splitext(path)[1].lower() + + self.archive_type = self.ArchiveType.Unknown + self.archiver = UnknownArchiver( self.path ) + + if ext == ".cbr" or ext == ".rar": + if self.rarTest(): + self.archive_type = self.ArchiveType.Rar + self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path ) + + elif self.zipTest(): + self.archive_type = self.ArchiveType.Zip + self.archiver = ZipArchiver( self.path ) + else: + if self.zipTest(): + self.archive_type = self.ArchiveType.Zip + self.archiver = ZipArchiver( self.path ) + + elif self.rarTest(): + self.archive_type = self.ArchiveType.Rar + self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path ) + elif os.path.basename(self.path)[-3:] == 'pdf': + self.archive_type = self.ArchiveType.Pdf + self.archiver = PdfArchiver(self.path) + + if ComicArchive.logo_data is None: + #fname = ComicTaggerSettings.getGraphic('nocover.png') + fname = self.default_image_path + with open(fname, 'rb') as fd: + ComicArchive.logo_data = fd.read() + + # Clears the cached data + def resetCache( self ): + self.has_cix = None + self.has_cbi = None + self.has_comet = None + self.comet_filename = None + self.page_count = None + self.page_list = None + self.cix_md = None + self.cbi_md = None + self.comet_md = None + + def loadCache( self, style_list ): + for style in style_list: + self.readMetadata(style) + + def rename( self, path ): + self.path = path + self.archiver.path = path + + def zipTest( self ): + return zipfile.is_zipfile( self.path ) + + def rarTest( self ): + try: + rarc = UnRAR2.RarFile( self.path ) + except: # InvalidRARArchive: + return False + else: + return True + + + def isZip( self ): + return self.archive_type == self.ArchiveType.Zip + + def isRar( self ): + return self.archive_type == self.ArchiveType.Rar + def isPdf(self): + return self.archive_type == self.ArchiveType.Pdf + def isFolder( self ): + return self.archive_type == self.ArchiveType.Folder + + def isWritable( self, check_rar_status=True ): + if self.archive_type == self.ArchiveType.Unknown : + return False + + elif check_rar_status and self.isRar() and self.rar_exe_path is None: + return False + + elif not os.access(self.path, os.W_OK): + return False + + elif ((self.archive_type != self.ArchiveType.Folder) and + (not os.access( os.path.dirname( os.path.abspath(self.path)), os.W_OK ))): + return False + + return True + + def isWritableForStyle( self, data_style ): + + if self.isRar() and data_style == MetaDataStyle.CBI: + return False + + return self.isWritable() + + def seemsToBeAComicArchive( self ): + + # Do we even care about extensions?? + ext = os.path.splitext(self.path)[1].lower() + + if ( + ( self.isZip() or self.isRar() or self.isPdf()) #or self.isFolder() ) + and + ( self.getNumberOfPages() > 0) + + ): + return True + else: + return False + + def readMetadata( self, style ): + + if style == MetaDataStyle.CIX: + return self.readCIX() + elif style == MetaDataStyle.CBI: + return self.readCBI() + elif style == MetaDataStyle.COMET: + return self.readCoMet() + else: + return GenericMetadata() + + def writeMetadata( self, metadata, style ): + + retcode = None + if style == MetaDataStyle.CIX: + retcode = self.writeCIX( metadata ) + elif style == MetaDataStyle.CBI: + retcode = self.writeCBI( metadata ) + elif style == MetaDataStyle.COMET: + retcode = self.writeCoMet( metadata ) + return retcode + + + def hasMetadata( self, style ): + + if style == MetaDataStyle.CIX: + return self.hasCIX() + elif style == MetaDataStyle.CBI: + return self.hasCBI() + elif style == MetaDataStyle.COMET: + return self.hasCoMet() + else: + return False + + def removeMetadata( self, style ): + retcode = True + if style == MetaDataStyle.CIX: + retcode = self.removeCIX() + elif style == MetaDataStyle.CBI: + retcode = self.removeCBI() + elif style == MetaDataStyle.COMET: + retcode = self.removeCoMet() + return retcode + + def getPage( self, index ): + + image_data = None + + filename = self.getPageName( index ) + + if filename is not None: + try: + image_data = self.archiver.readArchiveFile( filename ) + except IOError: + print >> sys.stderr, u"Error reading in page. Substituting logo page." + image_data = ComicArchive.logo_data + + return image_data + + def getPageName( self, index ): + + if index is None: + return None + + page_list = self.getPageNameList() + + num_pages = len( page_list ) + if num_pages == 0 or index >= num_pages: + return None + + return page_list[index] + + def getScannerPageIndex( self ): + + scanner_page_index = None + + #make a guess at the scanner page + name_list = self.getPageNameList() + count = self.getNumberOfPages() + + #too few pages to really know + if count < 5: + return None + + # count the length of every filename, and count occurences + length_buckets = dict() + for name in name_list: + fname = os.path.split(name)[1] + length = len(fname) + if length_buckets.has_key( length ): + length_buckets[ length ] += 1 + else: + length_buckets[ length ] = 1 + + # sort by most common + sorted_buckets = sorted(length_buckets.iteritems(), key=lambda (k,v): (v,k), reverse=True) + + # statistical mode occurence is first + mode_length = sorted_buckets[0][0] + + # we are only going to consider the final image file: + final_name = os.path.split(name_list[count-1])[1] + + common_length_list = list() + for name in name_list: + if len(os.path.split(name)[1]) == mode_length: + common_length_list.append( os.path.split(name)[1] ) + + prefix = os.path.commonprefix(common_length_list) + + if mode_length <= 7 and prefix == "": + #probably all numbers + if len(final_name) > mode_length: + scanner_page_index = count-1 + + # see if the last page doesn't start with the same prefix as most others + elif not final_name.startswith(prefix): + scanner_page_index = count-1 + + return scanner_page_index + + + def getPageNameList( self , sort_list=True): + + if self.page_list is None: + # get the list file names in the archive, and sort + files = self.archiver.getArchiveFilenameList() + + # seems like some archive creators are on Windows, and don't know about case-sensitivity! + if sort_list: + def keyfunc(k): + #hack to account for some weird scanner ID pages + #basename=os.path.split(k)[1] + #if basename < '0': + # k = os.path.join(os.path.split(k)[0], "z" + basename) + return k.lower() + + files = natsorted(files, key=keyfunc,signed=False) + + # make a sub-list of image files + self.page_list = [] + for name in files: + if ( name[-4:].lower() in [ ".jpg", "jpeg", ".png", ".gif", "webp" ] and os.path.basename(name)[0] != "." ): + self.page_list.append(name) + + return self.page_list + + def getNumberOfPages( self ): + + if self.page_count is None: + self.page_count = len( self.getPageNameList( ) ) + return self.page_count + + def readCBI( self ): + if self.cbi_md is None: + raw_cbi = self.readRawCBI() + if raw_cbi is None: + self.cbi_md = GenericMetadata() + else: + self.cbi_md = ComicBookInfo().metadataFromString( raw_cbi ) + + self.cbi_md.setDefaultPageList( self.getNumberOfPages() ) + + return self.cbi_md + + def readRawCBI( self ): + if ( not self.hasCBI() ): + return None + + return self.archiver.getArchiveComment() + + def hasCBI(self): + if self.has_cbi is None: + + #if ( not ( self.isZip() or self.isRar()) or not self.seemsToBeAComicArchive() ): + if not self.seemsToBeAComicArchive(): + self.has_cbi = False + else: + comment = self.archiver.getArchiveComment() + self.has_cbi = ComicBookInfo().validateString( comment ) + + return self.has_cbi + + def writeCBI( self, metadata ): + if metadata is not None: + self.applyArchiveInfoToMetadata( metadata ) + cbi_string = ComicBookInfo().stringFromMetadata( metadata ) + write_success = self.archiver.setArchiveComment( cbi_string ) + if write_success: + self.has_cbi = True + self.cbi_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCBI( self ): + if self.hasCBI(): + write_success = self.archiver.setArchiveComment( "" ) + if write_success: + self.has_cbi = False + self.cbi_md = None + self.resetCache() + return write_success + return True + + def readCIX( self ): + if self.cix_md is None: + raw_cix = self.readRawCIX() + if raw_cix is None or raw_cix == "": + self.cix_md = GenericMetadata() + else: + self.cix_md = ComicInfoXml().metadataFromString( raw_cix ) + + #validate the existing page list (make sure count is correct) + if len ( self.cix_md.pages ) != 0 : + if len ( self.cix_md.pages ) != self.getNumberOfPages(): + # pages array doesn't match the actual number of images we're seeing + # in the archive, so discard the data + self.cix_md.pages = [] + + if len( self.cix_md.pages ) == 0: + self.cix_md.setDefaultPageList( self.getNumberOfPages() ) + + return self.cix_md + + def readRawCIX( self ): + if not self.hasCIX(): + return None + try: + raw_cix = self.archiver.readArchiveFile( self.ci_xml_filename ) + except IOError: + print "Error reading in raw CIX!" + raw_cix = "" + return raw_cix + + def writeCIX(self, metadata): + + if metadata is not None: + self.applyArchiveInfoToMetadata( metadata, calc_page_sizes=True ) + cix_string = ComicInfoXml().stringFromMetadata( metadata ) + write_success = self.archiver.writeArchiveFile( self.ci_xml_filename, cix_string ) + if write_success: + self.has_cix = True + self.cix_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCIX( self ): + if self.hasCIX(): + write_success = self.archiver.removeArchiveFile( self.ci_xml_filename ) + if write_success: + self.has_cix = False + self.cix_md = None + self.resetCache() + return write_success + return True + + + def hasCIX(self): + if self.has_cix is None: + + if not self.seemsToBeAComicArchive(): + self.has_cix = False + elif self.ci_xml_filename in self.archiver.getArchiveFilenameList(): + self.has_cix = True + else: + self.has_cix = False + return self.has_cix + + + def readCoMet( self ): + if self.comet_md is None: + raw_comet = self.readRawCoMet() + if raw_comet is None or raw_comet == "": + self.comet_md = GenericMetadata() + else: + self.comet_md = CoMet().metadataFromString( raw_comet ) + + self.comet_md.setDefaultPageList( self.getNumberOfPages() ) + #use the coverImage value from the comet_data to mark the cover in this struct + # walk through list of images in file, and find the matching one for md.coverImage + # need to remove the existing one in the default + if self.comet_md.coverImage is not None: + cover_idx = 0 + for idx,f in enumerate(self.getPageNameList()): + if self.comet_md.coverImage == f: + cover_idx = idx + break + if cover_idx != 0: + del (self.comet_md.pages[0]['Type'] ) + self.comet_md.pages[ cover_idx ]['Type'] = PageType.FrontCover + + return self.comet_md + + def readRawCoMet( self ): + if not self.hasCoMet(): + print >> sys.stderr, self.path, "doesn't have CoMet data!" + return None + + try: + raw_comet = self.archiver.readArchiveFile( self.comet_filename ) + except IOError: + print >> sys.stderr, u"Error reading in raw CoMet!" + raw_comet = "" + return raw_comet + + def writeCoMet(self, metadata): + + if metadata is not None: + if not self.hasCoMet(): + self.comet_filename = self.comet_default_filename + + self.applyArchiveInfoToMetadata( metadata ) + # Set the coverImage value, if it's not the first page + cover_idx = int(metadata.getCoverPageIndexList()[0]) + if cover_idx != 0: + metadata.coverImage = self.getPageName( cover_idx ) + + comet_string = CoMet().stringFromMetadata( metadata ) + write_success = self.archiver.writeArchiveFile( self.comet_filename, comet_string ) + if write_success: + self.has_comet = True + self.comet_md = metadata + self.resetCache() + return write_success + else: + return False + + def removeCoMet( self ): + if self.hasCoMet(): + write_success = self.archiver.removeArchiveFile( self.comet_filename ) + if write_success: + self.has_comet = False + self.comet_md = None + self.resetCache() + return write_success + return True + + def hasCoMet(self): + if self.has_comet is None: + self.has_comet = False + if not self.seemsToBeAComicArchive(): + return self.has_comet + + #look at all xml files in root, and search for CoMet data, get first + for n in self.archiver.getArchiveFilenameList(): + if ( os.path.dirname(n) == "" and + os.path.splitext(n)[1].lower() == '.xml'): + # read in XML file, and validate it + try: + data = self.archiver.readArchiveFile( n ) + except: + data = "" + print >> sys.stderr, u"Error reading in Comet XML for validation!" + if CoMet().validateString( data ): + # since we found it, save it! + self.comet_filename = n + self.has_comet = True + break + + return self.has_comet + + + + def applyArchiveInfoToMetadata( self, md, calc_page_sizes=False): + md.pageCount = self.getNumberOfPages() + + if calc_page_sizes: + for p in md.pages: + idx = int( p['Image'] ) + if pil_available: + if 'ImageSize' not in p or 'ImageHeight' not in p or 'ImageWidth' not in p: + data = self.getPage( idx ) + if data is not None: + try: + im = Image.open(StringIO.StringIO(data)) + w,h = im.size + + p['ImageSize'] = str(len(data)) + p['ImageHeight'] = str(h) + p['ImageWidth'] = str(w) + except IOError: + p['ImageSize'] = str(len(data)) + + else: + if 'ImageSize' not in p: + data = self.getPage( idx ) + p['ImageSize'] = str(len(data)) + + + + def metadataFromFilename( self , parse_scan_info=True): + + metadata = GenericMetadata() + + fnp = FileNameParser() + fnp.parseFilename( self.path ) + + if fnp.issue != "": + metadata.issue = fnp.issue + if fnp.series != "": + metadata.series = fnp.series + if fnp.volume != "": + metadata.volume = fnp.volume + if fnp.year != "": + metadata.year = fnp.year + if fnp.issue_count != "": + metadata.issueCount = fnp.issue_count + if parse_scan_info: + if fnp.remainder != "": + metadata.scanInfo = fnp.remainder + + metadata.isEmpty = False + + return metadata + + def exportAsZip( self, zipfilename ): + if self.archive_type == self.ArchiveType.Zip: + # nothing to do, we're already a zip + return True + + zip_archiver = ZipArchiver( zipfilename ) + return zip_archiver.copyFromArchive( self.archiver ) + diff --git a/comicapi/comicbookinfo.py b/comicapi/comicbookinfo.py new file mode 100644 index 0000000..a0bbaf0 --- /dev/null +++ b/comicapi/comicbookinfo.py @@ -0,0 +1,152 @@ +""" +A python class to encapsulate the ComicBookInfo data +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +import json +from datetime import datetime +import zipfile + +from genericmetadata import GenericMetadata +import utils +#import ctversion + +class ComicBookInfo: + + + def metadataFromString( self, string ): + + cbi_container = json.loads( unicode(string, 'utf-8') ) + + metadata = GenericMetadata() + + cbi = cbi_container[ 'ComicBookInfo/1.0' ] + + #helper func + # If item is not in CBI, return None + def xlate( cbi_entry): + if cbi_entry in cbi: + return cbi[cbi_entry] + else: + return None + + metadata.series = xlate( 'series' ) + metadata.title = xlate( 'title' ) + metadata.issue = xlate( 'issue' ) + metadata.publisher = xlate( 'publisher' ) + metadata.month = xlate( 'publicationMonth' ) + metadata.year = xlate( 'publicationYear' ) + metadata.issueCount = xlate( 'numberOfIssues' ) + metadata.comments = xlate( 'comments' ) + metadata.credits = xlate( 'credits' ) + metadata.genre = xlate( 'genre' ) + metadata.volume = xlate( 'volume' ) + metadata.volumeCount = xlate( 'numberOfVolumes' ) + metadata.language = xlate( 'language' ) + metadata.country = xlate( 'country' ) + metadata.criticalRating = xlate( 'rating' ) + metadata.tags = xlate( 'tags' ) + + # make sure credits and tags are at least empty lists and not None + if metadata.credits is None: + metadata.credits = [] + if metadata.tags is None: + metadata.tags = [] + + #need to massage the language string to be ISO + if metadata.language is not None: + # reverse look-up + pattern = metadata.language + metadata.language = None + for key in utils.getLanguageDict(): + if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'): + metadata.language = key + break + + metadata.isEmpty = False + + return metadata + + def stringFromMetadata( self, metadata ): + + cbi_container = self.createJSONDictionary( metadata ) + return json.dumps( cbi_container ) + + #verify that the string actually contains CBI data in JSON format + def validateString( self, string ): + + try: + cbi_container = json.loads( string ) + except: + return False + + return ( 'ComicBookInfo/1.0' in cbi_container ) + + + def createJSONDictionary( self, metadata ): + + # Create the dictionary that we will convert to JSON text + cbi = dict() + cbi_container = {'appID' : 'ComicTagger/' + '1.0.0', #ctversion.version, + 'lastModified' : str(datetime.now()), + 'ComicBookInfo/1.0' : cbi } + + #helper func + def assign( cbi_entry, md_entry): + if md_entry is not None: + cbi[cbi_entry] = md_entry + + #helper func + def toInt(s): + i = None + if type(s) in [ str, unicode, int ]: + try: + i = int(s) + except ValueError: + pass + return i + + assign( 'series', metadata.series ) + assign( 'title', metadata.title ) + assign( 'issue', metadata.issue ) + assign( 'publisher', metadata.publisher ) + assign( 'publicationMonth', toInt(metadata.month) ) + assign( 'publicationYear', toInt(metadata.year) ) + assign( 'numberOfIssues', toInt(metadata.issueCount) ) + assign( 'comments', metadata.comments ) + assign( 'genre', metadata.genre ) + assign( 'volume', toInt(metadata.volume) ) + assign( 'numberOfVolumes', toInt(metadata.volumeCount) ) + assign( 'language', utils.getLanguageFromISO(metadata.language) ) + assign( 'country', metadata.country ) + assign( 'rating', metadata.criticalRating ) + assign( 'credits', metadata.credits ) + assign( 'tags', metadata.tags ) + + return cbi_container + + + def writeToExternalFile( self, filename, metadata ): + + cbi_container = self.createJSONDictionary(metadata) + + f = open(filename, 'w') + f.write(json.dumps(cbi_container, indent=4)) + f.close + diff --git a/comicapi/comicinfoxml.py b/comicapi/comicinfoxml.py new file mode 100644 index 0000000..9e9df07 --- /dev/null +++ b/comicapi/comicinfoxml.py @@ -0,0 +1,293 @@ +""" +A python class to encapsulate ComicRack's ComicInfo.xml data +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from datetime import datetime +import zipfile +from pprint import pprint +import xml.etree.ElementTree as ET +from genericmetadata import GenericMetadata +import utils + +class ComicInfoXml: + + writer_synonyms = ['writer', 'plotter', 'scripter'] + penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] + inker_synonyms = [ 'inker', 'artist', 'finishes' ] + colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] + letterer_synonyms = [ 'letterer'] + cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] + editor_synonyms = [ 'editor'] + + + def getParseableCredits( self ): + parsable_credits = [] + parsable_credits.extend( self.writer_synonyms ) + parsable_credits.extend( self.penciller_synonyms ) + parsable_credits.extend( self.inker_synonyms ) + parsable_credits.extend( self.colorist_synonyms ) + parsable_credits.extend( self.letterer_synonyms ) + parsable_credits.extend( self.cover_synonyms ) + parsable_credits.extend( self.editor_synonyms ) + return parsable_credits + + def metadataFromString( self, string ): + + tree = ET.ElementTree(ET.fromstring( string )) + return self.convertXMLToMetadata( tree ) + + def stringFromMetadata( self, metadata ): + + header = '\n' + + tree = self.convertMetadataToXML( self, metadata ) + return header + ET.tostring(tree.getroot()) + + def indent( self, elem, level=0 ): + # for making the XML output readable + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + self.indent( elem, level+1 ) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + def convertMetadataToXML( self, filename, metadata ): + + #shorthand for the metadata + md = metadata + + # build a tree structure + root = ET.Element("ComicInfo") + root.attrib['xmlns:xsi']="http://www.w3.org/2001/XMLSchema-instance" + root.attrib['xmlns:xsd']="http://www.w3.org/2001/XMLSchema" + #helper func + def assign( cix_entry, md_entry): + if md_entry is not None: + ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry) + + assign( 'Title', md.title ) + assign( 'Series', md.series ) + assign( 'Number', md.issue ) + assign( 'Count', md.issueCount ) + assign( 'Volume', md.volume ) + assign( 'AlternateSeries', md.alternateSeries ) + assign( 'AlternateNumber', md.alternateNumber ) + assign( 'StoryArc', md.storyArc ) + assign( 'SeriesGroup', md.seriesGroup ) + assign( 'AlternateCount', md.alternateCount ) + assign( 'Summary', md.comments ) + assign( 'Notes', md.notes ) + assign( 'Year', md.year ) + assign( 'Month', md.month ) + assign( 'Day', md.day ) + + # need to specially process the credits, since they are structured differently than CIX + credit_writer_list = list() + credit_penciller_list = list() + credit_inker_list = list() + credit_colorist_list = list() + credit_letterer_list = list() + credit_cover_list = list() + credit_editor_list = list() + + # first, loop thru credits, and build a list for each role that CIX supports + for credit in metadata.credits: + + if credit['role'].lower() in set( self.writer_synonyms ): + credit_writer_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.penciller_synonyms ): + credit_penciller_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.inker_synonyms ): + credit_inker_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.colorist_synonyms ): + credit_colorist_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.letterer_synonyms ): + credit_letterer_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.cover_synonyms ): + credit_cover_list.append(credit['person'].replace(",","")) + + if credit['role'].lower() in set( self.editor_synonyms ): + credit_editor_list.append(credit['person'].replace(",","")) + + # second, convert each list to string, and add to XML struct + if len( credit_writer_list ) > 0: + node = ET.SubElement(root, 'Writer') + node.text = utils.listToString( credit_writer_list ) + + if len( credit_penciller_list ) > 0: + node = ET.SubElement(root, 'Penciller') + node.text = utils.listToString( credit_penciller_list ) + + if len( credit_inker_list ) > 0: + node = ET.SubElement(root, 'Inker') + node.text = utils.listToString( credit_inker_list ) + + if len( credit_colorist_list ) > 0: + node = ET.SubElement(root, 'Colorist') + node.text = utils.listToString( credit_colorist_list ) + + if len( credit_letterer_list ) > 0: + node = ET.SubElement(root, 'Letterer') + node.text = utils.listToString( credit_letterer_list ) + + if len( credit_cover_list ) > 0: + node = ET.SubElement(root, 'CoverArtist') + node.text = utils.listToString( credit_cover_list ) + + if len( credit_editor_list ) > 0: + node = ET.SubElement(root, 'Editor') + node.text = utils.listToString( credit_editor_list ) + + assign( 'Publisher', md.publisher ) + assign( 'Imprint', md.imprint ) + assign( 'Genre', md.genre ) + assign( 'Web', md.webLink ) + assign( 'PageCount', md.pageCount ) + assign( 'LanguageISO', md.language ) + assign( 'Format', md.format ) + assign( 'AgeRating', md.maturityRating ) + if md.blackAndWhite is not None and md.blackAndWhite: + ET.SubElement(root, 'BlackAndWhite').text = "Yes" + assign( 'Manga', md.manga ) + assign( 'Characters', md.characters ) + assign( 'Teams', md.teams ) + assign( 'Locations', md.locations ) + assign( 'ScanInformation', md.scanInfo ) + + # loop and add the page entries under pages node + if len( md.pages ) > 0: + pages_node = ET.SubElement(root, 'Pages') + for page_dict in md.pages: + page_node = ET.SubElement(pages_node, 'Page') + page_node.attrib = page_dict + + # self pretty-print + self.indent(root) + + # wrap it in an ElementTree instance, and save as XML + tree = ET.ElementTree(root) + return tree + + + def convertXMLToMetadata( self, tree ): + + root = tree.getroot() + + if root.tag != 'ComicInfo': + raise 1 + return None + + metadata = GenericMetadata() + md = metadata + + + # Helper function + def xlate( tag ): + node = root.find( tag ) + if node is not None: + return node.text + else: + return None + + md.series = xlate( 'Series' ) + md.title = xlate( 'Title' ) + md.issue = xlate( 'Number' ) + md.issueCount = xlate( 'Count' ) + md.volume = xlate( 'Volume' ) + md.alternateSeries = xlate( 'AlternateSeries' ) + md.alternateNumber = xlate( 'AlternateNumber' ) + md.alternateCount = xlate( 'AlternateCount' ) + md.comments = xlate( 'Summary' ) + md.notes = xlate( 'Notes' ) + md.year = xlate( 'Year' ) + md.month = xlate( 'Month' ) + md.day = xlate( 'Day' ) + md.publisher = xlate( 'Publisher' ) + md.imprint = xlate( 'Imprint' ) + md.genre = xlate( 'Genre' ) + md.webLink = xlate( 'Web' ) + md.language = xlate( 'LanguageISO' ) + md.format = xlate( 'Format' ) + md.manga = xlate( 'Manga' ) + md.characters = xlate( 'Characters' ) + md.teams = xlate( 'Teams' ) + md.locations = xlate( 'Locations' ) + md.pageCount = xlate( 'PageCount' ) + md.scanInfo = xlate( 'ScanInformation' ) + md.storyArc = xlate( 'StoryArc' ) + md.seriesGroup = xlate( 'SeriesGroup' ) + md.maturityRating = xlate( 'AgeRating' ) + + tmp = xlate( 'BlackAndWhite' ) + md.blackAndWhite = False + if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]: + md.blackAndWhite = True + # Now extract the credit info + for n in root: + if ( n.tag == 'Writer' or + n.tag == 'Penciller' or + n.tag == 'Inker' or + n.tag == 'Colorist' or + n.tag == 'Letterer' or + n.tag == 'Editor' + ): + if n.text is not None: + for name in n.text.split(','): + metadata.addCredit( name.strip(), n.tag ) + + if n.tag == 'CoverArtist': + if n.text is not None: + for name in n.text.split(','): + metadata.addCredit( name.strip(), "Cover" ) + + # parse page data now + pages_node = root.find( "Pages" ) + if pages_node is not None: + for page in pages_node: + metadata.pages.append( page.attrib ) + #print page.attrib + + metadata.isEmpty = False + + return metadata + + def writeToExternalFile( self, filename, metadata ): + + tree = self.convertMetadataToXML( self, metadata ) + #ET.dump(tree) + tree.write(filename, encoding='utf-8') + + def readFromExternalFile( self, filename ): + + tree = ET.parse( filename ) + return self.convertXMLToMetadata( tree ) + diff --git a/comicapi/filenameparser.py b/comicapi/filenameparser.py new file mode 100644 index 0000000..6f3aa05 --- /dev/null +++ b/comicapi/filenameparser.py @@ -0,0 +1,277 @@ +""" +Functions for parsing comic info from filename + +This should probably be re-written, but, well, it mostly works! + +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + + +# Some portions of this code were modified from pyComicMetaThis project +# http://code.google.com/p/pycomicmetathis/ + +import re +import os +from urllib import unquote + +class FileNameParser: + + def repl(self, m): + return ' ' * len(m.group()) + + def fixSpaces( self, string, remove_dashes=True ): + if remove_dashes: + placeholders = ['[-_]',' +'] + else: + placeholders = ['[_]',' +'] + for ph in placeholders: + string = re.sub(ph, self.repl, string ) + return string #.strip() + + + def getIssueCount( self,filename, issue_end ): + + count = "" + filename = filename[issue_end:] + + # replace any name seperators with spaces + tmpstr = self.fixSpaces(filename) + found = False + + match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + if not found: + match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) + if match: + count = match.group() + found = True + + + count = count.lstrip("0") + + return count + + def getIssueNumber( self, filename ): + + # Returns a tuple of issue number string, and start and end indexs in the filename + # (The indexes will be used to split the string up for further parsing) + + found = False + issue = '' + start = 0 + end = 0 + + # first, look for multiple "--", this means it's formatted differently from most: + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" is the series name followed by issue + filename = re.sub("--.*", self.repl, filename) + + elif "__" in filename: + # the pattern seems to be that anything to left of the first "__" is the series name followed by issue + filename = re.sub("__.*", self.repl, filename) + + filename = filename.replace("+", " ") + + # replace parenthetical phrases with spaces + filename = re.sub( "\(.*?\)", self.repl, filename) + filename = re.sub( "\[.*?\]", self.repl, filename) + + # replace any name seperators with spaces + filename = self.fixSpaces(filename) + + # remove any "of NN" phrase with spaces (problem: this could break on some titles) + filename = re.sub( "of [\d]+", self.repl, filename) + + #print u"[{0}]".format(filename) + + # we should now have a cleaned up filename version with all the words in + # the same positions as original filename + + # make a list of each word and its position + word_list = list() + for m in re.finditer("\S+", filename): + word_list.append( (m.group(0), m.start(), m.end()) ) + + # remove the first word, since it can't be the issue number + if len(word_list) > 1: + word_list = word_list[1:] + else: + #only one word?? just bail. + return issue, start, end + + # Now try to search for the likely issue number word in the list + + # first look for a word with "#" followed by digits with optional sufix + # this is almost certainly the issue number + for w in reversed(word_list): + if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + found = True + break + + # same as above but w/o a '#', and only look at the last word in the list + if not found: + w = word_list[-1] + if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): + found = True + + # now try to look for a # followed by any characters + if not found: + for w in reversed(word_list): + if re.match("#\S+", w[0]): + found = True + break + + if found: + issue = w[0] + start = w[1] + end = w[2] + if issue[0] == '#': + issue = issue[1:] + + return issue, start, end + + def getSeriesName(self, filename, issue_start ): + + # use the issue number string index to split the filename string + + if issue_start != 0: + filename = filename[:issue_start] + + # in case there is no issue number, remove some obvious stuff + if "--" in filename: + # the pattern seems to be that anything to left of the first "--" is the series name followed by issue + filename = re.sub("--.*", self.repl, filename) + + elif "__" in filename: + # the pattern seems to be that anything to left of the first "__" is the series name followed by issue + filename = re.sub("__.*", self.repl, filename) + + filename = filename.replace("+", " ") + tmpstr = self.fixSpaces(filename, remove_dashes=False) + + series = tmpstr + volume = "" + + #save the last word + try: + last_word = series.split()[-1] + except: + last_word = "" + + # remove any parenthetical phrases + series = re.sub( "\(.*?\)", "", series) + + # search for volume number + match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) + if match: + series = match.group(1) + volume = match.group(3) + + # if a volume wasn't found, see if the last word is a year in parentheses + # since that's a common way to designate the volume + if volume == "": + #match either (YEAR), (YEAR-), or (YEAR-YEAR2) + match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) + if match: + volume = match.group(2) + + series = series.strip() + + # if we don't have an issue number (issue_start==0), look + # for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might + # be removed to help search online + if issue_start == 0: + one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ] + try: + last_word = series.split()[-1] + if last_word.lower() in one_shot_words: + series = series.rsplit(' ', 1)[0] + except: + pass + + return series, volume.strip() + + def getYear( self,filename, issue_end): + + filename = filename[issue_end:] + + year = "" + # look for four digit number with "(" ")" or "--" around it + match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) + if match: + year = match.group() + # remove non-numerics + year = re.sub("[^0-9]", "", year) + return year + + def getRemainder( self, filename, year, count, issue_end ): + + #make a guess at where the the non-interesting stuff begins + remainder = "" + + if "--" in filename: + remainder = filename.split("--",1)[1] + elif "__" in filename: + remainder = filename.split("__",1)[1] + elif issue_end != 0: + remainder = filename[issue_end:] + + remainder = self.fixSpaces(remainder, remove_dashes=False) + if year != "": + remainder = remainder.replace(year,"",1) + if count != "": + remainder = remainder.replace("of "+count,"",1) + + remainder = remainder.replace("()","") + + return remainder.strip() + + def parseFilename( self, filename ): + + # remove the path + filename = os.path.basename(filename) + + # remove the extension + filename = os.path.splitext(filename)[0] + + #url decode, just in case + filename = unquote(filename) + + # sometimes archives get messed up names from too many decodings + # often url encodings will break and leave "_28" and "_29" in place + # of "(" and ")" see if there are a number of these, and replace them + if filename.count("_28") > 1 and filename.count("_29") > 1: + filename = filename.replace("_28", "(") + filename = filename.replace("_29", ")") + + self.issue, issue_start, issue_end = self.getIssueNumber(filename) + self.series, self.volume = self.getSeriesName(filename, issue_start) + self.year = self.getYear(filename, issue_end) + self.issue_count = self.getIssueCount(filename, issue_end) + self.remainder = self.getRemainder( filename, self.year, self.issue_count, issue_end ) + + if self.issue != "": + # strip off leading zeros + self.issue = self.issue.lstrip("0") + if self.issue == "": + self.issue = "0" + if self.issue[0] == ".": + self.issue = "0" + self.issue diff --git a/comicapi/genericmetadata.py b/comicapi/genericmetadata.py new file mode 100644 index 0000000..8e7aeaf --- /dev/null +++ b/comicapi/genericmetadata.py @@ -0,0 +1,316 @@ +""" + A python class for internal metadata storage + + The goal of this class is to handle ALL the data that might come from various + tagging schemes and databases, such as ComicVine or GCD. This makes conversion + possible, however lossy it might be + +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import utils + +# These page info classes are exactly the same as the CIX scheme, since it's unique +class PageType: + FrontCover = "FrontCover" + InnerCover = "InnerCover" + Roundup = "Roundup" + Story = "Story" + Advertisement = "Advertisement" + Editorial = "Editorial" + Letters = "Letters" + Preview = "Preview" + BackCover = "BackCover" + Other = "Other" + Deleted = "Deleted" + +""" +class PageInfo: + Image = 0 + Type = PageType.Story + DoublePage = False + ImageSize = 0 + Key = "" + ImageWidth = 0 + ImageHeight = 0 +""" + +class GenericMetadata: + + def __init__(self): + + self.isEmpty = True + self.tagOrigin = None + + self.series = None + self.issue = None + self.title = None + self.publisher = None + self.month = None + self.year = None + self.day = None + self.issueCount = None + self.volume = None + self.genre = None + self.language = None # 2 letter iso code + self.comments = None # use same way as Summary in CIX + + self.volumeCount = None + self.criticalRating = None + self.country = None + + self.alternateSeries = None + self.alternateNumber = None + self.alternateCount = None + self.imprint = None + self.notes = None + self.webLink = None + self.format = None + self.manga = None + self.blackAndWhite = None + self.pageCount = None + self.maturityRating = None + + self.storyArc = None + self.seriesGroup = None + self.scanInfo = None + + self.characters = None + self.teams = None + self.locations = None + + self.credits = list() + self.tags = list() + self.pages = list() + + # Some CoMet-only items + self.price = None + self.isVersionOf = None + self.rights = None + self.identifier = None + self.lastMark = None + self.coverImage = None + + def overlay( self, new_md ): + # Overlay a metadata object on this one + # that is, when the new object has non-None + # values, over-write them to this one + + def assign( cur, new ): + if new is not None: + if type(new) == str and len(new) == 0: + setattr(self, cur, None) + else: + setattr(self, cur, new) + + if not new_md.isEmpty: + self.isEmpty = False + + assign( 'series', new_md.series ) + assign( "issue", new_md.issue ) + assign( "issueCount", new_md.issueCount ) + assign( "title", new_md.title ) + assign( "publisher", new_md.publisher ) + assign( "day", new_md.day ) + assign( "month", new_md.month ) + assign( "year", new_md.year ) + assign( "volume", new_md.volume ) + assign( "volumeCount", new_md.volumeCount ) + assign( "genre", new_md.genre ) + assign( "language", new_md.language ) + assign( "country", new_md.country ) + assign( "criticalRating", new_md.criticalRating ) + assign( "alternateSeries", new_md.alternateSeries ) + assign( "alternateNumber", new_md.alternateNumber ) + assign( "alternateCount", new_md.alternateCount ) + assign( "imprint", new_md.imprint ) + assign( "webLink", new_md.webLink ) + assign( "format", new_md.format ) + assign( "manga", new_md.manga ) + assign( "blackAndWhite", new_md.blackAndWhite ) + assign( "maturityRating", new_md.maturityRating ) + assign( "storyArc", new_md.storyArc ) + assign( "seriesGroup", new_md.seriesGroup ) + assign( "scanInfo", new_md.scanInfo ) + assign( "characters", new_md.characters ) + assign( "teams", new_md.teams ) + assign( "locations", new_md.locations ) + assign( "comments", new_md.comments ) + assign( "notes", new_md.notes ) + + assign( "price", new_md.price ) + assign( "isVersionOf", new_md.isVersionOf ) + assign( "rights", new_md.rights ) + assign( "identifier", new_md.identifier ) + assign( "lastMark", new_md.lastMark ) + + self.overlayCredits( new_md.credits ) + # TODO + + # not sure if the tags and pages should broken down, or treated + # as whole lists.... + + # For now, go the easy route, where any overlay + # value wipes out the whole list + if len(new_md.tags) > 0: + assign( "tags", new_md.tags ) + + if len(new_md.pages) > 0: + assign( "pages", new_md.pages ) + + + def overlayCredits( self, new_credits ): + for c in new_credits: + if c.has_key('primary') and c['primary']: + primary = True + else: + primary = False + + # Remove credit role if person is blank + if c['person'] == "": + for r in reversed(self.credits): + if r['role'].lower() == c['role'].lower(): + self.credits.remove(r) + # otherwise, add it! + else: + self.addCredit( c['person'], c['role'], primary ) + + def setDefaultPageList( self, count ): + # generate a default page list, with the first page marked as the cover + for i in range(count): + page_dict = dict() + page_dict['Image'] = str(i) + if i == 0: + page_dict['Type'] = PageType.FrontCover + self.pages.append( page_dict ) + + def getArchivePageIndex( self, pagenum ): + # convert the displayed page number to the page index of the file in the archive + if pagenum < len( self.pages ): + return int( self.pages[pagenum]['Image'] ) + else: + return 0 + + def getCoverPageIndexList( self ): + # return a list of archive page indices of cover pages + coverlist = [] + for p in self.pages: + if 'Type' in p and p['Type'] == PageType.FrontCover: + coverlist.append( int(p['Image'])) + + if len(coverlist) == 0: + coverlist.append( 0 ) + + return coverlist + + def addCredit( self, person, role, primary = False ): + + credit = dict() + credit['person'] = person + credit['role'] = role + if primary: + credit['primary'] = primary + + # look to see if it's not already there... + found = False + for c in self.credits: + if ( c['person'].lower() == person.lower() and + c['role'].lower() == role.lower() ): + # no need to add it. just adjust the "primary" flag as needed + c['primary'] = primary + found = True + break + + if not found: + self.credits.append(credit) + + + def __str__( self ): + vals = [] + if self.isEmpty: + return "No metadata" + + def add_string( tag, val ): + if val is not None and u"{0}".format(val) != "": + vals.append( (tag, val) ) + + def add_attr_string( tag ): + val = getattr(self,tag) + add_string( tag, getattr(self,tag) ) + + add_attr_string( "series" ) + add_attr_string( "issue" ) + add_attr_string( "issueCount" ) + add_attr_string( "title" ) + add_attr_string( "publisher" ) + add_attr_string( "year" ) + add_attr_string( "month" ) + add_attr_string( "day" ) + add_attr_string( "volume" ) + add_attr_string( "volumeCount" ) + add_attr_string( "genre" ) + add_attr_string( "language" ) + add_attr_string( "country" ) + add_attr_string( "criticalRating" ) + add_attr_string( "alternateSeries" ) + add_attr_string( "alternateNumber" ) + add_attr_string( "alternateCount" ) + add_attr_string( "imprint" ) + add_attr_string( "webLink" ) + add_attr_string( "format" ) + add_attr_string( "manga" ) + + add_attr_string( "price" ) + add_attr_string( "isVersionOf" ) + add_attr_string( "rights" ) + add_attr_string( "identifier" ) + add_attr_string( "lastMark" ) + + if self.blackAndWhite: + add_attr_string( "blackAndWhite" ) + add_attr_string( "maturityRating" ) + add_attr_string( "storyArc" ) + add_attr_string( "seriesGroup" ) + add_attr_string( "scanInfo" ) + add_attr_string( "characters" ) + add_attr_string( "teams" ) + add_attr_string( "locations" ) + add_attr_string( "comments" ) + add_attr_string( "notes" ) + + add_string( "tags", utils.listToString( self.tags ) ) + + for c in self.credits: + primary = "" + if c.has_key('primary') and c['primary']: + primary = " [P]" + add_string( "credit", c['role']+": "+c['person'] + primary) + + # find the longest field name + flen = 0 + for i in vals: + flen = max( flen, len(i[0]) ) + flen += 1 + + #format the data nicely + outstr = "" + fmt_str = u"{0: <" + str(flen) + "} {1}\n" + for i in vals: + outstr += fmt_str.format( i[0]+":", i[1] ) + + return outstr diff --git a/comicapi/issuestring.py b/comicapi/issuestring.py new file mode 100644 index 0000000..751aa8c --- /dev/null +++ b/comicapi/issuestring.py @@ -0,0 +1,140 @@ +# coding=utf-8 +""" +Class for handling the odd permutations of an 'issue number' that the comics industry throws at us + +e.g.: + +"12" +"12.1" +"0" +"-1" +"5AU" +"100-2" + +""" + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import utils +import math +import re + +class IssueString: + def __init__(self, text): + + # break up the issue number string into 2 parts: the numeric and suffix string. + # ( assumes that the numeric portion is always first ) + + self.num = None + self.suffix = "" + + if text is None: + return + + if type(text) == int: + text = str(text) + + if len(text) == 0: + return + + text = unicode(text) + + #skip the minus sign if it's first + if text[0] == '-': + start = 1 + else: + start = 0 + + # if it's still not numeric at start skip it + if text[start].isdigit() or text[start] == ".": + # walk through the string, look for split point (the first non-numeric) + decimal_count = 0 + for idx in range( start, len(text) ): + if text[idx] not in "0123456789.": + break + # special case: also split on second "." + if text[idx] == ".": + decimal_count += 1 + if decimal_count > 1: + break + else: + idx = len(text) + + # move trailing numeric decimal to suffix + # (only if there is other junk after ) + if text[idx-1] == "." and len(text) != idx: + idx = idx -1 + + # if there is no numeric after the minus, make the minus part of the suffix + if idx == 1 and start == 1: + idx = 0 + + part1 = text[0:idx] + part2 = text[idx:len(text)] + + if part1 != "": + self.num = float( part1 ) + self.suffix = part2 + else: + self.suffix = text + + #print "num: {0} suf: {1}".format(self.num, self.suffix) + + def asString( self, pad = 0 ): + #return the float, left side zero-padded, with suffix attached + if self.num is None: + return self.suffix + + negative = self.num < 0 + + num_f = abs(self.num) + + num_int = int( num_f ) + num_s = str( num_int ) + if float( num_int ) != num_f: + num_s = str( num_f ) + + num_s += self.suffix + + # create padding + padding = "" + l = len( str(num_int)) + if l < pad : + padding = "0" * (pad - l) + + num_s = padding + num_s + if negative: + num_s = "-" + num_s + + return num_s + + def asFloat( self ): + #return the float, with no suffix + if self.suffix == u"½": + if self.num is not None: + return self.num + .5 + else: + return .5 + return self.num + + def asInt( self ): + #return the int version of the float + if self.num is None: + return None + return int( self.num ) + + diff --git a/comicapi/utils.py b/comicapi/utils.py new file mode 100644 index 0000000..e315cd7 --- /dev/null +++ b/comicapi/utils.py @@ -0,0 +1,597 @@ +# coding=utf-8 + +""" +Some generic utilities +""" + + +""" +Copyright 2012-2014 Anthony Beville + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import sys +import os +import re +import platform +import locale +import codecs + + +class UtilsVars: + already_fixed_encoding = False + +def get_actual_preferred_encoding(): + preferred_encoding = locale.getpreferredencoding() + if platform.system() == "Darwin": + preferred_encoding = "utf-8" + return preferred_encoding + +def fix_output_encoding( ): + if not UtilsVars.already_fixed_encoding: + # this reads the environment and inits the right locale + locale.setlocale(locale.LC_ALL, "") + + # try to make stdout/stderr encodings happy for unicode printing + preferred_encoding = get_actual_preferred_encoding() + sys.stdout = codecs.getwriter(preferred_encoding)(sys.stdout) + sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr) + UtilsVars.already_fixed_encoding = True + +def get_recursive_filelist( pathlist ): + """ + Get a recursive list of of all files under all path items in the list + """ + filename_encoding = sys.getfilesystemencoding() + filelist = [] + for p in pathlist: + # if path is a folder, walk it recursivly, and all files underneath + if type(p) == str: + #make sure string is unicode + p = p.decode(filename_encoding) #, 'replace') + elif type(p) != unicode: + #it's probably a QString + p = unicode(p) + + if os.path.isdir( p ): + for root,dirs,files in os.walk( p ): + for f in files: + if type(f) == str: + #make sure string is unicode + f = f.decode(filename_encoding, 'replace') + elif type(f) != unicode: + #it's probably a QString + f = unicode(f) + filelist.append(os.path.join(root,f)) + else: + filelist.append(p) + + return filelist + +def listToString( l ): + string = "" + if l is not None: + for item in l: + if len(string) > 0: + string += ", " + string += item + return string + +def addtopath( dirname ): + if dirname is not None and dirname != "": + + # verify that path doesn't already contain the given dirname + tmpdirname = re.escape(dirname) + pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( dir=tmpdirname, sep=os.pathsep) + + match = re.search(pattern, os.environ['PATH']) + if not match: + os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH'] + +# returns executable path, if it exists +def which(program): + + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def removearticles( text ): + text = text.lower() + articles = ['and', 'the', 'a', '&', 'issue' ] + newText = '' + for word in text.split(' '): + if word not in articles: + newText += word+' ' + + newText = newText[:-1] + + # now get rid of some other junk + newText = newText.replace(":", "") + newText = newText.replace(",", "") + newText = newText.replace("-", " ") + + # since the CV api changed, searches for series names with periods + # now explicity require the period to be in the search key, + # so the line below is removed (for now) + #newText = newText.replace(".", "") + + return newText + + +def unique_file(file_name): + counter = 1 + file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext') + while 1: + if not os.path.lexists( file_name): + return file_name + file_name = file_name_parts[0] + ' (' + str(counter) + ')' + file_name_parts[1] + counter += 1 + + +# -o- coding: utf-8 -o- +# ISO639 python dict +# oficial list in http://www.loc.gov/standards/iso639-2/php/code_list.php + +lang_dict = { + 'ab': 'Abkhaz', + 'aa': 'Afar', + 'af': 'Afrikaans', + 'ak': 'Akan', + 'sq': 'Albanian', + 'am': 'Amharic', + 'ar': 'Arabic', + 'an': 'Aragonese', + 'hy': 'Armenian', + 'as': 'Assamese', + 'av': 'Avaric', + 'ae': 'Avestan', + 'ay': 'Aymara', + 'az': 'Azerbaijani', + 'bm': 'Bambara', + 'ba': 'Bashkir', + 'eu': 'Basque', + 'be': 'Belarusian', + 'bn': 'Bengali', + 'bh': 'Bihari', + 'bi': 'Bislama', + 'bs': 'Bosnian', + 'br': 'Breton', + 'bg': 'Bulgarian', + 'my': 'Burmese', + 'ca': 'Catalan; Valencian', + 'ch': 'Chamorro', + 'ce': 'Chechen', + 'ny': 'Chichewa; Chewa; Nyanja', + 'zh': 'Chinese', + 'cv': 'Chuvash', + 'kw': 'Cornish', + 'co': 'Corsican', + 'cr': 'Cree', + 'hr': 'Croatian', + 'cs': 'Czech', + 'da': 'Danish', + 'dv': 'Divehi; Maldivian;', + 'nl': 'Dutch', + 'dz': 'Dzongkha', + 'en': 'English', + 'eo': 'Esperanto', + 'et': 'Estonian', + 'ee': 'Ewe', + 'fo': 'Faroese', + 'fj': 'Fijian', + 'fi': 'Finnish', + 'fr': 'French', + 'ff': 'Fula', + 'gl': 'Galician', + 'ka': 'Georgian', + 'de': 'German', + 'el': 'Greek, Modern', + 'gn': 'Guaraní', + 'gu': 'Gujarati', + 'ht': 'Haitian', + 'ha': 'Hausa', + 'he': 'Hebrew (modern)', + 'hz': 'Herero', + 'hi': 'Hindi', + 'ho': 'Hiri Motu', + 'hu': 'Hungarian', + 'ia': 'Interlingua', + 'id': 'Indonesian', + 'ie': 'Interlingue', + 'ga': 'Irish', + 'ig': 'Igbo', + 'ik': 'Inupiaq', + 'io': 'Ido', + 'is': 'Icelandic', + 'it': 'Italian', + 'iu': 'Inuktitut', + 'ja': 'Japanese', + 'jv': 'Javanese', + 'kl': 'Kalaallisut', + 'kn': 'Kannada', + 'kr': 'Kanuri', + 'ks': 'Kashmiri', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'ki': 'Kikuyu, Gikuyu', + 'rw': 'Kinyarwanda', + 'ky': 'Kirghiz, Kyrgyz', + 'kv': 'Komi', + 'kg': 'Kongo', + 'ko': 'Korean', + 'ku': 'Kurdish', + 'kj': 'Kwanyama, Kuanyama', + 'la': 'Latin', + 'lb': 'Luxembourgish', + 'lg': 'Luganda', + 'li': 'Limburgish', + 'ln': 'Lingala', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lu': 'Luba-Katanga', + 'lv': 'Latvian', + 'gv': 'Manx', + 'mk': 'Macedonian', + 'mg': 'Malagasy', + 'ms': 'Malay', + 'ml': 'Malayalam', + 'mt': 'Maltese', + 'mi': 'Māori', + 'mr': 'Marathi (Marāṭhī)', + 'mh': 'Marshallese', + 'mn': 'Mongolian', + 'na': 'Nauru', + 'nv': 'Navajo, Navaho', + 'nb': 'Norwegian Bokmål', + 'nd': 'North Ndebele', + 'ne': 'Nepali', + 'ng': 'Ndonga', + 'nn': 'Norwegian Nynorsk', + 'no': 'Norwegian', + 'ii': 'Nuosu', + 'nr': 'South Ndebele', + 'oc': 'Occitan', + 'oj': 'Ojibwe, Ojibwa', + 'cu': 'Old Church Slavonic', + 'om': 'Oromo', + 'or': 'Oriya', + 'os': 'Ossetian, Ossetic', + 'pa': 'Panjabi, Punjabi', + 'pi': 'Pāli', + 'fa': 'Persian', + 'pl': 'Polish', + 'ps': 'Pashto, Pushto', + 'pt': 'Portuguese', + 'qu': 'Quechua', + 'rm': 'Romansh', + 'rn': 'Kirundi', + 'ro': 'Romanian, Moldavan', + 'ru': 'Russian', + 'sa': 'Sanskrit (Saṁskṛta)', + 'sc': 'Sardinian', + 'sd': 'Sindhi', + 'se': 'Northern Sami', + 'sm': 'Samoan', + 'sg': 'Sango', + 'sr': 'Serbian', + 'gd': 'Scottish Gaelic', + 'sn': 'Shona', + 'si': 'Sinhala, Sinhalese', + 'sk': 'Slovak', + 'sl': 'Slovene', + 'so': 'Somali', + 'st': 'Southern Sotho', + 'es': 'Spanish; Castilian', + 'su': 'Sundanese', + 'sw': 'Swahili', + 'ss': 'Swati', + 'sv': 'Swedish', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'ti': 'Tigrinya', + 'bo': 'Tibetan', + 'tk': 'Turkmen', + 'tl': 'Tagalog', + 'tn': 'Tswana', + 'to': 'Tonga', + 'tr': 'Turkish', + 'ts': 'Tsonga', + 'tt': 'Tatar', + 'tw': 'Twi', + 'ty': 'Tahitian', + 'ug': 'Uighur, Uyghur', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 've': 'Venda', + 'vi': 'Vietnamese', + 'vo': 'Volapük', + 'wa': 'Walloon', + 'cy': 'Welsh', + 'wo': 'Wolof', + 'fy': 'Western Frisian', + 'xh': 'Xhosa', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'za': 'Zhuang, Chuang', + 'zu': 'Zulu', +} + + +countries = [ + ('AF', 'Afghanistan'), + ('AL', 'Albania'), + ('DZ', 'Algeria'), + ('AS', 'American Samoa'), + ('AD', 'Andorra'), + ('AO', 'Angola'), + ('AI', 'Anguilla'), + ('AQ', 'Antarctica'), + ('AG', 'Antigua And Barbuda'), + ('AR', 'Argentina'), + ('AM', 'Armenia'), + ('AW', 'Aruba'), + ('AU', 'Australia'), + ('AT', 'Austria'), + ('AZ', 'Azerbaijan'), + ('BS', 'Bahamas'), + ('BH', 'Bahrain'), + ('BD', 'Bangladesh'), + ('BB', 'Barbados'), + ('BY', 'Belarus'), + ('BE', 'Belgium'), + ('BZ', 'Belize'), + ('BJ', 'Benin'), + ('BM', 'Bermuda'), + ('BT', 'Bhutan'), + ('BO', 'Bolivia'), + ('BA', 'Bosnia And Herzegowina'), + ('BW', 'Botswana'), + ('BV', 'Bouvet Island'), + ('BR', 'Brazil'), + ('BN', 'Brunei Darussalam'), + ('BG', 'Bulgaria'), + ('BF', 'Burkina Faso'), + ('BI', 'Burundi'), + ('KH', 'Cambodia'), + ('CM', 'Cameroon'), + ('CA', 'Canada'), + ('CV', 'Cape Verde'), + ('KY', 'Cayman Islands'), + ('CF', 'Central African Rep'), + ('TD', 'Chad'), + ('CL', 'Chile'), + ('CN', 'China'), + ('CX', 'Christmas Island'), + ('CC', 'Cocos Islands'), + ('CO', 'Colombia'), + ('KM', 'Comoros'), + ('CG', 'Congo'), + ('CK', 'Cook Islands'), + ('CR', 'Costa Rica'), + ('CI', 'Cote D`ivoire'), + ('HR', 'Croatia'), + ('CU', 'Cuba'), + ('CY', 'Cyprus'), + ('CZ', 'Czech Republic'), + ('DK', 'Denmark'), + ('DJ', 'Djibouti'), + ('DM', 'Dominica'), + ('DO', 'Dominican Republic'), + ('TP', 'East Timor'), + ('EC', 'Ecuador'), + ('EG', 'Egypt'), + ('SV', 'El Salvador'), + ('GQ', 'Equatorial Guinea'), + ('ER', 'Eritrea'), + ('EE', 'Estonia'), + ('ET', 'Ethiopia'), + ('FK', 'Falkland Islands (Malvinas)'), + ('FO', 'Faroe Islands'), + ('FJ', 'Fiji'), + ('FI', 'Finland'), + ('FR', 'France'), + ('GF', 'French Guiana'), + ('PF', 'French Polynesia'), + ('TF', 'French S. Territories'), + ('GA', 'Gabon'), + ('GM', 'Gambia'), + ('GE', 'Georgia'), + ('DE', 'Germany'), + ('GH', 'Ghana'), + ('GI', 'Gibraltar'), + ('GR', 'Greece'), + ('GL', 'Greenland'), + ('GD', 'Grenada'), + ('GP', 'Guadeloupe'), + ('GU', 'Guam'), + ('GT', 'Guatemala'), + ('GN', 'Guinea'), + ('GW', 'Guinea-bissau'), + ('GY', 'Guyana'), + ('HT', 'Haiti'), + ('HN', 'Honduras'), + ('HK', 'Hong Kong'), + ('HU', 'Hungary'), + ('IS', 'Iceland'), + ('IN', 'India'), + ('ID', 'Indonesia'), + ('IR', 'Iran'), + ('IQ', 'Iraq'), + ('IE', 'Ireland'), + ('IL', 'Israel'), + ('IT', 'Italy'), + ('JM', 'Jamaica'), + ('JP', 'Japan'), + ('JO', 'Jordan'), + ('KZ', 'Kazakhstan'), + ('KE', 'Kenya'), + ('KI', 'Kiribati'), + ('KP', 'Korea (North)'), + ('KR', 'Korea (South)'), + ('KW', 'Kuwait'), + ('KG', 'Kyrgyzstan'), + ('LA', 'Laos'), + ('LV', 'Latvia'), + ('LB', 'Lebanon'), + ('LS', 'Lesotho'), + ('LR', 'Liberia'), + ('LY', 'Libya'), + ('LI', 'Liechtenstein'), + ('LT', 'Lithuania'), + ('LU', 'Luxembourg'), + ('MO', 'Macau'), + ('MK', 'Macedonia'), + ('MG', 'Madagascar'), + ('MW', 'Malawi'), + ('MY', 'Malaysia'), + ('MV', 'Maldives'), + ('ML', 'Mali'), + ('MT', 'Malta'), + ('MH', 'Marshall Islands'), + ('MQ', 'Martinique'), + ('MR', 'Mauritania'), + ('MU', 'Mauritius'), + ('YT', 'Mayotte'), + ('MX', 'Mexico'), + ('FM', 'Micronesia'), + ('MD', 'Moldova'), + ('MC', 'Monaco'), + ('MN', 'Mongolia'), + ('MS', 'Montserrat'), + ('MA', 'Morocco'), + ('MZ', 'Mozambique'), + ('MM', 'Myanmar'), + ('NA', 'Namibia'), + ('NR', 'Nauru'), + ('NP', 'Nepal'), + ('NL', 'Netherlands'), + ('AN', 'Netherlands Antilles'), + ('NC', 'New Caledonia'), + ('NZ', 'New Zealand'), + ('NI', 'Nicaragua'), + ('NE', 'Niger'), + ('NG', 'Nigeria'), + ('NU', 'Niue'), + ('NF', 'Norfolk Island'), + ('MP', 'Northern Mariana Islands'), + ('NO', 'Norway'), + ('OM', 'Oman'), + ('PK', 'Pakistan'), + ('PW', 'Palau'), + ('PA', 'Panama'), + ('PG', 'Papua New Guinea'), + ('PY', 'Paraguay'), + ('PE', 'Peru'), + ('PH', 'Philippines'), + ('PN', 'Pitcairn'), + ('PL', 'Poland'), + ('PT', 'Portugal'), + ('PR', 'Puerto Rico'), + ('QA', 'Qatar'), + ('RE', 'Reunion'), + ('RO', 'Romania'), + ('RU', 'Russian Federation'), + ('RW', 'Rwanda'), + ('KN', 'Saint Kitts And Nevis'), + ('LC', 'Saint Lucia'), + ('VC', 'St Vincent/Grenadines'), + ('WS', 'Samoa'), + ('SM', 'San Marino'), + ('ST', 'Sao Tome'), + ('SA', 'Saudi Arabia'), + ('SN', 'Senegal'), + ('SC', 'Seychelles'), + ('SL', 'Sierra Leone'), + ('SG', 'Singapore'), + ('SK', 'Slovakia'), + ('SI', 'Slovenia'), + ('SB', 'Solomon Islands'), + ('SO', 'Somalia'), + ('ZA', 'South Africa'), + ('ES', 'Spain'), + ('LK', 'Sri Lanka'), + ('SH', 'St. Helena'), + ('PM', 'St.Pierre'), + ('SD', 'Sudan'), + ('SR', 'Suriname'), + ('SZ', 'Swaziland'), + ('SE', 'Sweden'), + ('CH', 'Switzerland'), + ('SY', 'Syrian Arab Republic'), + ('TW', 'Taiwan'), + ('TJ', 'Tajikistan'), + ('TZ', 'Tanzania'), + ('TH', 'Thailand'), + ('TG', 'Togo'), + ('TK', 'Tokelau'), + ('TO', 'Tonga'), + ('TT', 'Trinidad And Tobago'), + ('TN', 'Tunisia'), + ('TR', 'Turkey'), + ('TM', 'Turkmenistan'), + ('TV', 'Tuvalu'), + ('UG', 'Uganda'), + ('UA', 'Ukraine'), + ('AE', 'United Arab Emirates'), + ('UK', 'United Kingdom'), + ('US', 'United States'), + ('UY', 'Uruguay'), + ('UZ', 'Uzbekistan'), + ('VU', 'Vanuatu'), + ('VA', 'Vatican City State'), + ('VE', 'Venezuela'), + ('VN', 'Viet Nam'), + ('VG', 'Virgin Islands (British)'), + ('VI', 'Virgin Islands (U.S.)'), + ('EH', 'Western Sahara'), + ('YE', 'Yemen'), + ('YU', 'Yugoslavia'), + ('ZR', 'Zaire'), + ('ZM', 'Zambia'), + ('ZW', 'Zimbabwe') +] + + + +def getLanguageDict(): + return lang_dict + +def getLanguageFromISO( iso ): + if iso == None: + return None + else: + return lang_dict[ iso ] + + + + + + + + + + diff --git a/comictaggerlib/comet.py b/comictaggerlib/comet.py index 1a06977..b8ffe83 100644 --- a/comictaggerlib/comet.py +++ b/comictaggerlib/comet.py @@ -1,260 +1 @@ -""" -A python class to encapsulate CoMet data -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from datetime import datetime -import zipfile -from pprint import pprint -import xml.etree.ElementTree as ET -from genericmetadata import GenericMetadata -import utils - -class CoMet: - - writer_synonyms = ['writer', 'plotter', 'scripter'] - penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] - inker_synonyms = [ 'inker', 'artist', 'finishes' ] - colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] - letterer_synonyms = [ 'letterer'] - cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] - editor_synonyms = [ 'editor'] - - def metadataFromString( self, string ): - - tree = ET.ElementTree(ET.fromstring( string )) - return self.convertXMLToMetadata( tree ) - - def stringFromMetadata( self, metadata ): - - header = '\n' - - tree = self.convertMetadataToXML( self, metadata ) - return header + ET.tostring(tree.getroot()) - - def indent( self, elem, level=0 ): - # for making the XML output readable - i = "\n" + level*" " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - self.indent( elem, level+1 ) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i - - def convertMetadataToXML( self, filename, metadata ): - - #shorthand for the metadata - md = metadata - - # build a tree structure - root = ET.Element("comet") - root.attrib['xmlns:comet'] = "http://www.denvog.com/comet/" - root.attrib['xmlns:xsi'] = "http://www.w3.org/2001/XMLSchema-instance" - root.attrib['xsi:schemaLocation'] = "http://www.denvog.com http://www.denvog.com/comet/comet.xsd" - - #helper func - def assign( comet_entry, md_entry): - if md_entry is not None: - ET.SubElement(root, comet_entry).text = u"{0}".format(md_entry) - - # title is manditory - if md.title is None: - md.title = "" - assign( 'title', md.title ) - assign( 'series', md.series ) - assign( 'issue', md.issue ) #must be int?? - assign( 'volume', md.volume ) - assign( 'description', md.comments ) - assign( 'publisher', md.publisher ) - assign( 'pages', md.pageCount ) - assign( 'format', md.format ) - assign( 'language', md.language ) - assign( 'rating', md.maturityRating ) - assign( 'price', md.price ) - assign( 'isVersionOf', md.isVersionOf ) - assign( 'rights', md.rights ) - assign( 'identifier', md.identifier ) - assign( 'lastMark', md.lastMark ) - assign( 'genre', md.genre ) # TODO repeatable - - if md.characters is not None: - char_list = [ c.strip() for c in md.characters.split(',') ] - for c in char_list: - assign( 'character', c ) - - if md.manga is not None and md.manga == "YesAndRightToLeft": - assign( 'readingDirection', "rtl") - - date_str = "" - if md.year is not None: - date_str = str(md.year).zfill(4) - if md.month is not None: - date_str += "-" + str(md.month).zfill(2) - assign( 'date', date_str ) - - assign( 'coverImage', md.coverImage ) - - # need to specially process the credits, since they are structured differently than CIX - credit_writer_list = list() - credit_penciller_list = list() - credit_inker_list = list() - credit_colorist_list = list() - credit_letterer_list = list() - credit_cover_list = list() - credit_editor_list = list() - - # loop thru credits, and build a list for each role that CoMet supports - for credit in metadata.credits: - - if credit['role'].lower() in set( self.writer_synonyms ): - ET.SubElement(root, 'writer').text = u"{0}".format(credit['person']) - - if credit['role'].lower() in set( self.penciller_synonyms ): - ET.SubElement(root, 'penciller').text = u"{0}".format(credit['person']) - - if credit['role'].lower() in set( self.inker_synonyms ): - ET.SubElement(root, 'inker').text = u"{0}".format(credit['person']) - - if credit['role'].lower() in set( self.colorist_synonyms ): - ET.SubElement(root, 'colorist').text = u"{0}".format(credit['person']) - - if credit['role'].lower() in set( self.letterer_synonyms ): - ET.SubElement(root, 'letterer').text = u"{0}".format(credit['person']) - - if credit['role'].lower() in set( self.cover_synonyms ): - ET.SubElement(root, 'coverDesigner').text = u"{0}".format(credit['person']) - - if credit['role'].lower() in set( self.editor_synonyms ): - ET.SubElement(root, 'editor').text = u"{0}".format(credit['person']) - - - # self pretty-print - self.indent(root) - - # wrap it in an ElementTree instance, and save as XML - tree = ET.ElementTree(root) - return tree - - - def convertXMLToMetadata( self, tree ): - - root = tree.getroot() - - if root.tag != 'comet': - raise 1 - return None - - metadata = GenericMetadata() - md = metadata - - # Helper function - def xlate( tag ): - node = root.find( tag ) - if node is not None: - return node.text - else: - return None - - md.series = xlate( 'series' ) - md.title = xlate( 'title' ) - md.issue = xlate( 'issue' ) - md.volume = xlate( 'volume' ) - md.comments = xlate( 'description' ) - md.publisher = xlate( 'publisher' ) - md.language = xlate( 'language' ) - md.format = xlate( 'format' ) - md.pageCount = xlate( 'pages' ) - md.maturityRating = xlate( 'rating' ) - md.price = xlate( 'price' ) - md.isVersionOf = xlate( 'isVersionOf' ) - md.rights = xlate( 'rights' ) - md.identifier = xlate( 'identifier' ) - md.lastMark = xlate( 'lastMark' ) - md.genre = xlate( 'genre' ) # TODO - repeatable field - - date = xlate( 'date' ) - if date is not None: - parts = date.split('-') - if len( parts) > 0: - md.year = parts[0] - if len( parts) > 1: - md.month = parts[1] - - md.coverImage = xlate( 'coverImage' ) - - readingDirection = xlate( 'readingDirection' ) - if readingDirection is not None and readingDirection == "rtl": - md.manga = "YesAndRightToLeft" - - # loop for character tags - char_list = [] - for n in root: - if n.tag == 'character': - char_list.append(n.text.strip()) - md.characters = utils.listToString( char_list ) - - # Now extract the credit info - for n in root: - if ( n.tag == 'writer' or - n.tag == 'penciller' or - n.tag == 'inker' or - n.tag == 'colorist' or - n.tag == 'letterer' or - n.tag == 'editor' - ): - metadata.addCredit( n.text.strip(), n.tag.title() ) - - if n.tag == 'coverDesigner': - metadata.addCredit( n.text.strip(), "Cover" ) - - - metadata.isEmpty = False - - return metadata - - #verify that the string actually contains CoMet data in XML format - def validateString( self, string ): - try: - tree = ET.ElementTree(ET.fromstring( string )) - root = tree.getroot() - if root.tag != 'comet': - raise Exception - except: - return False - - return True - - - def writeToExternalFile( self, filename, metadata ): - - tree = self.convertMetadataToXML( self, metadata ) - #ET.dump(tree) - tree.write(filename, encoding='utf-8') - - def readFromExternalFile( self, filename ): - - tree = ET.parse( filename ) - return self.convertXMLToMetadata( tree ) - +from comicapi.comet import * diff --git a/comictaggerlib/comicarchive.py b/comictaggerlib/comicarchive.py index 381dc68..28b69e7 100644 --- a/comictaggerlib/comicarchive.py +++ b/comictaggerlib/comicarchive.py @@ -1,1088 +1 @@ -""" -A python class to represent a single comic, be it file or folder of images -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import zipfile -import os -import struct -import sys -import tempfile -import subprocess -import platform -import locale -from natsort import natsorted - -if platform.system() == "Windows": - import _subprocess -import time - -import StringIO -try: - import Image - pil_available = True -except ImportError: - pil_available = False - -sys.path.insert(0, os.path.abspath(".") ) -import UnRAR2 -from UnRAR2.rar_exceptions import * - -#from settings import ComicTaggerSettings -from comicinfoxml import ComicInfoXml -from comicbookinfo import ComicBookInfo -from comet import CoMet -from genericmetadata import GenericMetadata, PageType -from filenameparser import FileNameParser -from PyPDF2 import PdfFileReader - -class MetaDataStyle: - CBI = 0 - CIX = 1 - COMET = 2 - name = [ 'ComicBookLover', 'ComicRack', 'CoMet' ] - -class ZipArchiver: - - def __init__( self, path ): - self.path = path - - def getArchiveComment( self ): - zf = zipfile.ZipFile( self.path, 'r' ) - comment = zf.comment - zf.close() - return comment - - def setArchiveComment( self, comment ): - return self.writeZipComment( self.path, comment ) - - def readArchiveFile( self, archive_file ): - data = "" - zf = zipfile.ZipFile( self.path, 'r' ) - - try: - data = zf.read( archive_file ) - except zipfile.BadZipfile as e: - print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) - zf.close() - raise IOError - except Exception as e: - zf.close() - print >> sys.stderr, u"bad zipfile [{0}]: {1} :: {2}".format(e, self.path, archive_file) - raise IOError - finally: - zf.close() - return data - - def removeArchiveFile( self, archive_file ): - try: - self.rebuildZipFile( [ archive_file ] ) - except: - return False - else: - return True - - def writeArchiveFile( self, archive_file, data ): - # At the moment, no other option but to rebuild the whole - # zip archive w/o the indicated file. Very sucky, but maybe - # another solution can be found - try: - self.rebuildZipFile( [ archive_file ] ) - - #now just add the archive file as a new one - zf = zipfile.ZipFile(self.path, mode='a', compression=zipfile.ZIP_DEFLATED ) - zf.writestr( archive_file, data ) - zf.close() - return True - except: - return False - - def getArchiveFilenameList( self ): - try: - zf = zipfile.ZipFile( self.path, 'r' ) - namelist = zf.namelist() - zf.close() - return namelist - except Exception as e: - print >> sys.stderr, u"Unable to get zipfile list [{0}]: {1}".format(e, self.path) - return [] - - # zip helper func - def rebuildZipFile( self, exclude_list ): - - # this recompresses the zip archive, without the files in the exclude_list - #print ">> sys.stderr, Rebuilding zip {0} without {1}".format( self.path, exclude_list ) - - # generate temp file - tmp_fd, tmp_name = tempfile.mkstemp( dir=os.path.dirname(self.path) ) - os.close( tmp_fd ) - - zin = zipfile.ZipFile (self.path, 'r') - zout = zipfile.ZipFile (tmp_name, 'w') - for item in zin.infolist(): - buffer = zin.read(item.filename) - if ( item.filename not in exclude_list ): - zout.writestr(item, buffer) - - #preserve the old comment - zout.comment = zin.comment - - zout.close() - zin.close() - - # replace with the new file - os.remove( self.path ) - os.rename( tmp_name, self.path ) - - - def writeZipComment( self, filename, comment ): - """ - This is a custom function for writing a comment to a zip file, - since the built-in one doesn't seem to work on Windows and Mac OS/X - - Fortunately, the zip comment is at the end of the file, and it's - easy to manipulate. See this website for more info: - see: http://en.wikipedia.org/wiki/Zip_(file_format)#Structure - """ - - #get file size - statinfo = os.stat(filename) - file_length = statinfo.st_size - - try: - fo = open(filename, "r+b") - - #the starting position, relative to EOF - pos = -4 - - found = False - value = bytearray() - - # walk backwards to find the "End of Central Directory" record - while ( not found ) and ( -pos != file_length ): - # seek, relative to EOF - fo.seek( pos, 2) - - value = fo.read( 4 ) - - #look for the end of central directory signature - if bytearray(value) == bytearray([ 0x50, 0x4b, 0x05, 0x06 ]): - found = True - else: - # not found, step back another byte - pos = pos - 1 - #print pos,"{1} int: {0:x}".format(bytearray(value)[0], value) - - if found: - - # now skip forward 20 bytes to the comment length word - pos += 20 - fo.seek( pos, 2) - - # Pack the length of the comment string - format = "H" # one 2-byte integer - comment_length = struct.pack(format, len(comment)) # pack integer in a binary string - - # write out the length - fo.write( comment_length ) - fo.seek( pos+2, 2) - - # write out the comment itself - fo.write( comment ) - fo.truncate() - fo.close() - else: - raise Exception('Failed to write comment to zip file!') - except: - return False - else: - return True - - def copyFromArchive( self, otherArchive ): - # Replace the current zip with one copied from another archive - try: - zout = zipfile.ZipFile (self.path, 'w') - for fname in otherArchive.getArchiveFilenameList(): - data = otherArchive.readArchiveFile( fname ) - if data is not None: - zout.writestr( fname, data ) - zout.close() - - #preserve the old comment - comment = otherArchive.getArchiveComment() - if comment is not None: - if not self.writeZipComment( self.path, comment ): - return False - except Exception as e: - print >> sys.stderr, u"Error while copying to {0}: {1}".format(self.path, e) - return False - else: - return True - - -#------------------------------------------ -# RAR implementation - -class RarArchiver: - - devnull = None - def __init__( self, path, rar_exe_path ): - self.path = path - self.rar_exe_path = rar_exe_path - - if RarArchiver.devnull is None: - RarArchiver.devnull = open(os.devnull, "w") - - # windows only, keeps the cmd.exe from popping up - if platform.system() == "Windows": - self.startupinfo = subprocess.STARTUPINFO() - self.startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW - else: - self.startupinfo = None - - def __del__(self): - #RarArchiver.devnull.close() - pass - - def getArchiveComment( self ): - - rarc = self.getRARObj() - return rarc.comment - - def setArchiveComment( self, comment ): - - if self.rar_exe_path is not None: - try: - # write comment to temp file - tmp_fd, tmp_name = tempfile.mkstemp() - f = os.fdopen(tmp_fd, 'w+b') - f.write( comment ) - f.close() - - working_dir = os.path.dirname( os.path.abspath( self.path ) ) - - # use external program to write comment to Rar archive - subprocess.call([self.rar_exe_path, 'c', '-w' + working_dir , '-c-', '-z' + tmp_name, self.path], - startupinfo=self.startupinfo, - stdout=RarArchiver.devnull) - - if platform.system() == "Darwin": - time.sleep(1) - - os.remove( tmp_name) - except: - return False - else: - return True - else: - return False - - def readArchiveFile( self, archive_file ): - - # Make sure to escape brackets, since some funky stuff is going on - # underneath with "fnmatch" - archive_file = archive_file.replace("[", '[[]') - entries = [] - - rarc = self.getRARObj() - - tries = 0 - while tries < 7: - try: - tries = tries+1 - entries = rarc.read_files( archive_file ) - - if entries[0][0].size != len(entries[0][1]): - print >> sys.stderr, u"readArchiveFile(): [file is not expected size: {0} vs {1}] {2}:{3} [attempt # {4}]".format( - entries[0][0].size,len(entries[0][1]), self.path, archive_file, tries) - continue - - except (OSError, IOError) as e: - print >> sys.stderr, u"readArchiveFile(): [{0}] {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) - time.sleep(1) - except Exception as e: - print >> sys.stderr, u"Unexpected exception in readArchiveFile(): [{0}] for {1}:{2} attempt#{3}".format(str(e), self.path, archive_file, tries) - break - - else: - #Success" - #entries is a list of of tuples: ( rarinfo, filedata) - if tries > 1: - print >> sys.stderr, u"Attempted read_files() {0} times".format(tries) - if (len(entries) == 1): - return entries[0][1] - else: - raise IOError - - raise IOError - - - - def writeArchiveFile( self, archive_file, data ): - - if self.rar_exe_path is not None: - try: - tmp_folder = tempfile.mkdtemp() - - tmp_file = os.path.join( tmp_folder, archive_file ) - - working_dir = os.path.dirname( os.path.abspath( self.path ) ) - - # TODO: will this break if 'archive_file' is in a subfolder. i.e. "foo/bar.txt" - # will need to create the subfolder above, I guess... - f = open(tmp_file, 'w') - f.write( data ) - f.close() - - # use external program to write file to Rar archive - subprocess.call([self.rar_exe_path, 'a', '-w' + working_dir ,'-c-', '-ep', self.path, tmp_file], - startupinfo=self.startupinfo, - stdout=RarArchiver.devnull) - - if platform.system() == "Darwin": - time.sleep(1) - os.remove( tmp_file) - os.rmdir( tmp_folder) - except: - return False - else: - return True - else: - return False - - def removeArchiveFile( self, archive_file ): - if self.rar_exe_path is not None: - try: - # use external program to remove file from Rar archive - subprocess.call([self.rar_exe_path, 'd','-c-', self.path, archive_file], - startupinfo=self.startupinfo, - stdout=RarArchiver.devnull) - - if platform.system() == "Darwin": - time.sleep(1) - except: - return False - else: - return True - else: - return False - - def getArchiveFilenameList( self ): - - rarc = self.getRARObj() - #namelist = [ item.filename for item in rarc.infolist() ] - #return namelist - - tries = 0 - while tries < 7: - try: - tries = tries+1 - #namelist = [ item.filename for item in rarc.infolist() ] - namelist = [] - for item in rarc.infolist(): - if item.size != 0: - namelist.append( item.filename ) - - except (OSError, IOError) as e: - print >> sys.stderr, u"getArchiveFilenameList(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) - time.sleep(1) - - else: - #Success" - return namelist - - raise e - - - def getRARObj( self ): - tries = 0 - while tries < 7: - try: - tries = tries+1 - rarc = UnRAR2.RarFile( self.path ) - - except (OSError, IOError) as e: - print >> sys.stderr, u"getRARObj(): [{0}] {1} attempt#{2}".format(str(e), self.path, tries) - time.sleep(1) - - else: - #Success" - return rarc - - raise e - -#------------------------------------------ -# Folder implementation -class FolderArchiver: - - def __init__( self, path ): - self.path = path - self.comment_file_name = "ComicTaggerFolderComment.txt" - - def getArchiveComment( self ): - return self.readArchiveFile( self.comment_file_name ) - - def setArchiveComment( self, comment ): - return self.writeArchiveFile( self.comment_file_name, comment ) - - def readArchiveFile( self, archive_file ): - - data = "" - fname = os.path.join( self.path, archive_file ) - try: - with open( fname, 'rb' ) as f: - data = f.read() - f.close() - except IOError as e: - pass - - return data - - def writeArchiveFile( self, archive_file, data ): - - fname = os.path.join( self.path, archive_file ) - try: - with open(fname, 'w+') as f: - f.write( data ) - f.close() - except: - return False - else: - return True - - def removeArchiveFile( self, archive_file ): - - fname = os.path.join( self.path, archive_file ) - try: - os.remove( fname ) - except: - return False - else: - return True - - def getArchiveFilenameList( self ): - return self.listFiles( self.path ) - - def listFiles( self, folder ): - - itemlist = list() - - for item in os.listdir( folder ): - itemlist.append( item ) - if os.path.isdir( item ): - itemlist.extend( self.listFiles( os.path.join( folder, item ) )) - - return itemlist - -#------------------------------------------ -# Unknown implementation -class UnknownArchiver: - - def __init__( self, path ): - self.path = path - - def getArchiveComment( self ): - return "" - def setArchiveComment( self, comment ): - return False - def readArchiveFile( self ): - return "" - def writeArchiveFile( self, archive_file, data ): - return False - def removeArchiveFile( self, archive_file ): - return False - def getArchiveFilenameList( self ): - return [] - -class PdfArchiver: - def __init__( self, path ): - self.path = path - - def getArchiveComment( self ): - return "" - def setArchiveComment( self, comment ): - return False - def readArchiveFile( self, page_num ): - return subprocess.check_output(['mudraw', '-o','-', self.path, str(int(os.path.basename(page_num)[:-4]))]) - def writeArchiveFile( self, archive_file, data ): - return False - def removeArchiveFile( self, archive_file ): - return False - def getArchiveFilenameList( self ): - out = [] - pdf = PdfFileReader(open(self.path, 'rb')) - for page in range(1, pdf.getNumPages() + 1): - out.append("/%04d.jpg" % (page)) - return out - -#------------------------------------------------------------------ -class ComicArchive: - - logo_data = None - - class ArchiveType: - Zip, Rar, Folder, Pdf, Unknown = range(5) - - def __init__( self, path, rar_exe_path=None, default_image_path=None ): - self.path = path - - self.rar_exe_path = rar_exe_path - self.ci_xml_filename = 'ComicInfo.xml' - self.comet_default_filename = 'CoMet.xml' - self.resetCache() - self.default_image_path = default_image_path - - # Use file extension to decide which archive test we do first - ext = os.path.splitext(path)[1].lower() - - self.archive_type = self.ArchiveType.Unknown - self.archiver = UnknownArchiver( self.path ) - - if ext == ".cbr" or ext == ".rar": - if self.rarTest(): - self.archive_type = self.ArchiveType.Rar - self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path ) - - elif self.zipTest(): - self.archive_type = self.ArchiveType.Zip - self.archiver = ZipArchiver( self.path ) - else: - if self.zipTest(): - self.archive_type = self.ArchiveType.Zip - self.archiver = ZipArchiver( self.path ) - - elif self.rarTest(): - self.archive_type = self.ArchiveType.Rar - self.archiver = RarArchiver( self.path, rar_exe_path=self.rar_exe_path ) - elif os.path.basename(self.path)[-3:] == 'pdf': - self.archive_type = self.ArchiveType.Pdf - self.archiver = PdfArchiver(self.path) - - if ComicArchive.logo_data is None: - #fname = ComicTaggerSettings.getGraphic('nocover.png') - fname = self.default_image_path - with open(fname, 'rb') as fd: - ComicArchive.logo_data = fd.read() - - # Clears the cached data - def resetCache( self ): - self.has_cix = None - self.has_cbi = None - self.has_comet = None - self.comet_filename = None - self.page_count = None - self.page_list = None - self.cix_md = None - self.cbi_md = None - self.comet_md = None - - def loadCache( self, style_list ): - for style in style_list: - self.readMetadata(style) - - def rename( self, path ): - self.path = path - self.archiver.path = path - - def zipTest( self ): - return zipfile.is_zipfile( self.path ) - - def rarTest( self ): - try: - rarc = UnRAR2.RarFile( self.path ) - except: # InvalidRARArchive: - return False - else: - return True - - - def isZip( self ): - return self.archive_type == self.ArchiveType.Zip - - def isRar( self ): - return self.archive_type == self.ArchiveType.Rar - def isPdf(self): - return self.archive_type == self.ArchiveType.Pdf - def isFolder( self ): - return self.archive_type == self.ArchiveType.Folder - - def isWritable( self, check_rar_status=True ): - if self.archive_type == self.ArchiveType.Unknown : - return False - - elif check_rar_status and self.isRar() and self.rar_exe_path is None: - return False - - elif not os.access(self.path, os.W_OK): - return False - - elif ((self.archive_type != self.ArchiveType.Folder) and - (not os.access( os.path.dirname( os.path.abspath(self.path)), os.W_OK ))): - return False - - return True - - def isWritableForStyle( self, data_style ): - - if self.isRar() and data_style == MetaDataStyle.CBI: - return False - - return self.isWritable() - - def seemsToBeAComicArchive( self ): - - # Do we even care about extensions?? - ext = os.path.splitext(self.path)[1].lower() - - if ( - ( self.isZip() or self.isRar() or self.isPdf()) #or self.isFolder() ) - and - ( self.getNumberOfPages() > 0) - - ): - return True - else: - return False - - def readMetadata( self, style ): - - if style == MetaDataStyle.CIX: - return self.readCIX() - elif style == MetaDataStyle.CBI: - return self.readCBI() - elif style == MetaDataStyle.COMET: - return self.readCoMet() - else: - return GenericMetadata() - - def writeMetadata( self, metadata, style ): - - retcode = None - if style == MetaDataStyle.CIX: - retcode = self.writeCIX( metadata ) - elif style == MetaDataStyle.CBI: - retcode = self.writeCBI( metadata ) - elif style == MetaDataStyle.COMET: - retcode = self.writeCoMet( metadata ) - return retcode - - - def hasMetadata( self, style ): - - if style == MetaDataStyle.CIX: - return self.hasCIX() - elif style == MetaDataStyle.CBI: - return self.hasCBI() - elif style == MetaDataStyle.COMET: - return self.hasCoMet() - else: - return False - - def removeMetadata( self, style ): - retcode = True - if style == MetaDataStyle.CIX: - retcode = self.removeCIX() - elif style == MetaDataStyle.CBI: - retcode = self.removeCBI() - elif style == MetaDataStyle.COMET: - retcode = self.removeCoMet() - return retcode - - def getPage( self, index ): - - image_data = None - - filename = self.getPageName( index ) - - if filename is not None: - try: - image_data = self.archiver.readArchiveFile( filename ) - except IOError: - print >> sys.stderr, u"Error reading in page. Substituting logo page." - image_data = ComicArchive.logo_data - - return image_data - - def getPageName( self, index ): - - if index is None: - return None - - page_list = self.getPageNameList() - - num_pages = len( page_list ) - if num_pages == 0 or index >= num_pages: - return None - - return page_list[index] - - def getScannerPageIndex( self ): - - scanner_page_index = None - - #make a guess at the scanner page - name_list = self.getPageNameList() - count = self.getNumberOfPages() - - #too few pages to really know - if count < 5: - return None - - # count the length of every filename, and count occurences - length_buckets = dict() - for name in name_list: - fname = os.path.split(name)[1] - length = len(fname) - if length_buckets.has_key( length ): - length_buckets[ length ] += 1 - else: - length_buckets[ length ] = 1 - - # sort by most common - sorted_buckets = sorted(length_buckets.iteritems(), key=lambda (k,v): (v,k), reverse=True) - - # statistical mode occurence is first - mode_length = sorted_buckets[0][0] - - # we are only going to consider the final image file: - final_name = os.path.split(name_list[count-1])[1] - - common_length_list = list() - for name in name_list: - if len(os.path.split(name)[1]) == mode_length: - common_length_list.append( os.path.split(name)[1] ) - - prefix = os.path.commonprefix(common_length_list) - - if mode_length <= 7 and prefix == "": - #probably all numbers - if len(final_name) > mode_length: - scanner_page_index = count-1 - - # see if the last page doesn't start with the same prefix as most others - elif not final_name.startswith(prefix): - scanner_page_index = count-1 - - return scanner_page_index - - - def getPageNameList( self , sort_list=True): - - if self.page_list is None: - # get the list file names in the archive, and sort - files = self.archiver.getArchiveFilenameList() - - # seems like some archive creators are on Windows, and don't know about case-sensitivity! - if sort_list: - def keyfunc(k): - #hack to account for some weird scanner ID pages - #basename=os.path.split(k)[1] - #if basename < '0': - # k = os.path.join(os.path.split(k)[0], "z" + basename) - return k.lower() - - files = natsorted(files, key=keyfunc,signed=False) - - # make a sub-list of image files - self.page_list = [] - for name in files: - if ( name[-4:].lower() in [ ".jpg", "jpeg", ".png", ".gif", "webp" ] and os.path.basename(name)[0] != "." ): - self.page_list.append(name) - - return self.page_list - - def getNumberOfPages( self ): - - if self.page_count is None: - self.page_count = len( self.getPageNameList( ) ) - return self.page_count - - def readCBI( self ): - if self.cbi_md is None: - raw_cbi = self.readRawCBI() - if raw_cbi is None: - self.cbi_md = GenericMetadata() - else: - self.cbi_md = ComicBookInfo().metadataFromString( raw_cbi ) - - self.cbi_md.setDefaultPageList( self.getNumberOfPages() ) - - return self.cbi_md - - def readRawCBI( self ): - if ( not self.hasCBI() ): - return None - - return self.archiver.getArchiveComment() - - def hasCBI(self): - if self.has_cbi is None: - - #if ( not ( self.isZip() or self.isRar()) or not self.seemsToBeAComicArchive() ): - if not self.seemsToBeAComicArchive(): - self.has_cbi = False - else: - comment = self.archiver.getArchiveComment() - self.has_cbi = ComicBookInfo().validateString( comment ) - - return self.has_cbi - - def writeCBI( self, metadata ): - if metadata is not None: - self.applyArchiveInfoToMetadata( metadata ) - cbi_string = ComicBookInfo().stringFromMetadata( metadata ) - write_success = self.archiver.setArchiveComment( cbi_string ) - if write_success: - self.has_cbi = True - self.cbi_md = metadata - self.resetCache() - return write_success - else: - return False - - def removeCBI( self ): - if self.hasCBI(): - write_success = self.archiver.setArchiveComment( "" ) - if write_success: - self.has_cbi = False - self.cbi_md = None - self.resetCache() - return write_success - return True - - def readCIX( self ): - if self.cix_md is None: - raw_cix = self.readRawCIX() - if raw_cix is None or raw_cix == "": - self.cix_md = GenericMetadata() - else: - self.cix_md = ComicInfoXml().metadataFromString( raw_cix ) - - #validate the existing page list (make sure count is correct) - if len ( self.cix_md.pages ) != 0 : - if len ( self.cix_md.pages ) != self.getNumberOfPages(): - # pages array doesn't match the actual number of images we're seeing - # in the archive, so discard the data - self.cix_md.pages = [] - - if len( self.cix_md.pages ) == 0: - self.cix_md.setDefaultPageList( self.getNumberOfPages() ) - - return self.cix_md - - def readRawCIX( self ): - if not self.hasCIX(): - return None - try: - raw_cix = self.archiver.readArchiveFile( self.ci_xml_filename ) - except IOError: - print "Error reading in raw CIX!" - raw_cix = "" - return raw_cix - - def writeCIX(self, metadata): - - if metadata is not None: - self.applyArchiveInfoToMetadata( metadata, calc_page_sizes=True ) - cix_string = ComicInfoXml().stringFromMetadata( metadata ) - write_success = self.archiver.writeArchiveFile( self.ci_xml_filename, cix_string ) - if write_success: - self.has_cix = True - self.cix_md = metadata - self.resetCache() - return write_success - else: - return False - - def removeCIX( self ): - if self.hasCIX(): - write_success = self.archiver.removeArchiveFile( self.ci_xml_filename ) - if write_success: - self.has_cix = False - self.cix_md = None - self.resetCache() - return write_success - return True - - - def hasCIX(self): - if self.has_cix is None: - - if not self.seemsToBeAComicArchive(): - self.has_cix = False - elif self.ci_xml_filename in self.archiver.getArchiveFilenameList(): - self.has_cix = True - else: - self.has_cix = False - return self.has_cix - - - def readCoMet( self ): - if self.comet_md is None: - raw_comet = self.readRawCoMet() - if raw_comet is None or raw_comet == "": - self.comet_md = GenericMetadata() - else: - self.comet_md = CoMet().metadataFromString( raw_comet ) - - self.comet_md.setDefaultPageList( self.getNumberOfPages() ) - #use the coverImage value from the comet_data to mark the cover in this struct - # walk through list of images in file, and find the matching one for md.coverImage - # need to remove the existing one in the default - if self.comet_md.coverImage is not None: - cover_idx = 0 - for idx,f in enumerate(self.getPageNameList()): - if self.comet_md.coverImage == f: - cover_idx = idx - break - if cover_idx != 0: - del (self.comet_md.pages[0]['Type'] ) - self.comet_md.pages[ cover_idx ]['Type'] = PageType.FrontCover - - return self.comet_md - - def readRawCoMet( self ): - if not self.hasCoMet(): - print >> sys.stderr, self.path, "doesn't have CoMet data!" - return None - - try: - raw_comet = self.archiver.readArchiveFile( self.comet_filename ) - except IOError: - print >> sys.stderr, u"Error reading in raw CoMet!" - raw_comet = "" - return raw_comet - - def writeCoMet(self, metadata): - - if metadata is not None: - if not self.hasCoMet(): - self.comet_filename = self.comet_default_filename - - self.applyArchiveInfoToMetadata( metadata ) - # Set the coverImage value, if it's not the first page - cover_idx = int(metadata.getCoverPageIndexList()[0]) - if cover_idx != 0: - metadata.coverImage = self.getPageName( cover_idx ) - - comet_string = CoMet().stringFromMetadata( metadata ) - write_success = self.archiver.writeArchiveFile( self.comet_filename, comet_string ) - if write_success: - self.has_comet = True - self.comet_md = metadata - self.resetCache() - return write_success - else: - return False - - def removeCoMet( self ): - if self.hasCoMet(): - write_success = self.archiver.removeArchiveFile( self.comet_filename ) - if write_success: - self.has_comet = False - self.comet_md = None - self.resetCache() - return write_success - return True - - def hasCoMet(self): - if self.has_comet is None: - self.has_comet = False - if not self.seemsToBeAComicArchive(): - return self.has_comet - - #look at all xml files in root, and search for CoMet data, get first - for n in self.archiver.getArchiveFilenameList(): - if ( os.path.dirname(n) == "" and - os.path.splitext(n)[1].lower() == '.xml'): - # read in XML file, and validate it - try: - data = self.archiver.readArchiveFile( n ) - except: - data = "" - print >> sys.stderr, u"Error reading in Comet XML for validation!" - if CoMet().validateString( data ): - # since we found it, save it! - self.comet_filename = n - self.has_comet = True - break - - return self.has_comet - - - - def applyArchiveInfoToMetadata( self, md, calc_page_sizes=False): - md.pageCount = self.getNumberOfPages() - - if calc_page_sizes: - for p in md.pages: - idx = int( p['Image'] ) - if pil_available: - if 'ImageSize' not in p or 'ImageHeight' not in p or 'ImageWidth' not in p: - data = self.getPage( idx ) - if data is not None: - try: - im = Image.open(StringIO.StringIO(data)) - w,h = im.size - - p['ImageSize'] = str(len(data)) - p['ImageHeight'] = str(h) - p['ImageWidth'] = str(w) - except IOError: - p['ImageSize'] = str(len(data)) - - else: - if 'ImageSize' not in p: - data = self.getPage( idx ) - p['ImageSize'] = str(len(data)) - - - - def metadataFromFilename( self , parse_scan_info=True): - - metadata = GenericMetadata() - - fnp = FileNameParser() - fnp.parseFilename( self.path ) - - if fnp.issue != "": - metadata.issue = fnp.issue - if fnp.series != "": - metadata.series = fnp.series - if fnp.volume != "": - metadata.volume = fnp.volume - if fnp.year != "": - metadata.year = fnp.year - if fnp.issue_count != "": - metadata.issueCount = fnp.issue_count - if parse_scan_info: - if fnp.remainder != "": - metadata.scanInfo = fnp.remainder - - metadata.isEmpty = False - - return metadata - - def exportAsZip( self, zipfilename ): - if self.archive_type == self.ArchiveType.Zip: - # nothing to do, we're already a zip - return True - - zip_archiver = ZipArchiver( zipfilename ) - return zip_archiver.copyFromArchive( self.archiver ) - +from comicapi.comicarchive import * diff --git a/comictaggerlib/comicbookinfo.py b/comictaggerlib/comicbookinfo.py index a0bbaf0..6b839fa 100644 --- a/comictaggerlib/comicbookinfo.py +++ b/comictaggerlib/comicbookinfo.py @@ -1,152 +1 @@ -""" -A python class to encapsulate the ComicBookInfo data -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -import json -from datetime import datetime -import zipfile - -from genericmetadata import GenericMetadata -import utils -#import ctversion - -class ComicBookInfo: - - - def metadataFromString( self, string ): - - cbi_container = json.loads( unicode(string, 'utf-8') ) - - metadata = GenericMetadata() - - cbi = cbi_container[ 'ComicBookInfo/1.0' ] - - #helper func - # If item is not in CBI, return None - def xlate( cbi_entry): - if cbi_entry in cbi: - return cbi[cbi_entry] - else: - return None - - metadata.series = xlate( 'series' ) - metadata.title = xlate( 'title' ) - metadata.issue = xlate( 'issue' ) - metadata.publisher = xlate( 'publisher' ) - metadata.month = xlate( 'publicationMonth' ) - metadata.year = xlate( 'publicationYear' ) - metadata.issueCount = xlate( 'numberOfIssues' ) - metadata.comments = xlate( 'comments' ) - metadata.credits = xlate( 'credits' ) - metadata.genre = xlate( 'genre' ) - metadata.volume = xlate( 'volume' ) - metadata.volumeCount = xlate( 'numberOfVolumes' ) - metadata.language = xlate( 'language' ) - metadata.country = xlate( 'country' ) - metadata.criticalRating = xlate( 'rating' ) - metadata.tags = xlate( 'tags' ) - - # make sure credits and tags are at least empty lists and not None - if metadata.credits is None: - metadata.credits = [] - if metadata.tags is None: - metadata.tags = [] - - #need to massage the language string to be ISO - if metadata.language is not None: - # reverse look-up - pattern = metadata.language - metadata.language = None - for key in utils.getLanguageDict(): - if utils.getLanguageDict()[ key ] == pattern.encode('utf-8'): - metadata.language = key - break - - metadata.isEmpty = False - - return metadata - - def stringFromMetadata( self, metadata ): - - cbi_container = self.createJSONDictionary( metadata ) - return json.dumps( cbi_container ) - - #verify that the string actually contains CBI data in JSON format - def validateString( self, string ): - - try: - cbi_container = json.loads( string ) - except: - return False - - return ( 'ComicBookInfo/1.0' in cbi_container ) - - - def createJSONDictionary( self, metadata ): - - # Create the dictionary that we will convert to JSON text - cbi = dict() - cbi_container = {'appID' : 'ComicTagger/' + '1.0.0', #ctversion.version, - 'lastModified' : str(datetime.now()), - 'ComicBookInfo/1.0' : cbi } - - #helper func - def assign( cbi_entry, md_entry): - if md_entry is not None: - cbi[cbi_entry] = md_entry - - #helper func - def toInt(s): - i = None - if type(s) in [ str, unicode, int ]: - try: - i = int(s) - except ValueError: - pass - return i - - assign( 'series', metadata.series ) - assign( 'title', metadata.title ) - assign( 'issue', metadata.issue ) - assign( 'publisher', metadata.publisher ) - assign( 'publicationMonth', toInt(metadata.month) ) - assign( 'publicationYear', toInt(metadata.year) ) - assign( 'numberOfIssues', toInt(metadata.issueCount) ) - assign( 'comments', metadata.comments ) - assign( 'genre', metadata.genre ) - assign( 'volume', toInt(metadata.volume) ) - assign( 'numberOfVolumes', toInt(metadata.volumeCount) ) - assign( 'language', utils.getLanguageFromISO(metadata.language) ) - assign( 'country', metadata.country ) - assign( 'rating', metadata.criticalRating ) - assign( 'credits', metadata.credits ) - assign( 'tags', metadata.tags ) - - return cbi_container - - - def writeToExternalFile( self, filename, metadata ): - - cbi_container = self.createJSONDictionary(metadata) - - f = open(filename, 'w') - f.write(json.dumps(cbi_container, indent=4)) - f.close - +from comicapi.comicbookinfo import * \ No newline at end of file diff --git a/comictaggerlib/comicinfoxml.py b/comictaggerlib/comicinfoxml.py index 9e9df07..6943c9c 100644 --- a/comictaggerlib/comicinfoxml.py +++ b/comictaggerlib/comicinfoxml.py @@ -1,293 +1 @@ -""" -A python class to encapsulate ComicRack's ComicInfo.xml data -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from datetime import datetime -import zipfile -from pprint import pprint -import xml.etree.ElementTree as ET -from genericmetadata import GenericMetadata -import utils - -class ComicInfoXml: - - writer_synonyms = ['writer', 'plotter', 'scripter'] - penciller_synonyms = [ 'artist', 'penciller', 'penciler', 'breakdowns' ] - inker_synonyms = [ 'inker', 'artist', 'finishes' ] - colorist_synonyms = [ 'colorist', 'colourist', 'colorer', 'colourer' ] - letterer_synonyms = [ 'letterer'] - cover_synonyms = [ 'cover', 'covers', 'coverartist', 'cover artist' ] - editor_synonyms = [ 'editor'] - - - def getParseableCredits( self ): - parsable_credits = [] - parsable_credits.extend( self.writer_synonyms ) - parsable_credits.extend( self.penciller_synonyms ) - parsable_credits.extend( self.inker_synonyms ) - parsable_credits.extend( self.colorist_synonyms ) - parsable_credits.extend( self.letterer_synonyms ) - parsable_credits.extend( self.cover_synonyms ) - parsable_credits.extend( self.editor_synonyms ) - return parsable_credits - - def metadataFromString( self, string ): - - tree = ET.ElementTree(ET.fromstring( string )) - return self.convertXMLToMetadata( tree ) - - def stringFromMetadata( self, metadata ): - - header = '\n' - - tree = self.convertMetadataToXML( self, metadata ) - return header + ET.tostring(tree.getroot()) - - def indent( self, elem, level=0 ): - # for making the XML output readable - i = "\n" + level*" " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - self.indent( elem, level+1 ) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i - - def convertMetadataToXML( self, filename, metadata ): - - #shorthand for the metadata - md = metadata - - # build a tree structure - root = ET.Element("ComicInfo") - root.attrib['xmlns:xsi']="http://www.w3.org/2001/XMLSchema-instance" - root.attrib['xmlns:xsd']="http://www.w3.org/2001/XMLSchema" - #helper func - def assign( cix_entry, md_entry): - if md_entry is not None: - ET.SubElement(root, cix_entry).text = u"{0}".format(md_entry) - - assign( 'Title', md.title ) - assign( 'Series', md.series ) - assign( 'Number', md.issue ) - assign( 'Count', md.issueCount ) - assign( 'Volume', md.volume ) - assign( 'AlternateSeries', md.alternateSeries ) - assign( 'AlternateNumber', md.alternateNumber ) - assign( 'StoryArc', md.storyArc ) - assign( 'SeriesGroup', md.seriesGroup ) - assign( 'AlternateCount', md.alternateCount ) - assign( 'Summary', md.comments ) - assign( 'Notes', md.notes ) - assign( 'Year', md.year ) - assign( 'Month', md.month ) - assign( 'Day', md.day ) - - # need to specially process the credits, since they are structured differently than CIX - credit_writer_list = list() - credit_penciller_list = list() - credit_inker_list = list() - credit_colorist_list = list() - credit_letterer_list = list() - credit_cover_list = list() - credit_editor_list = list() - - # first, loop thru credits, and build a list for each role that CIX supports - for credit in metadata.credits: - - if credit['role'].lower() in set( self.writer_synonyms ): - credit_writer_list.append(credit['person'].replace(",","")) - - if credit['role'].lower() in set( self.penciller_synonyms ): - credit_penciller_list.append(credit['person'].replace(",","")) - - if credit['role'].lower() in set( self.inker_synonyms ): - credit_inker_list.append(credit['person'].replace(",","")) - - if credit['role'].lower() in set( self.colorist_synonyms ): - credit_colorist_list.append(credit['person'].replace(",","")) - - if credit['role'].lower() in set( self.letterer_synonyms ): - credit_letterer_list.append(credit['person'].replace(",","")) - - if credit['role'].lower() in set( self.cover_synonyms ): - credit_cover_list.append(credit['person'].replace(",","")) - - if credit['role'].lower() in set( self.editor_synonyms ): - credit_editor_list.append(credit['person'].replace(",","")) - - # second, convert each list to string, and add to XML struct - if len( credit_writer_list ) > 0: - node = ET.SubElement(root, 'Writer') - node.text = utils.listToString( credit_writer_list ) - - if len( credit_penciller_list ) > 0: - node = ET.SubElement(root, 'Penciller') - node.text = utils.listToString( credit_penciller_list ) - - if len( credit_inker_list ) > 0: - node = ET.SubElement(root, 'Inker') - node.text = utils.listToString( credit_inker_list ) - - if len( credit_colorist_list ) > 0: - node = ET.SubElement(root, 'Colorist') - node.text = utils.listToString( credit_colorist_list ) - - if len( credit_letterer_list ) > 0: - node = ET.SubElement(root, 'Letterer') - node.text = utils.listToString( credit_letterer_list ) - - if len( credit_cover_list ) > 0: - node = ET.SubElement(root, 'CoverArtist') - node.text = utils.listToString( credit_cover_list ) - - if len( credit_editor_list ) > 0: - node = ET.SubElement(root, 'Editor') - node.text = utils.listToString( credit_editor_list ) - - assign( 'Publisher', md.publisher ) - assign( 'Imprint', md.imprint ) - assign( 'Genre', md.genre ) - assign( 'Web', md.webLink ) - assign( 'PageCount', md.pageCount ) - assign( 'LanguageISO', md.language ) - assign( 'Format', md.format ) - assign( 'AgeRating', md.maturityRating ) - if md.blackAndWhite is not None and md.blackAndWhite: - ET.SubElement(root, 'BlackAndWhite').text = "Yes" - assign( 'Manga', md.manga ) - assign( 'Characters', md.characters ) - assign( 'Teams', md.teams ) - assign( 'Locations', md.locations ) - assign( 'ScanInformation', md.scanInfo ) - - # loop and add the page entries under pages node - if len( md.pages ) > 0: - pages_node = ET.SubElement(root, 'Pages') - for page_dict in md.pages: - page_node = ET.SubElement(pages_node, 'Page') - page_node.attrib = page_dict - - # self pretty-print - self.indent(root) - - # wrap it in an ElementTree instance, and save as XML - tree = ET.ElementTree(root) - return tree - - - def convertXMLToMetadata( self, tree ): - - root = tree.getroot() - - if root.tag != 'ComicInfo': - raise 1 - return None - - metadata = GenericMetadata() - md = metadata - - - # Helper function - def xlate( tag ): - node = root.find( tag ) - if node is not None: - return node.text - else: - return None - - md.series = xlate( 'Series' ) - md.title = xlate( 'Title' ) - md.issue = xlate( 'Number' ) - md.issueCount = xlate( 'Count' ) - md.volume = xlate( 'Volume' ) - md.alternateSeries = xlate( 'AlternateSeries' ) - md.alternateNumber = xlate( 'AlternateNumber' ) - md.alternateCount = xlate( 'AlternateCount' ) - md.comments = xlate( 'Summary' ) - md.notes = xlate( 'Notes' ) - md.year = xlate( 'Year' ) - md.month = xlate( 'Month' ) - md.day = xlate( 'Day' ) - md.publisher = xlate( 'Publisher' ) - md.imprint = xlate( 'Imprint' ) - md.genre = xlate( 'Genre' ) - md.webLink = xlate( 'Web' ) - md.language = xlate( 'LanguageISO' ) - md.format = xlate( 'Format' ) - md.manga = xlate( 'Manga' ) - md.characters = xlate( 'Characters' ) - md.teams = xlate( 'Teams' ) - md.locations = xlate( 'Locations' ) - md.pageCount = xlate( 'PageCount' ) - md.scanInfo = xlate( 'ScanInformation' ) - md.storyArc = xlate( 'StoryArc' ) - md.seriesGroup = xlate( 'SeriesGroup' ) - md.maturityRating = xlate( 'AgeRating' ) - - tmp = xlate( 'BlackAndWhite' ) - md.blackAndWhite = False - if tmp is not None and tmp.lower() in [ "yes", "true", "1" ]: - md.blackAndWhite = True - # Now extract the credit info - for n in root: - if ( n.tag == 'Writer' or - n.tag == 'Penciller' or - n.tag == 'Inker' or - n.tag == 'Colorist' or - n.tag == 'Letterer' or - n.tag == 'Editor' - ): - if n.text is not None: - for name in n.text.split(','): - metadata.addCredit( name.strip(), n.tag ) - - if n.tag == 'CoverArtist': - if n.text is not None: - for name in n.text.split(','): - metadata.addCredit( name.strip(), "Cover" ) - - # parse page data now - pages_node = root.find( "Pages" ) - if pages_node is not None: - for page in pages_node: - metadata.pages.append( page.attrib ) - #print page.attrib - - metadata.isEmpty = False - - return metadata - - def writeToExternalFile( self, filename, metadata ): - - tree = self.convertMetadataToXML( self, metadata ) - #ET.dump(tree) - tree.write(filename, encoding='utf-8') - - def readFromExternalFile( self, filename ): - - tree = ET.parse( filename ) - return self.convertXMLToMetadata( tree ) - +from comicapi.comicinfoxml import * \ No newline at end of file diff --git a/comictaggerlib/filenameparser.py b/comictaggerlib/filenameparser.py index 6f3aa05..77a1c5b 100644 --- a/comictaggerlib/filenameparser.py +++ b/comictaggerlib/filenameparser.py @@ -1,277 +1 @@ -""" -Functions for parsing comic info from filename - -This should probably be re-written, but, well, it mostly works! - -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -# Some portions of this code were modified from pyComicMetaThis project -# http://code.google.com/p/pycomicmetathis/ - -import re -import os -from urllib import unquote - -class FileNameParser: - - def repl(self, m): - return ' ' * len(m.group()) - - def fixSpaces( self, string, remove_dashes=True ): - if remove_dashes: - placeholders = ['[-_]',' +'] - else: - placeholders = ['[_]',' +'] - for ph in placeholders: - string = re.sub(ph, self.repl, string ) - return string #.strip() - - - def getIssueCount( self,filename, issue_end ): - - count = "" - filename = filename[issue_end:] - - # replace any name seperators with spaces - tmpstr = self.fixSpaces(filename) - found = False - - match = re.search('(?<=\sof\s)\d+(?=\s)', tmpstr, re.IGNORECASE) - if match: - count = match.group() - found = True - - if not found: - match = re.search('(?<=\(of\s)\d+(?=\))', tmpstr, re.IGNORECASE) - if match: - count = match.group() - found = True - - - count = count.lstrip("0") - - return count - - def getIssueNumber( self, filename ): - - # Returns a tuple of issue number string, and start and end indexs in the filename - # (The indexes will be used to split the string up for further parsing) - - found = False - issue = '' - start = 0 - end = 0 - - # first, look for multiple "--", this means it's formatted differently from most: - if "--" in filename: - # the pattern seems to be that anything to left of the first "--" is the series name followed by issue - filename = re.sub("--.*", self.repl, filename) - - elif "__" in filename: - # the pattern seems to be that anything to left of the first "__" is the series name followed by issue - filename = re.sub("__.*", self.repl, filename) - - filename = filename.replace("+", " ") - - # replace parenthetical phrases with spaces - filename = re.sub( "\(.*?\)", self.repl, filename) - filename = re.sub( "\[.*?\]", self.repl, filename) - - # replace any name seperators with spaces - filename = self.fixSpaces(filename) - - # remove any "of NN" phrase with spaces (problem: this could break on some titles) - filename = re.sub( "of [\d]+", self.repl, filename) - - #print u"[{0}]".format(filename) - - # we should now have a cleaned up filename version with all the words in - # the same positions as original filename - - # make a list of each word and its position - word_list = list() - for m in re.finditer("\S+", filename): - word_list.append( (m.group(0), m.start(), m.end()) ) - - # remove the first word, since it can't be the issue number - if len(word_list) > 1: - word_list = word_list[1:] - else: - #only one word?? just bail. - return issue, start, end - - # Now try to search for the likely issue number word in the list - - # first look for a word with "#" followed by digits with optional sufix - # this is almost certainly the issue number - for w in reversed(word_list): - if re.match("#[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): - found = True - break - - # same as above but w/o a '#', and only look at the last word in the list - if not found: - w = word_list[-1] - if re.match("[-]?(([0-9]*\.[0-9]+|[0-9]+)(\w*))", w[0]): - found = True - - # now try to look for a # followed by any characters - if not found: - for w in reversed(word_list): - if re.match("#\S+", w[0]): - found = True - break - - if found: - issue = w[0] - start = w[1] - end = w[2] - if issue[0] == '#': - issue = issue[1:] - - return issue, start, end - - def getSeriesName(self, filename, issue_start ): - - # use the issue number string index to split the filename string - - if issue_start != 0: - filename = filename[:issue_start] - - # in case there is no issue number, remove some obvious stuff - if "--" in filename: - # the pattern seems to be that anything to left of the first "--" is the series name followed by issue - filename = re.sub("--.*", self.repl, filename) - - elif "__" in filename: - # the pattern seems to be that anything to left of the first "__" is the series name followed by issue - filename = re.sub("__.*", self.repl, filename) - - filename = filename.replace("+", " ") - tmpstr = self.fixSpaces(filename, remove_dashes=False) - - series = tmpstr - volume = "" - - #save the last word - try: - last_word = series.split()[-1] - except: - last_word = "" - - # remove any parenthetical phrases - series = re.sub( "\(.*?\)", "", series) - - # search for volume number - match = re.search('(.+)([vV]|[Vv][oO][Ll]\.?\s?)(\d+)\s*$', series) - if match: - series = match.group(1) - volume = match.group(3) - - # if a volume wasn't found, see if the last word is a year in parentheses - # since that's a common way to designate the volume - if volume == "": - #match either (YEAR), (YEAR-), or (YEAR-YEAR2) - match = re.search("(\()(\d{4})(-(\d{4}|)|)(\))", last_word) - if match: - volume = match.group(2) - - series = series.strip() - - # if we don't have an issue number (issue_start==0), look - # for hints i.e. "TPB", "one-shot", "OS", "OGN", etc that might - # be removed to help search online - if issue_start == 0: - one_shot_words = [ "tpb", "os", "one-shot", "ogn", "gn" ] - try: - last_word = series.split()[-1] - if last_word.lower() in one_shot_words: - series = series.rsplit(' ', 1)[0] - except: - pass - - return series, volume.strip() - - def getYear( self,filename, issue_end): - - filename = filename[issue_end:] - - year = "" - # look for four digit number with "(" ")" or "--" around it - match = re.search('(\(\d\d\d\d\))|(--\d\d\d\d--)', filename) - if match: - year = match.group() - # remove non-numerics - year = re.sub("[^0-9]", "", year) - return year - - def getRemainder( self, filename, year, count, issue_end ): - - #make a guess at where the the non-interesting stuff begins - remainder = "" - - if "--" in filename: - remainder = filename.split("--",1)[1] - elif "__" in filename: - remainder = filename.split("__",1)[1] - elif issue_end != 0: - remainder = filename[issue_end:] - - remainder = self.fixSpaces(remainder, remove_dashes=False) - if year != "": - remainder = remainder.replace(year,"",1) - if count != "": - remainder = remainder.replace("of "+count,"",1) - - remainder = remainder.replace("()","") - - return remainder.strip() - - def parseFilename( self, filename ): - - # remove the path - filename = os.path.basename(filename) - - # remove the extension - filename = os.path.splitext(filename)[0] - - #url decode, just in case - filename = unquote(filename) - - # sometimes archives get messed up names from too many decodings - # often url encodings will break and leave "_28" and "_29" in place - # of "(" and ")" see if there are a number of these, and replace them - if filename.count("_28") > 1 and filename.count("_29") > 1: - filename = filename.replace("_28", "(") - filename = filename.replace("_29", ")") - - self.issue, issue_start, issue_end = self.getIssueNumber(filename) - self.series, self.volume = self.getSeriesName(filename, issue_start) - self.year = self.getYear(filename, issue_end) - self.issue_count = self.getIssueCount(filename, issue_end) - self.remainder = self.getRemainder( filename, self.year, self.issue_count, issue_end ) - - if self.issue != "": - # strip off leading zeros - self.issue = self.issue.lstrip("0") - if self.issue == "": - self.issue = "0" - if self.issue[0] == ".": - self.issue = "0" + self.issue +from comicapi.filenameparser import * \ No newline at end of file diff --git a/comictaggerlib/genericmetadata.py b/comictaggerlib/genericmetadata.py index 8e7aeaf..8bcf694 100644 --- a/comictaggerlib/genericmetadata.py +++ b/comictaggerlib/genericmetadata.py @@ -1,316 +1 @@ -""" - A python class for internal metadata storage - - The goal of this class is to handle ALL the data that might come from various - tagging schemes and databases, such as ComicVine or GCD. This makes conversion - possible, however lossy it might be - -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import utils - -# These page info classes are exactly the same as the CIX scheme, since it's unique -class PageType: - FrontCover = "FrontCover" - InnerCover = "InnerCover" - Roundup = "Roundup" - Story = "Story" - Advertisement = "Advertisement" - Editorial = "Editorial" - Letters = "Letters" - Preview = "Preview" - BackCover = "BackCover" - Other = "Other" - Deleted = "Deleted" - -""" -class PageInfo: - Image = 0 - Type = PageType.Story - DoublePage = False - ImageSize = 0 - Key = "" - ImageWidth = 0 - ImageHeight = 0 -""" - -class GenericMetadata: - - def __init__(self): - - self.isEmpty = True - self.tagOrigin = None - - self.series = None - self.issue = None - self.title = None - self.publisher = None - self.month = None - self.year = None - self.day = None - self.issueCount = None - self.volume = None - self.genre = None - self.language = None # 2 letter iso code - self.comments = None # use same way as Summary in CIX - - self.volumeCount = None - self.criticalRating = None - self.country = None - - self.alternateSeries = None - self.alternateNumber = None - self.alternateCount = None - self.imprint = None - self.notes = None - self.webLink = None - self.format = None - self.manga = None - self.blackAndWhite = None - self.pageCount = None - self.maturityRating = None - - self.storyArc = None - self.seriesGroup = None - self.scanInfo = None - - self.characters = None - self.teams = None - self.locations = None - - self.credits = list() - self.tags = list() - self.pages = list() - - # Some CoMet-only items - self.price = None - self.isVersionOf = None - self.rights = None - self.identifier = None - self.lastMark = None - self.coverImage = None - - def overlay( self, new_md ): - # Overlay a metadata object on this one - # that is, when the new object has non-None - # values, over-write them to this one - - def assign( cur, new ): - if new is not None: - if type(new) == str and len(new) == 0: - setattr(self, cur, None) - else: - setattr(self, cur, new) - - if not new_md.isEmpty: - self.isEmpty = False - - assign( 'series', new_md.series ) - assign( "issue", new_md.issue ) - assign( "issueCount", new_md.issueCount ) - assign( "title", new_md.title ) - assign( "publisher", new_md.publisher ) - assign( "day", new_md.day ) - assign( "month", new_md.month ) - assign( "year", new_md.year ) - assign( "volume", new_md.volume ) - assign( "volumeCount", new_md.volumeCount ) - assign( "genre", new_md.genre ) - assign( "language", new_md.language ) - assign( "country", new_md.country ) - assign( "criticalRating", new_md.criticalRating ) - assign( "alternateSeries", new_md.alternateSeries ) - assign( "alternateNumber", new_md.alternateNumber ) - assign( "alternateCount", new_md.alternateCount ) - assign( "imprint", new_md.imprint ) - assign( "webLink", new_md.webLink ) - assign( "format", new_md.format ) - assign( "manga", new_md.manga ) - assign( "blackAndWhite", new_md.blackAndWhite ) - assign( "maturityRating", new_md.maturityRating ) - assign( "storyArc", new_md.storyArc ) - assign( "seriesGroup", new_md.seriesGroup ) - assign( "scanInfo", new_md.scanInfo ) - assign( "characters", new_md.characters ) - assign( "teams", new_md.teams ) - assign( "locations", new_md.locations ) - assign( "comments", new_md.comments ) - assign( "notes", new_md.notes ) - - assign( "price", new_md.price ) - assign( "isVersionOf", new_md.isVersionOf ) - assign( "rights", new_md.rights ) - assign( "identifier", new_md.identifier ) - assign( "lastMark", new_md.lastMark ) - - self.overlayCredits( new_md.credits ) - # TODO - - # not sure if the tags and pages should broken down, or treated - # as whole lists.... - - # For now, go the easy route, where any overlay - # value wipes out the whole list - if len(new_md.tags) > 0: - assign( "tags", new_md.tags ) - - if len(new_md.pages) > 0: - assign( "pages", new_md.pages ) - - - def overlayCredits( self, new_credits ): - for c in new_credits: - if c.has_key('primary') and c['primary']: - primary = True - else: - primary = False - - # Remove credit role if person is blank - if c['person'] == "": - for r in reversed(self.credits): - if r['role'].lower() == c['role'].lower(): - self.credits.remove(r) - # otherwise, add it! - else: - self.addCredit( c['person'], c['role'], primary ) - - def setDefaultPageList( self, count ): - # generate a default page list, with the first page marked as the cover - for i in range(count): - page_dict = dict() - page_dict['Image'] = str(i) - if i == 0: - page_dict['Type'] = PageType.FrontCover - self.pages.append( page_dict ) - - def getArchivePageIndex( self, pagenum ): - # convert the displayed page number to the page index of the file in the archive - if pagenum < len( self.pages ): - return int( self.pages[pagenum]['Image'] ) - else: - return 0 - - def getCoverPageIndexList( self ): - # return a list of archive page indices of cover pages - coverlist = [] - for p in self.pages: - if 'Type' in p and p['Type'] == PageType.FrontCover: - coverlist.append( int(p['Image'])) - - if len(coverlist) == 0: - coverlist.append( 0 ) - - return coverlist - - def addCredit( self, person, role, primary = False ): - - credit = dict() - credit['person'] = person - credit['role'] = role - if primary: - credit['primary'] = primary - - # look to see if it's not already there... - found = False - for c in self.credits: - if ( c['person'].lower() == person.lower() and - c['role'].lower() == role.lower() ): - # no need to add it. just adjust the "primary" flag as needed - c['primary'] = primary - found = True - break - - if not found: - self.credits.append(credit) - - - def __str__( self ): - vals = [] - if self.isEmpty: - return "No metadata" - - def add_string( tag, val ): - if val is not None and u"{0}".format(val) != "": - vals.append( (tag, val) ) - - def add_attr_string( tag ): - val = getattr(self,tag) - add_string( tag, getattr(self,tag) ) - - add_attr_string( "series" ) - add_attr_string( "issue" ) - add_attr_string( "issueCount" ) - add_attr_string( "title" ) - add_attr_string( "publisher" ) - add_attr_string( "year" ) - add_attr_string( "month" ) - add_attr_string( "day" ) - add_attr_string( "volume" ) - add_attr_string( "volumeCount" ) - add_attr_string( "genre" ) - add_attr_string( "language" ) - add_attr_string( "country" ) - add_attr_string( "criticalRating" ) - add_attr_string( "alternateSeries" ) - add_attr_string( "alternateNumber" ) - add_attr_string( "alternateCount" ) - add_attr_string( "imprint" ) - add_attr_string( "webLink" ) - add_attr_string( "format" ) - add_attr_string( "manga" ) - - add_attr_string( "price" ) - add_attr_string( "isVersionOf" ) - add_attr_string( "rights" ) - add_attr_string( "identifier" ) - add_attr_string( "lastMark" ) - - if self.blackAndWhite: - add_attr_string( "blackAndWhite" ) - add_attr_string( "maturityRating" ) - add_attr_string( "storyArc" ) - add_attr_string( "seriesGroup" ) - add_attr_string( "scanInfo" ) - add_attr_string( "characters" ) - add_attr_string( "teams" ) - add_attr_string( "locations" ) - add_attr_string( "comments" ) - add_attr_string( "notes" ) - - add_string( "tags", utils.listToString( self.tags ) ) - - for c in self.credits: - primary = "" - if c.has_key('primary') and c['primary']: - primary = " [P]" - add_string( "credit", c['role']+": "+c['person'] + primary) - - # find the longest field name - flen = 0 - for i in vals: - flen = max( flen, len(i[0]) ) - flen += 1 - - #format the data nicely - outstr = "" - fmt_str = u"{0: <" + str(flen) + "} {1}\n" - for i in vals: - outstr += fmt_str.format( i[0]+":", i[1] ) - - return outstr +from comicapi.genericmetadata import * \ No newline at end of file diff --git a/comictaggerlib/issuestring.py b/comictaggerlib/issuestring.py index 751aa8c..d9872b2 100644 --- a/comictaggerlib/issuestring.py +++ b/comictaggerlib/issuestring.py @@ -1,140 +1 @@ -# coding=utf-8 -""" -Class for handling the odd permutations of an 'issue number' that the comics industry throws at us - -e.g.: - -"12" -"12.1" -"0" -"-1" -"5AU" -"100-2" - -""" - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import utils -import math -import re - -class IssueString: - def __init__(self, text): - - # break up the issue number string into 2 parts: the numeric and suffix string. - # ( assumes that the numeric portion is always first ) - - self.num = None - self.suffix = "" - - if text is None: - return - - if type(text) == int: - text = str(text) - - if len(text) == 0: - return - - text = unicode(text) - - #skip the minus sign if it's first - if text[0] == '-': - start = 1 - else: - start = 0 - - # if it's still not numeric at start skip it - if text[start].isdigit() or text[start] == ".": - # walk through the string, look for split point (the first non-numeric) - decimal_count = 0 - for idx in range( start, len(text) ): - if text[idx] not in "0123456789.": - break - # special case: also split on second "." - if text[idx] == ".": - decimal_count += 1 - if decimal_count > 1: - break - else: - idx = len(text) - - # move trailing numeric decimal to suffix - # (only if there is other junk after ) - if text[idx-1] == "." and len(text) != idx: - idx = idx -1 - - # if there is no numeric after the minus, make the minus part of the suffix - if idx == 1 and start == 1: - idx = 0 - - part1 = text[0:idx] - part2 = text[idx:len(text)] - - if part1 != "": - self.num = float( part1 ) - self.suffix = part2 - else: - self.suffix = text - - #print "num: {0} suf: {1}".format(self.num, self.suffix) - - def asString( self, pad = 0 ): - #return the float, left side zero-padded, with suffix attached - if self.num is None: - return self.suffix - - negative = self.num < 0 - - num_f = abs(self.num) - - num_int = int( num_f ) - num_s = str( num_int ) - if float( num_int ) != num_f: - num_s = str( num_f ) - - num_s += self.suffix - - # create padding - padding = "" - l = len( str(num_int)) - if l < pad : - padding = "0" * (pad - l) - - num_s = padding + num_s - if negative: - num_s = "-" + num_s - - return num_s - - def asFloat( self ): - #return the float, with no suffix - if self.suffix == u"½": - if self.num is not None: - return self.num + .5 - else: - return .5 - return self.num - - def asInt( self ): - #return the int version of the float - if self.num is None: - return None - return int( self.num ) - - +from comicapi.issuestring import * diff --git a/comictaggerlib/utils.py b/comictaggerlib/utils.py index e315cd7..3cf0b80 100644 --- a/comictaggerlib/utils.py +++ b/comictaggerlib/utils.py @@ -1,597 +1 @@ -# coding=utf-8 - -""" -Some generic utilities -""" - - -""" -Copyright 2012-2014 Anthony Beville - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import sys -import os -import re -import platform -import locale -import codecs - - -class UtilsVars: - already_fixed_encoding = False - -def get_actual_preferred_encoding(): - preferred_encoding = locale.getpreferredencoding() - if platform.system() == "Darwin": - preferred_encoding = "utf-8" - return preferred_encoding - -def fix_output_encoding( ): - if not UtilsVars.already_fixed_encoding: - # this reads the environment and inits the right locale - locale.setlocale(locale.LC_ALL, "") - - # try to make stdout/stderr encodings happy for unicode printing - preferred_encoding = get_actual_preferred_encoding() - sys.stdout = codecs.getwriter(preferred_encoding)(sys.stdout) - sys.stderr = codecs.getwriter(preferred_encoding)(sys.stderr) - UtilsVars.already_fixed_encoding = True - -def get_recursive_filelist( pathlist ): - """ - Get a recursive list of of all files under all path items in the list - """ - filename_encoding = sys.getfilesystemencoding() - filelist = [] - for p in pathlist: - # if path is a folder, walk it recursivly, and all files underneath - if type(p) == str: - #make sure string is unicode - p = p.decode(filename_encoding) #, 'replace') - elif type(p) != unicode: - #it's probably a QString - p = unicode(p) - - if os.path.isdir( p ): - for root,dirs,files in os.walk( p ): - for f in files: - if type(f) == str: - #make sure string is unicode - f = f.decode(filename_encoding, 'replace') - elif type(f) != unicode: - #it's probably a QString - f = unicode(f) - filelist.append(os.path.join(root,f)) - else: - filelist.append(p) - - return filelist - -def listToString( l ): - string = "" - if l is not None: - for item in l: - if len(string) > 0: - string += ", " - string += item - return string - -def addtopath( dirname ): - if dirname is not None and dirname != "": - - # verify that path doesn't already contain the given dirname - tmpdirname = re.escape(dirname) - pattern = r"{sep}{dir}$|^{dir}{sep}|{sep}{dir}{sep}|^{dir}$".format( dir=tmpdirname, sep=os.pathsep) - - match = re.search(pattern, os.environ['PATH']) - if not match: - os.environ['PATH'] = dirname + os.pathsep + os.environ['PATH'] - -# returns executable path, if it exists -def which(program): - - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fpath, fname = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - - return None - -def removearticles( text ): - text = text.lower() - articles = ['and', 'the', 'a', '&', 'issue' ] - newText = '' - for word in text.split(' '): - if word not in articles: - newText += word+' ' - - newText = newText[:-1] - - # now get rid of some other junk - newText = newText.replace(":", "") - newText = newText.replace(",", "") - newText = newText.replace("-", " ") - - # since the CV api changed, searches for series names with periods - # now explicity require the period to be in the search key, - # so the line below is removed (for now) - #newText = newText.replace(".", "") - - return newText - - -def unique_file(file_name): - counter = 1 - file_name_parts = os.path.splitext(file_name) # returns ('/path/file', '.ext') - while 1: - if not os.path.lexists( file_name): - return file_name - file_name = file_name_parts[0] + ' (' + str(counter) + ')' + file_name_parts[1] - counter += 1 - - -# -o- coding: utf-8 -o- -# ISO639 python dict -# oficial list in http://www.loc.gov/standards/iso639-2/php/code_list.php - -lang_dict = { - 'ab': 'Abkhaz', - 'aa': 'Afar', - 'af': 'Afrikaans', - 'ak': 'Akan', - 'sq': 'Albanian', - 'am': 'Amharic', - 'ar': 'Arabic', - 'an': 'Aragonese', - 'hy': 'Armenian', - 'as': 'Assamese', - 'av': 'Avaric', - 'ae': 'Avestan', - 'ay': 'Aymara', - 'az': 'Azerbaijani', - 'bm': 'Bambara', - 'ba': 'Bashkir', - 'eu': 'Basque', - 'be': 'Belarusian', - 'bn': 'Bengali', - 'bh': 'Bihari', - 'bi': 'Bislama', - 'bs': 'Bosnian', - 'br': 'Breton', - 'bg': 'Bulgarian', - 'my': 'Burmese', - 'ca': 'Catalan; Valencian', - 'ch': 'Chamorro', - 'ce': 'Chechen', - 'ny': 'Chichewa; Chewa; Nyanja', - 'zh': 'Chinese', - 'cv': 'Chuvash', - 'kw': 'Cornish', - 'co': 'Corsican', - 'cr': 'Cree', - 'hr': 'Croatian', - 'cs': 'Czech', - 'da': 'Danish', - 'dv': 'Divehi; Maldivian;', - 'nl': 'Dutch', - 'dz': 'Dzongkha', - 'en': 'English', - 'eo': 'Esperanto', - 'et': 'Estonian', - 'ee': 'Ewe', - 'fo': 'Faroese', - 'fj': 'Fijian', - 'fi': 'Finnish', - 'fr': 'French', - 'ff': 'Fula', - 'gl': 'Galician', - 'ka': 'Georgian', - 'de': 'German', - 'el': 'Greek, Modern', - 'gn': 'Guaraní', - 'gu': 'Gujarati', - 'ht': 'Haitian', - 'ha': 'Hausa', - 'he': 'Hebrew (modern)', - 'hz': 'Herero', - 'hi': 'Hindi', - 'ho': 'Hiri Motu', - 'hu': 'Hungarian', - 'ia': 'Interlingua', - 'id': 'Indonesian', - 'ie': 'Interlingue', - 'ga': 'Irish', - 'ig': 'Igbo', - 'ik': 'Inupiaq', - 'io': 'Ido', - 'is': 'Icelandic', - 'it': 'Italian', - 'iu': 'Inuktitut', - 'ja': 'Japanese', - 'jv': 'Javanese', - 'kl': 'Kalaallisut', - 'kn': 'Kannada', - 'kr': 'Kanuri', - 'ks': 'Kashmiri', - 'kk': 'Kazakh', - 'km': 'Khmer', - 'ki': 'Kikuyu, Gikuyu', - 'rw': 'Kinyarwanda', - 'ky': 'Kirghiz, Kyrgyz', - 'kv': 'Komi', - 'kg': 'Kongo', - 'ko': 'Korean', - 'ku': 'Kurdish', - 'kj': 'Kwanyama, Kuanyama', - 'la': 'Latin', - 'lb': 'Luxembourgish', - 'lg': 'Luganda', - 'li': 'Limburgish', - 'ln': 'Lingala', - 'lo': 'Lao', - 'lt': 'Lithuanian', - 'lu': 'Luba-Katanga', - 'lv': 'Latvian', - 'gv': 'Manx', - 'mk': 'Macedonian', - 'mg': 'Malagasy', - 'ms': 'Malay', - 'ml': 'Malayalam', - 'mt': 'Maltese', - 'mi': 'Māori', - 'mr': 'Marathi (Marāṭhī)', - 'mh': 'Marshallese', - 'mn': 'Mongolian', - 'na': 'Nauru', - 'nv': 'Navajo, Navaho', - 'nb': 'Norwegian Bokmål', - 'nd': 'North Ndebele', - 'ne': 'Nepali', - 'ng': 'Ndonga', - 'nn': 'Norwegian Nynorsk', - 'no': 'Norwegian', - 'ii': 'Nuosu', - 'nr': 'South Ndebele', - 'oc': 'Occitan', - 'oj': 'Ojibwe, Ojibwa', - 'cu': 'Old Church Slavonic', - 'om': 'Oromo', - 'or': 'Oriya', - 'os': 'Ossetian, Ossetic', - 'pa': 'Panjabi, Punjabi', - 'pi': 'Pāli', - 'fa': 'Persian', - 'pl': 'Polish', - 'ps': 'Pashto, Pushto', - 'pt': 'Portuguese', - 'qu': 'Quechua', - 'rm': 'Romansh', - 'rn': 'Kirundi', - 'ro': 'Romanian, Moldavan', - 'ru': 'Russian', - 'sa': 'Sanskrit (Saṁskṛta)', - 'sc': 'Sardinian', - 'sd': 'Sindhi', - 'se': 'Northern Sami', - 'sm': 'Samoan', - 'sg': 'Sango', - 'sr': 'Serbian', - 'gd': 'Scottish Gaelic', - 'sn': 'Shona', - 'si': 'Sinhala, Sinhalese', - 'sk': 'Slovak', - 'sl': 'Slovene', - 'so': 'Somali', - 'st': 'Southern Sotho', - 'es': 'Spanish; Castilian', - 'su': 'Sundanese', - 'sw': 'Swahili', - 'ss': 'Swati', - 'sv': 'Swedish', - 'ta': 'Tamil', - 'te': 'Telugu', - 'tg': 'Tajik', - 'th': 'Thai', - 'ti': 'Tigrinya', - 'bo': 'Tibetan', - 'tk': 'Turkmen', - 'tl': 'Tagalog', - 'tn': 'Tswana', - 'to': 'Tonga', - 'tr': 'Turkish', - 'ts': 'Tsonga', - 'tt': 'Tatar', - 'tw': 'Twi', - 'ty': 'Tahitian', - 'ug': 'Uighur, Uyghur', - 'uk': 'Ukrainian', - 'ur': 'Urdu', - 'uz': 'Uzbek', - 've': 'Venda', - 'vi': 'Vietnamese', - 'vo': 'Volapük', - 'wa': 'Walloon', - 'cy': 'Welsh', - 'wo': 'Wolof', - 'fy': 'Western Frisian', - 'xh': 'Xhosa', - 'yi': 'Yiddish', - 'yo': 'Yoruba', - 'za': 'Zhuang, Chuang', - 'zu': 'Zulu', -} - - -countries = [ - ('AF', 'Afghanistan'), - ('AL', 'Albania'), - ('DZ', 'Algeria'), - ('AS', 'American Samoa'), - ('AD', 'Andorra'), - ('AO', 'Angola'), - ('AI', 'Anguilla'), - ('AQ', 'Antarctica'), - ('AG', 'Antigua And Barbuda'), - ('AR', 'Argentina'), - ('AM', 'Armenia'), - ('AW', 'Aruba'), - ('AU', 'Australia'), - ('AT', 'Austria'), - ('AZ', 'Azerbaijan'), - ('BS', 'Bahamas'), - ('BH', 'Bahrain'), - ('BD', 'Bangladesh'), - ('BB', 'Barbados'), - ('BY', 'Belarus'), - ('BE', 'Belgium'), - ('BZ', 'Belize'), - ('BJ', 'Benin'), - ('BM', 'Bermuda'), - ('BT', 'Bhutan'), - ('BO', 'Bolivia'), - ('BA', 'Bosnia And Herzegowina'), - ('BW', 'Botswana'), - ('BV', 'Bouvet Island'), - ('BR', 'Brazil'), - ('BN', 'Brunei Darussalam'), - ('BG', 'Bulgaria'), - ('BF', 'Burkina Faso'), - ('BI', 'Burundi'), - ('KH', 'Cambodia'), - ('CM', 'Cameroon'), - ('CA', 'Canada'), - ('CV', 'Cape Verde'), - ('KY', 'Cayman Islands'), - ('CF', 'Central African Rep'), - ('TD', 'Chad'), - ('CL', 'Chile'), - ('CN', 'China'), - ('CX', 'Christmas Island'), - ('CC', 'Cocos Islands'), - ('CO', 'Colombia'), - ('KM', 'Comoros'), - ('CG', 'Congo'), - ('CK', 'Cook Islands'), - ('CR', 'Costa Rica'), - ('CI', 'Cote D`ivoire'), - ('HR', 'Croatia'), - ('CU', 'Cuba'), - ('CY', 'Cyprus'), - ('CZ', 'Czech Republic'), - ('DK', 'Denmark'), - ('DJ', 'Djibouti'), - ('DM', 'Dominica'), - ('DO', 'Dominican Republic'), - ('TP', 'East Timor'), - ('EC', 'Ecuador'), - ('EG', 'Egypt'), - ('SV', 'El Salvador'), - ('GQ', 'Equatorial Guinea'), - ('ER', 'Eritrea'), - ('EE', 'Estonia'), - ('ET', 'Ethiopia'), - ('FK', 'Falkland Islands (Malvinas)'), - ('FO', 'Faroe Islands'), - ('FJ', 'Fiji'), - ('FI', 'Finland'), - ('FR', 'France'), - ('GF', 'French Guiana'), - ('PF', 'French Polynesia'), - ('TF', 'French S. Territories'), - ('GA', 'Gabon'), - ('GM', 'Gambia'), - ('GE', 'Georgia'), - ('DE', 'Germany'), - ('GH', 'Ghana'), - ('GI', 'Gibraltar'), - ('GR', 'Greece'), - ('GL', 'Greenland'), - ('GD', 'Grenada'), - ('GP', 'Guadeloupe'), - ('GU', 'Guam'), - ('GT', 'Guatemala'), - ('GN', 'Guinea'), - ('GW', 'Guinea-bissau'), - ('GY', 'Guyana'), - ('HT', 'Haiti'), - ('HN', 'Honduras'), - ('HK', 'Hong Kong'), - ('HU', 'Hungary'), - ('IS', 'Iceland'), - ('IN', 'India'), - ('ID', 'Indonesia'), - ('IR', 'Iran'), - ('IQ', 'Iraq'), - ('IE', 'Ireland'), - ('IL', 'Israel'), - ('IT', 'Italy'), - ('JM', 'Jamaica'), - ('JP', 'Japan'), - ('JO', 'Jordan'), - ('KZ', 'Kazakhstan'), - ('KE', 'Kenya'), - ('KI', 'Kiribati'), - ('KP', 'Korea (North)'), - ('KR', 'Korea (South)'), - ('KW', 'Kuwait'), - ('KG', 'Kyrgyzstan'), - ('LA', 'Laos'), - ('LV', 'Latvia'), - ('LB', 'Lebanon'), - ('LS', 'Lesotho'), - ('LR', 'Liberia'), - ('LY', 'Libya'), - ('LI', 'Liechtenstein'), - ('LT', 'Lithuania'), - ('LU', 'Luxembourg'), - ('MO', 'Macau'), - ('MK', 'Macedonia'), - ('MG', 'Madagascar'), - ('MW', 'Malawi'), - ('MY', 'Malaysia'), - ('MV', 'Maldives'), - ('ML', 'Mali'), - ('MT', 'Malta'), - ('MH', 'Marshall Islands'), - ('MQ', 'Martinique'), - ('MR', 'Mauritania'), - ('MU', 'Mauritius'), - ('YT', 'Mayotte'), - ('MX', 'Mexico'), - ('FM', 'Micronesia'), - ('MD', 'Moldova'), - ('MC', 'Monaco'), - ('MN', 'Mongolia'), - ('MS', 'Montserrat'), - ('MA', 'Morocco'), - ('MZ', 'Mozambique'), - ('MM', 'Myanmar'), - ('NA', 'Namibia'), - ('NR', 'Nauru'), - ('NP', 'Nepal'), - ('NL', 'Netherlands'), - ('AN', 'Netherlands Antilles'), - ('NC', 'New Caledonia'), - ('NZ', 'New Zealand'), - ('NI', 'Nicaragua'), - ('NE', 'Niger'), - ('NG', 'Nigeria'), - ('NU', 'Niue'), - ('NF', 'Norfolk Island'), - ('MP', 'Northern Mariana Islands'), - ('NO', 'Norway'), - ('OM', 'Oman'), - ('PK', 'Pakistan'), - ('PW', 'Palau'), - ('PA', 'Panama'), - ('PG', 'Papua New Guinea'), - ('PY', 'Paraguay'), - ('PE', 'Peru'), - ('PH', 'Philippines'), - ('PN', 'Pitcairn'), - ('PL', 'Poland'), - ('PT', 'Portugal'), - ('PR', 'Puerto Rico'), - ('QA', 'Qatar'), - ('RE', 'Reunion'), - ('RO', 'Romania'), - ('RU', 'Russian Federation'), - ('RW', 'Rwanda'), - ('KN', 'Saint Kitts And Nevis'), - ('LC', 'Saint Lucia'), - ('VC', 'St Vincent/Grenadines'), - ('WS', 'Samoa'), - ('SM', 'San Marino'), - ('ST', 'Sao Tome'), - ('SA', 'Saudi Arabia'), - ('SN', 'Senegal'), - ('SC', 'Seychelles'), - ('SL', 'Sierra Leone'), - ('SG', 'Singapore'), - ('SK', 'Slovakia'), - ('SI', 'Slovenia'), - ('SB', 'Solomon Islands'), - ('SO', 'Somalia'), - ('ZA', 'South Africa'), - ('ES', 'Spain'), - ('LK', 'Sri Lanka'), - ('SH', 'St. Helena'), - ('PM', 'St.Pierre'), - ('SD', 'Sudan'), - ('SR', 'Suriname'), - ('SZ', 'Swaziland'), - ('SE', 'Sweden'), - ('CH', 'Switzerland'), - ('SY', 'Syrian Arab Republic'), - ('TW', 'Taiwan'), - ('TJ', 'Tajikistan'), - ('TZ', 'Tanzania'), - ('TH', 'Thailand'), - ('TG', 'Togo'), - ('TK', 'Tokelau'), - ('TO', 'Tonga'), - ('TT', 'Trinidad And Tobago'), - ('TN', 'Tunisia'), - ('TR', 'Turkey'), - ('TM', 'Turkmenistan'), - ('TV', 'Tuvalu'), - ('UG', 'Uganda'), - ('UA', 'Ukraine'), - ('AE', 'United Arab Emirates'), - ('UK', 'United Kingdom'), - ('US', 'United States'), - ('UY', 'Uruguay'), - ('UZ', 'Uzbekistan'), - ('VU', 'Vanuatu'), - ('VA', 'Vatican City State'), - ('VE', 'Venezuela'), - ('VN', 'Viet Nam'), - ('VG', 'Virgin Islands (British)'), - ('VI', 'Virgin Islands (U.S.)'), - ('EH', 'Western Sahara'), - ('YE', 'Yemen'), - ('YU', 'Yugoslavia'), - ('ZR', 'Zaire'), - ('ZM', 'Zambia'), - ('ZW', 'Zimbabwe') -] - - - -def getLanguageDict(): - return lang_dict - -def getLanguageFromISO( iso ): - if iso == None: - return None - else: - return lang_dict[ iso ] - - - - - - - - - - +from comicapi.utils import *