1196a3809SXin LI# coding: utf-8 2196a3809SXin LI 377409849SXin LI''' 477409849SXin LIPython bindings for libmagic 577409849SXin LI''' 677409849SXin LI 777409849SXin LIimport ctypes 8048bd409SXin LIimport threading 977409849SXin LI 10196a3809SXin LIfrom collections import namedtuple 11196a3809SXin LI 1277409849SXin LIfrom ctypes import * 1377409849SXin LIfrom ctypes.util import find_library 1477409849SXin LI 1577409849SXin LI 1677409849SXin LIdef _init(): 1777409849SXin LI """ 1877409849SXin LI Loads the shared library through ctypes and returns a library 1977409849SXin LI L{ctypes.CDLL} instance 2077409849SXin LI """ 2177409849SXin LI return ctypes.cdll.LoadLibrary(find_library('magic')) 2277409849SXin LI 2377409849SXin LI_libraries = {} 2477409849SXin LI_libraries['magic'] = _init() 2577409849SXin LI 2677409849SXin LI# Flag constants for open and setflags 2777409849SXin LIMAGIC_NONE = NONE = 0 2877409849SXin LIMAGIC_DEBUG = DEBUG = 1 2977409849SXin LIMAGIC_SYMLINK = SYMLINK = 2 3077409849SXin LIMAGIC_COMPRESS = COMPRESS = 4 3177409849SXin LIMAGIC_DEVICES = DEVICES = 8 3277409849SXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16 3377409849SXin LIMAGIC_CONTINUE = CONTINUE = 32 3477409849SXin LIMAGIC_CHECK = CHECK = 64 3577409849SXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128 3677409849SXin LIMAGIC_RAW = RAW = 256 3777409849SXin LIMAGIC_ERROR = ERROR = 512 3877409849SXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024 39196a3809SXin LIMAGIC_MIME = MIME = 1040 # MIME_TYPE + MIME_ENCODING 4077409849SXin LIMAGIC_APPLE = APPLE = 2048 4177409849SXin LI 4277409849SXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096 4377409849SXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192 4477409849SXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384 4577409849SXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768 4677409849SXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536 4777409849SXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072 4877409849SXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144 4977409849SXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576 5077409849SXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152 5177409849SXin LI 5277409849SXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824 5377409849SXin LI 54048bd409SXin LIMAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0 55048bd409SXin LIMAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1 56048bd409SXin LIMAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2 57048bd409SXin LIMAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3 58048bd409SXin LIMAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4 59048bd409SXin LIMAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5 60048bd409SXin LIMAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6 61048bd409SXin LI 62196a3809SXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name')) 63196a3809SXin LI 6477409849SXin LI 6577409849SXin LIclass magic_set(Structure): 6677409849SXin LI pass 6777409849SXin LImagic_set._fields_ = [] 6877409849SXin LImagic_t = POINTER(magic_set) 6977409849SXin LI 7077409849SXin LI_open = _libraries['magic'].magic_open 7177409849SXin LI_open.restype = magic_t 7277409849SXin LI_open.argtypes = [c_int] 7377409849SXin LI 7477409849SXin LI_close = _libraries['magic'].magic_close 7577409849SXin LI_close.restype = None 7677409849SXin LI_close.argtypes = [magic_t] 7777409849SXin LI 7877409849SXin LI_file = _libraries['magic'].magic_file 7977409849SXin LI_file.restype = c_char_p 8077409849SXin LI_file.argtypes = [magic_t, c_char_p] 8177409849SXin LI 8277409849SXin LI_descriptor = _libraries['magic'].magic_descriptor 8377409849SXin LI_descriptor.restype = c_char_p 8477409849SXin LI_descriptor.argtypes = [magic_t, c_int] 8577409849SXin LI 8677409849SXin LI_buffer = _libraries['magic'].magic_buffer 8777409849SXin LI_buffer.restype = c_char_p 8877409849SXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t] 8977409849SXin LI 9077409849SXin LI_error = _libraries['magic'].magic_error 9177409849SXin LI_error.restype = c_char_p 9277409849SXin LI_error.argtypes = [magic_t] 9377409849SXin LI 9477409849SXin LI_setflags = _libraries['magic'].magic_setflags 9577409849SXin LI_setflags.restype = c_int 9677409849SXin LI_setflags.argtypes = [magic_t, c_int] 9777409849SXin LI 9877409849SXin LI_load = _libraries['magic'].magic_load 9977409849SXin LI_load.restype = c_int 10077409849SXin LI_load.argtypes = [magic_t, c_char_p] 10177409849SXin LI 10277409849SXin LI_compile = _libraries['magic'].magic_compile 10377409849SXin LI_compile.restype = c_int 10477409849SXin LI_compile.argtypes = [magic_t, c_char_p] 10577409849SXin LI 10677409849SXin LI_check = _libraries['magic'].magic_check 10777409849SXin LI_check.restype = c_int 10877409849SXin LI_check.argtypes = [magic_t, c_char_p] 10977409849SXin LI 11077409849SXin LI_list = _libraries['magic'].magic_list 11177409849SXin LI_list.restype = c_int 11277409849SXin LI_list.argtypes = [magic_t, c_char_p] 11377409849SXin LI 11477409849SXin LI_errno = _libraries['magic'].magic_errno 11577409849SXin LI_errno.restype = c_int 11677409849SXin LI_errno.argtypes = [magic_t] 11777409849SXin LI 118048bd409SXin LI_getparam = _libraries['magic'].magic_getparam 119048bd409SXin LI_getparam.restype = c_int 120048bd409SXin LI_getparam.argtypes = [magic_t, c_int, c_void_p] 121048bd409SXin LI 122048bd409SXin LI_setparam = _libraries['magic'].magic_setparam 123048bd409SXin LI_setparam.restype = c_int 124048bd409SXin LI_setparam.argtypes = [magic_t, c_int, c_void_p] 125048bd409SXin LI 12677409849SXin LI 12777409849SXin LIclass Magic(object): 12877409849SXin LI def __init__(self, ms): 12977409849SXin LI self._magic_t = ms 13077409849SXin LI 13177409849SXin LI def close(self): 13277409849SXin LI """ 13377409849SXin LI Closes the magic database and deallocates any resources used. 13477409849SXin LI """ 13577409849SXin LI _close(self._magic_t) 13677409849SXin LI 137388b470cSGordon Tetlow @staticmethod 138388b470cSGordon Tetlow def __tostr(s): 139388b470cSGordon Tetlow if s is None: 140388b470cSGordon Tetlow return None 141388b470cSGordon Tetlow if isinstance(s, str): 142388b470cSGordon Tetlow return s 143388b470cSGordon Tetlow try: # keep Python 2 compatibility 144388b470cSGordon Tetlow return str(s, 'utf-8') 145388b470cSGordon Tetlow except TypeError: 146388b470cSGordon Tetlow return str(s) 147388b470cSGordon Tetlow 148388b470cSGordon Tetlow @staticmethod 149388b470cSGordon Tetlow def __tobytes(b): 150388b470cSGordon Tetlow if b is None: 151388b470cSGordon Tetlow return None 152388b470cSGordon Tetlow if isinstance(b, bytes): 153388b470cSGordon Tetlow return b 154388b470cSGordon Tetlow try: # keep Python 2 compatibility 155388b470cSGordon Tetlow return bytes(b, 'utf-8') 156388b470cSGordon Tetlow except TypeError: 157388b470cSGordon Tetlow return bytes(b) 158388b470cSGordon Tetlow 15977409849SXin LI def file(self, filename): 16077409849SXin LI """ 16177409849SXin LI Returns a textual description of the contents of the argument passed 16277409849SXin LI as a filename or None if an error occurred and the MAGIC_ERROR flag 16377409849SXin LI is set. A call to errno() will return the numeric error code. 16477409849SXin LI """ 165388b470cSGordon Tetlow return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename))) 16677409849SXin LI 16777409849SXin LI def descriptor(self, fd): 16877409849SXin LI """ 169388b470cSGordon Tetlow Returns a textual description of the contents of the argument passed 170388b470cSGordon Tetlow as a file descriptor or None if an error occurred and the MAGIC_ERROR 171388b470cSGordon Tetlow flag is set. A call to errno() will return the numeric error code. 17277409849SXin LI """ 173388b470cSGordon Tetlow return Magic.__tostr(_descriptor(self._magic_t, fd)) 17477409849SXin LI 17577409849SXin LI def buffer(self, buf): 17677409849SXin LI """ 17777409849SXin LI Returns a textual description of the contents of the argument passed 17877409849SXin LI as a buffer or None if an error occurred and the MAGIC_ERROR flag 17977409849SXin LI is set. A call to errno() will return the numeric error code. 18077409849SXin LI """ 181388b470cSGordon Tetlow return Magic.__tostr(_buffer(self._magic_t, buf, len(buf))) 18277409849SXin LI 18377409849SXin LI def error(self): 18477409849SXin LI """ 18577409849SXin LI Returns a textual explanation of the last error or None 18677409849SXin LI if there was no error. 18777409849SXin LI """ 188388b470cSGordon Tetlow return Magic.__tostr(_error(self._magic_t)) 18977409849SXin LI 19077409849SXin LI def setflags(self, flags): 19177409849SXin LI """ 19277409849SXin LI Set flags on the magic object which determine how magic checking 19377409849SXin LI behaves; a bitwise OR of the flags described in libmagic(3), but 19477409849SXin LI without the MAGIC_ prefix. 19577409849SXin LI 19677409849SXin LI Returns -1 on systems that don't support utime(2) or utimes(2) 19777409849SXin LI when PRESERVE_ATIME is set. 19877409849SXin LI """ 19977409849SXin LI return _setflags(self._magic_t, flags) 20077409849SXin LI 20177409849SXin LI def load(self, filename=None): 20277409849SXin LI """ 20377409849SXin LI Must be called to load entries in the colon separated list of database 20477409849SXin LI files passed as argument or the default database file if no argument 20577409849SXin LI before any magic queries can be performed. 20677409849SXin LI 20777409849SXin LI Returns 0 on success and -1 on failure. 20877409849SXin LI """ 209388b470cSGordon Tetlow return _load(self._magic_t, Magic.__tobytes(filename)) 21077409849SXin LI 21177409849SXin LI def compile(self, dbs): 21277409849SXin LI """ 21377409849SXin LI Compile entries in the colon separated list of database files 21477409849SXin LI passed as argument or the default database file if no argument. 21577409849SXin LI The compiled files created are named from the basename(1) of each file 21677409849SXin LI argument with ".mgc" appended to it. 217388b470cSGordon Tetlow 218388b470cSGordon Tetlow Returns 0 on success and -1 on failure. 21977409849SXin LI """ 220388b470cSGordon Tetlow return _compile(self._magic_t, Magic.__tobytes(dbs)) 22177409849SXin LI 22277409849SXin LI def check(self, dbs): 22377409849SXin LI """ 22477409849SXin LI Check the validity of entries in the colon separated list of 22577409849SXin LI database files passed as argument or the default database file 22677409849SXin LI if no argument. 227388b470cSGordon Tetlow 22877409849SXin LI Returns 0 on success and -1 on failure. 22977409849SXin LI """ 230388b470cSGordon Tetlow return _check(self._magic_t, Magic.__tobytes(dbs)) 23177409849SXin LI 23277409849SXin LI def list(self, dbs): 23377409849SXin LI """ 23477409849SXin LI Check the validity of entries in the colon separated list of 23577409849SXin LI database files passed as argument or the default database file 23677409849SXin LI if no argument. 237388b470cSGordon Tetlow 23877409849SXin LI Returns 0 on success and -1 on failure. 23977409849SXin LI """ 240388b470cSGordon Tetlow return _list(self._magic_t, Magic.__tobytes(dbs)) 24177409849SXin LI 24277409849SXin LI def errno(self): 24377409849SXin LI """ 24477409849SXin LI Returns a numeric error code. If return value is 0, an internal 24577409849SXin LI magic error occurred. If return value is non-zero, the value is 24677409849SXin LI an OS error code. Use the errno module or os.strerror() can be used 24777409849SXin LI to provide detailed error information. 24877409849SXin LI """ 24977409849SXin LI return _errno(self._magic_t) 25077409849SXin LI 251048bd409SXin LI def getparam(self, param): 252048bd409SXin LI """ 253048bd409SXin LI Returns the param value if successful and -1 if the parameter 254048bd409SXin LI was unknown. 255048bd409SXin LI """ 256048bd409SXin LI v = c_int() 257048bd409SXin LI i = _getparam(self._magic_t, param, byref(v)) 258048bd409SXin LI if i == -1: 259048bd409SXin LI return -1 260048bd409SXin LI return v.value 261048bd409SXin LI 262048bd409SXin LI def setparam(self, param, value): 263048bd409SXin LI """ 264048bd409SXin LI Returns 0 if successful and -1 if the parameter was unknown. 265048bd409SXin LI """ 266048bd409SXin LI v = c_int(value) 267048bd409SXin LI return _setparam(self._magic_t, param, byref(v)) 268048bd409SXin LI 26977409849SXin LI 27077409849SXin LIdef open(flags): 27177409849SXin LI """ 27277409849SXin LI Returns a magic object on success and None on failure. 27377409849SXin LI Flags argument as for setflags. 27477409849SXin LI """ 275ac338006SXin LI magic_t = _open(flags) 276ac338006SXin LI if magic_t is None: 277ac338006SXin LI return None 278ac338006SXin LI return Magic(magic_t) 279196a3809SXin LI 280196a3809SXin LI 281196a3809SXin LI# Objects used by `detect_from_` functions 282ac338006SXin LIclass error(Exception): 283ac338006SXin LI pass 284ac338006SXin LI 285048bd409SXin LIclass MagicDetect(object): 286048bd409SXin LI def __init__(self): 287ac338006SXin LI self.mime_magic = open(MAGIC_MIME) 288ac338006SXin LI if self.mime_magic is None: 289ac338006SXin LI raise error 290ac338006SXin LI if self.mime_magic.load() == -1: 291ac338006SXin LI self.mime_magic.close() 292ac338006SXin LI self.mime_magic = None 293ac338006SXin LI raise error 294ac338006SXin LI self.none_magic = open(MAGIC_NONE) 295ac338006SXin LI if self.none_magic is None: 296ac338006SXin LI self.mime_magic.close() 297ac338006SXin LI self.mime_magic = None 298ac338006SXin LI raise error 299ac338006SXin LI if self.none_magic.load() == -1: 300ac338006SXin LI self.none_magic.close() 301ac338006SXin LI self.none_magic = None 302ac338006SXin LI self.mime_magic.close() 303ac338006SXin LI self.mime_magic = None 304ac338006SXin LI raise error 305196a3809SXin LI 306048bd409SXin LI def __del__(self): 307ac338006SXin LI if self.mime_magic is not None: 308048bd409SXin LI self.mime_magic.close() 309ac338006SXin LI if self.none_magic is not None: 310048bd409SXin LI self.none_magic.close() 311048bd409SXin LI 312048bd409SXin LIthreadlocal = threading.local() 313048bd409SXin LI 314048bd409SXin LIdef _detect_make(): 315048bd409SXin LI v = getattr(threadlocal, "magic_instance", None) 316048bd409SXin LI if v is None: 317048bd409SXin LI v = MagicDetect() 318048bd409SXin LI setattr(threadlocal, "magic_instance", v) 319048bd409SXin LI return v 320196a3809SXin LI 321196a3809SXin LIdef _create_filemagic(mime_detected, type_detected): 322ad1ba6e1SEitan Adler try: 323196a3809SXin LI mime_type, mime_encoding = mime_detected.split('; ') 324ad1ba6e1SEitan Adler except ValueError: 325ad1ba6e1SEitan Adler raise ValueError(mime_detected) 326196a3809SXin LI 327196a3809SXin LI return FileMagic(name=type_detected, mime_type=mime_type, 328196a3809SXin LI encoding=mime_encoding.replace('charset=', '')) 329196a3809SXin LI 330196a3809SXin LI 331196a3809SXin LIdef detect_from_filename(filename): 332196a3809SXin LI '''Detect mime type, encoding and file type from a filename 333196a3809SXin LI 334196a3809SXin LI Returns a `FileMagic` namedtuple. 335196a3809SXin LI ''' 336048bd409SXin LI x = _detect_make() 337048bd409SXin LI return _create_filemagic(x.mime_magic.file(filename), 338048bd409SXin LI x.none_magic.file(filename)) 339196a3809SXin LI 340196a3809SXin LI 341196a3809SXin LIdef detect_from_fobj(fobj): 342196a3809SXin LI '''Detect mime type, encoding and file type from file-like object 343196a3809SXin LI 344196a3809SXin LI Returns a `FileMagic` namedtuple. 345196a3809SXin LI ''' 346196a3809SXin LI 347196a3809SXin LI file_descriptor = fobj.fileno() 348048bd409SXin LI x = _detect_make() 349048bd409SXin LI return _create_filemagic(x.mime_magic.descriptor(file_descriptor), 350048bd409SXin LI x.none_magic.descriptor(file_descriptor)) 351196a3809SXin LI 352196a3809SXin LI 353196a3809SXin LIdef detect_from_content(byte_content): 354196a3809SXin LI '''Detect mime type, encoding and file type from bytes 355196a3809SXin LI 356196a3809SXin LI Returns a `FileMagic` namedtuple. 357196a3809SXin LI ''' 358196a3809SXin LI 359048bd409SXin LI x = _detect_make() 360048bd409SXin LI return _create_filemagic(x.mime_magic.buffer(byte_content), 361048bd409SXin LI x.none_magic.buffer(byte_content)) 362