xref: /src/contrib/file/python/magic.py (revision a2dfb7224ec9933ee804cae54d51848dce938b6b)
1196a3809SXin LI# coding: utf-8
2196a3809SXin LI
377409849SXin LI'''
477409849SXin LIPython bindings for libmagic
577409849SXin LI'''
677409849SXin LI
777409849SXin LIimport ctypes
8048bd409SXin LIimport threading
977409849SXin LI
10196a3809SXin LIfrom collections import namedtuple
11196a3809SXin LI
1277409849SXin LIfrom ctypes import *
1377409849SXin LIfrom ctypes.util import find_library
1477409849SXin LI
1577409849SXin LI
1677409849SXin LIdef _init():
1777409849SXin LI    """
1877409849SXin LI    Loads the shared library through ctypes and returns a library
1977409849SXin LI    L{ctypes.CDLL} instance
2077409849SXin LI    """
2177409849SXin LI    return ctypes.cdll.LoadLibrary(find_library('magic'))
2277409849SXin LI
2377409849SXin LI_libraries = {}
2477409849SXin LI_libraries['magic'] = _init()
2577409849SXin LI
2677409849SXin LI# Flag constants for open and setflags
2777409849SXin LIMAGIC_NONE = NONE = 0
2877409849SXin LIMAGIC_DEBUG = DEBUG = 1
2977409849SXin LIMAGIC_SYMLINK = SYMLINK = 2
3077409849SXin LIMAGIC_COMPRESS = COMPRESS = 4
3177409849SXin LIMAGIC_DEVICES = DEVICES = 8
3277409849SXin LIMAGIC_MIME_TYPE = MIME_TYPE = 16
3377409849SXin LIMAGIC_CONTINUE = CONTINUE = 32
3477409849SXin LIMAGIC_CHECK = CHECK = 64
3577409849SXin LIMAGIC_PRESERVE_ATIME = PRESERVE_ATIME = 128
3677409849SXin LIMAGIC_RAW = RAW = 256
3777409849SXin LIMAGIC_ERROR = ERROR = 512
3877409849SXin LIMAGIC_MIME_ENCODING = MIME_ENCODING = 1024
39196a3809SXin LIMAGIC_MIME = MIME = 1040  # MIME_TYPE + MIME_ENCODING
4077409849SXin LIMAGIC_APPLE = APPLE = 2048
4177409849SXin LI
4277409849SXin LIMAGIC_NO_CHECK_COMPRESS = NO_CHECK_COMPRESS = 4096
4377409849SXin LIMAGIC_NO_CHECK_TAR = NO_CHECK_TAR = 8192
4477409849SXin LIMAGIC_NO_CHECK_SOFT = NO_CHECK_SOFT = 16384
4577409849SXin LIMAGIC_NO_CHECK_APPTYPE = NO_CHECK_APPTYPE = 32768
4677409849SXin LIMAGIC_NO_CHECK_ELF = NO_CHECK_ELF = 65536
4777409849SXin LIMAGIC_NO_CHECK_TEXT = NO_CHECK_TEXT = 131072
4877409849SXin LIMAGIC_NO_CHECK_CDF = NO_CHECK_CDF = 262144
4977409849SXin LIMAGIC_NO_CHECK_TOKENS = NO_CHECK_TOKENS = 1048576
5077409849SXin LIMAGIC_NO_CHECK_ENCODING = NO_CHECK_ENCODING = 2097152
5177409849SXin LI
5277409849SXin LIMAGIC_NO_CHECK_BUILTIN = NO_CHECK_BUILTIN = 4173824
5377409849SXin LI
54048bd409SXin LIMAGIC_PARAM_INDIR_MAX = PARAM_INDIR_MAX = 0
55048bd409SXin LIMAGIC_PARAM_NAME_MAX = PARAM_NAME_MAX = 1
56048bd409SXin LIMAGIC_PARAM_ELF_PHNUM_MAX = PARAM_ELF_PHNUM_MAX = 2
57048bd409SXin LIMAGIC_PARAM_ELF_SHNUM_MAX = PARAM_ELF_SHNUM_MAX = 3
58048bd409SXin LIMAGIC_PARAM_ELF_NOTES_MAX = PARAM_ELF_NOTES_MAX = 4
59048bd409SXin LIMAGIC_PARAM_REGEX_MAX = PARAM_REGEX_MAX = 5
60048bd409SXin LIMAGIC_PARAM_BYTES_MAX = PARAM_BYTES_MAX = 6
61048bd409SXin LI
62196a3809SXin LIFileMagic = namedtuple('FileMagic', ('mime_type', 'encoding', 'name'))
63196a3809SXin LI
6477409849SXin LI
6577409849SXin LIclass magic_set(Structure):
6677409849SXin LI    pass
6777409849SXin LImagic_set._fields_ = []
6877409849SXin LImagic_t = POINTER(magic_set)
6977409849SXin LI
7077409849SXin LI_open = _libraries['magic'].magic_open
7177409849SXin LI_open.restype = magic_t
7277409849SXin LI_open.argtypes = [c_int]
7377409849SXin LI
7477409849SXin LI_close = _libraries['magic'].magic_close
7577409849SXin LI_close.restype = None
7677409849SXin LI_close.argtypes = [magic_t]
7777409849SXin LI
7877409849SXin LI_file = _libraries['magic'].magic_file
7977409849SXin LI_file.restype = c_char_p
8077409849SXin LI_file.argtypes = [magic_t, c_char_p]
8177409849SXin LI
8277409849SXin LI_descriptor = _libraries['magic'].magic_descriptor
8377409849SXin LI_descriptor.restype = c_char_p
8477409849SXin LI_descriptor.argtypes = [magic_t, c_int]
8577409849SXin LI
8677409849SXin LI_buffer = _libraries['magic'].magic_buffer
8777409849SXin LI_buffer.restype = c_char_p
8877409849SXin LI_buffer.argtypes = [magic_t, c_void_p, c_size_t]
8977409849SXin LI
9077409849SXin LI_error = _libraries['magic'].magic_error
9177409849SXin LI_error.restype = c_char_p
9277409849SXin LI_error.argtypes = [magic_t]
9377409849SXin LI
9477409849SXin LI_setflags = _libraries['magic'].magic_setflags
9577409849SXin LI_setflags.restype = c_int
9677409849SXin LI_setflags.argtypes = [magic_t, c_int]
9777409849SXin LI
9877409849SXin LI_load = _libraries['magic'].magic_load
9977409849SXin LI_load.restype = c_int
10077409849SXin LI_load.argtypes = [magic_t, c_char_p]
10177409849SXin LI
10277409849SXin LI_compile = _libraries['magic'].magic_compile
10377409849SXin LI_compile.restype = c_int
10477409849SXin LI_compile.argtypes = [magic_t, c_char_p]
10577409849SXin LI
10677409849SXin LI_check = _libraries['magic'].magic_check
10777409849SXin LI_check.restype = c_int
10877409849SXin LI_check.argtypes = [magic_t, c_char_p]
10977409849SXin LI
11077409849SXin LI_list = _libraries['magic'].magic_list
11177409849SXin LI_list.restype = c_int
11277409849SXin LI_list.argtypes = [magic_t, c_char_p]
11377409849SXin LI
11477409849SXin LI_errno = _libraries['magic'].magic_errno
11577409849SXin LI_errno.restype = c_int
11677409849SXin LI_errno.argtypes = [magic_t]
11777409849SXin LI
118048bd409SXin LI_getparam = _libraries['magic'].magic_getparam
119048bd409SXin LI_getparam.restype = c_int
120048bd409SXin LI_getparam.argtypes = [magic_t, c_int, c_void_p]
121048bd409SXin LI
122048bd409SXin LI_setparam = _libraries['magic'].magic_setparam
123048bd409SXin LI_setparam.restype = c_int
124048bd409SXin LI_setparam.argtypes = [magic_t, c_int, c_void_p]
125048bd409SXin LI
12677409849SXin LI
12777409849SXin LIclass Magic(object):
12877409849SXin LI    def __init__(self, ms):
12977409849SXin LI        self._magic_t = ms
13077409849SXin LI
13177409849SXin LI    def close(self):
13277409849SXin LI        """
13377409849SXin LI        Closes the magic database and deallocates any resources used.
13477409849SXin LI        """
13577409849SXin LI        _close(self._magic_t)
13677409849SXin LI
137388b470cSGordon Tetlow    @staticmethod
138388b470cSGordon Tetlow    def __tostr(s):
139388b470cSGordon Tetlow        if s is None:
140388b470cSGordon Tetlow            return None
141388b470cSGordon Tetlow        if isinstance(s, str):
142388b470cSGordon Tetlow            return s
143388b470cSGordon Tetlow        try:  # keep Python 2 compatibility
144388b470cSGordon Tetlow            return str(s, 'utf-8')
145388b470cSGordon Tetlow        except TypeError:
146388b470cSGordon Tetlow            return str(s)
147388b470cSGordon Tetlow
148388b470cSGordon Tetlow    @staticmethod
149388b470cSGordon Tetlow    def __tobytes(b):
150388b470cSGordon Tetlow        if b is None:
151388b470cSGordon Tetlow            return None
152388b470cSGordon Tetlow        if isinstance(b, bytes):
153388b470cSGordon Tetlow            return b
154388b470cSGordon Tetlow        try:  # keep Python 2 compatibility
155388b470cSGordon Tetlow            return bytes(b, 'utf-8')
156388b470cSGordon Tetlow        except TypeError:
157388b470cSGordon Tetlow            return bytes(b)
158388b470cSGordon Tetlow
15977409849SXin LI    def file(self, filename):
16077409849SXin LI        """
16177409849SXin LI        Returns a textual description of the contents of the argument passed
16277409849SXin LI        as a filename or None if an error occurred and the MAGIC_ERROR flag
16377409849SXin LI        is set. A call to errno() will return the numeric error code.
16477409849SXin LI        """
165388b470cSGordon Tetlow        return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
16677409849SXin LI
16777409849SXin LI    def descriptor(self, fd):
16877409849SXin LI        """
169388b470cSGordon Tetlow        Returns a textual description of the contents of the argument passed
170388b470cSGordon Tetlow        as a file descriptor or None if an error occurred and the MAGIC_ERROR
171388b470cSGordon Tetlow        flag is set. A call to errno() will return the numeric error code.
17277409849SXin LI        """
173388b470cSGordon Tetlow        return Magic.__tostr(_descriptor(self._magic_t, fd))
17477409849SXin LI
17577409849SXin LI    def buffer(self, buf):
17677409849SXin LI        """
17777409849SXin LI        Returns a textual description of the contents of the argument passed
17877409849SXin LI        as a buffer or None if an error occurred and the MAGIC_ERROR flag
17977409849SXin LI        is set. A call to errno() will return the numeric error code.
18077409849SXin LI        """
181388b470cSGordon Tetlow        return Magic.__tostr(_buffer(self._magic_t, buf, len(buf)))
18277409849SXin LI
18377409849SXin LI    def error(self):
18477409849SXin LI        """
18577409849SXin LI        Returns a textual explanation of the last error or None
18677409849SXin LI        if there was no error.
18777409849SXin LI        """
188388b470cSGordon Tetlow        return Magic.__tostr(_error(self._magic_t))
18977409849SXin LI
19077409849SXin LI    def setflags(self, flags):
19177409849SXin LI        """
19277409849SXin LI        Set flags on the magic object which determine how magic checking
19377409849SXin LI        behaves; a bitwise OR of the flags described in libmagic(3), but
19477409849SXin LI        without the MAGIC_ prefix.
19577409849SXin LI
19677409849SXin LI        Returns -1 on systems that don't support utime(2) or utimes(2)
19777409849SXin LI        when PRESERVE_ATIME is set.
19877409849SXin LI        """
19977409849SXin LI        return _setflags(self._magic_t, flags)
20077409849SXin LI
20177409849SXin LI    def load(self, filename=None):
20277409849SXin LI        """
20377409849SXin LI        Must be called to load entries in the colon separated list of database
20477409849SXin LI        files passed as argument or the default database file if no argument
20577409849SXin LI        before any magic queries can be performed.
20677409849SXin LI
20777409849SXin LI        Returns 0 on success and -1 on failure.
20877409849SXin LI        """
209388b470cSGordon Tetlow        return _load(self._magic_t, Magic.__tobytes(filename))
21077409849SXin LI
21177409849SXin LI    def compile(self, dbs):
21277409849SXin LI        """
21377409849SXin LI        Compile entries in the colon separated list of database files
21477409849SXin LI        passed as argument or the default database file if no argument.
21577409849SXin LI        The compiled files created are named from the basename(1) of each file
21677409849SXin LI        argument with ".mgc" appended to it.
217388b470cSGordon Tetlow
218388b470cSGordon Tetlow        Returns 0 on success and -1 on failure.
21977409849SXin LI        """
220388b470cSGordon Tetlow        return _compile(self._magic_t, Magic.__tobytes(dbs))
22177409849SXin LI
22277409849SXin LI    def check(self, dbs):
22377409849SXin LI        """
22477409849SXin LI        Check the validity of entries in the colon separated list of
22577409849SXin LI        database files passed as argument or the default database file
22677409849SXin LI        if no argument.
227388b470cSGordon Tetlow
22877409849SXin LI        Returns 0 on success and -1 on failure.
22977409849SXin LI        """
230388b470cSGordon Tetlow        return _check(self._magic_t, Magic.__tobytes(dbs))
23177409849SXin LI
23277409849SXin LI    def list(self, dbs):
23377409849SXin LI        """
23477409849SXin LI        Check the validity of entries in the colon separated list of
23577409849SXin LI        database files passed as argument or the default database file
23677409849SXin LI        if no argument.
237388b470cSGordon Tetlow
23877409849SXin LI        Returns 0 on success and -1 on failure.
23977409849SXin LI        """
240388b470cSGordon Tetlow        return _list(self._magic_t, Magic.__tobytes(dbs))
24177409849SXin LI
24277409849SXin LI    def errno(self):
24377409849SXin LI        """
24477409849SXin LI        Returns a numeric error code. If return value is 0, an internal
24577409849SXin LI        magic error occurred. If return value is non-zero, the value is
24677409849SXin LI        an OS error code. Use the errno module or os.strerror() can be used
24777409849SXin LI        to provide detailed error information.
24877409849SXin LI        """
24977409849SXin LI        return _errno(self._magic_t)
25077409849SXin LI
251048bd409SXin LI    def getparam(self, param):
252048bd409SXin LI        """
253048bd409SXin LI        Returns the param value if successful and -1 if the parameter
254048bd409SXin LI        was unknown.
255048bd409SXin LI        """
256048bd409SXin LI        v = c_int()
257048bd409SXin LI        i = _getparam(self._magic_t, param, byref(v))
258048bd409SXin LI        if i == -1:
259048bd409SXin LI            return -1
260048bd409SXin LI        return v.value
261048bd409SXin LI
262048bd409SXin LI    def setparam(self, param, value):
263048bd409SXin LI        """
264048bd409SXin LI        Returns 0 if successful and -1 if the parameter was unknown.
265048bd409SXin LI        """
266048bd409SXin LI        v = c_int(value)
267048bd409SXin LI        return _setparam(self._magic_t, param, byref(v))
268048bd409SXin LI
26977409849SXin LI
27077409849SXin LIdef open(flags):
27177409849SXin LI    """
27277409849SXin LI    Returns a magic object on success and None on failure.
27377409849SXin LI    Flags argument as for setflags.
27477409849SXin LI    """
275ac338006SXin LI    magic_t = _open(flags)
276ac338006SXin LI    if magic_t is None:
277ac338006SXin LI        return None
278ac338006SXin LI    return Magic(magic_t)
279196a3809SXin LI
280196a3809SXin LI
281196a3809SXin LI# Objects used by `detect_from_` functions
282ac338006SXin LIclass error(Exception):
283ac338006SXin LI    pass
284ac338006SXin LI
285048bd409SXin LIclass MagicDetect(object):
286048bd409SXin LI    def __init__(self):
287ac338006SXin LI        self.mime_magic = open(MAGIC_MIME)
288ac338006SXin LI        if self.mime_magic is None:
289ac338006SXin LI            raise error
290ac338006SXin LI        if self.mime_magic.load() == -1:
291ac338006SXin LI            self.mime_magic.close()
292ac338006SXin LI            self.mime_magic = None
293ac338006SXin LI            raise error
294ac338006SXin LI        self.none_magic = open(MAGIC_NONE)
295ac338006SXin LI        if self.none_magic is None:
296ac338006SXin LI            self.mime_magic.close()
297ac338006SXin LI            self.mime_magic = None
298ac338006SXin LI            raise error
299ac338006SXin LI        if self.none_magic.load() == -1:
300ac338006SXin LI            self.none_magic.close()
301ac338006SXin LI            self.none_magic = None
302ac338006SXin LI            self.mime_magic.close()
303ac338006SXin LI            self.mime_magic = None
304ac338006SXin LI            raise error
305196a3809SXin LI
306048bd409SXin LI    def __del__(self):
307ac338006SXin LI        if self.mime_magic is not None:
308048bd409SXin LI            self.mime_magic.close()
309ac338006SXin LI        if self.none_magic is not None:
310048bd409SXin LI            self.none_magic.close()
311048bd409SXin LI
312048bd409SXin LIthreadlocal = threading.local()
313048bd409SXin LI
314048bd409SXin LIdef _detect_make():
315048bd409SXin LI    v = getattr(threadlocal, "magic_instance", None)
316048bd409SXin LI    if v is None:
317048bd409SXin LI        v = MagicDetect()
318048bd409SXin LI        setattr(threadlocal, "magic_instance", v)
319048bd409SXin LI    return v
320196a3809SXin LI
321196a3809SXin LIdef _create_filemagic(mime_detected, type_detected):
322ad1ba6e1SEitan Adler    try:
323196a3809SXin LI        mime_type, mime_encoding = mime_detected.split('; ')
324ad1ba6e1SEitan Adler    except ValueError:
325ad1ba6e1SEitan Adler        raise ValueError(mime_detected)
326196a3809SXin LI
327196a3809SXin LI    return FileMagic(name=type_detected, mime_type=mime_type,
328196a3809SXin LI                     encoding=mime_encoding.replace('charset=', ''))
329196a3809SXin LI
330196a3809SXin LI
331196a3809SXin LIdef detect_from_filename(filename):
332196a3809SXin LI    '''Detect mime type, encoding and file type from a filename
333196a3809SXin LI
334196a3809SXin LI    Returns a `FileMagic` namedtuple.
335196a3809SXin LI    '''
336048bd409SXin LI    x = _detect_make()
337048bd409SXin LI    return _create_filemagic(x.mime_magic.file(filename),
338048bd409SXin LI                             x.none_magic.file(filename))
339196a3809SXin LI
340196a3809SXin LI
341196a3809SXin LIdef detect_from_fobj(fobj):
342196a3809SXin LI    '''Detect mime type, encoding and file type from file-like object
343196a3809SXin LI
344196a3809SXin LI    Returns a `FileMagic` namedtuple.
345196a3809SXin LI    '''
346196a3809SXin LI
347196a3809SXin LI    file_descriptor = fobj.fileno()
348048bd409SXin LI    x = _detect_make()
349048bd409SXin LI    return _create_filemagic(x.mime_magic.descriptor(file_descriptor),
350048bd409SXin LI                             x.none_magic.descriptor(file_descriptor))
351196a3809SXin LI
352196a3809SXin LI
353196a3809SXin LIdef detect_from_content(byte_content):
354196a3809SXin LI    '''Detect mime type, encoding and file type from bytes
355196a3809SXin LI
356196a3809SXin LI    Returns a `FileMagic` namedtuple.
357196a3809SXin LI    '''
358196a3809SXin LI
359048bd409SXin LI    x = _detect_make()
360048bd409SXin LI    return _create_filemagic(x.mime_magic.buffer(byte_content),
361048bd409SXin LI                             x.none_magic.buffer(byte_content))
362