普通文本  |  357行  |  13.38 KB

# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
# (c) 2005 Ian Bicking, Clark C. Evans and contributors
# This module is part of the Python Paste Project and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
This module handles sending static content such as in-memory data or
files.  At this time it has cache helpers and understands the
if-modified-since request header.
"""

import os, time, mimetypes, zipfile, tarfile
from paste.httpexceptions import *
from paste.httpheaders import *

CACHE_SIZE = 4096
BLOCK_SIZE = 4096 * 16

__all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore']

class DataApp(object):
    """
    Returns an application that will send content in a single chunk,
    this application has support for setting cache-control and for
    responding to conditional (or HEAD) requests.

    Constructor Arguments:

        ``content``     the content being sent to the client

        ``headers``     the headers to send /w the response

        The remaining ``kwargs`` correspond to headers, where the
        underscore is replaced with a dash.  These values are only
        added to the headers if they are not already provided; thus,
        they can be used for default values.  Examples include, but
        are not limited to:

            ``content_type``
            ``content_encoding``
            ``content_location``

    ``cache_control()``

        This method provides validated construction of the ``Cache-Control``
        header as well as providing for automated filling out of the
        ``EXPIRES`` header for HTTP/1.0 clients.

    ``set_content()``

        This method provides a mechanism to set the content after the
        application has been constructed.  This method does things
        like changing ``Last-Modified`` and ``Content-Length`` headers.

    """

    allowed_methods = ('GET', 'HEAD')

    def __init__(self, content, headers=None, allowed_methods=None,
                 **kwargs):
        assert isinstance(headers, (type(None), list))
        self.expires = None
        self.content = None
        self.content_length = None
        self.last_modified = 0
        if allowed_methods is not None:
            self.allowed_methods = allowed_methods
        self.headers = headers or []
        for (k, v) in kwargs.items():
            header = get_header(k)
            header.update(self.headers, v)
        ACCEPT_RANGES.update(self.headers, bytes=True)
        if not CONTENT_TYPE(self.headers):
            CONTENT_TYPE.update(self.headers)
        if content is not None:
            self.set_content(content)

    def cache_control(self, **kwargs):
        self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
        return self

    def set_content(self, content, last_modified=None):
        assert content is not None
        if last_modified is None:
            self.last_modified = time.time()
        else:
            self.last_modified = last_modified
        self.content = content
        self.content_length = len(content)
        LAST_MODIFIED.update(self.headers, time=self.last_modified)
        return self

    def content_disposition(self, **kwargs):
        CONTENT_DISPOSITION.apply(self.headers, **kwargs)
        return self

    def __call__(self, environ, start_response):
        method = environ['REQUEST_METHOD'].upper()
        if method not in self.allowed_methods:
            exc = HTTPMethodNotAllowed(
                'You cannot %s a file' % method,
                headers=[('Allow', ','.join(self.allowed_methods))])
            return exc(environ, start_response)
        return self.get(environ, start_response)

    def calculate_etag(self):
        return '"%s-%s"' % (self.last_modified, self.content_length)

    def get(self, environ, start_response):
        headers = self.headers[:]
        current_etag = self.calculate_etag()
        ETAG.update(headers, current_etag)
        if self.expires is not None:
            EXPIRES.update(headers, delta=self.expires)

        try:
            client_etags = IF_NONE_MATCH.parse(environ)
            if client_etags:
                for etag in client_etags:
                    if etag == current_etag or etag == '*':
                        # horribly inefficient, n^2 performance, yuck!
                        for head in list_headers(entity=True):
                            head.delete(headers)
                        start_response('304 Not Modified', headers)
                        return [b'']
        except HTTPBadRequest as exce:
            return exce.wsgi_application(environ, start_response)

        # If we get If-None-Match and If-Modified-Since, and
        # If-None-Match doesn't match, then we should not try to
        # figure out If-Modified-Since (which has 1-second granularity
        # and just isn't as accurate)
        if not client_etags:
            try:
                client_clock = IF_MODIFIED_SINCE.parse(environ)
                if (client_clock is not None
                    and client_clock >= int(self.last_modified)):
                    # horribly inefficient, n^2 performance, yuck!
                    for head in list_headers(entity=True):
                        head.delete(headers)
                    start_response('304 Not Modified', headers)
                    return [b''] # empty body
            except HTTPBadRequest as exce:
                return exce.wsgi_application(environ, start_response)

        (lower, upper) = (0, self.content_length - 1)
        range = RANGE.parse(environ)
        if range and 'bytes' == range[0] and 1 == len(range[1]):
            (lower, upper) = range[1][0]
            upper = upper or (self.content_length - 1)
            if upper >= self.content_length or lower > upper:
                return HTTPRequestRangeNotSatisfiable((
                  "Range request was made beyond the end of the content,\r\n"
                  "which is %s long.\r\n  Range: %s\r\n") % (
                     self.content_length, RANGE(environ))
                ).wsgi_application(environ, start_response)

        content_length = upper - lower + 1
        CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper,
                            total_length = self.content_length)
        CONTENT_LENGTH.update(headers, content_length)
        if range or content_length != self.content_length:
            start_response('206 Partial Content', headers)
        else:
            start_response('200 OK', headers)
        if self.content is not None:
            return [self.content[lower:upper+1]]
        return (lower, content_length)

class FileApp(DataApp):
    """
    Returns an application that will send the file at the given
    filename.  Adds a mime type based on ``mimetypes.guess_type()``.
    See DataApp for the arguments beyond ``filename``.
    """

    def __init__(self, filename, headers=None, **kwargs):
        self.filename = filename
        content_type, content_encoding = self.guess_type()
        if content_type and 'content_type' not in kwargs:
            kwargs['content_type'] = content_type
        if content_encoding and 'content_encoding' not in kwargs:
            kwargs['content_encoding'] = content_encoding
        DataApp.__init__(self, None, headers, **kwargs)

    def guess_type(self):
        return mimetypes.guess_type(self.filename)

    def update(self, force=False):
        stat = os.stat(self.filename)
        if not force and stat.st_mtime == self.last_modified:
            return
        self.last_modified = stat.st_mtime
        if stat.st_size < CACHE_SIZE:
            fh = open(self.filename,"rb")
            self.set_content(fh.read(), stat.st_mtime)
            fh.close()
        else:
            self.content = None
            self.content_length = stat.st_size
            # This is updated automatically if self.set_content() is
            # called
            LAST_MODIFIED.update(self.headers, time=self.last_modified)

    def get(self, environ, start_response):
        is_head = environ['REQUEST_METHOD'].upper() == 'HEAD'
        if 'max-age=0' in CACHE_CONTROL(environ).lower():
            self.update(force=True) # RFC 2616 13.2.6
        else:
            self.update()
        if not self.content:
            if not os.path.exists(self.filename):
                exc = HTTPNotFound(
                    'The resource does not exist',
                    comment="No file at %r" % self.filename)
                return exc(environ, start_response)
            try:
                file = open(self.filename, 'rb')
            except (IOError, OSError) as e:
                exc = HTTPForbidden(
                    'You are not permitted to view this file (%s)' % e)
                return exc.wsgi_application(
                    environ, start_response)
        retval = DataApp.get(self, environ, start_response)
        if isinstance(retval, list):
            # cached content, exception, or not-modified
            if is_head:
                return [b'']
            return retval
        (lower, content_length) = retval
        if is_head:
            return [b'']
        file.seek(lower)
        file_wrapper = environ.get('wsgi.file_wrapper', None)
        if file_wrapper:
            return file_wrapper(file, BLOCK_SIZE)
        else:
            return _FileIter(file, size=content_length)

class _FileIter(object):

    def __init__(self, file, block_size=None, size=None):
        self.file = file
        self.size = size
        self.block_size = block_size or BLOCK_SIZE

    def __iter__(self):
        return self

    def next(self):
        chunk_size = self.block_size
        if self.size is not None:
            if chunk_size > self.size:
                chunk_size = self.size
            self.size -= chunk_size
        data = self.file.read(chunk_size)
        if not data:
            raise StopIteration
        return data
    __next__ = next

    def close(self):
        self.file.close()


class DirectoryApp(object):
    """
    Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO.
    FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory.
    To customize FileApp creation override ``DirectoryApp.make_fileapp``
    """

    def __init__(self, path):
        self.path = os.path.abspath(path)
        if not self.path.endswith(os.path.sep):
            self.path += os.path.sep
        assert os.path.isdir(self.path)
        self.cached_apps = {}

    make_fileapp = FileApp

    def __call__(self, environ, start_response):
        path_info = environ['PATH_INFO']
        app = self.cached_apps.get(path_info)
        if app is None:
            path = os.path.join(self.path, path_info.lstrip('/'))
            if not os.path.normpath(path).startswith(self.path):
                app = HTTPForbidden()
            elif os.path.isfile(path):
                app = self.make_fileapp(path)
                self.cached_apps[path_info] = app
            else:
                app = HTTPNotFound(comment=path)
        return app(environ, start_response)


class ArchiveStore(object):
    """
    Returns an application that serves up a DataApp for items requested
    in a given zip or tar archive.

    Constructor Arguments:

        ``filepath``    the path to the archive being served

    ``cache_control()``

        This method provides validated construction of the ``Cache-Control``
        header as well as providing for automated filling out of the
        ``EXPIRES`` header for HTTP/1.0 clients.
    """

    def __init__(self, filepath):
        if zipfile.is_zipfile(filepath):
            self.archive = zipfile.ZipFile(filepath,"r")
        elif tarfile.is_tarfile(filepath):
            self.archive = tarfile.TarFileCompat(filepath,"r")
        else:
            raise AssertionError("filepath '%s' is not a zip or tar " % filepath)
        self.expires = None
        self.last_modified = time.time()
        self.cache = {}

    def cache_control(self, **kwargs):
        self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
        return self

    def __call__(self, environ, start_response):
        path = environ.get("PATH_INFO","")
        if path.startswith("/"):
            path = path[1:]
        application = self.cache.get(path)
        if application:
            return application(environ, start_response)
        try:
            info = self.archive.getinfo(path)
        except KeyError:
            exc = HTTPNotFound("The file requested, '%s', was not found." % path)
            return exc.wsgi_application(environ, start_response)
        if info.filename.endswith("/"):
            exc = HTTPNotFound("Path requested, '%s', is not a file." % path)
            return exc.wsgi_application(environ, start_response)
        content_type, content_encoding = mimetypes.guess_type(info.filename)
        # 'None' is not a valid content-encoding, so don't set the header if
        # mimetypes.guess_type returns None
        if content_encoding is not None:
            app = DataApp(None, content_type = content_type,
                                content_encoding = content_encoding)
        else:
            app = DataApp(None, content_type = content_type)
        app.set_content(self.archive.read(path),
                time.mktime(info.date_time + (0,0,0)))
        self.cache[path] = app
        app.expires = self.expires
        return app(environ, start_response)