# (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
# Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
# (c) 2005 Ian Bicking, Clark C. Evans and contributors
# This module is part of the Python Paste Project and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php
"""
This module handles sending static content such as in-memory data or
files. At this time it has cache helpers and understands the
if-modified-since request header.
"""
import os, time, mimetypes, zipfile, tarfile
from paste.httpexceptions import *
from paste.httpheaders import *
CACHE_SIZE = 4096
BLOCK_SIZE = 4096 * 16
__all__ = ['DataApp', 'FileApp', 'DirectoryApp', 'ArchiveStore']
class DataApp(object):
"""
Returns an application that will send content in a single chunk,
this application has support for setting cache-control and for
responding to conditional (or HEAD) requests.
Constructor Arguments:
``content`` the content being sent to the client
``headers`` the headers to send /w the response
The remaining ``kwargs`` correspond to headers, where the
underscore is replaced with a dash. These values are only
added to the headers if they are not already provided; thus,
they can be used for default values. Examples include, but
are not limited to:
``content_type``
``content_encoding``
``content_location``
``cache_control()``
This method provides validated construction of the ``Cache-Control``
header as well as providing for automated filling out of the
``EXPIRES`` header for HTTP/1.0 clients.
``set_content()``
This method provides a mechanism to set the content after the
application has been constructed. This method does things
like changing ``Last-Modified`` and ``Content-Length`` headers.
"""
allowed_methods = ('GET', 'HEAD')
def __init__(self, content, headers=None, allowed_methods=None,
**kwargs):
assert isinstance(headers, (type(None), list))
self.expires = None
self.content = None
self.content_length = None
self.last_modified = 0
if allowed_methods is not None:
self.allowed_methods = allowed_methods
self.headers = headers or []
for (k, v) in kwargs.items():
header = get_header(k)
header.update(self.headers, v)
ACCEPT_RANGES.update(self.headers, bytes=True)
if not CONTENT_TYPE(self.headers):
CONTENT_TYPE.update(self.headers)
if content is not None:
self.set_content(content)
def cache_control(self, **kwargs):
self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
return self
def set_content(self, content, last_modified=None):
assert content is not None
if last_modified is None:
self.last_modified = time.time()
else:
self.last_modified = last_modified
self.content = content
self.content_length = len(content)
LAST_MODIFIED.update(self.headers, time=self.last_modified)
return self
def content_disposition(self, **kwargs):
CONTENT_DISPOSITION.apply(self.headers, **kwargs)
return self
def __call__(self, environ, start_response):
method = environ['REQUEST_METHOD'].upper()
if method not in self.allowed_methods:
exc = HTTPMethodNotAllowed(
'You cannot %s a file' % method,
headers=[('Allow', ','.join(self.allowed_methods))])
return exc(environ, start_response)
return self.get(environ, start_response)
def calculate_etag(self):
return '"%s-%s"' % (self.last_modified, self.content_length)
def get(self, environ, start_response):
headers = self.headers[:]
current_etag = self.calculate_etag()
ETAG.update(headers, current_etag)
if self.expires is not None:
EXPIRES.update(headers, delta=self.expires)
try:
client_etags = IF_NONE_MATCH.parse(environ)
if client_etags:
for etag in client_etags:
if etag == current_etag or etag == '*':
# horribly inefficient, n^2 performance, yuck!
for head in list_headers(entity=True):
head.delete(headers)
start_response('304 Not Modified', headers)
return [b'']
except HTTPBadRequest as exce:
return exce.wsgi_application(environ, start_response)
# If we get If-None-Match and If-Modified-Since, and
# If-None-Match doesn't match, then we should not try to
# figure out If-Modified-Since (which has 1-second granularity
# and just isn't as accurate)
if not client_etags:
try:
client_clock = IF_MODIFIED_SINCE.parse(environ)
if (client_clock is not None
and client_clock >= int(self.last_modified)):
# horribly inefficient, n^2 performance, yuck!
for head in list_headers(entity=True):
head.delete(headers)
start_response('304 Not Modified', headers)
return [b''] # empty body
except HTTPBadRequest as exce:
return exce.wsgi_application(environ, start_response)
(lower, upper) = (0, self.content_length - 1)
range = RANGE.parse(environ)
if range and 'bytes' == range[0] and 1 == len(range[1]):
(lower, upper) = range[1][0]
upper = upper or (self.content_length - 1)
if upper >= self.content_length or lower > upper:
return HTTPRequestRangeNotSatisfiable((
"Range request was made beyond the end of the content,\r\n"
"which is %s long.\r\n Range: %s\r\n") % (
self.content_length, RANGE(environ))
).wsgi_application(environ, start_response)
content_length = upper - lower + 1
CONTENT_RANGE.update(headers, first_byte=lower, last_byte=upper,
total_length = self.content_length)
CONTENT_LENGTH.update(headers, content_length)
if range or content_length != self.content_length:
start_response('206 Partial Content', headers)
else:
start_response('200 OK', headers)
if self.content is not None:
return [self.content[lower:upper+1]]
return (lower, content_length)
class FileApp(DataApp):
"""
Returns an application that will send the file at the given
filename. Adds a mime type based on ``mimetypes.guess_type()``.
See DataApp for the arguments beyond ``filename``.
"""
def __init__(self, filename, headers=None, **kwargs):
self.filename = filename
content_type, content_encoding = self.guess_type()
if content_type and 'content_type' not in kwargs:
kwargs['content_type'] = content_type
if content_encoding and 'content_encoding' not in kwargs:
kwargs['content_encoding'] = content_encoding
DataApp.__init__(self, None, headers, **kwargs)
def guess_type(self):
return mimetypes.guess_type(self.filename)
def update(self, force=False):
stat = os.stat(self.filename)
if not force and stat.st_mtime == self.last_modified:
return
self.last_modified = stat.st_mtime
if stat.st_size < CACHE_SIZE:
fh = open(self.filename,"rb")
self.set_content(fh.read(), stat.st_mtime)
fh.close()
else:
self.content = None
self.content_length = stat.st_size
# This is updated automatically if self.set_content() is
# called
LAST_MODIFIED.update(self.headers, time=self.last_modified)
def get(self, environ, start_response):
is_head = environ['REQUEST_METHOD'].upper() == 'HEAD'
if 'max-age=0' in CACHE_CONTROL(environ).lower():
self.update(force=True) # RFC 2616 13.2.6
else:
self.update()
if not self.content:
if not os.path.exists(self.filename):
exc = HTTPNotFound(
'The resource does not exist',
comment="No file at %r" % self.filename)
return exc(environ, start_response)
try:
file = open(self.filename, 'rb')
except (IOError, OSError) as e:
exc = HTTPForbidden(
'You are not permitted to view this file (%s)' % e)
return exc.wsgi_application(
environ, start_response)
retval = DataApp.get(self, environ, start_response)
if isinstance(retval, list):
# cached content, exception, or not-modified
if is_head:
return [b'']
return retval
(lower, content_length) = retval
if is_head:
return [b'']
file.seek(lower)
file_wrapper = environ.get('wsgi.file_wrapper', None)
if file_wrapper:
return file_wrapper(file, BLOCK_SIZE)
else:
return _FileIter(file, size=content_length)
class _FileIter(object):
def __init__(self, file, block_size=None, size=None):
self.file = file
self.size = size
self.block_size = block_size or BLOCK_SIZE
def __iter__(self):
return self
def next(self):
chunk_size = self.block_size
if self.size is not None:
if chunk_size > self.size:
chunk_size = self.size
self.size -= chunk_size
data = self.file.read(chunk_size)
if not data:
raise StopIteration
return data
__next__ = next
def close(self):
self.file.close()
class DirectoryApp(object):
"""
Returns an application that dispatches requests to corresponding FileApps based on PATH_INFO.
FileApp instances are cached. This app makes sure not to serve any files that are not in a subdirectory.
To customize FileApp creation override ``DirectoryApp.make_fileapp``
"""
def __init__(self, path):
self.path = os.path.abspath(path)
if not self.path.endswith(os.path.sep):
self.path += os.path.sep
assert os.path.isdir(self.path)
self.cached_apps = {}
make_fileapp = FileApp
def __call__(self, environ, start_response):
path_info = environ['PATH_INFO']
app = self.cached_apps.get(path_info)
if app is None:
path = os.path.join(self.path, path_info.lstrip('/'))
if not os.path.normpath(path).startswith(self.path):
app = HTTPForbidden()
elif os.path.isfile(path):
app = self.make_fileapp(path)
self.cached_apps[path_info] = app
else:
app = HTTPNotFound(comment=path)
return app(environ, start_response)
class ArchiveStore(object):
"""
Returns an application that serves up a DataApp for items requested
in a given zip or tar archive.
Constructor Arguments:
``filepath`` the path to the archive being served
``cache_control()``
This method provides validated construction of the ``Cache-Control``
header as well as providing for automated filling out of the
``EXPIRES`` header for HTTP/1.0 clients.
"""
def __init__(self, filepath):
if zipfile.is_zipfile(filepath):
self.archive = zipfile.ZipFile(filepath,"r")
elif tarfile.is_tarfile(filepath):
self.archive = tarfile.TarFileCompat(filepath,"r")
else:
raise AssertionError("filepath '%s' is not a zip or tar " % filepath)
self.expires = None
self.last_modified = time.time()
self.cache = {}
def cache_control(self, **kwargs):
self.expires = CACHE_CONTROL.apply(self.headers, **kwargs) or None
return self
def __call__(self, environ, start_response):
path = environ.get("PATH_INFO","")
if path.startswith("/"):
path = path[1:]
application = self.cache.get(path)
if application:
return application(environ, start_response)
try:
info = self.archive.getinfo(path)
except KeyError:
exc = HTTPNotFound("The file requested, '%s', was not found." % path)
return exc.wsgi_application(environ, start_response)
if info.filename.endswith("/"):
exc = HTTPNotFound("Path requested, '%s', is not a file." % path)
return exc.wsgi_application(environ, start_response)
content_type, content_encoding = mimetypes.guess_type(info.filename)
# 'None' is not a valid content-encoding, so don't set the header if
# mimetypes.guess_type returns None
if content_encoding is not None:
app = DataApp(None, content_type = content_type,
content_encoding = content_encoding)
else:
app = DataApp(None, content_type = content_type)
app.set_content(self.archive.read(path),
time.mktime(info.date_time + (0,0,0)))
self.cache[path] = app
app.expires = self.expires
return app(environ, start_response)