0001
0002
0003"""
0004WSGI applications that parse the URL and dispatch to on-disk resources
0005"""
0006
0007import os
0008import sys
0009import imp
0010import mimetypes
0011try:
0012 import pkg_resources
0013except ImportError:
0014 pkg_resources = None
0015from paste import request
0016from paste import fileapp
0017from paste.util import import_string
0018from paste import httpexceptions
0019from httpheaders import ETAG
0020from paste.util import converters
0021
0022class NoDefault(object):
0023 pass
0024
0025__all__ = ['URLParser', 'StaticURLParser', 'PkgResourcesParser']
0026
0027class URLParser(object):
0028
0029 """
0030 WSGI middleware
0031
0032 Application dispatching, based on URL. An instance of `URLParser` is
0033 an application that loads and delegates to other applications. It
0034 looks for files in its directory that match the first part of
0035 PATH_INFO; these may have an extension, but are not required to have
0036 one, in which case the available files are searched to find the
0037 appropriate file. If it is ambiguous, a 404 is returned and an error
0038 logged.
0039
0040 By default there is a constructor for .py files that loads the module,
0041 and looks for an attribute ``application``, which is a ready
0042 application object, or an attribute that matches the module name,
0043 which is a factory for building applications, and is called with no
0044 arguments.
0045
0046 URLParser will also look in __init__.py for special overrides.
0047 These overrides are:
0048
0049 ``urlparser_hook(environ)``
0050 This can modify the environment. Its return value is ignored,
0051 and it cannot be used to change the response in any way. You
0052 *can* use this, for example, to manipulate SCRIPT_NAME/PATH_INFO
0053 (try to keep them consistent with the original URL -- but
0054 consuming PATH_INFO and moving that to SCRIPT_NAME is ok).
0055
0056 ``urlparser_wrap(environ, start_response, app)``:
0057 After URLParser finds the application, it calls this function
0058 (if present). If this function doesn't call
0059 ``app(environ, start_response)`` then the application won't be
0060 called at all! This can be used to allocate resources (with
0061 ``try:finally:``) or otherwise filter the output of the
0062 application.
0063
0064 ``not_found_hook(environ, start_response)``:
0065 If no file can be found (*in this directory*) to match the
0066 request, then this WSGI application will be called. You can
0067 use this to change the URL and pass the request back to
0068 URLParser again, or on to some other application. This
0069 doesn't catch all ``404 Not Found`` responses, just missing
0070 files.
0071
0072 ``application(environ, start_response)``:
0073 This basically overrides URLParser completely, and the given
0074 application is used for all requests. ``urlparser_wrap`` and
0075 ``urlparser_hook`` are still called, but the filesystem isn't
0076 searched in any way.
0077 """
0078
0079 parsers_by_directory = {}
0080
0081
0082 init_module = NoDefault
0083
0084 global_constructors = {}
0085
0086 def __init__(self, global_conf,
0087 directory, base_python_name,
0088 index_names=NoDefault,
0089 hide_extensions=NoDefault,
0090 ignore_extensions=NoDefault,
0091 constructors=None,
0092 **constructor_conf):
0093 """
0094 Create a URLParser object that looks at `directory`.
0095 `base_python_name` is the package that this directory
0096 represents, thus any Python modules in this directory will
0097 be given names under this package.
0098 """
0099 if global_conf:
0100 import warnings
0101 warnings.warn(
0102 'The global_conf argument to URLParser is deprecated; '
0103 'either pass in None or {}, or use make_url_parser',
0104 DeprecationWarning)
0105 else:
0106 global_conf = {}
0107 if os.path.sep != '/':
0108 directory = directory.replace(os.path.sep, '/')
0109 self.directory = directory
0110 self.base_python_name = base_python_name
0111
0112
0113 if index_names is NoDefault:
0114 index_names = global_conf.get(
0115 'index_names', ('index', 'Index', 'main', 'Main'))
0116 self.index_names = converters.aslist(index_names)
0117 if hide_extensions is NoDefault:
0118 hide_extensions = global_conf.get(
0119 'hide_extensions', ('.pyc', '.bak', '.py~', '.pyo'))
0120 self.hide_extensions = converters.aslist(hide_extensions)
0121 if ignore_extensions is NoDefault:
0122 ignore_extensions = global_conf.get(
0123 'ignore_extensions', ())
0124 self.ignore_extensions = converters.aslist(ignore_extensions)
0125 self.constructors = self.global_constructors.copy()
0126 if constructors:
0127 self.constructors.update(constructors)
0128
0129 for name, value in constructor_conf.items():
0130 if not name.startswith('constructor '):
0131 raise ValueError(
0132 "Only extra configuration keys allowed are "
0133 "'constructor .ext = import_expr'; you gave %r "
0134 "(=%r)" % (name, value))
0135 ext = name[len('constructor '):].strip()
0136 if isinstance(value, (str, unicode)):
0137 value = import_string.eval_import(value)
0138 self.constructors[ext] = value
0139
0140 def __call__(self, environ, start_response):
0141 environ['paste.urlparser.base_python_name'] = self.base_python_name
0142 if self.init_module is NoDefault:
0143 self.init_module = self.find_init_module(environ)
0144 path_info = environ.get('PATH_INFO', '')
0145 if not path_info:
0146 return self.add_slash(environ, start_response)
0147 if (self.init_module
0148 and getattr(self.init_module, 'urlparser_hook', None)):
0149 self.init_module.urlparser_hook(environ)
0150 orig_path_info = environ['PATH_INFO']
0151 orig_script_name = environ['SCRIPT_NAME']
0152 application, filename = self.find_application(environ)
0153 if not application:
0154 if (self.init_module
0155 and getattr(self.init_module, 'not_found_hook', None)
0156 and environ.get('paste.urlparser.not_found_parser') is not self):
0157 not_found_hook = self.init_module.not_found_hook
0158 environ['paste.urlparser.not_found_parser'] = self
0159 environ['PATH_INFO'] = orig_path_info
0160 environ['SCRIPT_NAME'] = orig_script_name
0161 return not_found_hook(environ, start_response)
0162 if filename is None:
0163 name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
0164 if not name:
0165 name = 'one of %s' % ', '.join(
0166 self.index_names or
0167 ['(no index_names defined)'])
0168
0169 return self.not_found(
0170 environ, start_response,
0171 'Tried to load %s from directory %s'
0172 % (name, self.directory))
0173 else:
0174 environ['wsgi.errors'].write(
0175 'Found resource %s, but could not construct application\n'
0176 % filename)
0177 return self.not_found(
0178 environ, start_response,
0179 'Tried to load %s from directory %s'
0180 % (filename, self.directory))
0181 if (self.init_module
0182 and getattr(self.init_module, 'urlparser_wrap', None)):
0183 return self.init_module.urlparser_wrap(
0184 environ, start_response, application)
0185 else:
0186 return application(environ, start_response)
0187
0188 def find_application(self, environ):
0189 if (self.init_module
0190 and getattr(self.init_module, 'application', None)
0191 and not environ.get('paste.urlparser.init_application') == environ['SCRIPT_NAME']):
0192 environ['paste.urlparser.init_application'] = environ['SCRIPT_NAME']
0193 return self.init_module.application, None
0194 name, rest_of_path = request.path_info_split(environ['PATH_INFO'])
0195 environ['PATH_INFO'] = rest_of_path
0196 if name is not None:
0197 environ['SCRIPT_NAME'] = environ.get('SCRIPT_NAME', '') + '/' + name
0198 if not name:
0199 names = self.index_names
0200 for index_name in names:
0201 filename = self.find_file(environ, index_name)
0202 if filename:
0203 break
0204 else:
0205
0206 filename = None
0207 else:
0208 filename = self.find_file(environ, name)
0209 if filename is None:
0210 return None, filename
0211 else:
0212 return self.get_application(environ, filename), filename
0213
0214 def not_found(self, environ, start_response, debug_message=None):
0215 exc = httpexceptions.HTTPNotFound(
0216 'The resource at %s could not be found'
0217 % request.construct_url(environ),
0218 comment='SCRIPT_NAME=%r; PATH_INFO=%r; looking in %r; debug: %s'
0219 % (environ.get('SCRIPT_NAME'), environ.get('PATH_INFO'),
0220 self.directory, debug_message or '(none)'))
0221 return exc.wsgi_application(environ, start_response)
0222
0223 def add_slash(self, environ, start_response):
0224 """
0225 This happens when you try to get to a directory
0226 without a trailing /
0227 """
0228 url = request.construct_url(environ, with_query_string=False)
0229 url += '/'
0230 if environ.get('QUERY_STRING'):
0231 url += '?' + environ['QUERY_STRING']
0232 exc = httpexceptions.HTTPMovedPermanently(
0233 'The resource has moved to %s - you should be redirected '
0234 'automatically.''' % url,
0235 headers=[('location', url)])
0236 return exc.wsgi_application(environ, start_response)
0237
0238 def find_file(self, environ, base_filename):
0239 possible = []
0240 """Cache a few values to reduce function call overhead"""
0241 for filename in os.listdir(self.directory):
0242 base, ext = os.path.splitext(filename)
0243 full_filename = os.path.join(self.directory, filename)
0244 if (ext in self.hide_extensions
0245 or not base):
0246 continue
0247 if filename == base_filename:
0248 possible.append(full_filename)
0249 continue
0250 if ext in self.ignore_extensions:
0251 continue
0252 if base == base_filename:
0253 possible.append(full_filename)
0254 if not possible:
0255
0256
0257
0258 return None
0259 if len(possible) > 1:
0260
0261
0262
0263 if full_filename in possible:
0264 return full_filename
0265 else:
0266 environ['wsgi.errors'].write(
0267 'Ambiguous URL: %s; matches files %s\n'
0268 % (request.construct_url(environ),
0269 ', '.join(possible)))
0270 return None
0271 return possible[0]
0272
0273 def get_application(self, environ, filename):
0274 if os.path.isdir(filename):
0275 t = 'dir'
0276 else:
0277 t = os.path.splitext(filename)[1]
0278 constructor = self.constructors.get(t, self.constructors.get('*'))
0279 if constructor is None:
0280
0281
0282 return constructor
0283 app = constructor(self, environ, filename)
0284 if app is None:
0285
0286
0287
0288 pass
0289 return app
0290
0291 def register_constructor(cls, extension, constructor):
0292 """
0293 Register a function as a constructor. Registered constructors
0294 apply to all instances of `URLParser`.
0295
0296 The extension should have a leading ``.``, or the special
0297 extensions ``dir`` (for directories) and ``*`` (a catch-all).
0298
0299 `constructor` must be a callable that takes two arguments:
0300 ``environ`` and ``filename``, and returns a WSGI application.
0301 """
0302 d = cls.global_constructors
0303 assert not d.has_key(extension), (
0304 "A constructor already exists for the extension %r (%r) "
0305 "when attemption to register constructor %r"
0306 % (extension, d[extension], constructor))
0307 d[extension] = constructor
0308 register_constructor = classmethod(register_constructor)
0309
0310 def get_parser(self, directory, base_python_name):
0311 """
0312 Get a parser for the given directory, or create one if
0313 necessary. This way parsers can be cached and reused.
0314
0315 # @@: settings are inherited from the first caller
0316 """
0317 try:
0318 return self.parsers_by_directory[(directory, base_python_name)]
0319 except KeyError:
0320 parser = self.__class__(
0321 {},
0322 directory, base_python_name,
0323 index_names=self.index_names,
0324 hide_extensions=self.hide_extensions,
0325 ignore_extensions=self.ignore_extensions,
0326 constructors=self.constructors)
0327 self.parsers_by_directory[(directory, base_python_name)] = parser
0328 return parser
0329
0330 def find_init_module(self, environ):
0331 filename = os.path.join(self.directory, '__init__.py')
0332 if not os.path.exists(filename):
0333 return None
0334 return load_module(environ, filename)
0335
0336 def __repr__(self):
0337 return '<%s directory=%r; module=%s at %s>' % (
0338 self.__class__.__name__,
0339 self.directory,
0340 self.base_python_name,
0341 hex(abs(id(self))))
0342
0343def make_directory(parser, environ, filename):
0344 base_python_name = environ['paste.urlparser.base_python_name']
0345 if base_python_name:
0346 base_python_name += "." + os.path.basename(filename)
0347 else:
0348 base_python_name = os.path.basename(filename)
0349 return parser.get_parser(filename, base_python_name)
0350
0351URLParser.register_constructor('dir', make_directory)
0352
0353def make_unknown(parser, environ, filename):
0354 return fileapp.FileApp(filename)
0355
0356URLParser.register_constructor('*', make_unknown)
0357
0358def load_module(environ, filename):
0359 base_python_name = environ['paste.urlparser.base_python_name']
0360 module_name = os.path.splitext(os.path.basename(filename))[0]
0361 if base_python_name:
0362 module_name = base_python_name + '.' + module_name
0363 return load_module_from_name(environ, filename, module_name,
0364 environ['wsgi.errors'])
0365
0366def load_module_from_name(environ, filename, module_name, errors):
0367 if sys.modules.has_key(module_name):
0368 return sys.modules[module_name]
0369 init_filename = os.path.join(os.path.dirname(filename), '__init__.py')
0370 if not os.path.exists(init_filename):
0371 try:
0372 f = open(init_filename, 'w')
0373 except (OSError, IOError), e:
0374 errors.write(
0375 'Cannot write __init__.py file into directory %s (%s)\n'
0376 % (os.path.dirname(filename), e))
0377 return None
0378 f.write('#\n')
0379 f.close()
0380 fp = None
0381 if sys.modules.has_key(module_name):
0382 return sys.modules[module_name]
0383 if '.' in module_name:
0384 parent_name = '.'.join(module_name.split('.')[:-1])
0385 base_name = module_name.split('.')[-1]
0386 parent = load_module_from_name(environ, os.path.dirname(filename),
0387 parent_name, errors)
0388 else:
0389 base_name = module_name
0390 fp = None
0391 try:
0392 fp, pathname, stuff = imp.find_module(
0393 base_name, [os.path.dirname(filename)])
0394 module = imp.load_module(module_name, fp, pathname, stuff)
0395 finally:
0396 if fp is not None:
0397 fp.close()
0398 return module
0399
0400def make_py(parser, environ, filename):
0401 module = load_module(environ, filename)
0402 if not module:
0403 return None
0404 if hasattr(module, 'application') and module.application:
0405 return getattr(module.application, 'wsgi_application', module.application)
0406 base_name = module.__name__.split('.')[-1]
0407 if hasattr(module, base_name):
0408 obj = getattr(module, base_name)
0409 if hasattr(obj, 'wsgi_application'):
0410 return obj.wsgi_application
0411 else:
0412
0413 return getattr(module, base_name)()
0414 environ['wsgi.errors'].write(
0415 "Cound not find application or %s in %s\n"
0416 % (base_name, module))
0417 return None
0418
0419URLParser.register_constructor('.py', make_py)
0420
0421class StaticURLParser(object):
0422
0423 """
0424 Like ``URLParser`` but only serves static files.
0425
0426 ``cache_max_age``:
0427 integer specifies Cache-Control max_age in seconds
0428 """
0429
0430
0431 def __init__(self, directory, root_directory=None,
0432 cache_max_age=None):
0433 if os.path.sep != '/':
0434 directory = directory.replace(os.path.sep, '/')
0435 self.directory = directory
0436 self.root_directory = root_directory
0437 if root_directory is not None:
0438 self.root_directory = os.path.normpath(self.root_directory)
0439 else:
0440 self.root_directory = directory
0441 self.cache_max_age = cache_max_age
0442 if os.path.sep != '/':
0443 directory = directory.replace('/', os.path.sep)
0444 self.root_directory = self.root_directory.replace('/', os.path.sep)
0445
0446 def __call__(self, environ, start_response):
0447 path_info = environ.get('PATH_INFO', '')
0448 if not path_info:
0449 return self.add_slash(environ, start_response)
0450 if path_info == '/':
0451
0452