0001import copy
0002from lxml import etree
0003from paste.response import header_value
0004import htmlserialize
0005import urlparse
0006
0007class Page(object):
0008 """
0009 This represents a single page that was served.
0010 """
0011
0012 def __init__(self, headers, content, uri, context,
0013 content_page=False):
0014 self.headers = headers
0015 self.uri = uri
0016 self.context = context
0017 self.content_page = content_page
0018 self.content_type = header_value(headers, 'content-type')
0019 if not self.content_type.startswith('text/html'):
0020 raise ValueError(
0021 'Pages may only be text/html (not %r)'
0022 % self.content_type)
0023 self.html = etree.HTML(content)
0024 self.template = None
0025 self._scan_links()
0026 self._scan_html()
0027
0028 def _scan_links(self):
0029 """
0030 Read ``<link>`` information
0031 """
0032 head = self.html.find('head')
0033 if not head:
0034 return
0035 for el in head.findall('link'):
0036 rel = el.attrib.get('rel')
0037 if rel in ('over-template', 'over-content'):
0038 href = el.attrib['href']
0039 href = urlparse.urljoin(self.uri, href)
0040 page = self.context.get_page(href)
0041 if rel == 'over-template':
0042 if self.template is not None:
0043 raise ValueError(
0044 'Two <link rel="over-template"> found: '
0045 '%r and now href="%s"'
0046 % (self.template, href))
0047 self.template = page
0048 else:
0049
0050
0051 pass
0052 el.getparent().remove(el)
0053
0054 def _scan_html(self):
0055 """
0056 This scans all elements for the special attributes
0057 """
0058 self._el_commands = []
0059 for el in self.html.getiterator():
0060 if el.attrib.get('over-name'):
0061 for name in el.attrib['over-name'].split():
0062 self.context.add_name(self, el, name)
0063 if el.attrib.get('over-content'):
0064 self._el_commands.append(
0065 (el, 'over-content',
0066 el.attrib['over-content'].split()))
0067 if el.attrib.get('over-replace'):
0068 self._el_commands.append(
0069 (el, 'over-replace',
0070 el.attrib['over-replace'].split()))
0071 if el.attrib.get('over-include'):
0072 src = el.attrib['over-include']
0073 src = urlparse.urljoin(self.uri, src)
0074 if '#' in src:
0075 src, name = src.split('#', 1)
0076 else:
0077 name = None
0078 page = self.context.get_page(src)
0079 self._el_commands.append(
0080 (el, 'over-include', (page, name)))
0081
0082 def _merge_pieces(self):
0083 log_debug = self.context.log.debug
0084 log_warn = self.context.log.warn
0085 for el, command, data in self._el_commands:
0086 if command == 'over-include':
0087 page, name = data
0088 if name is None:
0089 include = page.html.find('body')
0090 if include is None:
0091 include = page.html
0092 else:
0093 include = page.get_name(name)
0094 if include is None:
0095 log_warn('Element named %r not found in %r',
0096 name, page)
0097 continue
0098 self._merge_content(el, include)
0099 del el.attrib['over-include']
0100 continue
0101
0102 found = None
0103 for name in data:
0104 if name == 'default':
0105 break
0106 found = self.context.get_name(name)
0107 if found is not None:
0108 break
0109 if found is None:
0110 log_warn('Skipping %r; none of %r found',
0111 el, data)
0112 continue
0113
0114 if command == 'over-content':
0115 log_debug('Inserting %r (id=%r) into %r',
0116 found, id, el)
0117 del el.attrib['over-content']
0118 self._merge_content(el, found)
0119 else:
0120 log_debug('Replacing %r with %r (id=%r)',
0121 el, found, id)
0122 self._merge_replace(el, found)
0123
0124 def _merge_content(self, el, insert):
0125 attribs = el.attrib.items()
0126 tail = el.tail
0127 el.clear()
0128 for key, value in attribs:
0129 el.attrib[key] = value
0130 el.text = insert.text
0131 for subel in insert:
0132 el.append(copy.deepcopy(subel))
0133 el.tail = tail
0134
0135 def _merge_replace(self, el, replace):
0136 parent = el.getparent()
0137 index = parent.index(el)
0138 if index == 0:
0139 parent.text += replace.text
0140 else:
0141 parent[index-1].tail += replace.text
0142 parent[index:index+1] = [copy.deepcopy(subel) for subel in replace]
0143
0144 def _merge_head(self):
0145 my_head = self.html.find('head')
0146 for page in self.context.pages:
0147 if page is self:
0148 continue
0149 head = page.html.find('head')
0150 for el_type in ['link', 'meta', 'script', 'style']:
0151 for el in head.findall(el_type):
0152 my_head.append(copy.deepcopy(el))
0153
0154 def _merge_content_page(self):
0155 """
0156 Merges the title from the content page
0157 """
0158 page = self.context.content_page
0159 title = page._get_title()
0160 if title is not None:
0161 my_title = self._get_title(True)
0162 my_title.text = title.text
0163 else:
0164 self.context.log.warn(
0165 'Content page %r has no <title>' % page)
0166
0167 def _get_title(self, create=False):
0168 head = self.html.find('head')
0169 if head is None:
0170 if create:
0171 head = etree.Element('head')
0172 self.insert(0, head)
0173 else:
0174 return None
0175 title = head.find('title')
0176 if title is None and create:
0177 title = etree.Element('title')
0178 head.append(title)
0179 return title
0180
0181 def resolve(self):
0182 self._merge_pieces()
0183 self._merge_head()
0184 self._merge_content_page()
0185
0186 def __str__(self):
0187 return htmlserialize.tostring(
0188 self.html, drop_attribs=['over-name', 'over-content',
0189 'over-replace'])
0190
0191 def __repr__(self):
0192 return '<%s %s from %s>' % (
0193 self.__class__.__name__,
0194 hex(abs(id(self)))[2:],
0195 self.uri)