0001import copy
0002from lxml import etree
0003from paste.response import header_value
0004import htmlserialize
0005import urlparse
0006
0007class Page(object):
0008    """
0009    This represents a single page that was served.
0010    """
0011
0012    def __init__(self, headers, content, uri, context,
0013                 content_page=False):
0014        self.headers = headers
0015        self.uri = uri
0016        self.context = context
0017        self.content_page = content_page
0018        self.content_type = header_value(headers, 'content-type')
0019        if not self.content_type.startswith('text/html'):
0020            raise ValueError(
0021                'Pages may only be text/html (not %r)'
0022                % self.content_type)
0023        self.html = etree.HTML(content)
0024        self.template = None
0025        self._scan_links()
0026        self._scan_html()
0027
0028    def _scan_links(self):
0029        """
0030        Read ``<link>`` information
0031        """
0032        head = self.html.find('head')
0033        if not head:
0034            return
0035        for el in head.findall('link'):
0036            rel = el.attrib.get('rel')
0037            if rel in ('over-template', 'over-content'):
0038                href = el.attrib['href']
0039                href = urlparse.urljoin(self.uri, href)
0040                page = self.context.get_page(href)
0041                if rel == 'over-template':
0042                    if self.template is not None:
0043                        raise ValueError(
0044                            'Two <link rel="over-template"> found: '
0045                            '%r and now href="%s"'
0046                            % (self.template, href))
0047                    self.template = page
0048                else:
0049                    # This merges the page into the namespace, and
0050                    # that's all we have to do with it...
0051                    pass
0052                el.getparent().remove(el)
0053
0054    def _scan_html(self):
0055        """
0056        This scans all elements for the special attributes
0057        """
0058        self._el_commands = []
0059        for el in self.html.getiterator():
0060            if el.attrib.get('over-name'):
0061                for name in el.attrib['over-name'].split():
0062                    self.context.add_name(self, el, name)
0063            if el.attrib.get('over-content'):
0064                self._el_commands.append(
0065                    (el, 'over-content',
0066                     el.attrib['over-content'].split()))
0067            if el.attrib.get('over-replace'):
0068                self._el_commands.append(
0069                    (el, 'over-replace',
0070                     el.attrib['over-replace'].split()))
0071            if el.attrib.get('over-include'):
0072                src = el.attrib['over-include']
0073                src = urlparse.urljoin(self.uri, src)
0074                if '#' in src:
0075                    src, name = src.split('#', 1)
0076                else:
0077                    name = None
0078                page = self.context.get_page(src)
0079                self._el_commands.append(
0080                    (el, 'over-include', (page, name)))
0081
0082    def _merge_pieces(self):
0083        log_debug = self.context.log.debug
0084        log_warn = self.context.log.warn
0085        for el, command, data in self._el_commands:
0086            if command == 'over-include':
0087                page, name = data
0088                if name is None:
0089                    include = page.html.find('body')
0090                    if include is None:
0091                        include = page.html
0092                else:
0093                    include = page.get_name(name)
0094                    if include is None:
0095                        log_warn('Element named %r not found in %r',
0096                                 name, page)
0097                        continue
0098                    self._merge_content(el, include)
0099                    del el.attrib['over-include']
0100                    continue
0101
0102            found = None
0103            for name in data:
0104                if name == 'default':
0105                    break
0106                found = self.context.get_name(name)
0107                if found is not None:
0108                    break
0109            if found is None:
0110                log_warn('Skipping %r; none of %r found',
0111                         el, data)
0112                continue
0113            # Got a match!  Now we have to copy it in
0114            if command == 'over-content':
0115                log_debug('Inserting %r (id=%r) into %r',
0116                          found, id, el)
0117                del el.attrib['over-content']
0118                self._merge_content(el, found)
0119            else:
0120                log_debug('Replacing %r with %r (id=%r)',
0121                          el, found, id)
0122                self._merge_replace(el, found)
0123
0124    def _merge_content(self, el, insert):
0125        attribs = el.attrib.items()
0126        tail = el.tail
0127        el.clear()
0128        for key, value in attribs:
0129            el.attrib[key] = value
0130        el.text = insert.text
0131        for subel in insert:
0132            el.append(copy.deepcopy(subel))
0133        el.tail = tail
0134
0135    def _merge_replace(self, el, replace):
0136        parent = el.getparent()
0137        index = parent.index(el)
0138        if index == 0:
0139            parent.text += replace.text
0140        else:
0141            parent[index-1].tail += replace.text
0142        parent[index:index+1] = [copy.deepcopy(subel) for subel in replace]
0143
0144    def _merge_head(self):
0145        my_head = self.html.find('head')
0146        for page in self.context.pages:
0147            if page is self:
0148                continue
0149            head = page.html.find('head')
0150            for el_type in ['link', 'meta', 'script', 'style']:
0151                for el in head.findall(el_type):
0152                    my_head.append(copy.deepcopy(el))
0153
0154    def _merge_content_page(self):
0155        """
0156        Merges the title from the content page
0157        """
0158        page = self.context.content_page
0159        title = page._get_title()
0160        if title is not None:
0161            my_title = self._get_title(True)
0162            my_title.text = title.text
0163        else:
0164            self.context.log.warn(
0165                'Content page %r has no <title>' % page)
0166
0167    def _get_title(self, create=False):
0168        head = self.html.find('head')
0169        if head is None:
0170            if create:
0171                head = etree.Element('head')
0172                self.insert(0, head)
0173            else:
0174                return None
0175        title = head.find('title')
0176        if title is None and create:
0177            title = etree.Element('title')
0178            head.append(title)
0179        return title
0180
0181    def resolve(self):
0182        self._merge_pieces()
0183        self._merge_head()
0184        self._merge_content_page()
0185
0186    def __str__(self):
0187        return htmlserialize.tostring(
0188            self.html, drop_attribs=['over-name', 'over-content',
0189                                     'over-replace'])
0190
0191    def __repr__(self):
0192        return '<%s %s from %s>' % (
0193            self.__class__.__name__,
0194            hex(abs(id(self)))[2:],
0195            self.uri)