0001import re
0002from lxml import etree
0003
0004def xml_cmp(a, b, debug=None):
0005 if isinstance(a, basestring):
0006 a = etree.HTML(a)
0007 if isinstance(b, basestring):
0008 b = etree.HTML(b)
0009 tag = cmp(a.tag, b.tag)
0010 if tag:
0011 if debug is not None:
0012 debug.write('tag %r != %r\n' % (a.tag, b.tag))
0013 return tag
0014 attrib = attrib_cmp(a.attrib, b.attrib, debug)
0015 if attrib:
0016 return attrib
0017 white = white_cmp(a.text, b.text)
0018 if white:
0019 if debug is not None:
0020 debug.write('text %r != %r\n' % (a.text, b.text))
0021 return white
0022 length = max(len(a), len(b))
0023 for i in range(length):
0024 if i >= len(a):
0025 if debug is not None:
0026 debug.write('second <%s> has more children (%i) than first (%i)\n'
0027 % (b.tag, len(b), len(a)))
0028 return -1
0029 elif i >= len(b):
0030 if debug is not None:
0031 debug.write('first <%s> has more children (%i) than second (%i)\n'
0032 % (a.tag, len(a), len(b)))
0033 return 1
0034 result = xml_cmp(a[i], b[i], debug)
0035 if result:
0036 if debug is not None:
0037 debug.write('child <%s> (%s) != <%s>\n'
0038 % (a[i].tag, i, b[i].tag))
0039 return result
0040 white = white_cmp(a.tail, b.tail)
0041 if debug is not None and white:
0042 debug.write('tail %r != %r\n' % (a.tail, b.tail))
0043 return white
0044
0045_white_re = re.compile('[ \n\r\t]+')
0046
0047def white_cmp(a, b):
0048 if not a and not b:
0049 return 0
0050 a = _white_re.sub(' ', a).strip()
0051 b = _white_re.sub(' ', b).strip()
0052 return cmp(a, b)
0053
0054def attrib_cmp(a, b, debug=None):
0055 a_keys = a.keys()
0056 b_keys = b.keys()
0057 for key in a_keys:
0058 if key not in b_keys:
0059 if debug is not None:
0060 debug.write('missing %s=%r attrib\n' % (key, a[key]))
0061 return -1
0062 result = cmp(a[key], b[key])
0063 if result:
0064 if debug is not None:
0065 debug.write('attrib %s=%r != %r\n' % (key, a[key], b[key]))
0066 return result
0067 b_keys.remove(key)
0068 if b_keys:
0069 if debug is not None:
0070 debug.write('missing %s attribs\n' % b_keys)
0071 return 1
0072 return 0