Monday, September 10, 2012

Django html5lib validating middleware

Django middleware to validate every HTML response with html5lib.

Usage

  1. Save to middleware.py
  2. Add 'middleware.ProfileMiddleware' to MIDDLEWARE_CLASSES

Listing

from html5lib.html5parser import HTMLParser
from html5lib import treebuilders
from django.http import HttpResponse


class ValidateMiddleware(object):
    def process_request(self, request):
        pass

    def process_response(self, request, response):
        if response.content and 'html' in response['Content-Type'] \
                and 'disable-validation' not in request.GET:
            # validate
            treebuilder = treebuilders.getTreeBuilder("simpleTree")
            parser = HTMLParser(tree=treebuilder, strict=True)
            try:
                parser.parse(response.content)
            except Exception:
                pass
            if parser.errors:
                # format output
                out = []
                lines = response.content.splitlines()
                for (row, col), e, d in parser.errors:
                    out.append('%s, %s' % (e, d))
                    for x in range(max(0, row - 3), min(len(lines), row + 1)):
                        out.append(lines[x])
                    out.append(' ' * col + '^')
                out = '\n'.join(out)
                return HttpResponse(out, mimetype='text/plain')
        return response

    def process_view(self, request, callback, callback_args, callback_kwargs):
        return callback(request, *callback_args, **callback_kwargs)

No comments:

Post a Comment