Monday, September 10, 2012

Django html5lib validating middleware

Django middleware to validate every HTML response with html5lib.

Usage

  1. Save to middleware.py
  2. Add 'middleware.ProfileMiddleware' to MIDDLEWARE_CLASSES

Listing

from html5lib.html5parser import HTMLParser
from html5lib import treebuilders
from django.http import HttpResponse


class ValidateMiddleware(object):
    def process_request(self, request):
        pass

    def process_response(self, request, response):
        if response.content and 'html' in response['Content-Type'] \
                and 'disable-validation' not in request.GET:
            # validate
            treebuilder = treebuilders.getTreeBuilder("simpleTree")
            parser = HTMLParser(tree=treebuilder, strict=True)
            try:
                parser.parse(response.content)
            except Exception:
                pass
            if parser.errors:
                # format output
                out = []
                lines = response.content.splitlines()
                for (row, col), e, d in parser.errors:
                    out.append('%s, %s' % (e, d))
                    for x in range(max(0, row - 3), min(len(lines), row + 1)):
                        out.append(lines[x])
                    out.append(' ' * col + '^')
                out = '\n'.join(out)
                return HttpResponse(out, mimetype='text/plain')
        return response

    def process_view(self, request, callback, callback_args, callback_kwargs):
        return callback(request, *callback_args, **callback_kwargs)

Thursday, June 21, 2012

Django Profiling Middleware

This Django middleware can be used to collect and print accumulated profiling statistics on a running site. As opposed to popular individual view profiling tools this one profiles all existing views as they are being executed and saves statistics for later usage. On production (DEBUG=False) only each 20-th request is profiled to keep CPU overhead low.

Usage

  1. Save to profile.py
  2. Add 'profile.ProfileMiddleware' to MIDDLEWARE_CLASSES
  3. ...run requests
  4. Visit http://mysite/profile/ to print accumulated statistics
  5. Visit http://mysite/myview?profile to profile this view only and print statistics immediately

Listing

from cStringIO import StringIO
from django.conf import settings
from django.http import HttpResponse
from os import path
from random import randrange
import cProfile
import pstats

# see pstats.Stats.print_stats
RESTRICTIONS = 30,
# to keep stats in memory
STATS_FILE = None
# to dump accumulated stats to file after every request
STATS_FILE = '/tmp/mysite.profile'


class ProfileMiddleware(object):
    empty = True
    prof = cProfile.Profile()

    def process_request(self, request):
        if request.path.startswith('/profile/'):
            # return collected stats
            if not (settings.DEBUG or request.user.is_staff):
                return HttpResponseForbidden(
                    'Forbidden', content_type='text/plain')
            return self.print_stats()

    def process_response(self, request, response):
        return response

    def process_view(self, request, callback, callback_args, callback_kwargs):
        if 'profile' in request.GET:
            # profile this view and print results
            self.prof.clear()
            self.prof.runcall(
                callback, request, *callback_args, **callback_kwargs)
            self.empty = False
            return self.print_stats(None)
        if not self.empty and not settings.DEBUG and randrange(20):
            # run without profiling
            return callback(request, *callback_args, **callback_kwargs)
        # run with profiling
        self.empty = False
        if STATS_FILE:
            self.prof.clear()
        retval = self.prof.runcall(
            callback, request, *callback_args, **callback_kwargs)
        if STATS_FILE:
            # merge stats
            self.save()
        return retval

    def save(self):
        stats = pstats.Stats(self.prof)
        stats.strip_dirs()
        if path.exists(STATS_FILE):
            try:
                stats.add(STATS_FILE)
            except (EOFError, ValueError):
                pass
        stats.dump_stats(STATS_FILE)

    def print_stats(self, input_file=STATS_FILE):
        # handle ourselfves
        out = StringIO()
        if input_file:
            stats = pstats.Stats(input_file, stream=out)
        else:
            if self.empty:
                return HttpResponse('No data yet.', content_type='text/plain')
            stats = pstats.Stats(self.prof, stream=out).strip_dirs()
        stats.sort_stats('cumulative').print_stats(*RESTRICTIONS)
        stats_str = out.getvalue()
        return HttpResponse(stats_str, content_type='text/plain')

See also