DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

Format Firefox Session File

08.09.2009
| 2626 views |
  • submit to reddit
        Copy the following code into a file and execute.  Output can be indented lines or xml.  By using the xml format one can use xmldiff from http://www.logilab.org/projects/xmldiff to get a decent difference of two different generated session xmls.  This script requires with python 2.6 or one of the following python modules: simplejson or cjson.

#!/usr/bin/env python

import sys, getopt

jdecode = None

# For 2.6 and above this should work
try:
    import json
    jdecode = json.loads
except: pass

if not jdecode:
    # This is the module that is json in 2.6
    try:
        import simplejson
        jdecode = simplejson.loads
    except: pass

if not jdecode:
    try:
        import cjson
        jdecode = cjson.decode
    except: pass

if not jdecode:
    def jdecode(s): raise NotImplemented()

def extractsession(sdata):
    
    # a Session Manager session file
    if sdata.splitlines()[0].startswith('[SessionManager'):
        sdata = sdata.splitlines()[-1]
    
    # Not sure why, but some sessions are enclosed in parenthesis
    # and some are not.
    if sdata[0] == '{' and sdata[-1] == '}':
        return sdata
    elif sdata[0] == '(' and sdata[-1] == ')':
        return sdata[1:-1]
    else:
        raise ValueError('Unexpected data format')

def printsessiontxt(sdata, history=False):
    for widx, window in enumerate(sdata['windows']):
        print "Window", widx
        for tidx, tab in enumerate(window['tabs']):
            print "  Tab", tidx
            if history:
                for entry in tab['entries']:
                    print "   ", entry['url']
            else:
                print "   ", tab['entries'][-1]['url']

def xmlreplacespecial(astr):
    return astr.replace('&', '&').replace('>', '>').replace('<', '<')

def printsessionxml(sdata, history=False):
    print "<session>"
    for widx, window in enumerate(sdata['windows']):
        print "  <window>"
        for tidx, tab in enumerate(window['tabs']):
            print "    <tab>"
            if history:
                for entry in tab['entries']:
                    entryval = entry['url'].encode('utf8', 'xmlcharrefreplace')
                    print "      <entry>%s</entry>" % xmlreplacespecial(entryval)
            else:
                entryval = tab['entries'][-1]['url'].encode('utf8', 'xmlcharrefreplace')
                print "      <entry>%s</entry>" % xmlreplacespecial(entryval)
            print "    </tab>"
        print "  </window>"
    print "</session>"

def main(argv):
    history=False
    printsession=printsessiontxt
    
    opts, argv = getopt.getopt(argv, 'Hx', ('history', 'xml', ))
    
    for opt, val in opts:
        if opt in ('-H', "--history"):
            history=True
        elif opt in ('-x', "--xml"):
            printsession=printsessionxml
    
    sesdata = open(argv[0]).read()
    sesdata = extractsession(sesdata)
    sesobj = jdecode(sesdata)
    printsession(sesobj, history)

if __name__ == "__main__":
    main(sys.argv[1:])