#!/usr/bin/python
#
# written by Nick Shin - nshin@estss.com
# created Oct 5 2010 and placed in the public domain
#
# this file can be found at www.estss.com/opensource/bkmkscrub/
#
#
# NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
# 
# This is a reference implementation. You are free to copy,
# modify, or redistribute.


import getopt               # getopt GetoptError
import sys                  # exit stdin argv
import re                   # compile search
import json                 # loads


# these are not exported via Bookmark Organizer - so neither shall we
root_folders_to_scrub = ( "tagsFolder", "unfiledBookmarksFolder" )


# JSONBKMKS {{{
# --------------------------------------------------------------------------------

jsonescape = {  '\b': '\\b',
                '\t': '\\t',
                '\n': '\\n',
                '\f': '\\f',
                '\r': '\\r',
                '"' : '\\"',
                '\\': '\\\\' }


def jsonsafe( value ):
    # escape JSON compliant output

    if type( value ) is long or type( value ) is int:
        return str(value)

    results = ""
    for ch in value:
        try:
            results += jsonescape[ch]
        except:
            if ' ' <= ch <= '~':                    #  i.e. 0x1f < ch < 0x7f
                results += ch
            elif type(ch) is unicode:               # python unicode
                results += ch.encode( 'ascii', 'ignore' )
            else:                                   # control character
                results += '\\u{0:04x}'.format( ch )
    return '"' + results + '"'


def jsonbkmks( jsonobj, flip, scrub, scrubtitles, _depth = 0 ):
    result = ""
    indent = "\t" * _depth

    if jsonobj['type'] == "text/x-moz-place-container":
        # check if folder is on the scrub list
        if scrub and scrubtitles != None:
            if jsonobj.has_key('title'):
                for title in scrubtitles:
                    if title in jsonobj['title']:
                        return ""

            if jsonobj.has_key('annos') and jsonobj['annos'][0]['name'] == "bookmarkProperties/description":
                # this is where the magic happends
                if jsonobj['annos'][0]['value'] and 'DO_NOT_EXPORT' in jsonobj['annos'][0]['value']:
                    return ""

        if jsonobj.has_key('root') and jsonobj['root'] in root_folders_to_scrub:
            return ""

        result += indent + '{ "title" : '
        if jsonobj.has_key('title'):
            result += jsonsafe( jsonobj['title'] ) + ', '
        else:
            result += '"", '

        if jsonobj.has_key('root'):
            result += '"root" : "' + jsonobj['root'] + '", '

        if jsonobj.has_key('livemark'):
            result += '"livemark" : ' + str( jsonobj['livemark'] ) + ', '

        result += '"type" : "text/x-moz-place-container"'
        closed = 0

        # drill down the annos array
        if jsonobj.has_key('annos') and jsonobj['annos'][0]['name'] != 'placesInternal/GUID':
            _depth += 1
            indent = "\t" * _depth
            result += ',\n' + indent + '"annos" : ['
            if jsonobj.has_key('livemark'):
                anno = ""                       # foreach "mini" string to easily strip trailing comma
                for annos in jsonobj['annos']:
                    if annos['name'] == "livemark/feedURI":
                        anno += '\n' + indent + '{ "value" : ' \
                            + jsonsafe( annos['value'] ) \
                            + ', "name" : "livemark/feedURI" },'
                    elif annos['name'] == "livemark/siteURI":
                        anno += '\n' + indent + '{ "value" : ' \
                            + jsonsafe( annos['value'] ) \
                            + ', "name" : "livemark/siteURI" },'
                result += anno[:-1] + ' ]'      # strip trailing comma to make it json.loads() compliant
            else:
                result += ' { "value" : ' + jsonsafe( jsonobj['annos'][0]['value'] ) \
                    + ', "name" : ' + jsonsafe( jsonobj['annos'][0]['name'] ) + ' } ]'

            if not jsonobj.has_key('children') or len( jsonobj['children'] ) <= 0:
                result += ' },\n'
            _depth -= 1
            indent = "\t" * _depth
            closed = 1

        # drill down the children array
        if jsonobj.has_key('children') and len( jsonobj['children'] ) > 0:
            _depth += 1
            indent = "\t" * _depth
            result += ',\n' + indent + '"children" : [\n'
            children = ""                       # foreach "mini" string to easily strip trailing comma
            if flip and _depth == 2:
                for child in reversed( jsonobj['children'] ):
                    children += jsonbkmks( child, flip, scrub, scrubtitles, _depth )
            else:
                for child in jsonobj['children']:
                    children += jsonbkmks( child, flip, scrub, scrubtitles, _depth )
            result += children[:-2] + '\n'      # strip trailing comma to make it json.loads() compliant
            _depth -= 1
            indent = "\t" * _depth
            if _depth > 0:
                result += indent + '] },\n'
            else:
                result += '] }\n'
            closed = 1

        if not closed:
            result += ' },\n'

    elif jsonobj['type'] == "text/x-moz-place-separator":
        result += indent + '{ "title" : "", "type" : "text/x-moz-place-separator" },\n'

    else:
        if jsonobj.has_key('annos') and jsonobj['annos'][0]['name'] == "bookmarkProperties/description":
            # this is where the magic happends
            if scrub and jsonobj['annos'][0]['value'] and 'DO_NOT_EXPORT' in jsonobj['annos'][0]['value']:
                return ""

        result += indent + '{ "title" : '
        if jsonobj.has_key('title'):
            result += jsonsafe( jsonobj['title'] )
        else:
            result += '""'
        result += ', "uri" : ' + jsonsafe( jsonobj['uri'] ) + ', '
        if jsonobj.has_key('keyword'):
            result += '"keyword" : ' + jsonsafe( jsonobj['keyword'] ) + ', '
        result += '"type" : "text/x-moz-place"'
        if jsonobj.has_key('annos') and jsonobj['annos'][0]['name'] != 'placesInternal/GUID' \
            and jsonobj['annos'][0]['value'] and 'DO_NOT_EXPORT' in jsonobj['annos'][0]['value']:
            _depth += 1
            indent = "\t" * _depth
            result += ',\n' + indent + '"annos" : [ { "value" : ' + jsonsafe( jsonobj['annos'][0]['value'] ) \
                + ', "name" : ' + jsonsafe( jsonobj['annos'][0]['name'] ) + ' } ] },\n'
            _depth -= 1
            indent = "\t" * _depth
        else:
            result += ' },\n'

    return result


# JSONBKMKS }}}
# JSON2HTML {{{
# --------------------------------------------------------------------------------

def htmlsafe( value ):
    # convert string to HTML "text" output -- i.e. no HTML tags and special characters
    return value.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;').replace('"','&quot;').replace("'",'&#39;')


# special case URI items to also scrub 
uri_srch_pattern ="/" + \
    "^file:\/\/" + \
    "|^http[s]?:\/\/localhost" + \
    "|^http[s]?:\/\/10." + \
    "|^http[s]?:\/\/192.168" + \
    "|^http[s]?:\/\/172.16" + \
    "|^http[s]?:\/\/169.254" + \
    "|^http[s]?:\/\/127.0.0.1" + \
    "/"
uri_regex_pattern = re.compile( uri_srch_pattern )


def extractURI( jsonobj, scrub, indent ):
    if scrub and uri_regex_pattern.search( jsonobj['uri'] ):
        return ''
    result = indent + '<DT><A HREF="' + jsonobj['uri'] + '"'
    if jsonobj.has_key('keyword'):
        result += ' SHORTCUTURL="' + htmlsafe( jsonobj['keyword'] ) + '"'
    if jsonobj.has_key('title'):
        result += '>' + htmlsafe( jsonobj['title'] ) + '</A></DT>\n'
    return result


def json2html( jsonobj, scrub, scrubtitles, _depth = 0 ):
    result = ""
    indent = "\t" * _depth

    if jsonobj.has_key('title'):

        # check if folder is on the scrub list
        if jsonobj['type'] == "text/x-moz-place-container":
            if scrub and scrubtitles != None and jsonobj.has_key('title'):
                for title in scrubtitles:
                    if title in jsonobj['title']:
                        return ""

            if jsonobj.has_key('root') and jsonobj['root'] in root_folders_to_scrub:
                return ""
        
        if jsonobj.has_key('uri'):
            result += extractURI( jsonobj, scrub, indent )

        elif jsonobj.has_key('livemark'):
            # extract the feed details
            if jsonobj.has_key('annos'):
                result += indent + "<DT><A"
                for annos in jsonobj['annos']:
                    if annos['name'] == "livemark/feedURI":
                        result += " FEEDURL=\"" + htmlsafe( annos['value'] ) + "\""
                    elif annos['name'] == "livemark/siteURI":
                        result += " HREF=\"" + htmlsafe( annos['value'] ) + "\""
            
                if jsonobj.has_key('keyword'):
                    result += " SHORTCUTURL=\"" + htmlsafe( jsonobj['keyword'] ) + "\""
                result += ">" + htmlsafe( jsonobj['title'] ) + "</A></DT>\n"

        else:
            # extract the folder details
            result += indent + "<DT><H3"
            if jsonobj.has_key('root') and jsonobj['root'] == "toolbarFolder":
                    result += ' PERSONAL_TOOLBAR_FOLDER="true"'
            result += ">" + htmlsafe( jsonobj['title'] ) + "</H3>\n"

        # descriptions are printed below the URL links
        if jsonobj.has_key('annos') and jsonobj['annos'][0]['name'] == "bookmarkProperties/description":
            # this is where the magic happends
            if scrub and jsonobj['annos'][0]['value'] and 'DO_NOT_EXPORT' in jsonobj['annos'][0]['value']:
                return ""
            result += "<DD>" + htmlsafe( jsonobj['annos'][0]['value'] ) + "</DD>\n"

    else:
        if jsonobj['type'] == "text/x-moz-place-separator":
            result += indent + "<HR>\n"

        elif jsonobj.has_key('uri'):
            result += extractURI( jsonobj, scrub, indent )

    # drill down the children array
    if jsonobj.has_key('children') and len( jsonobj['children'] ) > 0:
        _depth += 1
        subresult = ""
        for child in jsonobj['children']:
            subresult += json2html( child, scrub, scrubtitles, _depth )
        _depth -= 1
        if len( subresult ) > 0:
            result += indent + "<DL>\n" + subresult + indent + "</DL>\n"

    return result


# JSON2HTML }}}
# MAIN {{{
# --------------------------------------------------------------------------------

def usage():
    print "Usage: " + sys.argv[0] + " [options]"
    print """
Options are:
    -h --help                This usage message.

    -o filename              Output results to [ filename ]
    --output=filename        or else, output to [ stdout ].

    -i filename              Read JSON data from [ filename ]
    --input=filename         or else, read data from [ stdin ].

    -f --flip                Flip first sub-children list order.
                             Note: flag is ignored during -H --HTML

    -s --scrub               Scrub [ DO_NOT_EXPORT ] JSON entries.

    -t title                 Additional FOLDERS (titles) to scrub out.
    --scrubtitles=title      This can be stacked.

    -H --HTML                Generate HTML bookmarks instead of JSON data.

Flip option:
    The Firefox Bookmark Organizer's restore feature flips the first
    sub-children list.  This option "fixes up" the streamlined data
    for the restore procedure to work properly.

Scrub Title option:
    An optional array parameter that specifies additional folders to
    "scrubout".  This can be used to filter folders which do not have
    description boxes (i.e. "Bookmarks Menu", "Tags", "Unsorted
    Bookmarks", etc.). 
    This can be stacked: --scrubtitle=title1 --scrubtitle=title2 etc.
"""


def main():
    try:
        opts, args = getopt.getopt( sys.argv[1:],
                                    "ho:i:fst:H",
                                    [ "help",
                                      "output=", "input=",
                                      "flip", "scrub", "scrubtitles=",
                                      "HTML"
                                    ] )
    except getopt.GetoptError, err:
        print str(err)
        usage()
        sys.exit(2)

    datain  = None
    dataout = None
    flip  = False
    scrub = False
    scrubtitles = []
    html = False
    for o, a in opts:
        if o in ( "-h", "--help" ):
            usage()
            sys.exit()
        elif o in ( "-o", "--output" ):
            dataout = a
        elif o in ( "-i", "--input" ):
            datain = a
        elif o in ( "-f", "--flip" ):
            flip = True
        elif o in ( "-s", "--scrub" ):
            scrub = True
        elif o in ( "-t", "--scrubtitles" ):
            scrubtitles.append( "".join(a) )
        elif o in ( "-H", "--HTML" ):
            html = True
        else:
            assert False, "unhandled option"

    # read in text
    data = ""
    if datain == None:
        for line in sys.stdin:
            if not line.startswith( '#' ):      # DEBUGGING: manually built JSON file - strip out "comment" lines
                data += line.strip()            # json.loads() doesn't like newlines...
    else:
#       with open( datain, 'r' ) as f:
        f = open( datain, 'r' )
        if f:
            for line in f:
                if not line.startswith( '#' ):  # DEBUGGING: manually built JSON file - strip out "comment" lines
                    data += line.strip()        # json.loads() doesn't like newlines...
            f.close()
    
    # Firefox Bookmark Organizer's exporter leaves trailing commas in lists.
    # strip trailing comma to make it json.loads() compliant
    data = data.replace( ',}', '}' ).replace( ',]', ']' )

    # convert text to JSON objects
#   print data                                  # DEBUGGING: data check...
    objs = json.loads( data )
    
    if not html:
        results = jsonbkmks( objs, flip, scrub, scrubtitles )
    else:
        results = json2html( objs, scrub, scrubtitles )

    if dataout == None:
        print results
    else:
#       with open( dataout, 'w' ) as f:
        f = open( dataout, 'w' )
        if f:
            f.write( results )
            f.close()


if __name__ == "__main__":
    main()


# MAIN }}}
# --------------------------------------------------------------------------------