#! /usr/bin/perl -w
# ff2xml:
#   Parse Netscape type bookmarks into an openbox pipe menu.  Yes,
# you can parse something _into_ something else...  HTML Parsing (and
# inspiration) from Brian Del Vecchio's delicious import tool.  See:
# http://www.hybernaut.com/bdv/delicious-import.html
#
# Blame: oz (@tuxaco.net)
# Usage: ff2xml <bookmarks.html>
#
# TODO: 
#       - clean URIs and description to get valid XML

use strict;
use warnings;

use Digest::MD5 qw(md5_hex);
use HTML::Parser;

my $browser = "firefox -a firefox -remote openURL(%s,new-tab)";
my $current = undef;
my @folders = ();
my %links;
my $url;

my %dispatch = (
    'start' => {
        'a'  => \&start_a_tag,
        'h3' => \&start_h3_tag
    },
    'end' => {
        'a'  => \&end_a_tag,
        'dl' => \&end_dl_tag
    }
);

my $file = shift || "bookmarks.html";    # Obiously we need a filename.

# No filename ? :(
if ( !-e $file ) {
    print <<EOF;
<openbox_pipe_menu>
    <item label="Can't load bookmarks." />
</openbox_pipe_menu>
EOF
    exit(127);
}

# Create HTML Parser
my $p = HTML::Parser->new(
    'api_version' => 3,
    'handlers'    => {
        'start' => [ \&dispatch, "self, event, tagname, attr" ],
        'end'   => [ \&dispatch, "self, event, tagname, attr" ],
    }
);

# We want to know about theses tags.
$p->report_tags( 'a', 'h3', 'dl' );
$p->unbroken_text(1);

# Start menu and parse HTML.
print "<openbox_pipe_menu>\n";
$p->parse_file($file);
print "</openbox_pipe_menu>";

exit(0);

# -----------------------------------------------------------------------------
# HTML Parser stuff:
# ~~~~~~~~~~~~~~~~~~

# Dispatch events...
sub dispatch($$$$) {
    my ( $self, $event, $tagname, $attr ) = @_;
    my $func = $dispatch{$event}{$tagname};

    if ($func) {
        &$func;
    }
}

# Link start
sub start_a_tag($$$$) {
    my ( $self, $event, $tagname, $attr ) = @_;
    my $url  = $attr->{'href'};
    my $hash = md5_hex($url);

    $url =~ s/&/&amp;/g;              # minimal url cleaning
    $links{$hash}{'hash'} = $hash;    # why not...
    $links{$hash}{'url'}  = $url;     # item url

    $current = $links{$hash};         # save current link in global state
    $self->handler( text => \&text_a_tag, "self,dtext" );
}

# Link text
sub text_a_tag($$) {
    my ( $self, $dtext ) = @_;
    my $url = $current->{'url'};
    $current->{'desc'} = $dtext;

    # Don't display a link if it bears no name.
    if ( $current->{'desc'} ) {
        $current->{'desc'} =~ s/&/&amp;/g;    # '&' -> '&amp;' ...
        print "<item label=\"" . $current->{'desc'} . "\">\n";
        print "<action name=\"Execute\"><execute>";
        printf $browser, $current->{'url'};
        print "</execute></action>\n";
        print "</item>\n";
    }
}

# Link end
sub end_a_tag($) {
    my ($self) = @_;
    $self->handler( text => undef );
}

# Folder start
sub start_h3_tag($$$$) {
    my ( $self, $event, $tagname, $attr ) = @_;

    $self->handler( text => \&text_h3_tag, "self,dtext" );
}

# Folder text
sub text_h3_tag($$) {
    my ( $self, $text ) = @_;

    print "<menu label=\"$text\" ";
    $self->handler( text => undef );
    $text =~ tr/ /-/;
    $text = lc($text);

    my $label = $text;                  # menu label is left as-is.
    $label =~ s/\W//g;                  # menu id is text-only
    print "id=\"menu_$label\">\n";
    push @folders, $text;
}

# Folder end (sort of)
sub end_dl_tag($) {
    my ($self) = @_;
    my $off = pop @folders;
    if ($off) {
        print "</menu>\n";
    }
}