#! /usr/bin/perl -w
# ff2xml:
# Parse Netscape type bookmarks into an openbox pipe menu. Yes,
# you can parse something _into_ something else... HTML Parsing (and
# inspiration) from Brian Del Vecchio's delicious import tool. See:
# http://www.hybernaut.com/bdv/delicious-import.html
#
# Blame: oz (@tuxaco.net)
# Usage: ff2xml <bookmarks.html>
#
# TODO:
# - clean URIs and description to get valid XML
use strict;
use warnings;
use Digest::MD5 qw(md5_hex);
use HTML::Parser;
my $browser = "firefox -a firefox -remote openURL(%s,new-tab)";
my $current = undef;
my @folders = ();
my %links;
my $url;
my %dispatch = (
'start' => {
'a' => \&start_a_tag,
'h3' => \&start_h3_tag
},
'end' => {
'a' => \&end_a_tag,
'dl' => \&end_dl_tag
}
);
my $file = shift || "bookmarks.html"; # Obiously we need a filename.
# No filename ? :(
if ( !-e $file ) {
print <<EOF;
<openbox_pipe_menu>
<item label="Can't load bookmarks." />
</openbox_pipe_menu>
EOF
exit(127);
}
# Create HTML Parser
my $p = HTML::Parser->new(
'api_version' => 3,
'handlers' => {
'start' => [ \&dispatch, "self, event, tagname, attr" ],
'end' => [ \&dispatch, "self, event, tagname, attr" ],
}
);
# We want to know about theses tags.
$p->report_tags( 'a', 'h3', 'dl' );
$p->unbroken_text(1);
# Start menu and parse HTML.
print "<openbox_pipe_menu>\n";
$p->parse_file($file);
print "</openbox_pipe_menu>";
exit(0);
# -----------------------------------------------------------------------------
# HTML Parser stuff:
# ~~~~~~~~~~~~~~~~~~
# Dispatch events...
sub dispatch($$$$) {
my ( $self, $event, $tagname, $attr ) = @_;
my $func = $dispatch{$event}{$tagname};
if ($func) {
&$func;
}
}
# Link start
sub start_a_tag($$$$) {
my ( $self, $event, $tagname, $attr ) = @_;
my $url = $attr->{'href'};
my $hash = md5_hex($url);
$url =~ s/&/&/g; # minimal url cleaning
$links{$hash}{'hash'} = $hash; # why not...
$links{$hash}{'url'} = $url; # item url
$current = $links{$hash}; # save current link in global state
$self->handler( text => \&text_a_tag, "self,dtext" );
}
# Link text
sub text_a_tag($$) {
my ( $self, $dtext ) = @_;
my $url = $current->{'url'};
$current->{'desc'} = $dtext;
# Don't display a link if it bears no name.
if ( $current->{'desc'} ) {
$current->{'desc'} =~ s/&/&/g; # '&' -> '&' ...
print "<item label=\"" . $current->{'desc'} . "\">\n";
print "<action name=\"Execute\"><execute>";
printf $browser, $current->{'url'};
print "</execute></action>\n";
print "</item>\n";
}
}
# Link end
sub end_a_tag($) {
my ($self) = @_;
$self->handler( text => undef );
}
# Folder start
sub start_h3_tag($$$$) {
my ( $self, $event, $tagname, $attr ) = @_;
$self->handler( text => \&text_h3_tag, "self,dtext" );
}
# Folder text
sub text_h3_tag($$) {
my ( $self, $text ) = @_;
print "<menu label=\"$text\" ";
$self->handler( text => undef );
$text =~ tr/ /-/;
$text = lc($text);
my $label = $text; # menu label is left as-is.
$label =~ s/\W//g; # menu id is text-only
print "id=\"menu_$label\">\n";
push @folders, $text;
}
# Folder end (sort of)
sub end_dl_tag($) {
my ($self) = @_;
my $off = pop @folders;
if ($off) {
print "</menu>\n";
}
}