#!/usr/bin/perl -w
# ner2html: convert ner tag format to html
# usage: ner2html < file
# note: option -o: use the same color for all named entity types 
# 20050411 erikt@science.uva.nl

use Getopt::Std;
use vars qw($opt_o);
getopt("");

$color{"PER"} = "red";
$color{"ORG"} = "blue";
$color{"LOC"} = "green";
$color{"MISC"} = "purple";
if (defined $opt_o) {
   $color{"ORG"} = "red";
   $color{"LOC"} = "red";
   $color{"MISC"} = "red";
}

print "<html><head><title>NER output</title></head><body>\n";
foreach $c (sort keys %color) {
   print <<THEEND;
<font color="$color{$c}">
$c
</font>
THEEND
}
while (<STDIN>) {
   $line = $_;
   chomp($line);
   if ($line !~ /^\s*$/) { print "<p>\n"; }
   @fields = split(/\s+/,$line);
   $lastTag = "O";
   for ($i=0;$i<=$#fields;$i++) {
      ($w,$t) = split(/[\~\/]+/,$fields[$i]);
      if ($w eq "&slash;") { $w = "/"; }
      $t =~ s/^[BEI]-//; # warning: ignores E and B tags
      if ($lastTag ne "O" and $lastTag ne $t) { print "</font>\n"; }
      if (defined $color{$t} and $lastTag ne $t) {
         print <<THEEND;
<font color="$color{$t}">
$w
THEEND
      } elsif ($t ne "O" and $lastTag ne $t) {
         print <<THEEND;
<font color="lightgrey">
$w
THEEND
      } else { print "$w\n"; }
      $lastTag = $t;
   }
   if ($lastTag ne "O") { print "</font>\n"; }
}
print "</body></html>\n";
exit(0);

