#!/usr/bin/perl -w
# eval: evaluate output of r
# usage: bin/r | bin/eval directory threshold
# notes: 
# directory is the directory with positive and negative words (positive.txt/negative.txt)
# threshold number indicates how many should be evaluated: top and bottom number
# 20160426 erikt(at)xs4all.nl

use strict;

my $command = $0;

my $baseDir = shift(@ARGV);
my $threshold = shift(@ARGV);
if (not defined $threshold or $threshold !~ /^\d+$/) {
   die "usage: $command directory threshold\n";
}

# read positive words
my %positive = ();
open(INFILE,"$baseDir/positive.txt") or
   die "$command: cannot read file $baseDir/positive.txt\n";
while (<INFILE>) {
   my $line = $_;
   chomp($line);
   $line =~ s/^\s+//;
   $line =~ s/\s+$//;
   if ($line ne "") { $positive{$line} = 1; }
}
close(INFILE);

# read negative words
my %negative = ();
open(INFILE,"$baseDir/negative.txt") or
   die "$command: cannot read file $baseDir/negative.txt\n";
while (<INFILE>) {
   my $line = $_;
   chomp($line);
   $line =~ s/^\s+//;
   $line =~ s/\s+$//;
   if ($line ne "") { $negative{$line} = 1; }
}
close(INFILE);

my $lineNbr = 0;
my @buffer = ();
my $positive = 0;
my $negative = 0;
my @unknown = ();
while (<STDIN>) {
   my $line = $_;
   chomp($line);
   $lineNbr++;
   my @fields = split(/\s+/,$line);
   my $word = pop(@fields);
   if ($lineNbr <= $threshold) {
      if (defined $positive{$word}) { $positive++; }
      elsif (defined $negative{$word}) { $negative++; }
      else { push(@unknown,$word); }
   }
   push(@buffer,$word);
   while ($#buffer >= $threshold) { shift(@buffer); }
}
if ($positive+$negative > 0) { printf "top $threshold:    positive: %3d; negative: %3d; percentage: %0.1f%%\n",$positive,$negative,100*$positive/($positive+$negative); }
print "unknown:";
for (my $i=0;$i<=$#unknown;$i++) { print " ".$unknown[$i]; }
print "\n";

$positive = 0;
$negative = 0;
@unknown = ();
for (my $i=0;$i<=$#buffer;$i++) {
   if (defined $positive{$buffer[$i]}) { $positive++; }
   elsif (defined $negative{$buffer[$i]}) { $negative++; }
   else { push(@unknown,$buffer[$i]); }
}
if ($positive+$negative > 0) { printf "bottom $threshold: positive: %3d; negative: %3d; percentage: %0.1f%%\n",$positive,$negative,100*$positive/($positive+$negative); }
print "unknown:";
for (my $i=0;$i<=$#unknown;$i++) { print " ".$unknown[$i]; }
print "\n";

exit(0);
