#!/usr/bin/perl -w
# onePartyPerTweet: remove tweets with more than one political party
# usage: onePartyPerTweet < file
# 20120202 erikt(at)xs4all.nl

# assumption: there are two exactly names for each party in this list
@parties = ('\bvvd\b','\bvolkspartij voor vrijheid en democratie\b',
            '\bpvda\b','\bpartij van de arbeid\b',
            '\bcda\b','\bchristen[ \-]democratisch app.l\b',
            '\bpvv\b','\bpartij voor de vrijheid\b',
            '\bsp\b','\bsocialistische partij\b',
            '\bd66\b','\bdemocraten *66\b',
            '\bgl\b','\bgroen *links\b',
            '\bcu\b','\bchristen *unie\b',
            '\bsgp\b','\bstaatkundig gereformeerde partij\b',
            '\bpvdd\b','\bpartij voor de dieren\b',
            '\b50plus\b','\b50+\b');

LOOP: while (<STDIN>) {
   $line = $_;
   chomp($line);
   $tweet = $line;
   $tweet =~ s/^\S+\s//; # remove user name
   $found = -1;
   for ($i=0;$i<=$#parties;$i++) {
      if ($tweet =~ /$parties[$i]/i) {
         # determine key of short party name
         $key = 2*int($i/2);
         if ($found < 0) { $found = $key; }
         # allow multiple mentions of the same party 
         # but skip line if different parties are mentioned
         elsif ($found != $key) { next LOOP; }
      }
   }
   if ($found >= 0) { print "$line\n"; }
}

exit(0);
