#!/bin/sh
# smurfentaal: translate Dutch text file to Smurf language
# usage: smurfentaal < txt
# 20061106 erikt@xs4all.nl

BASEDIR="/usr/local/WWW/A/.2/t/tjongkim/private/smurfentaal"
BINDIR="$BASEDIR/bin"
DICT="$BASEDIR/etc/dutch"

# convert html/xml to standard format
$BINDIR/xml2xml |\
   # take care or NOS Teletekst format: insert space after sentence
   sed 's/\([a-zA-Z].\)\.\([A-Z]\)/\1. \2/g' |\
   # tokenize: separate punctuation from words
   $BINDIR/tokenize |\
   # translate text to Smurf language
   $BINDIR/smurf-nl $DICT $* |\
   # detokenize: glue punctuation to words
   $BINDIR/detokenize

# add: 
# | nroff | grep -v '^ *$' 
# for a version with formatted text (includes hyphenated words)

exit 0
