#!/bin/sh
# kwic01.sh
# D. Gibbon
# KWIC concordancer demo for plain text
FILE=`echo $1 | sed "s/.txt$//g"`
INFILE=$FILE.txt
CONCFILE=$FILE-conc.txt
NORMTEXTFILE=$FILE-norm.txt
WORDLISTFILE=$FILE-words.txt
echo Processing $INFILE, $CONCFILE, $NORMTEXTFILE, $WORDLISTFILE...
rm -f $CONCFILE
touch $CONCFILE
#=====================================================================
cat $INFILE |
#=====================================================================
# Remove punctuation marks and store word types alphabetically
cat $INFILE |
sed "s/[-,.:<>)(/\"\'\`]//g" |
tr "[" " " |
tr "]" " " |
tr " " "\012" |
tr "\011" "\012" |
tr "[A-Z]" "[a-z]" |
grep -v "^$" |
sort -u > $WORDLISTFILE
#=====================================================================
cat $INFILE |
tr "[A-Z]" "[a-z]" |
tr "\012" " " |
sed "s/[-,.:<>)(/\"\'\`]//g" |
tr "#" "\012" |
sed "s/[-,.:<>)(/\"\'\`]//g" |
sed "s/^/ /g" |
sed "s/$/ /g" |
sed "s/ */++/g" |
grep . > $NORMTEXTFILE
for WORD in `cat $WORDLISTFILE`
do
echo " " >> $CONCFILE
echo "$WORD: " >> $CONCFILE
grep ++$WORD++ $NORMTEXTFILE |
sed "s/^/ /g" |
sed "s/++/ /g" >> $CONCFILE
done