#!/bin/sh # kwic01.sh # D. Gibbon # KWIC concordancer demo for plain text FILE=`echo $1 | sed "s/.txt$//g"` INFILE=$FILE.txt CONCFILE=$FILE-conc.txt NORMTEXTFILE=$FILE-norm.txt WORDLISTFILE=$FILE-words.txt echo Processing $INFILE, $CONCFILE, $NORMTEXTFILE, $WORDLISTFILE... rm -f $CONCFILE touch $CONCFILE #===================================================================== cat $INFILE | #===================================================================== # Remove punctuation marks and store word types alphabetically cat $INFILE | sed "s/[-,.:<>)(/\"\'\`]//g" | tr "[" " " | tr "]" " " | tr " " "\012" | tr "\011" "\012" | tr "[A-Z]" "[a-z]" | grep -v "^$" | sort -u > $WORDLISTFILE #===================================================================== cat $INFILE | tr "[A-Z]" "[a-z]" | tr "\012" " " | sed "s/[-,.:<>)(/\"\'\`]/&#/g" | tr "#" "\012" | sed "s/[-,.:<>)(/\"\'\`]//g" | sed "s/^/ /g" | sed "s/$/ /g" | sed "s/ */++/g" | grep . > $NORMTEXTFILE for WORD in `cat $WORDLISTFILE` do echo " " >> $CONCFILE echo "$WORD: " >> $CONCFILE grep ++$WORD++ $NORMTEXTFILE | sed "s/^/ /g" | sed "s/++/ /g" >> $CONCFILE done