A range of UNIX scripts was used to convert the input database formats from the individual projects into the normalised database form, and to join the attributes into one database using the orthography as a join attribute.
Perhaps the most interesting script is the one designed to convert the C programme of B1 into database format, which is shown below.
Code:
#!/bin/sh
# makeb1
# D. Gibbon 6.12.1994
grep -v "^\ *$" b1.wl |
gawk 'NR > 30 {print $0}' |
grep -v "\/\*" |
grep -v "[\{\}]" |
grep -v "short" |
grep -v "long" |
grep -v "wnr" |
grep -v "wanr" |
sed -e "s/insert_sim_value(simval=crea_simval(),/value/g" |
sed -e "s/);//g" |
sed -e "s/crea_sem(wnr);//g" |
sed -e "s/insert_ent_//g" |
sed -e "s/(ent, simval//g" |
sed -e "s/wortart(ent,/wortart/g" |
sed -e "s/(void)//g" |
sed -e "s/create_lexikon_/create_lexikon /g" |
gawk '
$1 == "create_lexikon" {word = $2}
$1 == "value" {value = $2}
$1 == "wortart" {print word, "wortart_" $2}
$1 != "create_lexikon" && $1 != "value" &&
$1 != "wortart" {print word, $1 "_" value}
' |
sort -u > b1.att.val
gawk '{print $1}' b1.att.val | sort -u > b1.orth.att
gawk '{print $2}' b1.att.val > b1.orth.val
# check for duplicate keywords and create disjunctions
gawk ' { if ($1 == previous_1 && $2 != previous_2 )
{ printf(";%s",$2) }
else { previous_1 = $1
previous_2 = $2
printf("\n%s", $2) }
} ' b1.att.val | grep -v "^$" > b1.dis
paste b1.orth.att b1.dis | sed -e "s/ / /g" |
grep -v "^\ *$" |
sort -u |
gawk '{print $1, "B1", $2}' > b1.lex
echo Finished