Warning: This parser is far from being an optimised implementation. It is intended for teaching use only.
The parser is based on the compiled context-free nondeterministic recogniser, and contains enhancements to the recogniser which enable it to build up a structure description in the form of a labelled bracketing as parsing proceeds.
Like the recogniser, it is possible to compile this directly from the context-free rule notation.
In addition, the parser also contains a trace mechanism which provides a history of all rules which were applied (including during backtracking), in the order in which they were applied, and stack overflow protection mechanism.
Task:
Document the techniques used to enhance the recogniser.
// cf_parse_td_lr_nd_com.pc
// D.Gibbon, 9 Nov 2001
string st[200];
int smax=199;
int tr=0;
// Utils
first(string s){
int firstlen;
string f;
firstlen=strstr(s," ",0);
if(firstlen==-1)
f=s;
else
f=strleft(s,firstlen);
return f;
}
rest(string s){
int firstlen;
string r;
firstlen=strstr(s," ",0);
if(firstlen==-1)
r="";
else
r=strright(s,strlen(s)-firstlen-1);
return r;
}
oflow(int d){
if(d>smax){
alert("Stack overflow!");
return 1;
}
else
return 0;
}
dotr(string s,string in){
puts(s+" "+in+"\n");
}
// Lexical rules
add(string in, int d){
string f,r,out;
if(oflow(d))
return "!";
if(tr)
dotr("add?",in);
st[++d]="";
f=first(in);
r=rest(in);
if(f=="very"||f=="extremely"){
out=r;
st[d]="(ADD "+f+")";
}
else
out="!";
return out;
}
adv(string in, int d){
string f,r,out;
if(oflow(d))
return "!";
if(tr)
dotr("adv?",in);
st[++d]="";
f=first(in);
r=rest(in);
if(f=="easily"||f=="obviously"||f=="never"){
out=r;
st[d]="(ADV "+f+")";
}
else
out="!";
return out;
}
v(string in, int d){
string f,r,out;
if(oflow(d))
return "!";
if(tr)
dotr("v?",in);
st[++d]="";
f=first(in);
r=rest(in);
if(f=="saw"||f=="met"||f=="laughed"){
out=r;
st[d]="(V "+f+")";
}
else
out="!";
return out;
}
n(string in, int d){
string f,r,out;
if(oflow(d))
return "!";
if(tr)
dotr("n?",in);
st[++d]="";
f=first(in);
r=rest(in);
if(f=="dog"||f=="cat"||f=="mouse"){
out=r;
st[d]="(N "+f+")";
}
else
out="!";
return out;
}
adj(string in, int d){
string f,r,out;
if(oflow(d))
return "!";
if(tr)
dotr("adj?",in);
st[++d]="";
f=first(in);
r=rest(in);
if(f=="small"||f=="big"||f=="black"||f=="white"){
out=r;
st[d]="(A "+f+")";
}
else
out="!";
return out;
}
det(string in, int d){
string f,r,out;
if(oflow(d))
return "!";
if(tr)
dotr("det?",in);
st[++d]="";
f=first(in);
r=rest(in);
if(f=="the"||f=="this"||f=="a"){
out=r;
st[d]="(DET "+f+")";
}
else
out="!";
return out;
}
// Phrasal rules
adjp(string in,int d){
string out;
if(tr)
dotr("adjp?",in);
if(oflow(d))
return "!";
st[++d]="";
out=add(in,d);
if(out!="!"){
st[d]="(A' "+st[d+1];
out=adjp(out,d);
if(out!="!")
st[d]= st[d]+" "+st[d+1];
}
else {
out=adj(in,d);
if(out!="!")
st[d]= "(A' "+st[d]+" "+st[d+1]+")";
}
return out;
}
nom(string in,int d){
string out;
if(tr)
dotr("n''?",in);
if(oflow(d))
return "!";
st[++d]="";
out=adjp(in,d);
if(out!="!"){
st[d]="(N' "+st[d+1];
out=nom(out,d);
if(out!="!")
st[d]= st[d]+" "+st[d+1];
}
else {
out=n(in,d);
if(out!="!")
st[d]= "(N' "+st[d]+" "+st[d+1]+")";
}
return out;
}
np(string in,int d){
string out;
if(tr)dotr("n''?",in);
if(oflow(d)) return "!";
st[++d]="";
out=det(in,d);
if(out!="!"){
st[d]= "(N'' "+st[d+1];
out=nom(out,d);
if(out!="!")
st[d]= st[d]+" "+st[d+1]+")";
}
else {
out=nom(in,d);
if(out!="!")
st[d]= "(N'' "+st[d+1]+")";
}
return out;
}
vbl(string in,int d){
string out;
if(tr)
dotr("v'?",in);
if(oflow(d))
return "!";
st[++d]="";
out=adv(in,d);
if(out!="!"){
st[d]="(V' "+st[d+1];
out=vbl(out,d);
if(out!="!")
st[d]= st[d]+" "+st[d+1];
}
else {
out=v(in,d);
if(out!="!")
st[d]= "(V' "+st[d]+" "+st[d+1]+")";
}
return out;
}
vp(string in,int d){
string out;
if(tr)
dotr("v''?",in);
if(oflow(d))
return "!";
st[++d]="";
out=vbl(in,d);
if(out!="!"){
st[d]="(V'' "+st[d+1];
out=np(out,d);
if(out!="!")
st[d]= st[d]+" "+st[d+1];
st[d]= st[d]+")";
}
return out;
}
s(string in,int d){
string out;
if(tr)dotr("s?",in);
if(oflow(d)) return "!";
st[++d]="";
out=np(in,d);
if(out!="!"){
st[d]= "(S "+st[d+1];
out=vp(out,d);
if(out!="!")
st[d]= st[d]+" "+st[d+1]+")";
}
return out;
}
main(){
string in,out;
in="the big big white dog obviously saw a very black cat";
while(in!=""){
in=getsd("Input: ",in);
if(in!=""){
out=s(in,0);
if(out=="")
puts(st[1]+"\n");
else puts("Rejected!\n");
}
}
}