next up previous
Next: Output from the Example Up: 15.11.2001: (Gruppenarbeit & Berichte: Previous: Example 2: A compiled

Example 3: A compiled non-deterministic top-down left-right recursive descent context-free parser

Warning: This parser is far from being an optimised implementation. It is intended for teaching use only.

The parser is based on the compiled context-free nondeterministic recogniser, and contains enhancements to the recogniser which enable it to build up a structure description in the form of a labelled bracketing as parsing proceeds.

Like the recogniser, it is possible to compile this directly from the context-free rule notation.

In addition, the parser also contains a trace mechanism which provides a history of all rules which were applied (including during backtracking), in the order in which they were applied, and stack overflow protection mechanism.

Task:

Document the techniques used to enhance the recogniser.

// cf_parse_td_lr_nd_com.pc
// D.Gibbon, 9 Nov 2001

string st[200];
int smax=199;
int tr=0;

// Utils

first(string s){
	int firstlen;
	string f;
	firstlen=strstr(s," ",0);
	if(firstlen==-1)
		f=s;
	else
		f=strleft(s,firstlen);
	return f;
}


rest(string s){
	int firstlen;
	string r;
	firstlen=strstr(s," ",0);
	if(firstlen==-1)
		r="";
	else
		r=strright(s,strlen(s)-firstlen-1);
return r;
}

oflow(int d){
	if(d>smax){
		alert("Stack overflow!");
		return 1;
	}
	else
		return 0;
}

dotr(string s,string in){
	puts(s+" "+in+"\n");
}

// Lexical rules

add(string in, int d){
	string f,r,out;
	if(oflow(d))
		return "!";
	if(tr)
		dotr("add?",in);
	st[++d]="";
	f=first(in);
	r=rest(in);
	if(f=="very"||f=="extremely"){
		out=r;
		st[d]="(ADD "+f+")";
	}
	else
		out="!";
	return out;
}

adv(string in, int d){
	string f,r,out;
	if(oflow(d))
		return "!";
	if(tr)
		dotr("adv?",in);
	st[++d]="";
	f=first(in);
	r=rest(in);
	if(f=="easily"||f=="obviously"||f=="never"){
		out=r;
		st[d]="(ADV "+f+")";
	}
	else
		out="!";
	return out;
}

v(string in, int d){
	string f,r,out;
	if(oflow(d))
		return "!";
	if(tr)
		dotr("v?",in);
	st[++d]="";
	f=first(in);
	r=rest(in);
	if(f=="saw"||f=="met"||f=="laughed"){
		out=r;
		st[d]="(V "+f+")";
	}
	else
		out="!";
	return out;
}

n(string in, int d){
	string f,r,out;
	if(oflow(d))
		return "!";
	if(tr)
		dotr("n?",in);
	st[++d]="";
	f=first(in);
	r=rest(in);
	if(f=="dog"||f=="cat"||f=="mouse"){
		out=r;
		st[d]="(N "+f+")";
	}
	else
		out="!";
	return out;
}

adj(string in, int d){
	string f,r,out;
	if(oflow(d))
		return "!";
	if(tr)
		dotr("adj?",in);
	st[++d]="";
	f=first(in);
	r=rest(in);
	if(f=="small"||f=="big"||f=="black"||f=="white"){
		out=r;
		st[d]="(A "+f+")";
		}
	else
		out="!";
	return out;
}

det(string in, int d){
	string f,r,out;
	if(oflow(d))
		return "!";
	if(tr)
		dotr("det?",in);
	st[++d]="";
	f=first(in);
	r=rest(in);
	if(f=="the"||f=="this"||f=="a"){
		out=r;
		st[d]="(DET "+f+")";
		}
	else
		out="!";
	return out;
}

// Phrasal rules

adjp(string in,int d){
	string out;
	if(tr)
		dotr("adjp?",in);
	if(oflow(d))
		 return "!";
	st[++d]="";
	out=add(in,d);
	if(out!="!"){
		st[d]="(A' "+st[d+1];
		out=adjp(out,d);
		if(out!="!")
			st[d]= st[d]+" "+st[d+1];
	}
	else {
		out=adj(in,d);
		if(out!="!")
			st[d]= "(A' "+st[d]+" "+st[d+1]+")";
	}
return out;
}


nom(string in,int d){
	string out;
	if(tr)
		dotr("n''?",in);
	if(oflow(d))
		return "!";
	st[++d]="";
	out=adjp(in,d);
	if(out!="!"){
		st[d]="(N' "+st[d+1];
		out=nom(out,d);
		if(out!="!")
			st[d]= st[d]+" "+st[d+1];
		}
	else {
		out=n(in,d);
		if(out!="!")
			st[d]= "(N' "+st[d]+" "+st[d+1]+")";
	}
	return out;
}


np(string in,int d){
	string out;
	if(tr)dotr("n''?",in);
	if(oflow(d)) return "!";
	st[++d]="";
	out=det(in,d);
	if(out!="!"){
		st[d]= "(N'' "+st[d+1];
		out=nom(out,d);
		if(out!="!")
			st[d]= st[d]+" "+st[d+1]+")";
	}
	else {
		out=nom(in,d);
		if(out!="!")
			st[d]= "(N'' "+st[d+1]+")";
	}
	return out;
}


vbl(string in,int d){
	string out;
	if(tr)
		dotr("v'?",in);
	if(oflow(d))
		return "!";
	st[++d]="";
	out=adv(in,d);
	if(out!="!"){
		st[d]="(V' "+st[d+1];
		out=vbl(out,d);
		if(out!="!")
			st[d]= st[d]+" "+st[d+1];
	}
	else {
		out=v(in,d);
		if(out!="!")
			st[d]= "(V' "+st[d]+" "+st[d+1]+")";
	}
return out;
}

vp(string in,int d){
	string out;
	if(tr)
		dotr("v''?",in);
	if(oflow(d))
		return "!";
	st[++d]="";
	out=vbl(in,d);
	if(out!="!"){
		st[d]="(V'' "+st[d+1];
		out=np(out,d);
		if(out!="!")
			st[d]= st[d]+" "+st[d+1];
	st[d]= st[d]+")";
	}
return out;
}


s(string in,int d){
	string out;
	if(tr)dotr("s?",in);
	if(oflow(d)) return "!";
	st[++d]="";
	out=np(in,d);
	if(out!="!"){
		st[d]= "(S "+st[d+1];
		out=vp(out,d);
		if(out!="!")
			st[d]= st[d]+" "+st[d+1]+")";
	}
	return out;
}

main(){
	string in,out;
	in="the big big white dog obviously saw a very black cat";
	while(in!=""){
		in=getsd("Input: ",in);
		if(in!=""){
			out=s(in,0);
			if(out=="")
				puts(st[1]+"\n");
			else puts("Rejected!\n");
		}
	}
}



Dafydd Gibbon, Wed Feb 12 10:50:41 MET 2003 Automatically generated, links may change - update every session.