Sie sind auf Seite 1von 24

COMPILER DESIGN (170701)

Experiment no.1
AIM: Write a program to implement simple lexical analyzer using C language.
#include<string.h>
#include<ctype.h>
#include<stdio.h>
#include<conio.h>
void keyword(char str[10])
{
if(strcmp("void",str)==0||strcmp("for",str)==0||strcmp("while",str)==0||strcmp("do",str)==0||strc
mp("int",str)==0||strcmp("float",str)==0||strcmp("char",str)==0||strcmp("double",str)==0||strcmp(
"static",str)==0||strcmp("switch",str)==0||strcmp("case",str)==0)
printf("\n%s is a keyword",str);
else
printf("\n%s is an identifier",str);
}
void main()
{
FILE *f1,*f2,*f3;
char c,str[10],st1[10];
int num[100],lineno=0,tokenvalue=0,i=0,j=0,k=0;
clrscr();
f1=fopen("D:\\input.txt","r");
f2=fopen("identifier","w");
f3=fopen("specialchar","w");
while((c=getc(f1))!=EOF)
{
if(isdigit(c))
{
tokenvalue=c-'0';
c=getc(f1);
while(isdigit(c))
{
tokenvalue*=10+c-'0';
c=getc(f1);
}
num[i++]=tokenvalue;
PROF. NIDHI GONDALIA

PAGE 1

COMPILER DESIGN (170701)

ungetc(c,f1);
}
else if(isalpha(c))
{
putc(c,f2);
c=getc(f1);
while(isdigit(c)||isalpha(c)||c=='_'||c=='$')
{
putc(c,f2);
c=getc(f1);
}
putc(' ',f2);
ungetc(c,f1);
}
else if(c==' '||c=='\t')
printf(" ");
else if(c=='\n')
lineno++;
else
putc(c,f3);
}
fclose(f2);
fclose(f3);
fclose(f1);
printf("\nThe no's in the program are");
for(j=0;j<i;j++)
printf("\t%d",num[j]);
printf("\n");
f2=fopen("identifier","r");
k=0;
printf("The keywords and identifiersare:");
while((c=getc(f2))!=EOF)
{
if(c!=' ')
str[k++]=c;
else
{
str[k]='\0';
keyword(str);
k=0;

PROF. NIDHI GONDALIA

PAGE 2

COMPILER DESIGN (170701)

}
}
fclose(f2);
f3=fopen("specialchar","r");
printf("\nSpecial characters are");
while((c=getc(f3))!=EOF)
printf("\t%c",c);
printf("\n");
fclose(f3);
printf("Total no. of lines are:%d",lineno);
getch();
}

Input.txt
void main()
{
int a=3;
int b=5;
int c;
c=a+b;
}

Output:

PROF. NIDHI GONDALIA

PAGE 3

COMPILER DESIGN (170701)

Experiment no.2
AIM: Write a Program to Implement NFA for regular expression (aa*)/ (bb*).
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
charst[20];
inti,count=0,len;
clrscr();
printf("\n\nEnter string");
scanf("%s",st);
len=strlen(st);
if(st[0]=='a')
{
for(i=1;i<len;i++)
{
if(st[i]=='a')
{
count++;
continue;
}
else
{
count==0;
break;
}
}
}
else
{
for(i=1;i<len;i++)
{
if(st[i]=='b')
{
PROF. NIDHI GONDALIA

PAGE 4

COMPILER DESIGN (170701)

count++;
continue;
}
else
{
count==0;
break;
}
}
}
if(count == len-1)
{
printf("valid string");
}
else
{
printf("invalid string");
}
getch();
}

Output:

PROF. NIDHI GONDALIA

PAGE 5

COMPILER DESIGN (170701)

Experiment no.3
AIM: Write a Program to Implement NFA for regular expression (a/b)* c (a/b)*.

#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
int i=0,j=0,len;
charst[64];
printf("\nEnter Any String of Type (a/b)* c (a/b)*: ");
scanf("%s",st);
len=strlen(st);
for(i=0;i<len;i++)
{
if(st[i]=='a' || st[i]=='b')
{
continue;
}
else
{
break;
}
}
if(st[i]=='c' && i<len)
{
i++;
for(;i<len;i++)
{
if(st[i]=='a' || st[i]=='b')
{
continue;
}
else
{

PROF. NIDHI GONDALIA

PAGE 6

COMPILER DESIGN (170701)

break;
}
}
if(i==len)
{
printf("\nYour String Shall Pass!!");
goto end;
}
}
printf("\nYour String Shall not Pass!!");
end:
getch();
}

Output:

PROF. NIDHI GONDALIA

PAGE 7

COMPILER DESIGN (170701)

Experiment no.4
AIM: Write a Program to Implement NFA for regular expression (a/b)*abb.
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
inti,len,n;
charstr[20],s;
clrscr();
printf("\nEnter String Which is Built From Grammer (a/b)*abb:");
scanf ("%s",&str);
len=strlen(str);
for(i=0;i<len-3;i++)
{
if(str[i]=='a' || str[i]=='b')
{
continue;
}
else
{
break;
}
}
if(str[len-3]=='a' &&str[len-2]=='b' &&str[len-1]=='b')
{
printf("\n Valid String");
}
else
{
printf("\nInvalid String");
}
getch();
}

PROF. NIDHI GONDALIA

PAGE 8

COMPILER DESIGN (170701)

Output:

PROF. NIDHI GONDALIA

PAGE 9

COMPILER DESIGN (170701)

Experiment no.5
AIM: Write a Program to Demonstrate use of LEX compiler.
Lex Syntax and Example
Lex is short for "lexical analysis". Lex takes an input file containing a set of lexical analysis rules or
regular expressions. For output, Lex produces a C function which when invoked, finds the next match in
the input stream.
1. Format of lex input:
(beginning in col. 1)

2. Declarations:
a) string sets;
b) standard C;
3. Token rules:

declarations
%%
token-rules
%%
aux-procedures
name character-class
%{ -- c declarations -%}
regular-expression {optional C-code}

a) if the expression includes a reference to a character class, enclose the class name in brackets { }
b) regular expression operators;
*,+
--closure, positive closure
" " or \
--protection of special chars
|
--or
^
--beginning-of-line anchor
()
--grouping
$
--end-of-line anchor
?
--zero or one
.
--any char (except \n)
{ref}
--reference to a named character class (a definition)
[]
--character class
[^ ]
--not-character class
4. Match rules: Longest match is preferred. If two matches are equal length, the first match is
preferred. Remember, lex partitions, it does not attempt to find nested matches. Once a character
becomes part of a match, it is no longer considered for other matches.
5. Built-in variables: yytext -- ptr to the matching lexeme. (char *yytext;)
yylen -- length of matching lexeme (yytext). Note: some systems use yyleng

PROF. NIDHI GONDALIA

PAGE 10

COMPILER DESIGN (170701)

6. Aux Procedures: C functions may be defined and called from the C-code of token rules or from
other functions. Each lex file should also have a yyerror() function to be called when lex
encounters an error condition.
Example header file: tokens.h
#define NUM
#define ID
#define PLUS
#define MULT
#define ASGN
#define SEMI

1
2
3
4
5
6

// define constants used by lexyy.c


// could be defined in the lex rule file

7. Example lex file


D [0-9]
/* note these lines begin in col. 1 */
A [a-zA-Z]
%{
#include tokens.h
%}
%%
{D}+
return (NUM);
/* match integer numbers */
{A}({A}|{D})* return (ID);
/* match identifiers */
"+"
return (PLUS);
/* match the plus sign (note protection) */
"*"
return (MULT);
/* match the mult sign (note protection again) */
:=
return (ASGN);
/* match the assignment string */
;
return (SEMI);
/* match the semi colon */
.
;
/* ignore any unmatched chars
%%
void yyerror ()
{ printf (" error\n");
exit(0);
}

/* default action in case of error in yylex() */

void yywrap () { }

/* usually only needed for some Linux systems */

8. Execution of lex: (to generate the yylex() function file and then compile a user program)
(MS) c:> flex rulefile
(Linux) $ lex rulefile
flex produces lexyy.c
The produced .c file contains this function:

PROF. NIDHI GONDALIA

lex produces lex.yy.c


int yylex()

PAGE 11

COMPILER DESIGN (170701)

9. User program:
(The above scanner file must be linked into the project)
#include <stdio.h>
#include tokens.h
int yylex ();
extern char* yytext;

// scanner prototype

main ()
{ int n;
while ( n = yylex() )
// call scanner until it returns 0 for EOF
printf (" %d %s\n", n, yytext); // output the token code and lexeme string
}

PROF. NIDHI GONDALIA

PAGE 12

COMPILER DESIGN (170701)

Experiment no.6
AIM: Write a program to eliminate left recursion from a grammar.
#include<stdio.h>
#include<conio.h>
#include<string.h>
#define SIZE 10
void main ()
{
charnon_terminal;
charbeta,alpha;
char production[SIZE];
int index=3; /* starting of the string following "->" */
printf("\n \nEnter the grammar:\n");
scanf("%s",production);
non_terminal=production[0];
if(non_terminal==production[index])
{
alpha=production[index+1];
printf("Grammar is left recursive.\n");
while(production[index]!=0 && production[index]!='|')
{
index++;
if(production[index]!=0)
{
beta=production[index+1];
printf("Grammar without left recursion:\n");
printf("%c->%c%c\'",non_terminal,beta,non_terminal);
printf("\n%c\'->%c%c\'|E\n",non_terminal,alpha,non_terminal);
}
else
printf("Grammar can't be reduced\n");
}
}
else
printf("Grammar is not left recursive.\n");
getch();
}
PROF. NIDHI GONDALIA

PAGE 13

COMPILER DESIGN (170701)

Output:

PROF. NIDHI GONDALIA

PAGE 14

COMPILER DESIGN (170701)

Experiment no.7
AIM: Write a program to implement left factor a grammar.
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
char a[10],a1[10],a2[10],a3[10],a4[10],a5[10];
int i,j=0,k,l;
clrscr();
printf("enter any productions A->");
gets(a);
for(i=0;a[i]!='/';i++,j++)
a1[j]=a[i];
a1[j]='\0';
for(j=++i,i=0;a[j]!='\0';j++,i++)
a2[i]=a[j];
a2[i]='\0';
k=0;
l=0;
for(i=0;i<strlen(a1)||i<strlen(a2);i++)
{
if(a1[i]==a2[i])
{
a3[k]=a1[i];
k++;
}
else
{
a4[l]=a1[i];
a5[l]=a2[i];
l++;
}}
a3[k]='X';
a3[++k]='\0';
a4[l]='/';
a5[l]='\0';
a4[++l]='\0';

PROF. NIDHI GONDALIA

PAGE 15

COMPILER DESIGN (170701)

strcat(a4,a5);
printf("\n A->%s",a3);
printf("\n X->%s",a4);
getch();
}

Output:

PROF. NIDHI GONDALIA

PAGE 16

COMPILER DESIGN (170701)

Experiment no.8
AIM: Write a program that will find the FIRST SET of the grammar.
#include<stdio.h>
#include<conio.h>
char array[10][20],temp[10];
int c,n;
void fun(int,int[]);
int fun2(int i,int j,int p[],int );
void main()
{
int p[2],i,j;
printf("Enter the no. of productions :");
scanf("%d",&n);
printf("Enter the productions :\n");
for(i=0;i<n;i++)
scanf("%s",array[i]);
for(i=0;i<n;i++)
{
c=-1,p[0]=-1,p[1]=-1;
fun(i,p);
printf("First(%c) : [ ",array[i][0]);
for(j=0;j<=c;j++)
printf("%c,",temp[j]);
printf("\b ].\n");
}
getch();
}
int fun2(int i,int j,int p[],int key)
{
int k;
if(!key)
{
for(k=0;k<n;k++)
if(array[i][j]==array[k][0])
break;

PROF. NIDHI GONDALIA

PAGE 17

COMPILER DESIGN (170701)

p[0]=i;p[1]=j+1;
fun(k,p);
return 0;
}
else
{
for(k=0;k<=c;k++)
{
if(array[i][j]==temp[k])
break;
}
if(k>c)return 1;
else return 0;
}
}
void fun(int i,int p[])
{
int j,k,key;
for(j=2;array[i][j] != NULL; j++)
{
if(array[i][j-1]=='/')
{
if(array[i][j]>= 'A' && array[i][j]<='Z')
{
key=0;
fun2(i,j,p,key);
}
else
{
key = 1;
if(fun2(i,j,p,key))
temp[++c] = array[i][j];
if(array[i][j]== '@'&& p[0]!=-1) //taking '@' as null symbol
{
if(array[p[0]][p[1]]>='A' && array[p[0]][p[1]] <='Z')
{
key=0;
fun2(p[0],p[1],p,key);
}
Else if(array[p[0]][p[1]] != '/'&& array[p[0]][p[1]]!=NULL)

PROF. NIDHI GONDALIA

PAGE 18

COMPILER DESIGN (170701)

{
if(fun2(p[0],p[1],p,key))
temp[++c]=array[p[0]][p[1]];
}
}
}
}
}
}

Output:

PROF. NIDHI GONDALIA

PAGE 19

COMPILER DESIGN (170701)

Experiment no.9
AIM: Write a program that will find the FOLLOW SET of the grammar.
#include<stdio.h>
#include<string.h>
int n,m=0,p,i=0,j=0;
char a[10][10],f[10];
void follow(char c);
void first(char c);
void main()
{
int i,z;
char c,ch;
clrscr();
printf("\nEnter the no.of productions:");
scanf("%d",&n);
printf("\nEnter the productions(epsilon=$):\n");
for(i=0;i<n;i++)
scanf("%s%c",a[i],&ch);
do
{
m=0;
printf("\nEnter the element whose FOLLOW is to be found:");
scanf("%c",&c);
follow(c);
printf("\nFOLLOW(%c) = { ",c);
for(i=0;i<m;i++)
printf("%c ",f[i]);
printf(" }\n");
printf("Do you want to continue(0/1)?");
scanf("%d%c",&z,&ch);
}while(z==1);
}
void follow(char c)
{
if(a[0][0]==c)f[m++]='$';
for(i=0;i<n;i++)
{
for(j=2;j<strlen(a[i]);j++)

PROF. NIDHI GONDALIA

PAGE 20

COMPILER DESIGN (170701)

{
if(a[i][j]==c)
{
if(a[i][j+1]!='\0')first(a[i][j+1]);
if(a[i][j+1]=='\0'&&c!=a[i][0])
follow(a[i][0]);
}
}
}
}
void first(char c)
{
int k;
if(!(isupper(c)))f[m++]=c;
for(k=0;k<n;k++)
{
if(a[k][0]==c)
{
if(a[k][2]=='$') follow(a[i][0]);
else if(islower(a[k][2]))f[m++]=a[k][2];
else first(a[k][2]);
}
}
}

Output:

PROF. NIDHI GONDALIA

PAGE 21

COMPILER DESIGN (170701)

Experiment no.10
AIM: Write a Program to demonstrate use of YACC compiler.
Using yacc
1. Format for the rule file;
declarations//beginning in col 1
%%
grammar_rules actions
%%
#include "lexyy.c"
main() { ... }
other user functions...
A. Declarations: global declarations are made by enclosing them as the following;
%{
#include <stdio.h>
int x, y;
%}
B. The start symbol of the grammar must also be declared; %start symbolname
2. Declare all tokens to be returned by the scanner;
%token ID 1 ICONST 2 RCONST 3 LBRACK 4 RBRACK 5 ... etc.
3. Grammar rule format;
nonterm : handle1 { action1 }
| handle2 { action2 } ;
A. Handles may contain nonterminals and tokens.
B. The grammar must be complete -- all nonterminals must be defined so than the leaves of all
parse
trees contain tokens (terminals).
C. Actions are C code blocks where;
nonterminals on the left side may be assigned a value using the pseudo variable $$
Example: $$ = 1;
The values of previously-assigned nonterminals in handles can be referenced with pseudo
variables $n (where n corresponds to the position in the handle).
Example: x = $2; y =
$3;
yytext, yylen are globally declared in lex to access, provide a local extern declaration.
4. Interface;
The main program (provided by the user) should call yyparse(). This parser calls yylex() each time
it needs a new token to continue the parse. Note that lexyy.c was included prior to the main program

PROF. NIDHI GONDALIA

PAGE 22

COMPILER DESIGN (170701)

section of the input file to yacc. This allows the definitions for tokens in the yacc input file to be
recognized in yylex(). yyparse() returns 0 if successful.
In some systems, the function yywrap() must be defined. This is normally available in UNIX
libraries but may be unknown to some MS or Apple compilers. Placing the following at the bottom
of the yacc input file will suffice;
yywrap () { return(1); };
Yacc tables under MS may not be capable of handling some large grammars. The header file called
yacc.h can be edited to change the table size default. If the following is found;
#define SMALLTAB YES
It can be changed to
#define HUGETAB YES or #define MEDTAB YES
To increase the table size. A bug has been reported when using yacc under MSDOS when intrinsic
C functions such as sqrt, sin, cos, etc. have been invoked.
5. Error handling;
It is often acceptable to stop all processing when an error is found. It may be more useful to continue to find further syntax errors. If this is the case, this leads to the problem of getting the parser
"started" again. To provide this utility, yacc reserves the token error.
When an error occurs, yyerror() is called. Next, yacc will pop partially completed handles until the
token error can be shifted onto the stack. At that point, yacc will discard input until the token
following the error token can be shifted onto the stack. The token following error is defined as the
synchronization symbol. For example;
stmt : assgn
| cond
| loop
| error SEMI { printf (" illegal statement\n"); }
| error END { printf ( illegal statement\n); }
;
In this grammar rule, if yacc is attempting to form a branch in the parse tree underneith a stmt -- but
cannot, yacc would attempt to discard that all partially completed handles until the stmt can be
reduced to error. All input would then be discarded up to the next SEMI or END. In other words, this
is panic recovery and the SEMI or END is the synchronization token. In other words, yacc attempts
to skip ahead to the next stmt. To prevent cascading of errors, yacc will insure that the first three
tokens of the next statement are correct before proceeding. If they are not, the error action is again
taken and yacc again searches for the next SEMI or END. Naturally, other error actions can be
defined using user-written C code, but this simple built-in mechanism is often useful.
6. Use of Yacc with MS;
Yacc requires a skeleton file to be in the directory \lib on the default drive -- check the Readme file
on the distribution disk. The MS version of yacc expects a single argument representing the name of
the specification file given without an extension. Yacc will assume this file has a .y extension. The

PROF. NIDHI GONDALIA

PAGE 23

COMPILER DESIGN (170701)

parser constructed by yacc will have the same file name with a new .c extension. For example, if the
specification file is parser.y, then the following command line;
c:> yacc parser
Will produce parser.c which can then be compiled. Once again, be sure to use the MS model if you
are using a windows capable IDE such as Borland or Microsoft C++. The output of yacc will
generally produce many warning messages when compiled (which can all be ignored), but should
produce no errors.
Under UNIX, yacc produces an output file with a fixed name of y.tab.c The output file (which still
includes the lex-generated scanner) must be linked with the "l" and "y" libraries as in the following
example;
% cc -o parser y.tab.c -ll -ly
Using yacc with the -v option under UNIX will cause yacc to produce y.output containing the
complete parsing tables and descriptions of any conflicts in the grammar.
7. Yacc errors;
The two most common errors reported by yacc are shift-reduce and reduce-reduce conflicts
indicating that the grammar is inconsistent, incomplete, or at least not LR (1). If you are confident
that the grammar is in fact LR (1), you probably left off a semicolon at the end of a grammar rule or
made some other typing mistake.

PROF. NIDHI GONDALIA

PAGE 24

Das könnte Ihnen auch gefallen