Beruflich Dokumente
Kultur Dokumente
Experiment no.1
AIM: Write a program to implement simple lexical analyzer using C language.
#include<string.h>
#include<ctype.h>
#include<stdio.h>
#include<conio.h>
void keyword(char str[10])
{
if(strcmp("void",str)==0||strcmp("for",str)==0||strcmp("while",str)==0||strcmp("do",str)==0||strc
mp("int",str)==0||strcmp("float",str)==0||strcmp("char",str)==0||strcmp("double",str)==0||strcmp(
"static",str)==0||strcmp("switch",str)==0||strcmp("case",str)==0)
printf("\n%s is a keyword",str);
else
printf("\n%s is an identifier",str);
}
void main()
{
FILE *f1,*f2,*f3;
char c,str[10],st1[10];
int num[100],lineno=0,tokenvalue=0,i=0,j=0,k=0;
clrscr();
f1=fopen("D:\\input.txt","r");
f2=fopen("identifier","w");
f3=fopen("specialchar","w");
while((c=getc(f1))!=EOF)
{
if(isdigit(c))
{
tokenvalue=c-'0';
c=getc(f1);
while(isdigit(c))
{
tokenvalue*=10+c-'0';
c=getc(f1);
}
num[i++]=tokenvalue;
PROF. NIDHI GONDALIA
PAGE 1
ungetc(c,f1);
}
else if(isalpha(c))
{
putc(c,f2);
c=getc(f1);
while(isdigit(c)||isalpha(c)||c=='_'||c=='$')
{
putc(c,f2);
c=getc(f1);
}
putc(' ',f2);
ungetc(c,f1);
}
else if(c==' '||c=='\t')
printf(" ");
else if(c=='\n')
lineno++;
else
putc(c,f3);
}
fclose(f2);
fclose(f3);
fclose(f1);
printf("\nThe no's in the program are");
for(j=0;j<i;j++)
printf("\t%d",num[j]);
printf("\n");
f2=fopen("identifier","r");
k=0;
printf("The keywords and identifiersare:");
while((c=getc(f2))!=EOF)
{
if(c!=' ')
str[k++]=c;
else
{
str[k]='\0';
keyword(str);
k=0;
PAGE 2
}
}
fclose(f2);
f3=fopen("specialchar","r");
printf("\nSpecial characters are");
while((c=getc(f3))!=EOF)
printf("\t%c",c);
printf("\n");
fclose(f3);
printf("Total no. of lines are:%d",lineno);
getch();
}
Input.txt
void main()
{
int a=3;
int b=5;
int c;
c=a+b;
}
Output:
PAGE 3
Experiment no.2
AIM: Write a Program to Implement NFA for regular expression (aa*)/ (bb*).
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
charst[20];
inti,count=0,len;
clrscr();
printf("\n\nEnter string");
scanf("%s",st);
len=strlen(st);
if(st[0]=='a')
{
for(i=1;i<len;i++)
{
if(st[i]=='a')
{
count++;
continue;
}
else
{
count==0;
break;
}
}
}
else
{
for(i=1;i<len;i++)
{
if(st[i]=='b')
{
PROF. NIDHI GONDALIA
PAGE 4
count++;
continue;
}
else
{
count==0;
break;
}
}
}
if(count == len-1)
{
printf("valid string");
}
else
{
printf("invalid string");
}
getch();
}
Output:
PAGE 5
Experiment no.3
AIM: Write a Program to Implement NFA for regular expression (a/b)* c (a/b)*.
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
int i=0,j=0,len;
charst[64];
printf("\nEnter Any String of Type (a/b)* c (a/b)*: ");
scanf("%s",st);
len=strlen(st);
for(i=0;i<len;i++)
{
if(st[i]=='a' || st[i]=='b')
{
continue;
}
else
{
break;
}
}
if(st[i]=='c' && i<len)
{
i++;
for(;i<len;i++)
{
if(st[i]=='a' || st[i]=='b')
{
continue;
}
else
{
PAGE 6
break;
}
}
if(i==len)
{
printf("\nYour String Shall Pass!!");
goto end;
}
}
printf("\nYour String Shall not Pass!!");
end:
getch();
}
Output:
PAGE 7
Experiment no.4
AIM: Write a Program to Implement NFA for regular expression (a/b)*abb.
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
inti,len,n;
charstr[20],s;
clrscr();
printf("\nEnter String Which is Built From Grammer (a/b)*abb:");
scanf ("%s",&str);
len=strlen(str);
for(i=0;i<len-3;i++)
{
if(str[i]=='a' || str[i]=='b')
{
continue;
}
else
{
break;
}
}
if(str[len-3]=='a' &&str[len-2]=='b' &&str[len-1]=='b')
{
printf("\n Valid String");
}
else
{
printf("\nInvalid String");
}
getch();
}
PAGE 8
Output:
PAGE 9
Experiment no.5
AIM: Write a Program to Demonstrate use of LEX compiler.
Lex Syntax and Example
Lex is short for "lexical analysis". Lex takes an input file containing a set of lexical analysis rules or
regular expressions. For output, Lex produces a C function which when invoked, finds the next match in
the input stream.
1. Format of lex input:
(beginning in col. 1)
2. Declarations:
a) string sets;
b) standard C;
3. Token rules:
declarations
%%
token-rules
%%
aux-procedures
name character-class
%{ -- c declarations -%}
regular-expression {optional C-code}
a) if the expression includes a reference to a character class, enclose the class name in brackets { }
b) regular expression operators;
*,+
--closure, positive closure
" " or \
--protection of special chars
|
--or
^
--beginning-of-line anchor
()
--grouping
$
--end-of-line anchor
?
--zero or one
.
--any char (except \n)
{ref}
--reference to a named character class (a definition)
[]
--character class
[^ ]
--not-character class
4. Match rules: Longest match is preferred. If two matches are equal length, the first match is
preferred. Remember, lex partitions, it does not attempt to find nested matches. Once a character
becomes part of a match, it is no longer considered for other matches.
5. Built-in variables: yytext -- ptr to the matching lexeme. (char *yytext;)
yylen -- length of matching lexeme (yytext). Note: some systems use yyleng
PAGE 10
6. Aux Procedures: C functions may be defined and called from the C-code of token rules or from
other functions. Each lex file should also have a yyerror() function to be called when lex
encounters an error condition.
Example header file: tokens.h
#define NUM
#define ID
#define PLUS
#define MULT
#define ASGN
#define SEMI
1
2
3
4
5
6
void yywrap () { }
8. Execution of lex: (to generate the yylex() function file and then compile a user program)
(MS) c:> flex rulefile
(Linux) $ lex rulefile
flex produces lexyy.c
The produced .c file contains this function:
PAGE 11
9. User program:
(The above scanner file must be linked into the project)
#include <stdio.h>
#include tokens.h
int yylex ();
extern char* yytext;
// scanner prototype
main ()
{ int n;
while ( n = yylex() )
// call scanner until it returns 0 for EOF
printf (" %d %s\n", n, yytext); // output the token code and lexeme string
}
PAGE 12
Experiment no.6
AIM: Write a program to eliminate left recursion from a grammar.
#include<stdio.h>
#include<conio.h>
#include<string.h>
#define SIZE 10
void main ()
{
charnon_terminal;
charbeta,alpha;
char production[SIZE];
int index=3; /* starting of the string following "->" */
printf("\n \nEnter the grammar:\n");
scanf("%s",production);
non_terminal=production[0];
if(non_terminal==production[index])
{
alpha=production[index+1];
printf("Grammar is left recursive.\n");
while(production[index]!=0 && production[index]!='|')
{
index++;
if(production[index]!=0)
{
beta=production[index+1];
printf("Grammar without left recursion:\n");
printf("%c->%c%c\'",non_terminal,beta,non_terminal);
printf("\n%c\'->%c%c\'|E\n",non_terminal,alpha,non_terminal);
}
else
printf("Grammar can't be reduced\n");
}
}
else
printf("Grammar is not left recursive.\n");
getch();
}
PROF. NIDHI GONDALIA
PAGE 13
Output:
PAGE 14
Experiment no.7
AIM: Write a program to implement left factor a grammar.
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main()
{
char a[10],a1[10],a2[10],a3[10],a4[10],a5[10];
int i,j=0,k,l;
clrscr();
printf("enter any productions A->");
gets(a);
for(i=0;a[i]!='/';i++,j++)
a1[j]=a[i];
a1[j]='\0';
for(j=++i,i=0;a[j]!='\0';j++,i++)
a2[i]=a[j];
a2[i]='\0';
k=0;
l=0;
for(i=0;i<strlen(a1)||i<strlen(a2);i++)
{
if(a1[i]==a2[i])
{
a3[k]=a1[i];
k++;
}
else
{
a4[l]=a1[i];
a5[l]=a2[i];
l++;
}}
a3[k]='X';
a3[++k]='\0';
a4[l]='/';
a5[l]='\0';
a4[++l]='\0';
PAGE 15
strcat(a4,a5);
printf("\n A->%s",a3);
printf("\n X->%s",a4);
getch();
}
Output:
PAGE 16
Experiment no.8
AIM: Write a program that will find the FIRST SET of the grammar.
#include<stdio.h>
#include<conio.h>
char array[10][20],temp[10];
int c,n;
void fun(int,int[]);
int fun2(int i,int j,int p[],int );
void main()
{
int p[2],i,j;
printf("Enter the no. of productions :");
scanf("%d",&n);
printf("Enter the productions :\n");
for(i=0;i<n;i++)
scanf("%s",array[i]);
for(i=0;i<n;i++)
{
c=-1,p[0]=-1,p[1]=-1;
fun(i,p);
printf("First(%c) : [ ",array[i][0]);
for(j=0;j<=c;j++)
printf("%c,",temp[j]);
printf("\b ].\n");
}
getch();
}
int fun2(int i,int j,int p[],int key)
{
int k;
if(!key)
{
for(k=0;k<n;k++)
if(array[i][j]==array[k][0])
break;
PAGE 17
p[0]=i;p[1]=j+1;
fun(k,p);
return 0;
}
else
{
for(k=0;k<=c;k++)
{
if(array[i][j]==temp[k])
break;
}
if(k>c)return 1;
else return 0;
}
}
void fun(int i,int p[])
{
int j,k,key;
for(j=2;array[i][j] != NULL; j++)
{
if(array[i][j-1]=='/')
{
if(array[i][j]>= 'A' && array[i][j]<='Z')
{
key=0;
fun2(i,j,p,key);
}
else
{
key = 1;
if(fun2(i,j,p,key))
temp[++c] = array[i][j];
if(array[i][j]== '@'&& p[0]!=-1) //taking '@' as null symbol
{
if(array[p[0]][p[1]]>='A' && array[p[0]][p[1]] <='Z')
{
key=0;
fun2(p[0],p[1],p,key);
}
Else if(array[p[0]][p[1]] != '/'&& array[p[0]][p[1]]!=NULL)
PAGE 18
{
if(fun2(p[0],p[1],p,key))
temp[++c]=array[p[0]][p[1]];
}
}
}
}
}
}
Output:
PAGE 19
Experiment no.9
AIM: Write a program that will find the FOLLOW SET of the grammar.
#include<stdio.h>
#include<string.h>
int n,m=0,p,i=0,j=0;
char a[10][10],f[10];
void follow(char c);
void first(char c);
void main()
{
int i,z;
char c,ch;
clrscr();
printf("\nEnter the no.of productions:");
scanf("%d",&n);
printf("\nEnter the productions(epsilon=$):\n");
for(i=0;i<n;i++)
scanf("%s%c",a[i],&ch);
do
{
m=0;
printf("\nEnter the element whose FOLLOW is to be found:");
scanf("%c",&c);
follow(c);
printf("\nFOLLOW(%c) = { ",c);
for(i=0;i<m;i++)
printf("%c ",f[i]);
printf(" }\n");
printf("Do you want to continue(0/1)?");
scanf("%d%c",&z,&ch);
}while(z==1);
}
void follow(char c)
{
if(a[0][0]==c)f[m++]='$';
for(i=0;i<n;i++)
{
for(j=2;j<strlen(a[i]);j++)
PAGE 20
{
if(a[i][j]==c)
{
if(a[i][j+1]!='\0')first(a[i][j+1]);
if(a[i][j+1]=='\0'&&c!=a[i][0])
follow(a[i][0]);
}
}
}
}
void first(char c)
{
int k;
if(!(isupper(c)))f[m++]=c;
for(k=0;k<n;k++)
{
if(a[k][0]==c)
{
if(a[k][2]=='$') follow(a[i][0]);
else if(islower(a[k][2]))f[m++]=a[k][2];
else first(a[k][2]);
}
}
}
Output:
PAGE 21
Experiment no.10
AIM: Write a Program to demonstrate use of YACC compiler.
Using yacc
1. Format for the rule file;
declarations//beginning in col 1
%%
grammar_rules actions
%%
#include "lexyy.c"
main() { ... }
other user functions...
A. Declarations: global declarations are made by enclosing them as the following;
%{
#include <stdio.h>
int x, y;
%}
B. The start symbol of the grammar must also be declared; %start symbolname
2. Declare all tokens to be returned by the scanner;
%token ID 1 ICONST 2 RCONST 3 LBRACK 4 RBRACK 5 ... etc.
3. Grammar rule format;
nonterm : handle1 { action1 }
| handle2 { action2 } ;
A. Handles may contain nonterminals and tokens.
B. The grammar must be complete -- all nonterminals must be defined so than the leaves of all
parse
trees contain tokens (terminals).
C. Actions are C code blocks where;
nonterminals on the left side may be assigned a value using the pseudo variable $$
Example: $$ = 1;
The values of previously-assigned nonterminals in handles can be referenced with pseudo
variables $n (where n corresponds to the position in the handle).
Example: x = $2; y =
$3;
yytext, yylen are globally declared in lex to access, provide a local extern declaration.
4. Interface;
The main program (provided by the user) should call yyparse(). This parser calls yylex() each time
it needs a new token to continue the parse. Note that lexyy.c was included prior to the main program
PAGE 22
section of the input file to yacc. This allows the definitions for tokens in the yacc input file to be
recognized in yylex(). yyparse() returns 0 if successful.
In some systems, the function yywrap() must be defined. This is normally available in UNIX
libraries but may be unknown to some MS or Apple compilers. Placing the following at the bottom
of the yacc input file will suffice;
yywrap () { return(1); };
Yacc tables under MS may not be capable of handling some large grammars. The header file called
yacc.h can be edited to change the table size default. If the following is found;
#define SMALLTAB YES
It can be changed to
#define HUGETAB YES or #define MEDTAB YES
To increase the table size. A bug has been reported when using yacc under MSDOS when intrinsic
C functions such as sqrt, sin, cos, etc. have been invoked.
5. Error handling;
It is often acceptable to stop all processing when an error is found. It may be more useful to continue to find further syntax errors. If this is the case, this leads to the problem of getting the parser
"started" again. To provide this utility, yacc reserves the token error.
When an error occurs, yyerror() is called. Next, yacc will pop partially completed handles until the
token error can be shifted onto the stack. At that point, yacc will discard input until the token
following the error token can be shifted onto the stack. The token following error is defined as the
synchronization symbol. For example;
stmt : assgn
| cond
| loop
| error SEMI { printf (" illegal statement\n"); }
| error END { printf ( illegal statement\n); }
;
In this grammar rule, if yacc is attempting to form a branch in the parse tree underneith a stmt -- but
cannot, yacc would attempt to discard that all partially completed handles until the stmt can be
reduced to error. All input would then be discarded up to the next SEMI or END. In other words, this
is panic recovery and the SEMI or END is the synchronization token. In other words, yacc attempts
to skip ahead to the next stmt. To prevent cascading of errors, yacc will insure that the first three
tokens of the next statement are correct before proceeding. If they are not, the error action is again
taken and yacc again searches for the next SEMI or END. Naturally, other error actions can be
defined using user-written C code, but this simple built-in mechanism is often useful.
6. Use of Yacc with MS;
Yacc requires a skeleton file to be in the directory \lib on the default drive -- check the Readme file
on the distribution disk. The MS version of yacc expects a single argument representing the name of
the specification file given without an extension. Yacc will assume this file has a .y extension. The
PAGE 23
parser constructed by yacc will have the same file name with a new .c extension. For example, if the
specification file is parser.y, then the following command line;
c:> yacc parser
Will produce parser.c which can then be compiled. Once again, be sure to use the MS model if you
are using a windows capable IDE such as Borland or Microsoft C++. The output of yacc will
generally produce many warning messages when compiled (which can all be ignored), but should
produce no errors.
Under UNIX, yacc produces an output file with a fixed name of y.tab.c The output file (which still
includes the lex-generated scanner) must be linked with the "l" and "y" libraries as in the following
example;
% cc -o parser y.tab.c -ll -ly
Using yacc with the -v option under UNIX will cause yacc to produce y.output containing the
complete parsing tables and descriptions of any conflicts in the grammar.
7. Yacc errors;
The two most common errors reported by yacc are shift-reduce and reduce-reduce conflicts
indicating that the grammar is inconsistent, incomplete, or at least not LR (1). If you are confident
that the grammar is in fact LR (1), you probably left off a semicolon at the end of a grammar rule or
made some other typing mistake.
PAGE 24