Sie sind auf Seite 1von 563

1



Introduction

CHAPTER 1

Introduction


exe

txt
Compiler source text
in implementation
language

Executable compiler
code

Compiler

exe

txt
Program source text
in source language

Executable program
code for target
machine

Compiler

X
Input in
some input format

Program

Output in
some output format

Figure 1.1 Compiling and running a compiler.




CHAPTER 1

Introduction


txt
Source
text

Frontend
(analysis)

Semantic
representation

Backend
(synthesis)

Compiler

Figure 1.2 Conceptual structure of a compiler.




exe
Executable
code

CHAPTER 1

Introduction


Machine

Executable
code

Source
code

preprocessing

processing

Compilation

Source
code

Interpreter

Intermediate
code

preprocessing

processing
Interpretation

Figure 1.3 Comparison of a compiler and an interpreter.




SECTION 1.1

Why study compiler construction?


Front-ends for

Back-ends for

Language 1

Machine 1

Machine 2

Language 2
Semantic
representation

Language L

Machine M

Figure 1.4 Creating compilers for L languages and M machines.




A simple traditional modular compiler/interpreter

SECTION 1.2


expression

expression

term

term

term

factor

identifier

identifier

term

factor

term

factor

factor

identifier

factor

identifier

identifier

Figure 1.5 The expression b*b - 4*a*c as a parse tree.




SUBSECTION 1.2.1

The abstract syntax tree


-

Figure 1.6 The expression b*b - 4*a*c as an AST.




SECTION 1.2

A simple traditional modular compiler/interpreter


type: real
- loc: reg1

type: real
* loc: reg2

type: real
* loc: reg1

type: real
b loc: sp+16

type: real
b loc: sp+16

type: real
* loc: reg2

type: real
4 loc: const

type: real
c loc: sp+24

type: real
a loc: sp+8

Figure 1.7 The expression b*b - 4*a*c as an annotated AST.




SUBSECTION 1.2.2

Structure of the demo compiler



Lexical
analysis

Syntax
analysis

Code
generation
Intermediate
code
(AST)
Interpretation

Context
handling

Figure 1.8 Structure of the demo compiler/interpreter.




10

SECTION 1.2

A simple traditional modular compiler/interpreter


expression digit | ( expression operator expression )
operator + | *
digit 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
Figure 1.9 Grammar for simple fully parenthesized expressions.



SUBSECTION 1.2.3

The language for the demo compiler

11


#include
#include
#include

"parser.h"
"backend.h"
"error.h"

/* for type AST_node */


/* for Process() */
/* for Error() */

int main(void) {
AST_node *icode;
if (!Parse_program(&icode)) Error("No top-level expression");
Process(icode);
return 0;
}
Figure 1.10 Driver for the demo compiler.



12

SECTION 1.2

A simple traditional modular compiler/interpreter


/* Define class constants */
/* Values 0-255 are reserved for ASCII characters */
#define
EOF
256
#define
DIGIT
257
typedef struct {int class; char repr;} Token_type;
extern Token_type Token;
extern void get_next_token(void);
Figure 1.11 Header file lex.h for the demo lexical analyzer.



SUBSECTION 1.2.4

Lexical analysis for the demo compiler


#include

"lex.h"

/* for self check */


/* PRIVATE */
static int Layout_char(int ch) {
switch (ch) {
case : case \t: case \n: return 1;
default:
return 0;
}
}
/* PUBLIC */
Token_type Token;
void get_next_token(void) {
int ch;
/* get a non-layout character: */
do {
ch = getchar();
if (ch < 0) {
Token.class = EOF; Token.repr = #;
return;
}
} while (Layout_char(ch));
/* classify it: */
if (0 <= ch && ch <= 9) {Token.class = DIGIT;}
else {Token.class = ch;}
Token.repr = ch;
}
Figure 1.12 Lexical analyzer for the demo compiler.



13

14

SECTION 1.2

A simple traditional modular compiler/interpreter


static int Parse_operator(Operator *oper) {
if (Token.class == +) {
*oper = +; get_next_token(); return 1;
}
if (Token.class == *) {
*oper = *; get_next_token(); return 1;
}
return 0;
}
static int Parse_expression(Expression **expr_p) {
Expression *expr = *expr_p = new_expression();
/* try to parse a digit: */
if (Token.class == DIGIT) {
expr->type = D; expr->value = Token.repr - 0;
get_next_token();
return 1;
}
/* try to parse a parenthesized expression: */
if (Token.class == () {
expr->type = P;
get_next_token();
if (!Parse_expression(&expr->left)) {
Error("Missing expression");
}
if (!Parse_operator(&expr->oper)) {
Error("Missing operator");
}
if (!Parse_expression(&expr->right)) {
Error("Missing expression");
}
if (Token.class != )) {
Error("Missing )");
}
get_next_token();
return 1;
}
/* failed on both attempts */
free_expression(expr); return 0;
}
Figure 1.13 Parsing routines for the demo compiler.



SUBSECTION 1.2.5

Syntax analysis for the demo compiler

15


#include
#include
#include

"lex.h"
"error.h"
"parser.h"

/* for Error() */
/* for self check */
/* PRIVATE */
static Expression *new_expression(void) {
return (Expression *)malloc(sizeof (Expression));
}
static void free_expression(Expression *expr) {free((void *)expr);}
static int Parse_operator(Operator *oper_p);
static int Parse_expression(Expression **expr_p);
/* PUBLIC */
int Parse_program(AST_node **icode_p) {
Expression *expr;
get_next_token();
/* start the lexical analyzer */
if (Parse_expression(&expr)) {
if (Token.class != EOF) {
Error("Garbage after end of program");
}
*icode_p = expr;
return 1;
}
return 0;
}
Figure 1.14 Parser environment for the demo compiler.



16

SECTION 1.2

A simple traditional modular compiler/interpreter


int P(...) {
/* try to parse the alternative A1 A2 ... An */
if (A1(...)) {
if (!A2(...)) Error("Missing A2");
...
if (!An (...)) Error("Missing An ");
return 1;
}
/* try to parse the alternative B1 B2 ... */
if (B1(...)) {
if (!B2(...)) Error("Missing B2");
...
return 1;
}
...
/* failed to find any alternative of P */
return 0;
}
Figure 1.15 A C template for a grammar rule.



SUBSECTION 1.2.5

Syntax analysis for the demo compiler

17


typedef int Operator;
typedef struct _expression {
char type;
/*
int value;
/*
struct _expression *left, *right; /*
Operator oper;
/*
} Expression;
typedef Expression AST_node;

D
for
for
for

or P */
D */
P */
P */

/* the top node is an Expression */

extern int Parse_program(AST_node **);


Figure 1.16 Parser header file for the demo compiler.



18

SECTION 1.2

A simple traditional modular compiler/interpreter


type
value
left

P
*
oper

right
P

D
2

P
9

D
3

D
4

Figure 1.17 An AST for the expression (2*((3*4)+9)).




SUBSECTION 1.2.7

Code generation for the demo compiler

19


#include
#include

"parser.h"
"backend.h"

/* for types AST_node and Expression */


/* for self check */
/* PRIVATE */
static void Code_gen_expression(Expression *expr) {
switch (expr->type) {
case D:
printf("PUSH %d\n", expr->value);
break;
case P:
Code_gen_expression(expr->left);
Code_gen_expression(expr->right);
switch (expr->oper) {
case +: printf("ADD\n"); break;
case *: printf("MULT\n"); break;
}
break;
}
}
/* PUBLIC */
void Process(AST_node *icode) {
Code_gen_expression(icode); printf("PRINT\n");
}
Figure 1.18 Code generation back-end for the demo compiler.



20

SECTION 1.2

A simple traditional modular compiler/interpreter


#include
#include

"parser.h"
"backend.h"

/* for types AST_node and Expression */


/* for self check */
/* PRIVATE */
static int Interpret_expression(Expression *expr) {
switch (expr->type) {
case D:
return expr->value;
break;
case P: {
int e_left = Interpret_expression(expr->left);
int e_right = Interpret_expression(expr->right);
switch (expr->oper) {
case +: return e_left + e_right;
case *: return e_left * e_right;
}}
break;
}
}
/* PUBLIC */
void Process(AST_node *icode) {
printf("%d\n", Interpret_expression(icode));
}
Figure 1.19 Interpreter back-end for the demo compiler.



SUBSECTION 1.2.8

Interpretation for the demo compiler


extern void Process(AST_node *);
Figure 1.20 Common back-end header for code generator and interpreter.



21

22

CHAPTER 1

Introduction


IC

file

Program
text
input

IC
optimization

characters

IC

Lexical
analysis

Code
generation

tokens

symbolic instructions

Intermediate
code

Syntax
analysis

Target
code
optimization

AST

symbolic instructions

Context
handling

Machine
code
generation

annotated AST

bit patterns

Intermediate
code
generation

Executable
code
output

IC

Front-end

Back-end

Figure 1.21 Structure of a compiler.




file

SECTION 1.4

Compiler architectures


SET Object code TO
Assembly(
Code generation(
Context check(
Parse(
Tokenize(
Source code
)
)
)
)
);
Figure 1.22 Flow-of-control structure of a broad compiler.



23

24

SECTION 1.4

Compiler architectures


WHILE NOT Finished:
Read some data D from the source code;
Process D and produce the corresponding object code, if any;
Figure 1.23 Flow-of-control structure of a narrow compiler.



SUBSECTION 1.4.2

Whos the boss?


WHILE Obtained input character Ch from previous module:
IF Ch = a:
// See if there is another a:
IF Obtained input character Ch1 from previous module:
IF Ch1 = a:
// We have aa:
Output character b to next module;
ELSE Ch1 /= a:
Output character a to next module;
Output character Ch1 to next module;
ELSE Ch1 not obtained:
Output character a to next module;
EXIT WHILE;
ELSE Ch /= a:
Output character Ch to next module;
Figure 1.24 The filter aa b as a main loop.



25

26

SECTION 1.4

Compiler architectures


SET the flag Input exhausted TO False;
SET the flag There is a stored character TO False;
SET Stored character TO Undefined;
// can never be an a
FUNCTION Filtered character RETURNING a Boolean, a character:
IF Input Exhausted: RETURN False, No character;
ELSE IF There is a stored character:
// It cannot be an a:
SET There is a stored character TO False;
RETURN True, Stored character;
ELSE Input not exhausted AND There is no stored character:
IF Obtained input character Ch from previous module:
IF Ch = a:
// See if there is another a:
IF Obtained input character Ch1 from previous module:
IF Ch1 = a:
// We have aa:
RETURN True, b;
ELSE Ch1 /= a:
SET Stored character TO Ch1;
SET There is a stored character TO True;
RETURN True, a;
ELSE Ch1 not obtained:
SET Input exhausted TO True;
RETURN True, a;
ELSE Ch /= a:
RETURN True, Ch;
ELSE Ch not obtained:
SET Input exhausted TO True;
RETURN False, No character;
Figure 1.25 The filter aa b as a pre-main module.



SUBSECTION 1.9.2

The production process


1@1
2@2
4@3
1@4
2@5
3@6
2@7

expression
( expression operator expression )
( 1 operator expression )
( 1 * expression )
( 1 * ( expression operator expression ) )
( 1 * ( 1 operator expression ) )
( 1 * ( 1 + expression ) )
( 1 * ( 1 + 1 ) )
Figure 1.26 Leftmost derivation of the string (1*(1+1)).



27

28

SECTION 1.9

Grammars


expression

( expression

operator

expression

expression

operator

expression

Figure 1.27 Parse tree of the derivation in Figure 1.26.




SECTION 1.10

Closure algorithms


void
void
void
void
void

P()
Q()
R()
S()
T()

{
{
{
{
{

...
...
...
...
...

Q(); .... S(); ... }


R(); .... T(); ... }
P(); }
}
}

Figure 1.28 Sample C program used in the construction of a calling graph.



29

30

CHAPTER 1

Introduction


P

Figure 1.29 Initial (direct) calling graph of the code in Figure 1.28.


SECTION 1.10

Closure algorithms



Figure 1.30 Calling graph of the code in Figure 1.28.




31

32

CHAPTER 1

Introduction



Data definitions:
1. Let G be a directed graph with one node for each routine. The information
items are arrows in G.
2. An arrow from a node A to a node B means that routine A calls routine B
directly or indirectly.
Initializations:
If the body of a routine A contains a call to routine B, an arrow from A to B
must be present.
Inference rules:
If there is an arrow from node A to node B and one from B to C, an arrow from
A to C must be present.
Figure 1.31 Recursion detection as a closure algorithm.


SECTION 1.10

Closure algorithms


calls(A, C) :- calls(A, B), calls(B, C).
calls(a, b).
calls(b, a).
:-? calls(a, a).
Figure 1.32 A Prolog program corresponding to the closure algorithm of Figure 1.31.



33

34

SECTION 1.10

Closure algorithms


SET the flag Something changed TO True;
WHILE Something changed:
SET Something changed TO False;
FOR EACH Node 1 IN Graph:
FOR EACH Node 2 IN Descendants of Node 1:
FOR EACH Node 3 IN Descendants of Node 2:
IF there is no arrow from Node 1 to Node 3:
Add an arrow from Node 1 to Node 3;
SET Something changed TO True;
Figure 1.33 Outline of a bottom-up algorithm for transitive closure.





From program text to


abstract syntax tree

35

36

CHAPTER 2

From program text to abstract syntax tree


expression product | factor
product expression * factor
factor number | identifier
Figure 2.1 A very simple grammar for expression.



CHAPTER 2

From program text to abstract syntax tree


if_statement

IF

condition

(a)

THEN

if_statement

statement

condition statement statement

(b)

Figure 2.2 Syntax tree (a) and abstract syntax tree (b) of an if-then statement.


37

38

From program text to abstract syntax tree

CHAPTER 2


Input
text

chars

Lexical
analysis

tokens

Syntax
analysis

AST

Context
handling

AST

Figure 2.3 Pipeline from input to annotated syntax tree.




Annotated
AST

SECTION 2.1

From program text to tokens the lexical structure



Basic patterns:
x
.
[xyz...]

Matching:
The character x
Any character, usually except a newline
Any of the characters x, y, z, ...

Repetition operators:
R?
R*
R+

An R or nothing (= optionally an R)
Zero or more occurrences of R
One or more occurrences of R

Composition operators:
R 1R 2
R 1 |R 2

An R 1 followed by an R 2
Either an R 1 or an R 2

Grouping:
(R)

R itself
Figure 2.4 Components of regular expressions.



39

40

SECTION 2.1

From program text to tokens the lexical structure


/* Define class constants; 0-255 reserved for ASCII characters: */
#define EOF
256
#define IDENTIFIER
257
#define INTEGER
258
#define ERRONEOUS
259
typedef struct {
char *file_name;
int line_number;
int char_number;
} Position_in_File;
typedef struct {
int class;
char *repr;
Position_in_File pos;
} Token_Type;
extern Token_Type Token;
extern void start_lex(void);
extern void get_next_token(void);
Figure 2.5 Header file lex.h of the handwritten lexical analyzer.



SUBSECTION 2.1.5

Creating a lexical analyzer by hand

41


#include
#include

"input.h"
"lex.h"

/* for get_input() */

/* PRIVATE */
static char *input;
static int dot;
static int input_char;

/* dot position in input */


/* character at dot position */

#define next_char()

(input_char = input[++dot])

/* PUBLIC */
Token_Type Token;
void start_lex(void) {
input = get_input();
dot = 0; input_char = input[dot];
}
Figure 2.6 Data and start-up of the handwritten lexical analyzer.



42

SECTION 2.1

From program text to tokens the lexical structure


void get_next_token(void) {
int start_dot;
skip_layout_and_comment();
/* now we are at the start of a token or at end-of-file, so: */
note_token_position();
/* split on first character of the token */
start_dot = dot;
if (is_end_of_input(input_char)) {
Token.class = EOF; Token.repr = "<EOF>"; return;
}
if (is_letter(input_char)) {recognize_identifier();}
else
if (is_digit(input_char)) {recognize_integer();}
else
if (is_operator(input_char) || is_separator(input_char)) {
Token.class = input_char; next_char();
}
else {Token.class = ERRONEOUS; next_char();}
Token.repr = input_to_zstring(start_dot, dot-start_dot);
}
Figure 2.7 Main reading routine of the handwritten lexical analyzer.



SUBSECTION 2.1.5

Creating a lexical analyzer by hand


void skip_layout_and_comment(void) {
while (is_layout(input_char)) {next_char();}
while (is_comment_starter(input_char)) {
next_char();
while (!is_comment_stopper(input_char)) {
if (is_end_of_input(input_char)) return;
next_char();
}
next_char();
while (is_layout(input_char)) {next_char();}
}
}
Figure 2.8 Skipping layout and comment in the handwritten lexical analyzer.



43

44

SECTION 2.1

From program text to tokens the lexical structure


void recognize_identifier(void) {
Token.class = IDENTIFIER; next_char();
while (is_letter_or_digit(input_char)) {next_char();}
while (is_underscore(input_char)
&&
is_letter_or_digit(input[dot+1])
) {
next_char();
while (is_letter_or_digit(input_char)) {next_char();}
}
}
Figure 2.9 Recognizing an identifier in the handwritten lexical analyzer.



SUBSECTION 2.1.5

Creating a lexical analyzer by hand


void recognize_integer(void) {
Token.class = INTEGER; next_char();
while (is_digit(input_char)) {next_char();}
}
Figure 2.10 Recognizing an integer in the handwritten lexical analyzer.



45

46

SECTION 2.1

From program text to tokens the lexical structure


#define
#define
#define
#define

is_end_of_input(ch)
is_layout(ch)
is_comment_starter(ch)
is_comment_stopper(ch)

((ch) == \0)
(!is_end_of_input(ch) && (ch) <= )
((ch) == #)
((ch) == # || (ch) == \n)

#define
#define
#define
#define
#define
#define

is_uc_letter(ch)
is_lc_letter(ch)
is_letter(ch)
is_digit(ch)
is_letter_or_digit(ch)
is_underscore(ch)

(A <= (ch) && (ch) <= Z)


(a <= (ch) && (ch) <= z)
(is_uc_letter(ch) || is_lc_letter(ch))
(0 <= (ch) && (ch) <= 9)
(is_letter(ch) || is_digit(ch))
((ch) == _)

#define is_operator(ch)
#define is_separator(ch)

(strchr("+-*/", (ch)) != 0)
(strchr(";,(){}", (ch)) != 0)

Figure 2.11 Character classification in the handwritten lexical analyzer.



SUBSECTION 2.1.5

Creating a lexical analyzer by hand

47


#include

"lex.h"

/* for start_lex(), get_next_token() */

int main(void) {
start_lex();
do {
get_next_token();
switch (Token.class) {
case IDENTIFIER:
printf("Identifier"); break;
case INTEGER:
printf("Integer"); break;
case ERRONEOUS:
printf("Erroneous token"); break;
case EOF:
printf("End-of-file pseudo-token"); break;
default:
printf("Operator or separator"); break;
}
printf(": %s\n", Token.repr);
} while (Token.class != EOF);
return 0;
}
Figure 2.12 Driver for the handwritten lexical analyzer.



48

SECTION 2.1

From program text to tokens the lexical structure


Integer: 8
Operator or separator: ;
Identifier: abc
Erroneous token: _
Erroneous token: _
Identifier: dd_8
Operator or separator: ;
Identifier: zz
Erroneous token: _
End-of-file pseudo-token: <EOF>
Figure 2.13 Sample results of the hand-written lexical analyzer.



SUBSECTION 2.1.5

Creating a lexical analyzer by hand


#define is_operator(ch)

(is_operator_bit[(ch)&0377])

static const char is_operator_bit[256] = {


0,
/* position 0 */
0, 0, ...
/* another 41 zeroes */
1,
/* *, position 42 */
1,
/* + */
0,
1,
/* - */
0,
1,
/* /, position 47 */
0, 0, ...
/* 208 more zeroes */
};
Figure 2.14 A naive table implementation of is_operator().



49

50

SECTION 2.1

From program text to tokens the lexical structure


#define UC_LETTER_MASK
#define LC_LETTER_MASK
#define OPERATOR_MASK

(1<<1)
(1<<2)
(1<<5)

/* a 1 bit, shifted left 1 pos. */


/* a 1 bit, shifted left 2 pos. */

#define LETTER_MASK

(UC_LETTER_MASK | LC_LETTER_MASK)

#define bits_of(ch)

(charbits[(ch)&0377])

#define is_end_of_input(ch) ((ch) == \0)


#define is_uc_letter(ch)
#define is_lc_letter(ch)
#define is_letter(ch)

(bits_of(ch) & UC_LETTER_MASK)


(bits_of(ch) & LC_LETTER_MASK)
(bits_of(ch) & LETTER_MASK)

#define is_operator(ch)

(bits_of(ch) & OPERATOR_MASK)

static const char charbits[256] = {


0000,
/* position 0 */
...
0040,
/* *, position 42 */
0040,
/* + */
...
0000,
/* position 64 */
0002,
/* A */
0002,
/* B */
0000,
0004,
0004,
...
0000

/* position 96 */
/* a */
/* b */
/* position 255 */

};
Figure 2.15 Efficient classification of characters (excerpt).



SUBSECTION 2.1.6

Creating a lexical analyzer automatically


SET the global token (Token .class, Token .length) TO (0, 0);
// Try to match token description T1R1:
FOR EACH Length SUCH THAT the input matches T1R1 over Length:
IF Length > Token .length:
SET (Token .class, Token .length) TO (T1, Length);
// Try to match token description T2R2:
FOR EACH Length SUCH THAT the input matches T2R2 over Length:
IF Length > Token .length:
SET (Token .class, Token .length) TO (T2, Length);
...
FOR EACH Length SUCH THAT the input matches Tn Rn over Length:
IF Length > Token .length:
SET (Token .class, Token .length) TO (Tn , Length);
IF Token .length = 0:
Handle non-matching character;
Figure 2.16 Outline of a naive generated lexical analyzer.



51

52

SECTION 2.1

From program text to tokens the lexical structure


Already
matched

Still to be
matched

regular expression

input
gap

Figure 2.17 Components of a token description and components of the input.




SUBSECTION 2.1.6

Creating a lexical analyzer automatically


cn
input

Dotted item
T

Already matched
by

c n+1

Still to be matched
by

Figure 2.18 The relation between a dotted item and the input.


53

54

SECTION 2.1

From program text to tokens the lexical structure



T (R)*

T(R)*
T( R)*

T(R )*

T(R)*
T( R)*

T (R)+

T( R)+

T(R )+

T(R)+
T( R)+

T (R)?

T(R)?
T( R)?

T(R )?

T(R)?

T (R 1 | R 2 | ...)

T( R 1 | R 2 | ...)
T(R 1 | R 2 | ...)
...

T(R 1 | R 2 | ...)
T(R 1 | R 2 | ...)
...

...

T(R 1 | R 2 | ...)
T(R 1 | R 2 | ...)
...

Figure 2.19 move rules for the regular operators.




SUBSECTION 2.1.6

Creating a lexical analyzer automatically


integral_number [0-9]+
fixed_point_number [0-9]*.[0-9]+
Figure 2.20 A simple set of regular expressions.



55

56

SECTION 2.1

From program text to tokens the lexical structure


IMPORT Input char [1..];
SET Read index TO 1;

// as from the previous module


// the read index into Input char [ ]

PROCEDURE Get next token:


SET Start of token TO Read index;
SET End of last token TO Uninitialized;
SET Class of last token TO Uninitialized;
SET Item set TO Initial item set ();
WHILE Item set /= Empty:
SET Ch TO Input char [Read index];
SET Item set TO Next item set (Item set, Ch);
SET Class TO Class of token recognized in (Item set);
IF Class /= No class:
SET Class of last token TO Class;
SET End of last token TO Read index;
SET Read index TO Read index + 1;
SET Token .class TO Class of last token;
SET Token .repr TO Input char [Start of token .. End of last token];
SET Read index TO End of last token + 1;
Figure 2.21 Outline of a linear-time lexical analyzer.



SUBSECTION 2.1.6

Creating a lexical analyzer automatically

57


FUNCTION Initial item set RETURNING an item set:
SET New item set TO Empty;
// Initial contents - obtain from the language specification:
FOR EACH token description TR IN the language specification:
SET New item set TO New item set + item T R;
RETURN closure (New item set);
Figure 2.22 The function Initial item set for a lexical analyzer.



58

SECTION 2.1

From program text to tokens the lexical structure


FUNCTION Next item set (Item set, Ch) RETURNING an item set:
SET New item set TO Empty;
// Initial contents - obtain from character moves:
FOR EACH item T B IN Item set:
IF B is a basic pattern AND B matches Ch:
SET New item set TO New item set + item TB ;
RETURN closure (New item set);
Figure 2.23 The function Next item set() for a lexical analyzer.



SUBSECTION 2.1.6

Creating a lexical analyzer automatically


FUNCTION closure (Item set) RETURNING an item set:
SET Closure set TO the Closure set produced by the
closure algorithm 2.25, passing the Item set to it;
// Filter out the interesting items:
SET New item set TO Empty set;
FOR EACH item I IN Closure set:
IF I is a basic item:
Insert I in New item set;
RETURN New item set;
Figure 2.24 The function closure() for a lexical analyzer.



59

60

SECTION 2.1

From program text to tokens the lexical structure



Data definitions:
Let Closure set be a set of dotted items.
Initializations:
Put each item in Item set in Closure set.
Inference rules:
If an item in Closure set matches the left-hand side of one of the moves
in Figure 2.19, the corresponding right-hand side must be present in Closure
set.
Figure 2.25 Closure algorithm for dotted items.


SUBSECTION 2.1.6

Creating a lexical analyzer automatically



Data definitions:
1a. A state is a set of items.
1b. Let States be a set of states.
2a. A state transition is a triple (start state, character, end state).
2b. Let Transitions be a set of state transitions.
Initializations:
1. Set States to contain a single state, Initial item set().
2. Set Transitions to the empty set.
Inference rules:
If States contains a state S, States must contain the state E and Transitions must contain the state transition (S, Ch, E) for each character Ch in
the input character set, where E = Next item set (S, Ch).
Figure 2.26 The subset algorithm for lexical analyzers.


61

62

SECTION 2.1

From program text to tokens the lexical structure



State
S0
S1
S2
S3

Next state [ ]
Ch
digit
point
S1
S2
S1
S2
S3
S3
-

Class of token recognized in[ ]


other
-

integral_number
fixed_point_number

Figure 2.27 Transition table and recognition table for the regular expressions from Figure 2.20.


SUBSECTION 2.1.6

Creating a lexical analyzer automatically


S1

S0
I->(.D)+
F->(.D)*.(D)+
F->(D)*..(D)+

I->(.D)+
!I->(D)+ .
F->(.D)*.(D)+
F->(D)*..(D)+

.
D

.
S2

S3
D

F->(D)*.(.D)+

F->(D)*.(.D)+
!F->(D)*.(D)+.

Figure 2.28 Transition diagram of the states and transitions for Figure 2.20.


63

64

SECTION 2.1

From program text to tokens the lexical structure


PROCEDURE Get next token:
SET Start of token TO Read index;
SET End of last token TO Uninitialized;
SET Class of last token TO Uninitialized;
SET Item set TO Initial item set;
WHILE Item set /= Empty:
SET Ch TO Input char [Read index];
SET Item set TO Next item set [Item set, Ch];
SET Class TO Class of token recognized in [Item set];
IF Class /= No class:
SET Class of last token TO Class;
SET End of last token TO Read index;
SET Read index TO Read index + 1;
SET Token .class TO Class of last token;
SET Token .repr TO Input char [Start of token .. End of last token];
SET Read index TO End of last token + 1;
Figure 2.29 Outline of an efficient linear-time routine Get next token().



SUBSECTION 2.1.6

Creating a lexical analyzer automatically


S1

S0
I->(.D)+
F->(.D)*.(D)+
F->(D)*..(D)+

I->(.D)+
!I->(D)+ .
F->(.D)*.(D)+
F->(D)*..(D)+

.
D

.
other

S2

S3
D

F->(D)*.(.D)+

.
other

other

F->(D)*.(.D)+
!F->(D)*.(D)+.

.
other

Figure 2.30 Transition diagram of all states and transitions for Figure 2.20.


65

66

SECTION 2.1

From program text to tokens the lexical structure


state
0
1
2
3

digit=1
1
1
3
3

other=2
-

point=3
2
2
-

Figure 2.31 The transition matrix from Figure 2.27 in reduced form.



SUBSECTION 2.1.7

Transition table compression


0
1
2
3

1
1

2
2
3
3

1
3
2
-

Figure 2.32 Fitting the strips into one array.



67

68

SECTION 2.1

From program text to tokens the lexical structure


Empty state [0..3][1..3] =
((0, 1, 0), (0, 1, 0), (0, 1, 1), (0, 1, 1));
Displacement [0..3] = (0, 0, 1, 1);
Entry [1..3] = (1, 3, 2);
Figure 2.33 The transition matrix from Figure 2.27 in compressed form.



SUBSECTION 2.1.7

Transition table compression


IF Empty state [State][Ch]:
SET New state TO No state;
ELSE entry in Entry [ ] is valid:
SET New state TO Entry [Displacement [State] + Ch];
Figure 2.34 Code for SET New state TO Next state[State, Ch].



69

70

SECTION 2.1

From program text to tokens the lexical structure


0
1
2
3

(1, 1)

(1, 1)

(2, 3)
-

(2, 3)

(3, 1)
(3, 1)

(1, 1)
(1, 1)
(2, 3)
(2, 3)
(3, 1)
(3, 1)
-

Figure 2.35 Fitting the strips with entries marked by character.



SUBSECTION 2.1.7

Transition table compression


Displacement [0..3] = (0, 1, 4, 5);
Mark [1..8] = (1, 1, 3, 3, 1, 1, 0, 0);
Entry [1..6] = (1, 1, 2, 2, 3, 3);
Figure 2.36 The transition matrix compressed with marking by character.



71

72

SECTION 2.1

From program text to tokens the lexical structure


IF Mark [Displacement [State] + Ch] /= Ch:
SET New state TO No state;
ELSE entry in Entry [ ] is valid:
SET New state TO Entry [Displacement [State] + Ch];
Figure 2.37 Code for SET New state TO Next state[State, Ch] for marking by character.



SUBSECTION 2.1.7

Transition table compression


1 0 0 1 0 1 0 1 1 1 1 1 0 1 0 0 1 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1

1 0 0 0 0 1 0 1

Figure 2.38 A damaged comb finding room for its teeth.




73

74

SECTION 2.1

From program text to tokens the lexical structure


0
1
2
3
4
5
6
7

w
1
3
1
1
-

x
2
2
7
-

y
4
4
-

z
6
5
-

0
1
2
3
4
5
6
7

w x y z
1 2 - -

w x y z
3 - 4 -

1 - - 6
- 2 - - - - 5
1 - 4 - 7 - - - - _______
3 7 4 5

_______
1 2 4 6
(a)

(b)

Figure 2.39 A transition table (a) and its compressed form packed by graph coloring (b).



SUBSECTION 2.1.7

Transition table compression


0
1
2
4

6
5

3
7

Figure 2.40 Interference graph for the automaton of Figure 2.39a.




75

76

SECTION 2.1

From program text to tokens the lexical structure


%{
#include

"lex.h"

Token_Type Token;
int line_number = 1;
%}
whitespace

[ \t]

letter
digit
underscore
letter_or_digit
underscored_tail
identifier

[a-zA-Z]
[0-9]
"_"
({letter}|{digit})
({underscore}{letter_or_digit}+)
({letter}{letter_or_digit}*{underscored_tail}*)

operator
separator

[-+*/]
[;,(){}]

%%
{digit}+
{identifier}
{operator}|{separator}
#[#\n]*#?
{whitespace}
\n
.

{return INTEGER;}
{return IDENTIFIER;}
{return yytext[0];}
{/* ignore comment */}
{/* ignore whitespace */}
{line_number++;}
{return ERRONEOUS;}

%%
void start_lex(void) {}
void get_next_token(void) {
Token.class = yylex();
if (Token.class == 0) {
Token.class = EOF; Token.repr = "<EOF>"; return;
}
Token.pos.line_number = line_number;
strcpy(Token.repr = (char*)malloc(strlen(yytext)+1), yytext);
}
int yywrap(void) {return 1;}
Figure 2.41 Lex input for the token set from Figure 2.20.



SUBSECTION 2.1.10

Lexical identification of tokens

77


FUNCTION Get next token () RETURNING a token:
SET Simple token TO Get next simple token ();
IF Class of Simple token = Identifier:
SET Simple token TO Identify in symbol table (Simple token);
// See if this has reset Class of Simple token:
IF Class of Simple token = Macro:
Switch to macro (Simple token);
RETURN Get next token ();
ELSE Class of Simple token /= Macro:
// Identifier or Type identifier or Keyword:
RETURN Simple token;
ELSE Class of Simple token /= Identifier:
RETURN Simple token;
Figure 2.42 A Get next token() that does lexical identification.



78

SECTION 2.1

From program text to tokens the lexical structure


Program
reading
module

Lexical
analyzer

Lexical
identification
module

Figure 2.43 Pipeline from input to lexical identification.




SUBSECTION 2.1.11

Symbol tables

79


FUNCTION Identify (Identifier name)
RETURNING a pointer to Identifier info:
SET Bucket number TO Hash value (Identifier name);
IF there is no Identifier info record for Identifier name
in Bucket [Bucket number]:
Insert an empty Identifier info record for Identifier name
in Bucket [Bucket number];
RETURN Pointer to Identifier info record for Identifier name
in Bucket [Bucket number];
Figure 2.44 Outline of the function Identify.



80

SECTION 2.1

From program text to tokens the lexical structure


hash_table
bucket 0

struct idf

bucket 1

idf_next

bucket 2

idf_name

bucket 3

char[]
l e n g t h \0

bucket 4
struct idf

struct idf

idf_next

idf_next

idf_name

idf_name

char[]
w i d t h \0 h e i g h t \0

Figure 2.45 A hash table of size 5 containing three identifiers.




SUBSECTION 2.1.11

Symbol tables

81


FUNCTION Identify in symbol table (Token) RETURNING a token:
SET Identifier info pointer TO Identify (Representation of Token);
IF Identifier info indicates macro:
SET Class of Token TO Macro;
ELSE IF Identifier info indicates keyword:
SET Class of Token TO Keyword;
ELSE IF Identifier info indicates type identifier:
SET Class of Token TO Type identifier;
ELSE Identifier info indicates
variable, struct, union, etc. identifier:
// Do not change Class of Token;
Append Identifier info pointer to Token;
RETURN Token;
Figure 2.46 A possible Identify in symbol table(Token) for C.



82

SECTION 2.1

From program text to tokens the lexical structure


Program
reading
module

Lexical
analyzer

Lexical
identification
module

back-calls

Figure 2.47 Pipeline from input to lexical identification, with feedback.




SUBSECTION 2.1.12

Macro processing and file inclusion


parameter

development of ch

t x t [ i ]
endptr
readptr

macro

( A < = ( c h ) & & ( c h ) < = Z )

is_capital

ch = txt[i]
parameter list

file

readptr

endptr

endptr

readptr

file

mac.h

. . . . i s _ c a p i t a l ( t x t [ i ] ) . . . . .

. . . . # i n c l u d e

" m a c . h " . . . .
readptr

input.c

endptr

Figure 2.48 An input buffer stack of include files, macro calls, and macro parameters.


83

84

SECTION 2.1

From program text to tokens the lexical structure


Program
reading
module

Line-layer
module

Lexical
analyzer

Lexical
identification
module

back-calls

Figure 2.49 Input pipeline, with line layer and feedback.




SUBSECTION 2.2.1

Two classes of parsing methods


1

(a) Pre-order

(b) Post-order

Figure 2.50 A tree with its nodes numbered in pre-order and post-order.


85

86

SECTION 2.2

From tokens to syntax tree the syntax


1

2
parse
tree

input
t1

t2

t3

t4

t5

t6

t7

t8

t9

Figure 2.51 A top-down parser recognizing the first token in the input.


SUBSECTION 2.2.1

Two classes of parsing methods


3

parse
tree

input
t1

t2

t3

t4

t5

t6

t7

t8

t9

Figure 2.52 A bottom-up parser constructing its first, second, and third nodes.


87

88

SECTION 2.2

From tokens to syntax tree the syntax


input expression EOF
expression term rest_expression
term IDENTIFIER | parenthesized_expression
parenthesized_expression ( expression )
rest_expression + expression |
Figure 2.53 A simple grammar for demonstrating top-down parsing.



SUBSECTION 2.2.3

Creating a top-down parser manually

89


#include

"tokennumbers.h"

/* PARSER */
int input(void) {
return expression() && require(token(EOF));
}
int expression(void) {
return term() && require(rest_expression());
}
int term(void) {
return token(IDENTIFIER) || parenthesized_expression();
}
int parenthesized_expression(void) {
return token(() && require(expression()) && require(token()));
}
int rest_expression(void) {
return token(+) && require(expression()) || 1;
}
int token(int tk) {
if (tk != Token.class) return 0;
get_next_token(); return 1;
}
int require(int found) {
if (!found) error();
return 1;
}
Figure 2.54 A recursive descent parser/recognizer for the grammar of Figure 2.53.



90

SECTION 2.2

From tokens to syntax tree the syntax


#include

"lex.h"

/* for start_lex(), get_next_token(), Token */

/* DRIVER */
int main(void) {
start_lex(); get_next_token();
require(input());
return 0;
}
void error(void) {
printf("Error in expression\n"); exit(1);
}
Figure 2.55 Driver for the recursive descent parser/recognizer.



SUBSECTION 2.2.3

Creating a top-down parser manually


S A a b
A a |
Figure 2.56 A simple grammar with a FIRST/FOLLOW conflict.



91

92

SECTION 2.2

From tokens to syntax tree the syntax


int S(void) {
return A() && require(token(a)) && require(token(b));
}
int A(void) {
return token(a) || 1;
}
Figure 2.57 A faulty recursive recognizer for grammar of Figure 2.56.



SUBSECTION 2.2.4

Creating a top-down parser automatically



Data definitions:
1. Token sets called FIRST sets for all terminals, non-terminals and alternatives of non-terminals in G.
2. A token set called FIRST for each alternative tail in G; an alternative tail is
a sequence of zero or more grammar symbols if A is an alternative or alternative tail in G.
Initializations:
1. For all terminals T, set FIRST(T) to {T}.
2. For all non-terminals N, set FIRST(N) to the empty set.
3. For all non-empty alternatives and alternative tails , set FIRST() to the
empty set.
4. Set the FIRST set of all empty alternatives and alternative tails to {}.
Inference rules:
1. For each rule N in G, FIRST(N) must contain all tokens in FIRST(),
including if FIRST() contains it.
2. For each alternative or alternative tail of the form A, FIRST() must
contain all tokens in FIRST(A), excluding , should FIRST(A) contain it.
3. For each alternative or alternative tail of the form A and FIRST(A) contains , FIRST() must contain all tokens in FIRST(), including if
FIRST() contains it.
Figure 2.58 Closure algorithm for computing the FIRST sets in a grammar G.


93

94

SECTION 2.2

From tokens to syntax tree the syntax


Rule/alternative (tail)
input
expression EOF
EOF

FIRST set
{}
{}
{ EOF }

expression
term rest_expression
rest_expression

{}
{}
{}

term
IDENTIFIER

{}
{ IDENTIFIER }

parenthesized_expression

{}

|
parenthesized_expression
( expression )
expression )
)

{}
{ ( }
{}
{ ) }

rest_expression
+ expression
expression
|

{}
{ + }
{}
{ }

Figure 2.59 The initial FIRST sets.



SUBSECTION 2.2.4

Creating a top-down parser automatically


Rule/alternative (tail)
input
expression EOF
EOF

FIRST set
{ IDENTIFIER ( }
{ IDENTIFIER ( }
{ EOF }

expression
term rest_expression
rest_expression

{ IDENTIFIER ( }
{ IDENTIFIER ( }
{ + }

term
IDENTIFIER

{ IDENTIFIER ( }
{ IDENTIFIER }

parenthesized_expression

{ ( }

|
parenthesized_expression
( expression )
expression )
)

{
{
{
{

( }
( }
IDENTIFIER ( }
) }

rest_expression
+ expression
expression
|

{
{
{
{

+ }
+ }
IDENTIFIER ( }
}

Figure 2.60 The final FIRST sets.



95

96

SECTION 2.2

From tokens to syntax tree the syntax


void input(void) {
switch (Token.class) {
case IDENTIFIER: case (:
expression(); token(EOF); break;
default:
error();
}
}
void expression(void) {
switch (Token.class) {
case IDENTIFIER: case (:
term(); rest_expression(); break;
default:
error();
}
}
void term(void) {
switch (Token.class) {
case IDENTIFIER:
token(IDENTIFIER); break;
case (:
parenthesized_expression(); break;
default:
error();
}
}
void parenthesized_expression(void) {
switch (Token.class) {
case (:
token((); expression(); token()); break;
default:
error();
}
}
void rest_expression(void) {
switch (Token.class) {
case +:
token(+); expression(); break;
case EOF: case ): break;
default:
error();
}
}
void token(int tk) {
if (tk != Token.class) error();
get_next_token();
}
Figure 2.61 A predictive parser for the grammar of Figure 2.53.



SUBSECTION 2.2.4

Creating a top-down parser automatically



Data definitions:
1. Token sets called FOLLOW sets for all non-terminals in G.
2. Token sets called FIRST sets for all alternatives and alternative tails in G.
Initializations:
1. For all non-terminals N, set FOLLOW(N) to the empty set.
2. Set all FIRST sets to the values determined by the algorithm for FIRST sets.
Inference rules:
1. For each rule of the form MN in G, FOLLOW(N) must contain all
tokens in FIRST(), excluding , should FIRST() contain it.
2. For each rule of the form MN in G where FIRST() contains ,
FOLLOW(N) must contain all tokens in FOLLOW(M).
Figure 2.62 Closure algorithm for computing the FOLLOW sets in grammar G.


97

98

SECTION 2.2

From tokens to syntax tree the syntax


Rule
input
expression
term
parenthesized_expression
rest_expression

{
{
{
{
{

FIRST set
IDENTIFIER ( }
IDENTIFIER ( }
IDENTIFIER ( }
( }
+ }

FOLLOW set
{}
{ EOF ) }
{ + EOF ) }
{ + EOF ) }
{ EOF ) }

Figure 2.63 The FIRST and FOLLOW sets for the grammar from Figure 2.53.



SUBSECTION 2.2.4

Creating a top-down parser automatically


void S(void) {
switch (Token.class) {
case a:
A(); token(a); token(b); break;
default:
error();
}
}
void A(void) {
switch (Token.class) {
case a:
token(a); break;
case a:
break;
default:
error();
}
}
Figure 2.64 A predictive parser for the grammar of Figure 2.56.



99

100

SECTION 2.2

From tokens to syntax tree the syntax


Top of stack/state:
Look-ahead token
IDENTIFIER
+
(
)
EOF

input
expression
expression

expression
term
term
rest_rest_expression
expression

term
IDENTIFIER
parenthesized_expression

parenthesized_( expression )
expression

rest_expression
+

expression


Figure 2.65 Transition table for an LL(1) parser for the grammar of Figure 2.53.



SUBSECTION 2.2.4

Creating a top-down parser automatically

101


Prediction stack:
Present input:

expression ) rest_expression EOF


IDENTIFIER + IDENTIFIER ) + IDENTIFIER EOF

Figure 2.66 Prediction stack and present input in a push-down automaton.



102

SECTION 2.2

From tokens to syntax tree the syntax


prediction stack

input

A i-1

Ai
tk

A0

t k+1
t
A
transition table

prediction stack

input

A.....
tk

A i-1

A0

t k+1

Figure 2.67 Prediction move in an LL(1) push-down automaton.




SUBSECTION 2.2.4

Creating a top-down parser automatically


prediction stack

tk

input

tk

prediction stack

A i-1

input

A i-1

A0

t k+1

A0

t k+1

Figure 2.68 Match move in an LL(1) push-down automaton.




103

104

SECTION 2.2

From tokens to syntax tree the syntax


IMPORT Input token [1..];

// from lexical analyzer

SET Input token index TO 1;


SET Prediction stack TO Empty stack;
PUSH Start symbol ON Prediction stack;
WHILE Prediction stack /= Empty stack:
Pop top of Prediction stack into Predicted;
IF Predicted is a Token class:
// Try a match move:
IF Predicted = Input token [Input token index] .class:
SET Input token index TO Input token index + 1; // matched
ELSE Predicted /= Input token:
ERROR "Expected token not found: ", Predicted;
ELSE Predicted is a Non-terminal:
// Try a prediction move, using the input token as look-ahead:
SET Prediction TO
Prediction table [Predicted, Input token [Input token index]];
IF Prediction = Empty:
ERROR "Token not expected: ", Input token [Input token index]];
ELSE Prediction /= Empty:
PUSH The symbols of Prediction ON Prediction stack;
Figure 2.69 Predictive parsing with an LL(1) push-down automaton.



SUBSECTION 2.2.4

Creating a top-down parser automatically

105


Initial situation:
Prediction stack:
Input:

input
( IDENTIFIER + IDENTIFIER ) + IDENTIFIER EOF

Prediction moves:
Prediction stack:
Prediction stack:
Prediction stack:
Prediction stack:
Input:

expression EOF
term rest_expression EOF
parenthesized_expression rest_expression EOF
( expression ) rest_expression EOF
( IDENTIFIER + IDENTIFIER ) + IDENTIFIER EOF

Match move on (:
Prediction stack:
Input:

expression ) rest_expression EOF


IDENTIFIER + IDENTIFIER ) + IDENTIFIER EOF

Figure 2.70 The first few parsing moves for (i+i)+i.



106

SECTION 2.2

From tokens to syntax tree the syntax


// Step 1: construct acceptable set:
SET Acceptable set TO Acceptable set of (Prediction stack);
// Step 2: skip unacceptable tokens:
WHILE Input token [Input token index] IS NOT IN Acceptable set:
MESSAGE "Token skipped: ", Input token [Input token index];
SET Input token index TO Input token index + 1;
// Step 3: resynchronize the parser:
SET the flag Resynchronized TO False;
WHILE NOT Resynchronized:
Pop top of Prediction stack into Predicted;
IF Predicted is a Token class:
// Try a match move:
IF Predicted = Input token [Input token index] .class:
SET Input token index TO Input token index + 1; // matched
SET Resynchronized TO True;
// resynchronized!
ELSE Predicted /= Input token:
Insert a token of class Predicted, including representation;
MESSAGE "Token inserted of class ", Predicted;
ELSE Predicted is a Non-terminal:
// Do a prediction move:
SET Prediction TO
Prediction table [Predicted, Input token [Input token index]];
IF Prediction = Empty:
SET Prediction TO Shortest production table [Predicted];
// Now Prediction /= Empty:
PUSH The symbols of Prediction ON Prediction stack;
Figure 2.71 Acceptable-set error recovery in a predictive parser.



SUBSECTION 2.2.4

Creating a top-down parser automatically



Data definitions:
1. A set of pairs of the form (production rule, integer).
2a. A set of pairs of the form (non-terminal, integer).
2b. A set of pairs of the form (terminal, integer).
Initializations:
1a. For each production rule NA 1 ...An with n>0 there is a pair (NA 1 ...An ,
).
1b. For each production rule N there is a pair (N, 0).
2a. For each non-terminal N there is a pair (N, ).
2b. For each terminal T there is a pair (T, 1).
Inference rules:
1. For each production rule NA 1 ...An with n>0, if there are pairs (A 1 , l 1 ) to
(An , ln ) with all li <, the pair (NA 1 ...An , lN ) must be replaced by a pair
n

(NA 1 ...An , lnew ) where lnew = li provided lnew <lN .


i=1

2. For each non-terminal N, if there are one or more pairs of the form (N,
li ) with li <, the pair (N, lN ) must be replaced by (N, lnew ) where lnew is the
minimum of the li s, provided lnew <lN .
Figure 2.72 Closure algorithm for computing lengths of shortest productions.


107

108

SECTION 2.2

From tokens to syntax tree the syntax


Non-terminal
input
expression
term
parenthesized_expression
rest_expression

Alternative
expression EOF
term rest_expression
IDENTIFIER
( expression )

Shortest length
2
1
1
3
0

Figure 2.73 Shortest production table for the grammar of Figure 2.53.



SUBSECTION 2.2.4

Creating a top-down parser automatically

109


Error detected, since Prediction table [expression, +] is empty:
Prediction stack:
expression ) rest_expression EOF
Input:
+ IDENTIFIER ) + IDENTIFIER EOF
Shortest production for expression:
Prediction stack:
term rest_expression ) rest_expression EOF
Input:
+ IDENTIFIER ) + IDENTIFIER EOF
Shortest production for term:
Prediction stack:
IDENTIFIER rest_expression ) rest_expression EOF
Input:
+ IDENTIFIER ) + IDENTIFIER EOF
Token IDENTIFIER inserted in the input and matched:
Prediction stack:
rest_expression ) rest_expression EOF
Input:
+ IDENTIFIER ) + IDENTIFIER EOF
Normal prediction for rest_expression, resynchronized:
Prediction stack:
+ expression ) rest_expression EOF
Input:
+ IDENTIFIER ) + IDENTIFIER EOF

Figure 2.74 Some steps in parsing (i++i)+i.



110

SECTION 2.2

From tokens to syntax tree the syntax


expression(int *e)
term(int *t)
{*e = *t;}
expression_tail_option(int *e)
;
expression_tail_option(int *e)
- term(int *t)
{*e -= *t;}
expression_tail_option(int *e)
|

;
term(int *t) IDENTIFIER {*t = 1;};
Figure 2.75 Minimal non-left-recursive grammar for expressions.



SUBSECTION 2.2.4

Creating a top-down parser automatically

111


%lexical get_next_token_class;
%token IDENTIFIER;
%start Main_Program, main;
main {int result;}:
expression(&result)
;
expression(int *e) {int t;}:
term(&t)
expression_tail_option(e)
;

{printf("result = %d\n", result);}

{*e = t;}

expression_tail_option(int *e) {int t;}:


- term(&t)
{*e -= t;}
expression_tail_option(e)
|
;
term(int *t):
IDENTIFIER
;

{*t = 1;}

Figure 2.76 LLgen code for a parser for simple expressions.



112

SECTION 2.2

From tokens to syntax tree the syntax


{
#include

"lex.h"

int main(void) {
start_lex(); Main_Program(); return 0;
}
Token_Type Last_Token;
int Reissue_Last_Token = 0;

/* error recovery support */


/* idem */

int get_next_token_class(void) {
if (Reissue_Last_Token) {
Token = Last_Token;
Reissue_Last_Token = 0;
}
else get_next_token();
return Token.class;
}
void insert_token(int token_class) {
Last_Token = Token; Reissue_Last_Token = 1;
Token.class = token_class;
/* and set the attributes of Token, if any */
}
void print_token(int token_class) {
switch (token_class) {
case IDENTIFIER: printf("IDENTIFIER"); break;
case EOFILE
: printf("<EOF>"); break;
default
: printf("%c", token_class); break;
}
}
#include
}

"../../fg2/LLgen/LLmessage.i"

Figure 2.77 Auxiliary C code for a parser for simple expressions.



SUBSECTION 2.2.4

Creating a top-down parser automatically


void LLmessage(int class) {
switch (class) {
default:
insert_token(class);
printf("Missing token ");
print_token(class);
printf(" inserted in front of token ");
print_token(LLsymb); printf("\n");
break;
case 0:
printf("Token deleted: ");
print_token(LLsymb); printf("\n");
break;
case -1:
printf("End of input expected, but found token ");
print_token(LLsymb); printf("\n");
break;
}
}
Figure 2.78 The routine LLmessage() required by LLgen.



113

114

SECTION 2.2

From tokens to syntax tree the syntax


expression(struct expr **ep):
{(*ep) = new_expr(); (*ep)->type = -;}
expression(&(*ep)->expr) - term(&(*ep)->term)
|
{(*ep) = new_expr(); (*ep)->type = T;}
term(&(*ep)->term)
;
term(struct term **tp):
{(*tp) = new_term(); (*tp)->type = I;}
IDENTIFIER
;
Figure 2.79 Original grammar with code for constructing a parse tree.



SUBSECTION 2.2.4

Creating a top-down parser automatically

115


struct expr {
int type;
/* - or T */
struct expr *expr; /* for - */
struct term *term; /* for - and T */
};
#define new_expr() ((struct expr *)malloc(sizeof(struct expr)))
struct term {
int type;
};
#define new_term()

/* I only */
((struct term *)malloc(sizeof(struct term)))

extern void print_expr(struct expr *e);


extern void print_term(struct term *t);
Figure 2.80 Data structures for the parse tree.



116

SECTION 2.2

From tokens to syntax tree the syntax


expression(struct expr **ep):
expression(ep)
{
struct expr *e_aux = (*ep);
(*ep) = new_expr(); (*ep)->type = -; (*ep)->expr = e_aux;
}
- term(&(*ep)->term)
|
{(*ep) = new_expr(); (*ep)->type = T;}
term(&(*ep)->term)
;
Figure 2.81 Visibly left-recursive grammar with code for constructing a parse tree.



SUBSECTION 2.2.4

Creating a top-down parser automatically

117


expression(struct expr **ep):
{(*ep) = new_expr(); (*ep)->type = T;}
term(&(*ep)->term)
expression_tail_option(ep)
;
expression_tail_option(struct expr **ep):
{
struct expr *e_aux = (*ep);
(*ep) = new_expr(); (*ep)->type = -; (*ep)->expr = e_aux;
}
- term(&(*ep)->term)
expression_tail_option(ep)
|
;
Figure 2.82 Adapted LLgen grammar with code for constructing a parse tree.



118

SECTION 2.2

From tokens to syntax tree the syntax


ep:

ep:

*ep:

*ep:

e_aux:

tree

tree

(a)

(b)

ep:

*ep:

e_aux:

new_expr
type:-
expr:
tree

term:

(c)

ep:

*ep:

type:-
expr:
term:

(d)
tree

Figure 2.83 Tree transformation performed by expression_tail_option.




SUBSECTION 2.2.5

Creating a bottom-up parser automatically


input expression EOF
expression term | expression + term
term IDENTIFIER | ( expression )
Figure 2.84 A simple grammar for demonstrating bottom-up parsing.



119

120

SECTION 2.2

From tokens to syntax tree the syntax


Z E $
E T | E + T
T i | ( E )
Figure 2.85 An abbreviated form of the simple grammar for bottom-up parsing.



SUBSECTION 2.2.5

Creating a bottom-up parser automatically



Data definitions:
A set S of LR(0) items.
Initializations:
S is prefilled externally with one or more LR(0) items.
Inference rules:
If S holds an item of the form P N, then for each production rule N
in G, S must also contain the item N .
Figure 2.86 closure algorithm for LR(0) item sets for a grammar G.


121

122

SECTION 2.2

From tokens to syntax tree the syntax


FUNCTION Initial item set RETURNING an item set:
SET New item set TO Empty;
// Initial contents - obtain from the start symbol:
FOR EACH production rule S for the start symbol S:
SET New item set TO New item set + item S ;
RETURN closure (New item set);
Figure 2.87 The routine Initial item set for an LR(0) parser.



SUBSECTION 2.2.5

Creating a bottom-up parser automatically

123


FUNCTION Next item set (Item set, Symbol) RETURNING an item set:
SET New item set TO Empty;
// Initial contents - obtain from token moves:
FOR EACH item N S IN Item set:
IF S = Symbol:
SET New item set TO New item set + item NS ;
RETURN closure (New item set);
Figure 2.88 The routine Next item set() for an LR(0) parser.



124

SECTION 2.2

From tokens to syntax tree the syntax


S6
E -> T
S0

T
Z
E
E
T
T

->
->
->
->
->

E$
T
E+T
i
(E)

T
E
E
T
T

i S5
E
S1

(
T -> (E)
E -> E+T

i
+

+
E -> E+T
T -> i
T -> (E)

S3

(E)
T
E+T
i
(E)

Z -> E$
E -> E+T

->
->
->
->
->

i
T -> i

S2

S7

Z -> E$

S8

)
S9
T -> (E)

T
S4
E -> E+T

Figure 2.89 Transition diagram for the LR(0) automaton for the grammar of Figure 2.85.


SUBSECTION 2.2.5

Creating a bottom-up parser automatically



state
0
1
2
3
4
5
6
7
8
9

|
GOTO table
|
symbol

i
+
(
)
$
E
T
5

3
5

7
3

8
9

ACTION table

shift
shift
ZE$
shift
EE+T
Ti
ET
shift
shift
T(E)

Figure 2.90 GOTO and ACTION tables for the LR(0) automaton for the grammar of Figure 2.85.



125

126

SECTION 2.2

From tokens to syntax tree the syntax


Stack
S0
S0
S0
S0
S0
S0
S0
S0
S0
S0

i
T
E
E
E
E
E
E
Z

S5
S6
S1
S1
S1
S1
S1
S1

+ S3
+ S3 i S5
+ S3 T S4
$ S2

Input
i + i
+ i
+ i
+ i
i

$
$
$
$
$
$
$
$

Action
shift
reduce by Ti
reduce by ET
shift
shift
reduce by Ti
reduce by EE+T
shift
reduce by ZE$
stop

Figure 2.91 LR(0) parsing of the input i+i.



SUBSECTION 2.2.5

Creating a bottom-up parser automatically

127


IMPORT Input token [1..];

// from the lexical analyzer

SET Input token index TO 1;


SET Reduction stack TO Empty stack;
PUSH Start state ON Reduction stack;
WHILE Reduction stack /= {Start state, Start symbol, End state}
SET State TO Top of Reduction stack;
SET Action TO Action table [State];
IF Action = "shift":
// Do a shift move:
SET Shifted token TO Input token [Input token index];
SET Input token index TO Input token index + 1; // shifted
PUSH Shifted token ON Reduction stack;
SET New state TO Goto table [State, Shifted token .class];
PUSH New state ON Reduction stack;
// can be Empty
ELSE IF Action = ("reduce", N):
// Do a reduction move:
Pop the symbols of from Reduction stack;
SET State TO Top of Reduction stack;
// update State
PUSH N ON Reduction stack;
SET New state TO Goto table [State, N];
PUSH New state ON Reduction stack;
// cannot be Empty
ELSE Action = Empty:
ERROR "Error at token ", Input token [Input token index];
Figure 2.92 LR(0) parsing with a push-down automaton.



128

SECTION 2.2

From tokens to syntax tree the syntax


look-ahead token

state
i
+
(
)
$
0
1
2
3
4
5
6
7
8
9

shift
shift
shift

shift
ZE$
shift

EE+T
Ti
ET
shift

EE+T
Ti
ET

EE+T
Ti
ET

shift
T(E)

T(E)

shift
shift
T(E)

Figure 2.93 ACTION table for the SLR(1) automaton for the grammar of Figure 2.85.



Creating a bottom-up parser automatically

SUBSECTION 2.2.5



state

0
1
2
3
4
5
6
7
8
9

s5

stack symbol/look-ahead token


+
(
)
$
E
s1
s3

s5

s7

s4
r3
r4
r2

r3
r4
r2

s7
s3
r5

s6

s2
r1

r3
r4
r2
s5

s8
s9
r5

s6

r5

Figure 2.94 ACTION/GOTO table for the SLR(1) automaton for the grammar of Figure 2.85.



129

130

SECTION 2.2

From tokens to syntax tree the syntax


S A | xb
A aAb | B
B x
Figure 2.95 Grammar for demonstrating the LR(1) technique.



SUBSECTION 2.2.5

Creating a bottom-up parser automatically


S7

S4
A->B.{b$}

B->x.{b$}

B
S0

S->.A{$}
S->.xb{$}
A->.aAb{b$}
A->.B{b$}
B->.x{b$}

S3
a

A->a.Ab{b$}
A->.aAb{b$}
A->.B{b$}
B->.x{b$}

S1

S6
A->aA.b{b$}

S->A.{$}

b
S2
S->x.b{$}
B->x.{b$}

S8
A->aAb.{b$}

shift-reduce
conflict
S5

S->xb.{$}

Figure 2.96 The SLR(1) automaton for the grammar of Figure 2.95.



131

132

From tokens to syntax tree the syntax

SECTION 2.2


S4

S9

A->B.{$}

B->x.{b}

A->B.{b}

A->a.Ab{$}
A->.aAb{b}
A->.B{b}
B->.x{b}

A->a.Ab{b}
A->.aAb{b}
A->.B{b}
B->.x{b}

a
S 11

S6
A->aA.b{b}

A->aA.b{$}

b
S2
S->x.b{$}
B->x.{$}

x
S 10

S1
S->A.{$}

B
S3

S0
S->.A{$}
S->.xb{$}
A->.aAb{$}
A->.B{$}
B->.x{$}
A

S7

S 12

S8
A->aAb.{$}

A->aAb.{b}

b
S5
S->xb.{$}

Figure 2.97 The LR(1) automaton for the grammar of Figure 2.95.



SUBSECTION 2.2.5

Creating a bottom-up parser automatically



Data definitions:
A set S of LR(1) items of the form N {}.
Initializations:
S is prefilled externally with one or more LR(1) items.
Inference rules:
If S holds an item of the form P N{}, then for each production rule
N in G, S must also contain the item N {}, where = FIRST({}).
Figure 2.98 closure algorithm for LR(1) item sets for a grammar G.


133

134

SECTION 2.2

From tokens to syntax tree the syntax


IMPORT Input token [1..];

// from the lexical analyzer

SET Input token index TO 1;


SET Reduction stack TO Empty stack;
PUSH Start state ON Reduction stack;
WHILE Reduction stack /= {Start state, Start symbol, End state}
SET State TO Top of Reduction stack;
SET Look ahead TO Input token [Input token index] .class;
SET Action TO Action table [State, Look ahead];
IF Action = "shift":
// Do a shift move:
SET Shifted token TO Input token [Input token index];
SET Input token index TO Input token index + 1; // shifted
PUSH Shifted token ON Reduction stack;
SET New state TO Goto table [State, Shifted token .class];
PUSH New state ON Reduction stack;
// cannot be Empty
ELSE IF Action = ("reduce", N):
// Do a reduction move:
Pop the symbols of from Reduction stack;
SET State TO Top of Reduction stack;
// update State
PUSH N ON Reduction stack;
SET New state TO Goto table [State, N];
PUSH New state ON Reduction stack;
// cannot be Empty
ELSE Action = Empty:
ERROR "Error at token ", Input token [Input token index];
Figure 2.99 LR(1) parsing with a push-down automaton.



SUBSECTION 2.2.5

Creating a bottom-up parser automatically


S 4,9

S7
B->x.{b}

A->B.{b$}
x
B

B
S0

S->.A{$}
S->.xb{$}
A->.aAb{$}
A->.B{$}
B->.x{$}
A

S 3,10
a

A->a.Ab{b$}
A->.aAb{b}
A->.B{b}
B->.x{b}

S1

S 6,11
A->aA.b{b$}

S->A.{$}

b
S2
S->x.b{$}
B->x.{$}

S 8,12
A->aAb.{b$}

b
S5
S->xb.{$}

Figure 2.100 The LALR(1) automaton for the grammar of Figure 2.95.



135

136

SECTION 2.2

From tokens to syntax tree the syntax


if_statement

if ( expression ) statement

x > 0

if_statement

if ( expression ) statement else statement

y > 0

p = 0;

Figure 2.101 A possible syntax tree for a nested if-statement.




q = 0;

SUBSECTION 2.2.5

Creating a bottom-up parser automatically


if_statement

if ( expression ) statement else statement

x > 0

if_statement

q = 0;

if ( expression ) statement

y > 0

p = 0;

Figure 2.102 An alternative syntax tree for the same nested if-statement.


137

138

SECTION 2.2

From tokens to syntax tree the syntax



sv

N ->
R ->
R ->
G ->

sw H

sx

tx

. R
. G H I
. erroneous_R
.....

Figure 2.103 LR error recovery detecting the error.




SUBSECTION 2.2.5

Creating a bottom-up parser automatically


sv

N ->
R ->
R ->
G ->

tx

. R
. G H I
. erroneous_R
.....

Figure 2.104 LR error recovery finding an error recovery state.




139

140

SECTION 2.2

From tokens to syntax tree the syntax


sv

sz

tx

......

tz

N -> R .
-> . ty ...
-> . tz ...
...

Figure 2.105 LR error recovery repairing the stack.




Creating a bottom-up parser automatically

SUBSECTION 2.2.5


sv

sz

tz

N -> R .
-> . ty ...
-> . tz ...
...

Figure 2.106 LR error recovery repairing the input.




141

142

SECTION 2.2

From tokens to syntax tree the syntax


sv

sz

tz

sa

-> tz . ...
...
...

Figure 2.107 LR error recovery restarting the parser.




SUBSECTION 2.2.5

Creating a bottom-up parser automatically

143


%{
#include
%}

"tree.h"

%union {
struct expr *expr;
struct term *term;
}
%type <expr> expression;
%type <term> term;
%token IDENTIFIER
%start main
%%
main:
expression
;

{print_expr($1); printf("\n");}

expression:
expression - term
{$$ = new_expr(); $$->type = -; $$->expr = $1; $$->term = $3;}
|
term
{$$ = new_expr(); $$->type = T; $$->term = $1;}
;
term:
IDENTIFIER
{$$ = new_term(); $$->type = I;}
;
%%
Figure 2.108 Yacc code for constructing parse trees.



144

SECTION 2.2

From tokens to syntax tree the syntax


#include

"lex.h"

int main(void) {
start_lex();
yyparse();
return 0;
}

/* routine generated by yacc */

int yylex(void) {
get_next_token();
return Token.class;
}
int
yyerror(const char *msg) {
fprintf(stderr, "%s\n", msg);
return 0;
}
Figure 2.109 Auxiliary code for the yacc parser for simple expressions.



SECTION 2.3

Conclusion





 Syntax analysis
Lexical
analysis

Top-down  Decision on first char-  Decision on first

 acter: manual method  token: LL(1) method
 Decision on reduce
Bottom-up  Decision on reduce
 items: finite-state
 items: LR techniques



automata

Figure 2.110 A very high-level view of text analysis techniques.




145

146

CHAPTER 2

From program text to abstract syntax tree


declaration: decl_specifiers init_declarator? ;
decl_specifiers: type_specifier decl_specifiers?
type_specifier: int | long
init_declarator: declarator initializer?
declarator: IDENTIFIER | declarator ( ) | declarator [ ]
initializer:
= expression
| = { initializer_list } | = { initializer_list , }
initializer_list:
expression
| initializer_list , initializer_list | { initializer_list }
Figure 2.111 A simplified grammar for declarations in C.



SECTION 2.3

Conclusion


type actual_type | virtual_type
actual_type actual_basic_type actual_size
virtual_type virtual_basic_type virtual_size
actual_basic_type int | char
actual_size [ NUMBER ]
virtual_basic_type int | char | void
virtual_size [ ]
Figure 2.112 Sample grammar for type.



147



Annotating the abstract


syntax tree the context

148

SECTION 3.1

Attribute grammars



Inherited attributes of A

Synthesized attributes of A

Attribute
evaluation
rules

inh.

synth.

inh.

synth.

inh.

synth.

Figure 3.1 Data flow in a node with attributes.




149

150

CHAPTER 3

Annotating the abstract syntax tree the context


Constant_definition (INH old symbol table, SYN new symbol table)
CONST Defined_identifier = Expression ;
ATTRIBUTE RULES:
SET Expression .symbol table TO
Constant_definition .old symbol table;
SET Constant_definition .new symbol table TO
Updated symbol table (
Constant_definition .old symbol table,
Defined_identifier .name,
Checked type of Constant_definition (Expression .type),
Expression .value
);
Defined_identifier (SYN name) ...
Expression (INH symbol table, SYN type, SYN value) ...
Figure 3.2 A simple attribute rule for Constant_definition.



SECTION 3.1

Attribute grammars

151



old symbol table Constant_definition

Defined_identifier

name

new symbol table

symbol table

Expression

type

value

Figure 3.3 Dependency graph of the rule for Constant_definition from Figure 3.1.


152

SECTION 3.1

Attribute grammars


Expression (INH symbol table, SYN type, SYN value)
Number
ATTRIBUTE RULES:
SET Expression .type TO Number .type;
SET Expression .value TO Number .value;
Figure 3.4 Trivial attribute grammar for Expression.



SUBSECTION 3.1.1

Dependency graphs



old symbol table=


Constant_definition
S

Defined_identifier

name

new symbol table

symbol table

Expression

Identifier

name

Number

pi

name=
"pi"

3.14159265

type

type

type=
real

Figure 3.5 Sample attributed syntax tree with data flow.




value

value

value=
3.14..

153

154

SECTION 3.1

Attribute grammars



old symbol table=


new symbol table=
Constant_definition
S+("pi",real,3.14..)
S

Defined_identifier

name=
"pi"

symbol table=
type=
Expression
S
real

Identifier

name=
"pi"

Number

type=
real

pi

name=
"pi"

3.14159265

value=
3.14..

value=
3.14..

type=
real

value=
3.14..

Figure 3.6 The attributed syntax tree from Figure 3.5 after attribute evaluation.


SUBSECTION 3.1.2

Attribute evaluation


Number Digit_Seq Base_Tag
Digit_Seq Digit_Seq Digit | Digit
Digit Digit_Token
// 0 1 2 3 4 5 6 7 8 9
Base_Tag B | D
Figure 3.7 A context-free grammar for octal and decimal numbers.



155

156

SECTION 3.1

Attribute grammars


Number(SYN value)
Digit_Seq Base_Tag
ATTRIBUTE RULES
SET Digit_Seq .base TO Base_Tag .base;
SET Number .value TO Digit_Seq .value;
Digit_Seq(INH base, SYN value)
Digit_Seq[1] Digit
ATTRIBUTE RULES
SET Digit_Seq[1] .base TO Digit_Seq .base;
SET Digit .base TO Digit_Seq .base;
SET Digit_Seq .value TO
Digit_Seq[1] .value * Digit_Seq .base + Digit .value;
|
Digit
ATTRIBUTE RULES
SET Digit .base TO Digit_Seq .base;
SET Digit_Seq .value TO Digit .value;
Digit(INH base, SYN value)
Digit_Token
ATTRIBUTE RULES
SET Digit .value TO Checked digit value (
Value_of (Digit_Token .repr [0]) - Value_of (0),
base
);
Base_Tag(SYN base)
B
ATTRIBUTE RULES
SET Base_Tag .base TO 8;
|
D
ATTRIBUTE RULES
SET Base_Tag .base TO 10;
Figure 3.8 An attribute grammar for octal and decimal numbers.



SUBSECTION 3.1.2

Attribute evaluation

157


FUNCTION Checked digit value (Token value, Base) RETURNING an integer:
IF Token value < Base: RETURN Token value;
ELSE Token value >= Base:
ERROR "Token " Token value " cannot be a digit in base " Base;
RETURN Base - 1;
Figure 3.9 The function Checked digit value.



158

Attribute grammars

SECTION 3.1


Number

base

Digit_Seq

value

value

Base_Tag

base

Figure 3.10 The dependency graph of Number.




SUBSECTION 3.1.2

Attribute evaluation


base

base

Digit_Seq

Digit_Seq

value

base

base

value

base

Digit_Seq

Digit

Digit

value

value

value

Figure 3.11 The two dependency graphs of Digit_Seq.




159

160

SECTION 3.1

Attribute grammars


base

Digit

value

Digit_Token

repr

Figure 3.12 The dependency graph of Digit.




SUBSECTION 3.1.2

Attribute evaluation


Base_Tag

base

Base_Tag

base

Figure 3.13 The two dependency graphs of Base_Tag.




161

162

SECTION 3.1

Attribute grammars


PROCEDURE Evaluate for Digit_Seq alternative_1 (
pointer to digit_seq node Digit_Seq,
pointer to digit_seq alt_1 node Digit_Seq alt_1
):
// Propagate attributes:
Propagate for Digit_Seq alternative_1 (Digit_Seq, Digit_Seq alt_1);
// Traverse subtrees:
Evaluate for Digit_Seq (Digit_Seq alt_1 .Digit_Seq);
Evaluate for Digit (Digit_Seq alt_1 .Digit);
// Propagate attributes:
Propagate for Digit_Seq alternative_1 (Digit_Seq, Digit_Seq alt_1);
PROCEDURE Propagate for Digit_Seq alternative_1 (
pointer to digit_seq node Digit_Seq,
pointer to digit_seq alt_1 node Digit_Seq alt_1
):
IF Digit_Seq alt_1 .Digit_Seq .base is not set
AND Digit_Seq .base is set:
SET Digit_Seq alt_1 .Digit_Seq .base TO Digit_Seq .base;
IF Digit_Seq alt_1 .Digit .base is not set
AND Digit_Seq .base is set:
SET Digit_Seq alt_1 .Digit .base TO Digit_Seq .base;
IF Digit_Seq .value is not set
AND Digit_Seq alt_1 .Digit_Seq .value is set
AND Digit_Seq .base is set
AND Digit_Seq alt_1 .Digit .value is set:
SET Digit_Seq .value TO
Digit_Seq alt_1 .Digit_Seq .value * Digit_Seq .base
+ Digit_Seq alt_1 .Digit .value;
Figure 3.14 Data-flow code for the first alternative of Digit_Seq.



SUBSECTION 3.1.2

Attribute evaluation

163


PROCEDURE Run the data-flow attribute evaluator on node Number:
WHILE Number .value is not set:
PRINT "Evaluate for Number called";
// report progress
Evaluate for Number (Number);
// Print one attribute:
PRINT "Number .value = ", Number .value;
Figure 3.15 Driver for the data-flow code.



164

SECTION 3.1

Attribute grammars


FUNCTION Topological sort of (a set Set) RETURNING a list:
SET List TO Empty list;
WHILE there is a Node in Set but not in List:
Append Node and its predecessors to List;
RETURN List;
PROCEDURE Append Node and its predecessors to List:
// First append the predecessors of Node:
FOR EACH Node_1 IN the Set of nodes that Node is dependent on:
IF Node_1 is not in List:
Append Node_1 and its predecessors to List;
Append Node to List;
Figure 3.16 Outline code for a simple implementation of topological sort.



SUBSECTION 3.1.3

Cycle handling



Figure 3.17 A fairly long, possibly circular, data-flow path.




165

166

SECTION 3.1

Attribute grammars



i1

i2

i3

s1

s2

Figure 3.18 An example of an IS-SI graph.




SUBSECTION 3.1.3

Cycle handling


i1

i1

i2

s1

s1

s2

i1

s1

s2

Figure 3.19 The dependency graph for the production rule NPQ.


167

168

SECTION 3.1

Attribute grammars


SET the flag Something was changed TO True;
// Initialization step:
FOR EACH terminal T IN Attribute grammar:
SET the IS-SI graph of T TO Ts dependency graph;
FOR EACH non-terminal N IN Attribute grammar:
SET the IS-SI graph of N TO the empty set;
WHILE Something was changed:
SET Something was changed TO False;
FOR EACH production rule P = M 0 M 1 ...Mn IN Attribute grammar:
// Construct the dependency graph copy D:
SET the dependency graph D TO a copy of the dependency graph of P;
// Add the dependencies already found for Mi i=0...n :
FOR EACH M IN M 0 ...Mn :
FOR EACH dependency d IN the IS-SI graph of M:
Insert d in D;
// Use the dependency graph D:
Compute all induced dependencies in D by transitive closure;
IF D contains a cycle: ERROR "Cycle found in production", P
// Propagate the newly discovered dependencies:
FOR EACH M IN M 0 ...Mn :
FOR EACH dependency d IN D
SUCH THAT the attributes in d are attributes of M:
IF there is no dependency d in the IS-SI graph of M:
Enter a dependency d into the IS-SI graph of M;
SET Something was changed TO True;
Figure 3.20 Outline of the strong-cyclicity test for an attribute grammar.



SUBSECTION 3.1.3

Cycle handling



i1

i1

i2

s1

s1

s2

i1

s1

s2

Figure 3.21 The IS-SI graphs of N, P, and Q collected so far.




169

170

SECTION 3.1

Attribute grammars



i2

i1

i1

s1

s2

s1

i1

s1

s2

Figure 3.22 Transitive closure over the dependencies of N, P, Q and D.




SUBSECTION 3.1.3

Cycle handling



i2

i1

i1

s1

s1

s2

i1

s1

s2

Figure 3.23 The new IS-SI graphs of N, P, and Q.




171

172

SECTION 3.1

Attribute grammars


// Visit 1 from the parent: flow of control from parent enters here.
// The parent has set some inherited attributes, the set IN 1 .
// Visit some children Mk , Ml , ...:
Compute some inherited attributes of Mk , the set (IMk )1 ;
Visit Mk for the first time;
// Mk returns with some of its synthesized attributes evaluated.
Compute some inherited attributes of Ml , the set (IMl )1 ;
Visit Ml for the first time;
// Ml returns with some of its synthesized attributes evaluated.
... // Perhaps visit some more children, including possibly Mk or
// Ml again, while supplying the proper inherited attributes
// and obtaining synthesized attributes in return.
// End of the visits to children.
Compute some of Ns synthesized attributes, the set SN 1 ;
Leave to the parent;
// End of visit 1 from the parent.
// Visit 2 from the parent: flow of control re-enters here.
// The parent has set some inherited attributes, the set IN 2 .
... // Again visit some children while supplying inherited
// attributes and obtaining synthesized attributes in return.
Compute some of Ns synthesized attributes, the set SN 2 ;
Leave to the parent;
// End of visit 2 from the parent.
... // Perhaps code for some more visits 3..n from the parent,
// supplying sets IN 3 to INn and yielding
// sets SN 3 to SNn .
Figure 3.24 Outline code for multi-visit attribute evaluation.



SUBSECTION 3.1.5

Multi-visit attribute grammars


i -th visit to N
1

provides IN

yields SN

11
10

2
SN

INi

...

...

8
(IM l)

...
k

4
(SMl )

(IMk)

performs j -th visit to M l


providing (IM l )
j

yielding (SM l )

(SMk)

performs h-th visit to M k


providing (IM k )
h
3
yielding (SMk )
h

Figure 3.25 The i-th visit to a node N, visiting two children, Mk and Ml .


173

174

SECTION 3.1

Attribute grammars


PROCEDURE Visit_i to N (pointer to an N node Node):
SELECT Node .type:
CASE alternative_1:
Visit_i to N alternative_1 (Node);
...
CASE alternative_k:
Visit_i to N alternative_k (Node);
...
Figure 3.26 Structure of an i-th visit routine for N.



SUBSECTION 3.1.5

Multi-visit attribute grammars


Number

value

base

Digit_Seq

value

base

Digit

value

Base_Tag

base

Figure 3.27 The IS-SI graphs of the non-terminals from grammar 3.8.


175

176

SECTION 3.1

Attribute grammars


IN 1
Number
Digit_Seq
Digit
Base_Tag

base
base

SN 1
value
value
value
base

Figure 3.28 Partitionings of the attributes of grammar 3.8.



SUBSECTION 3.1.5

Multi-visit attribute grammars

177


PROCEDURE Visit_1 to Number alternative_1 (
pointer to number node Number,
pointer to number alt_1 node Number alt_1
):
// Visit 1 from the parent: flow of control from the parent enters here.
// The parent has set the attributes in IN 1 of Number, the set { }.
// Visit some children:
// Compute the attributes in IN 1 of Base_Tag (), the set { }:
// Visit Base_Tag for the first time:
Visit_1 to Base_Tag (Number alt_1 .Base_Tag);
// Base_Tag returns with its SN 1 , the set { base }, evaluated.
// Compute the attributes in IN 1 of Digit_Seq (), the set { base }:
SET Number alt_1 .Digit_Seq .base TO Number alt_1 .Base_Tag .base;
// Visit Digit_Seq for the first time:
Visit_1 to Digit_Seq (Number alt_1 .Digit_Seq);
// Digit_Seq returns with its SN 1 , the set { value }, evaluated.
// End of the visits to children.
// Compute the attributes in SN 1 of Number, the set { value }:
SET Number .value TO Number alt_1 .Digit_Seq .value;
Figure 3.29 Visiting code for Number nodes.



178

SECTION 3.1

Attribute grammars


PROCEDURE Visit_1 to Digit_Seq alternative_1 (
pointer to digit_seq node Digit_Seq,
pointer to digit_seq alt_1 node Digit_Seq alt_1
):
// Visit 1 from the parent: flow of control from the parent enters here.
// The parent has set the attributes in IN 1 of Digit_Seq, the set { base }.
// Visit some children:
// Compute the attributes in IN 1 of Digit_Seq (), the set { base }:
SET Digit_Seq alt_1 .Digit_Seq .base TO Digit_Seq .base;
// Visit Digit_Seq for the first time:
Visit_1 to Digit_Seq (Digit_Seq alt_1 .Digit_Seq);
// Digit_Seq returns with its SN 1 , the set { value }, evaluated.
// Compute the attributes in IN 1 of Digit (), the set { base }:
SET Digit_Seq alt_1 .Digit .base TO Digit_Seq .base;
// Visit Digit for the first time:
Visit_1 to Digit (Digit_Seq alt_1 .Digit);
// Digit returns with its SN 1 , the set { value }, evaluated.
// End of the visits to children.
// Compute the attributes in SN 1 of Digit_Seq, the set { value }:
SET Digit_Seq .value TO
Digit_Seq alt_1 .Digit_Seq .value * Digit_Seq .base +
Digit_Seq alt_1 .Digit .value;
Figure 3.30 Visiting code for Digit_Seq alternative_1 nodes.



SUBSECTION 3.1.7

L-attributed grammars


A

B1

B2

B3

B4

C1

C2

C3

Figure 3.31 Data flow in part of a parse tree for an L-attributed grammar.


179

180

SECTION 3.1

Attribute grammars


{
#include
}

"symbol_table.h"

%lexical get_next_token_class;
%token IDENTIFIER;
%token DIGIT;
%start Main_Program, main;
main {symbol_table sym_tab; int result;}:
{init_symbol_table(&sym_tab);}
declarations(sym_tab)
expression(sym_tab, &result)
{printf("result = %d\n", result);}
;
declarations(symbol_table sym_tab):
declaration(sym_tab) [ , declaration(sym_tab) ]* ;
;
declaration(symbol_table sym_tab) {symbol_entry *sym_ent;}:
IDENTIFIER {sym_ent = look_up(sym_tab, Token.repr);}
= DIGIT {sym_ent->value = Token.repr - 0;}
;
expression(symbol_table sym_tab; int *e) {int t;}:
term(sym_tab, &t)
{*e = t;}
expression_tail_option(sym_tab, e)
;
expression_tail_option(symbol_table sym_tab; int *e) {int t;}:
- term(sym_tab, &t)
{*e -= t;}
expression_tail_option(sym_tab, e)
|
;
term(symbol_table sym_tab; int *t):
IDENTIFIER
{*t = look_up(sym_tab, Token.repr)->value;}
;
Figure 3.32 LLgen code for an L-attributed grammar for simple expressions.



SUBSECTION 3.1.9

Equivalence of L-attributed and S-attributed grammars


Declaration
Type_Declarator(type)
;

Declared_Idf_Sequence(type)

Declared_Idf_Sequence(INH type)
Declared_Idf(type)
|
Declared_Idf_Sequence(type) ,
;

Declared_Idf(type)

Declared_Idf(INH type)
Idf(repr)
ATTRIBUTE RULES:
Add to symbol table (repr, type);
;
Figure 3.33 Sketch of an L-attributed grammar for Declaration.



181

182

SECTION 3.1

Attribute grammars


Declaration
Type_Declarator(type) Declared_Idf_Sequence(repr list)
ATTRIBUTE RULES:
FOR EACH repr IN repr list:
Add to symbol table(repr, type);

Declared_Idf_Sequence(SYN repr list)


Declared_Idf(repr)
ATTRIBUTE RULES:
SET repr list TO Convert to list (repr);
|
Declared_Idf_Sequence(old repr list) , Declared_Idf(repr)
ATTRIBUTE RULES:
SET repr list TO Append to list (old repr list, repr);
;
Declared_Idf(SYN repr)
Idf(repr)
;
Figure 3.34 Sketch of an S-attributed grammar for Declaration.



SUBSECTION 3.1.11

Conclusion



Ordered

L-attributed

S-attributed

Figure 3.35 Pictorial comparison of three types of attribute grammars.




183

184

SECTION 3.2

Manual methods


Last node pointer

X
a

initial situation

Last node pointer

final situation

Figure 3.36 Control flow graph for the expression b*b - 4*a*c.


SUBSECTION 3.2.1

Threading the AST

185


#include
#include

"parser.h"
"thread.h"

/* for types AST_node and Expression */


/* for self check */
/* PRIVATE */

static AST_node *Last_node;


static void Thread_expression(Expression *expr) {
switch (expr->type) {
case D:
Last_node->successor = expr; Last_node = expr;
break;
case P:
Thread_expression(expr->left);
Thread_expression(expr->right);
Last_node->successor = expr; Last_node = expr;
break;
}
}
/* PUBLIC */
AST_node *Thread_start;
void Thread_AST(AST_node *icode) {
AST_node Dummy_node;
Last_node = &Dummy_node; Thread_expression(icode);
Last_node->successor = (AST_node *)0;
Thread_start = Dummy_node.successor;
}
Figure 3.37 Threading code for the demo compiler from Section 1.2.



186

SECTION 3.2

Manual methods


PROCEDURE Thread if statement (If node pointer):
Thread expression (If node pointer .condition);
SET Last node pointer .successor TO If node pointer;
SET End if join node TO Generate join node ();
SET Last node pointer TO address of
Thread block (If node pointer .then
SET If node pointer .true successor
SET Last node pointer .successor TO

a local node Aux last node;


part);
TO Aux last node .successor;
address of End if join node;

SET Last node pointer TO address of Aux last node;


Thread block (If node pointer .else part);
SET If node pointer .false successor TO Aux last node .successor;
SET Last node pointer .successor TO address of End if join node;
SET Last node pointer TO address of End if join node;
Figure 3.38 Sample threading routine for if-statements.



SUBSECTION 3.2.1

Threading the AST


Last node pointer

If_statement

X
Condition

Then_part

Else_part

Figure 3.39 AST of an if-statement before threading.




187

188

SECTION 3.2

Manual methods


Last node pointer

If_statement

X
Condition

Then_part

Else_part

End_if

Figure 3.40 AST and control flow graph of an if-statement after threading.


SUBSECTION 3.2.1

Threading the AST

189


If_statement(INH successor, SYN first)
IF Condition THEN Then_part ELSE Else_part END
ATTRIBUTE RULES:
SET If_statement .first TO Condition .first;
SET Condition .true successor TO Then_part .first;
SET Condition .false successor TO Else_part .first;
SET Then_part .successor TO If_statement .successor;
SET Else_part .successor TO If_statement .successor;
Figure 3.41 Threading an if-statement using attribute rules.



IF

190

SECTION 3.2

Manual methods


Last node pointer

If_statement

X
Condition

Then_part

Else_part

End_if

Figure 3.42 AST and doubly-linked control flow graph of an if-statement.




SUBSECTION 3.2.2

Symbolic interpretation


If_statement
y 5
x
1
Condition
>

y
y 5
x

y 5
x
y 5
x

Then_part

Else_part

y 5
x
5
0
0

y 5
x
5

End_if
y 5
x

y 5
x

y 5
x

Figure 3.43 Stack representations in the control flow graph of an if-statement.




191

192

SECTION 3.2

Manual methods


FUNCTION Symbolically interpret an if statement (
Stack representation, If node
) RETURNING a stack representation:
SET New stack representation TO
Symbolically interpret a condition (
Stack representation, If node .condition
);
Discard top entry from New stack representation;
RETURN Merge stack representations (
Symbolically interpret a statement sequence (
New stack representation, If node .then part
),
Symbolically interpret a statement sequence (
New stack representation, If node .else part
)
);
Figure 3.44 Outline of a routine Symbolically interpret an if statement.



SUBSECTION 3.2.2

Symbolic interpretation


From_expr

To_expr

expr

from F

from F
to T

from F
to T
E

=: v

For_statement
v E

from F
to T

Body

v E

v E
End_for

from F
to T

from F
to T

v ?

Figure 3.45 Stack representations in the control flow graph of a for-statement.




193

194

SECTION 3.2

Manual methods


int i = 0;
while (some condition) {
if (i > 0) printf("Loop reentered: i = %d\n", i);
i++;
}
Figure 3.46 Value set analysis in the presence of a loop statement.



SUBSECTION 3.2.2

Symbolic interpretation



Data definitions:
Stack representations, with entries for every item we are interested in.
Initializations:
1. Empty stack representations are attached to all arrows in the control flow
graph residing in the threaded AST.
2. Some stack representations at strategic points are initialized in accordance
with properties of the source language; for example, the stack representations
of input parameters are initialized to Initialized.
Inference rules:
For each node type, source language dependent rules allow inferences to be
made, adding information to the stack representation on the outgoing arrows
based on those on the incoming arrows and the node itself, and vice versa.
Figure 3.47 Full symbolic interpretation as a closure algorithm.


195

196

SECTION 3.2

Manual methods



:=
x := y + 3
x

y
(a)

3
(b)

Figure 3.48 An assignment as a full control flow graph and as a single node.


SUBSECTION 3.2.3

Data-flow equations


IN(N) =

OUT (M)

M=dynamic predecessor of N

OUT (N) = (IN(N) \ KILL(N)) GEN(N)


Figure 3.49 Data-flow equations for a node N.



197

198

SECTION 3.2

Manual methods


IN := all value parameters have values
first node

return
return

exit node

KILL = all local information

Figure 3.50 Data-flow details at routine entry and exit.




SUBSECTION 3.2.3

Data-flow equations



Data definitions:
1. Constant KILL and GEN sets for each node.
2. Variable IN and OUT sets for each node.
Initializations:
1. The IN set of the top node is initialized with information established externally.
2. For all other nodes N, IN(N) and OUT(N) are set to empty.
Inference rules:
1. For any node N, IN(N) should contain

OUT(M).

M=dynamic predecessor of N

2. For any node N, OUT(N) should contain


(IN(N) \ KILL(N)) GEN(N).
Figure 3.51 Closure algorithm for solving the data-flow equations.


199

200

SECTION 3.2

Manual methods



y may be initialized
y may be uninitialized
x may be initialized
x may be uninitialized

Figure 3.52 Bit patterns for properties of the variables x and y.




SUBSECTION 3.2.3

Data-flow equations


0

x is guaranteed to have a value


y may or may not have a value

x is guaranteed not to have a value


1

the combination 00 for y is an error

Figure 3.53 Examples of bit patterns for properties of the variables x and y.


201

202

SECTION 3.2

Manual methods



1 0 0 1
y > 0
1 0 0 1
KILL

1 0 0 0

1 0 0 1
x := y

y := 0

0 0 1 0

GEN

KILL
\

0 0 0 1

1 0 0 1

0 1 0 0

0 0 0 1

0 1 0 1

1 0 0 1

GEN

end_if
0 1 0 1
1 0 0 1
1 1 0 1

Figure 3.54 Data-flow propagation through an if-statement.




SUBSECTION 3.2.5

Carrying the information upstream live analysis

203


{

int x;
x = ...;
if (...) {
...
print(x);
...
} else {
int y;
...
print(x);
...
y = ...;
...
print(y);
...
}
x = ...;
...
print(x);
...

/* code fragment

1, does not use x */

/* code fragment
/* code fragment
/* code fragment

2, does not use x */


3, uses x */
4, does not use x */

/*
/*
/*
/*
/*
/*
/*

code
code
code
code
code
code
code

fragment 5, does
fragment 6, uses
fragment 7, does
fragment 8, does
fragment 9, does
fragment 10, uses
fragment 11, does

not use x,y */


x, but not y */
not use x,y */
not use x,y */
not use x,y */
y but not x */
not use x,y */

/*
/*
/*
/*

code
code
code
code

fragment
fragment
fragment
fragment

not use x */
not use x */
x */
not use x */

12,
13,
14,
15,

does
does
uses
does

}
Figure 3.55 A segment of C code to demonstrate live analysis.



204

SECTION 3.2

Manual methods


BPLU_x =
{}

x = ...; (1)
LU_x = true
...

BPLU_x =
{1}
...; (2)

print(x); (3)

...; (4)

Figure 3.56 The first few steps in live analysis for Figure 3.55 using backpatch lists.


SUBSECTION 3.2.5

Carrying the information upstream live analysis



x = ...; (1)
LU_x = false
...

...; (2)

print(x); (3)
BPLU_x =
{3}

LU_x = true
...; (4)

Figure 3.57 Live analysis for Figure 3.55, after a few steps.


205

206

SECTION 3.2

Manual methods



x = ...; (1)
LU_x = false
...

...; (5)

...; (2)

print(x); (3)

print(x); (6)
LU_x = true

LU_x = true

...; (7)

...; (4)

y = ...; (8)
LU_y = false
...; (9)

BPLU_x =
{3}

print(y);(10)
LU_y = true
...; (11)

BPLU_x =
{6}
BPLU_y =
{10}

end_if
BPLU_x =
{3, 6}
x = ...; (12)

Figure 3.58 Live analysis for Figure 3.55, merging at the end-if node.


SUBSECTION 3.2.5

Carrying the information upstream live analysis


OUT (N) =

IN(M)

M=dynamic successor of N

IN(N) = (OUT(N) \ KILL(N)) GEN (N)


Figure 3.59 Backwards data-flow equations for a node N.



207

208

SECTION 3.2

Manual methods


00
x = ...; (1)
10
...
10

10
...; (5)

...; (2)
10

10
print(x); (6)

print(x); (3)
00

00
...; (7)

...; (4)
00

00
y = ...; (8)
01
...; (9)
01
print(y);(10)
00
...; (11)
00
end_if
00
x = ...; (12)
10
...; (13)
10
print(x);(14)
00
...; (15)
00

Figure 3.60 Live analysis for Figure 3.55 using backward data-flow equations.


SECTION 3.3

Conclusion


S(SYN s)
A(i1, s1)
ATTRIBUTE RULES:
SET i1 TO s1;
SET s TO s1;
A(INH i1, SYN s1)
A(i2, s2) a
ATTRIBUTE RULES:
SET i2 TO i1;
SET s1 TO s2;
|
B(i2, s2)
ATTRIBUTE RULES:
SET i2 TO i1;
SET s1 TO s2;
B(INH i, SYN s)
b
ATTRIBUTE RULES: SET s TO i;
Figure 3.61 Attribute grammar for Exercise {#ShowCycle}.



209

210

CHAPTER 3

Annotating the abstract syntax tree the context


i

Figure 3.62 IS-SI graphs for T and U.




SECTION 3.3

Conclusion



x = ...; (1)

y := 1; (2)

z := x*x - y*y; (3)

print(x); (4)

y := y+1; (5)

y>100

(6)

Figure 3.63 Example program for a very busy expression.




211



Processing the
intermediate code

212

CHAPTER 4

Processing the intermediate code



Interpretation
Lexical
analysis

Syntax
analysis

Context
handling

Intermediate
code
generation

Run-time
system
design

Largely language- and paradigm-independent

Language- and
paradigmdependent

Machine
code
generation

Largely machineand paradigmindependent

Figure 4.1 The status of the various modules in compiler construction.




213

214

SECTION 4.1

Interpretation


re:

3.0

im:

4.0

Figure 4.2 A value of type Complex_Number as the programmer sees it.




SUBSECTION 4.1.1

Recursive interpretation

215



value: V

"complex_number"
type:
name:
size:

2
class:

values: V

RECORD

field:

name:

"re"

type:
type:
value:

next:
3.0

name:
type:
value:

"im"

type:
4.0

next:

"real"

name:
class:
type number:
Specific to the given value of type
complex_number

BASIC
3

Common to all values of type


complex_number

Figure 4.3 A representation of the value of Figure 4.2 in a recursive interpreter.




216

SECTION 4.1

Interpretation


PROCEDURE Elaborate if statement (If node):
SET Result TO Evaluate condition (If node .condition);
IF Status .mode /= Normal mode: RETURN;
IF Result .type /= Boolean:
ERROR "Condition in if-statement is not of type Boolean";
RETURN;
IF Result .boolean .value = True:
Elaborate statement (If node .then part);
ELSE Result .boolean .value = False:
// Check if there is an else-part at all:
IF If node .else part /= No node:
Elaborate statement (If node .else part);
ELSE If node .else part = No node:
SET Status .mode TO Normal mode;
Figure 4.4 Outline of a routine for recursively interpreting an if-statement.



SUBSECTION 4.1.2

Iterative interpretation

217


WHILE Active node .type /= End of program type:
SELECT Active node .type:
CASE ...
CASE If type:
// We arrive here after the condition has been evaluated;
// the Boolean result is on the working stack.
SET Value TO Pop working stack ();
IF Value .boolean .value = True:
SET Active node TO Active node .true successor;
ELSE Value .boolean .value = False:
IF Active node .false successor /= No node:
SET Active node TO Active node .false successor;
ELSE Active node .false successor = No node:
SET Active node TO Active node .successor;
CASE ...
Figure 4.5 Sketch of the main loop of an iterative interpreter, showing the code for an if-statement.



218

SECTION 4.1

Interpretation


#include
#include
#include
#include

"parser.h"
"thread.h"
"stack.h"
"backend.h"

/*
/*
/*
/*

for
for
for
for

types AST_node and Expression */


Thread_AST() and Thread_start */
Push() and Pop() */
self check */
/* PRIVATE */
static AST_node *Active_node_pointer;
static void Interpret_iteratively(void) {
while (Active_node_pointer != 0) {
/* there is only one node type, Expression: */
Expression *expr = Active_node_pointer;
switch (expr->type) {
case D:
Push(expr->value);
break;
case P: {
int e_left = Pop(); int e_right = Pop();
switch (expr->oper) {
case +: Push(e_left + e_right); break;
case *: Push(e_left * e_right); break;
}}
break;
}
Active_node_pointer = Active_node_pointer->successor;
}
printf("%d\n", Pop());
/* print the result */
}
/* PUBLIC */
void Process(AST_node *icode) {
Thread_AST(icode); Active_node_pointer = Thread_start;
Interpret_iteratively();
}
Figure 4.6 An iterative interpreter for the demo compiler of Section 1.2.



SUBSECTION 4.1.2

Iterative interpretation



condition

IF

statement 1

statement 2

statement 3

statement 4

END
IF

Figure 4.7 An AST stored as a graph.




219

220

SECTION 4.1

Interpretation



condition

condition
IF_FALSE

IF
statement 1
JUMP
statement 1
statement 2
statement 2

statement 3
statement 4

statement 3
(b)
statement 4

(a)

Figure 4.8 Storing the AST in an array (a) and as pseudo-instructions (b).


SECTION 4.2

Code generation


Intermediate Code Generation
and Register Allocation
Ra

:=
a

+
*

[
b

mem

+
*

+
9
2

+
d

@b

+
Rd

*
4

Rc

Figure 4.9 Two ASTs for the expression a := (b[4*c + d] * 2) + 9.




221

222

CHAPTER 4

Processing the intermediate code


Rd

Rd

mem

*
Ri

+
*

Ri
Load_Address M[Ri ],C,Rd

Ro
c

Load_Byte (M+Ro )[R i],C,Rd

Figure 4.10 Two sample instructions with their ASTs.




SECTION 4.2

Code generation


Intermediate
code

IC
preprocessing

Code
generation
proper

Target code
postprocessing

Machine
code
generation

Executable
code
output

exe
Executable
code

Figure 4.11 Overall structure of a code generator.




223

224

SECTION 4.2

Code generation


#include
#include
#include

"parser.h"
"thread.h"
"backend.h"

/* for types AST_node and Expression */


/* for Thread_AST() and Thread_start */
/* for self check */
/* PRIVATE */
static AST_node *Active_node_pointer;
static void Trivial_code_generation(void) {
printf("#include
\"stack.h\"\nint main(void) {\n");
while (Active_node_pointer != 0) {
/* there is only one node type, Expression: */
Expression *expr = Active_node_pointer;
switch (expr->type) {
case D:
printf("Push(%d);\n", expr->value);
break;
case P:
printf("{\n\
int e_left = Pop(); int e_right = Pop();\n\
switch (%d) {\n\
case +: Push(e_left + e_right); break;\n\
case *: Push(e_left * e_right); break;\n\
}}\n",
expr->oper
);
break;
}
Active_node_pointer = Active_node_pointer->successor;
}
printf("printf(\"%%d\\n\", Pop()); /* print the result */\n");
printf("return 0;}\n");
}
/* PUBLIC */
void Process(AST_node *icode) {
Thread_AST(icode); Active_node_pointer = Thread_start;
Trivial_code_generation();
}
Figure 4.12 A trivial code generator for the demo compiler of Section 1.2.



SUBSECTION 4.2.3

Trivial code generation


#include
"stack.h"
int main(void) {
Push(7);
Push(1);
Push(5);
{
int e_left = Pop(); int e_right = Pop();
switch (43) {
case +: Push(e_left + e_right); break;
case *: Push(e_left * e_right); break;
}}
{
int e_left = Pop(); int e_right = Pop();
switch (42) {
case +: Push(e_left + e_right); break;
case *: Push(e_left * e_right); break;
}}
printf("%d\n", Pop()); /* print the result */
return 0;}
Figure 4.13 Code for (7*(1+5)) generated by the code generator of Figure 4.12.



225

226

SECTION 4.2

Code generation


int main(void) {
Expression_D(7);
Expression_D(1);
Expression_D(5);
Expression_P(43);
Expression_P(42);
Print();
return 0;
}

/* 43 = ASCII value of + */
/* 42 = ASCII value of * */

Figure 4.14 Possible threaded code for (7*(1+5)).



SUBSECTION 4.2.3

Trivial code generation


#include

"stack.h"

void Expression_D(int digit) {


Push(digit);
}
void Expression_P(int oper)
int e_left = Pop(); int
switch (oper) {
case +: Push(e_left +
case *: Push(e_left *
}
}

{
e_right = Pop();
e_right); break;
e_right); break;

void Print(void) {
printf("%d\n", Pop());
}
Figure 4.15 Routines for the threaded code for (7*(1+5)).



227

228

SECTION 4.2

Code generation


case P:
printf("{\n\
int e_left = Pop(); int e_right = Pop();\n"
);
switch (expr->oper) {
case +: printf("Push(e_left + e_right);\n"); break;
case *: printf("Push(e_left * e_right);\n"); break;
}
printf("}\n");
break;
Figure 4.16 Partial evaluation in a segment of the code generator.



SUBSECTION 4.2.3

Trivial code generation

229


#include
"stack.h"
int main(void) {
Push(7);
Push(1);
Push(5);
{int e_left = Pop(); int e_right = Pop(); Push(e_left + e_right);}
{int e_left = Pop(); int e_right = Pop(); Push(e_left * e_right);}
printf("%d\n", Pop()); /* print the result */
return 0;}
Figure 4.17 Code for (7*(1+5)) generated by the code generator of Figure 4.16.



230

SECTION 4.2

Code generation


case P:
printf("{\n\
int e_left = Pop(); int e_right = Pop();\n"
);
switch (expr->oper) {
case +: printf("Push(e_left + e_right);\n"); break;
case *: printf("Push(e_left * e_right);\n"); break;
}
printf("}\n");
break;
Figure 4.18 Foreground (run-now) view of partially evaluating code.



SUBSECTION 4.2.3

Trivial code generation


case P:
printf("{\n\
int e_left = Pop(); int e_right = Pop();\n"
);
switch (expr->oper) {
case +: printf("Push(e_left + e_right);\n"); break;
case *: printf("Push(e_left * e_right);\n"); break;
}
printf("}\n");
break;
Figure 4.19 Background (run-later) view of partially evaluating code.



231

SECTION 4.2

Code generation



SP

stack

direction of growth

232

BP

Figure 4.20 Data administration in a simple stack machine.




SUBSECTION 4.2.4

Simple code generation

233


Instruction
Push_Const
Push_Local
Store_Local
Add_Top2
Subtr_Top2
Mult_Top2

Actions
c
i
i

SP:=SP+1; stack[SP]:=c;
SP:=SP+1; stack[SP]:=stack[BP+i];
stack[BP+i]:=stack[SP]; SP:=SP-1;
stack[SP-1]:=stack[SP-1]+stack[SP]; SP:=SP-1;
stack[SP-1]:=stack[SP-1]-stack[SP]; SP:=SP-1;
stack[SP-1]:=stack[SP-1]*stack[SP]; SP:=SP-1;

Figure 4.21 Stack machine instructions.



234

SECTION 4.2

Code generation


Instruction

Actions

Load_Const
Load_Mem
Store_Reg

c,Rn
x,Rn
Rn ,x

Rn :=c;
Rn :=x;
x:=Rn ;

Add_Reg
Subtr_Reg
Mult_Reg

Rm ,Rn
Rm ,Rn
Rm ,Rn

Rn :=Rn +Rm ;
Rn :=Rn -Rm ;
Rn :=Rn *Rm ;

Figure 4.22 Register machine instructions.



SUBSECTION 4.2.4

Simple code generation


*
b

*
b

*
a

Figure 4.23 The abstract syntax tree for b*b - 4*(a*c).




235

236

SECTION 4.2

Code generation


Push_Const c:

Push_Local i:
c

Subtr_Top2:

Add_Top2:
+

Mult_Top2:

Store_Local i:

:=

*
i

Figure 4.24 The abstract syntax trees for the stack machine instructions.


SUBSECTION 4.2.4

Simple code generation


Subtr_Top2

Mult_Top2

Push_Local #b

Push_Local #b

Mult_Top2

Push_Const 4

Mult_Top2

Push_Local #a

Push_Local #c

Figure 4.25 The abstract syntax tree for b*b - 4*(a*c) rewritten.


237

238

SECTION 4.2

Code generation


PROCEDURE Generate code (Node):
SELECT Node .type:
CASE Constant type:
Emit ("Push_Const" Node .value);
CASE LocalVar type:
Emit ("Push_Local" Node .number);
CASE StoreLocal type: Emit ("Store_Local" Node .number);
CASE Add type:
Generate code (Node .left); Generate code (Node .right);
Emit ("Add_Top2");
CASE Subtract type:
Generate code (Node .left); Generate code (Node .right);
Emit ("Subtr_Top2");
CASE Multiply type:
Generate code (Node .left); Generate code (Node .right);
Emit ("Mult_Top2");
Figure 4.26 Depth-first code generation for a stack machine.



SUBSECTION 4.2.4

Simple code generation


b

b*b

b*b

(1)

(2)

(3)

(4)

c
a

(a*c)

4*(a*c)

b*b

b*b

b*b

b*b

(5)

(6)

(7)

(8)

b*b-4*(a*c)
(9)

Figure 4.27 Successive stack configurations for b*b - 4*(a*c).




239

240

SECTION 4.2

Code generation


Load_Const c ,Rn :

Rn

Load_Mem x ,Rn :

c
Add_Reg Rm,Rn :

Rn

Subtr_Reg Rm,Rn :

Mult_Reg Rm,Rn :

Rn
-

Rm

Rn

Rn

Rn

Rm

Rn

Store_Reg Rn ,x:

:=

*
x
Rn

Rn

Rm

Figure 4.28 The abstract syntax trees for the register machine instructions.


SUBSECTION 4.2.4

Simple code generation

241


PROCEDURE Generate code (Node, a register Target, a register set Aux):
SELECT Node .type:
CASE Constant type:
Emit ("Load_Const " Node .value ",R" Target);
CASE Variable type:
Emit ("Load_Mem " Node .address ",R" Target);
CASE ...
CASE Add type:
Generate code (Node .left, Target, Aux);
SET Target 2 TO An arbitrary element of Aux;
SET Aux 2 TO Aux \ Target 2;
// the \ denotes the set difference operation
Generate code (Node .right, Target 2, Aux 2);
Emit ("Add_Reg R" Target 2 ",R" Target);
CASE ...
Figure 4.29 Simple code generation with register allocation.



242

SECTION 4.2

Code generation


PROCEDURE Generate code (Node, a register number Target):
SELECT Node .type:
CASE Constant type:
Emit ("Load_Const " Node .value ",R" Target);
CASE Variable type:
Emit ("Load_Mem " Node .address ",R" Target);
CASE ...
CASE Add type:
Generate code (Node .left, Target);
Generate code (Node .right, Target+1);
Emit ("Add_Reg R" Target+1 ",R" Target);
CASE ...
Figure 4.30 Simple code generation with register numbering.



SUBSECTION 4.2.4

Simple code generation


Load_Mem
Load_Mem
Mult_Reg
Load_Const
Load_Mem
Load_Mem
Mult_Reg
Mult_Reg
Subtr_Reg

b,R1
b,R2
R2,R1
4,R2
a,R3
c,R4
R4,R3
R3,R2
R2,R1

Figure 4.31 Register machine code for the expression b*b - 4*(a*c).



243

244

SECTION 4.2

Code generation


R4:
R3:
b

b*b

b*b

(1)

(2)

(3)

(4)

R2:
R1:

R4:

R3:

(a*c)

(a*c)

R2:

4*(a*c)

R1:

b*b

b*b

b*b

b*b

(5)

(6)

(7)

(8)

R4:

R3:

(a*c)

R2:

4*(a*c)

R1: b*b-4*(a*c)
(9)

Figure 4.32 Successive register contents for b*b - 4*(a*c).




SUBSECTION 4.2.4

Simple code generation


Load_Mem
Load_Mem
Mult_Reg
Load_Mem
Load_Mem
Mult_Reg
Load_Const
Mult_Reg
Subtr_Reg

b,R1
b,R2
R2,R1
a,R2
c,R3
R3,R2
4,R3
R3,R2
R2,R1

Figure 4.33 Weighted register machine code for the expression b*b - 4*(a*c).



245

246

SECTION 4.2

Code generation


FUNCTION Weight of (Node) RETURNING an integer:
SELECT Node .type:
CASE Constant type: RETURN 1;
CASE Variable type: RETURN 1;
CASE ...
CASE Add type:
SET Required left TO Weight of (Node .left);
SET Required right TO Weight of (Node .right);
IF Required left > Required right: RETURN Required left;
IF Required left < Required right: RETURN Required right;
// Required left = Required right
RETURN Required left + 1;
CASE ...
Figure 4.34 Register requirements (weight) of a node.



SUBSECTION 4.2.4

Simple code generation


-3
*2
b1

*2
b1

41

*2
a1

c1

Figure 4.35 AST for b*b - 4*(a*c) with register weights.




247

248

SECTION 4.2

Code generation


computation order
R2

R3

R1

uses

uses

uses

R1,R2,R3

R1 and R3

R1

third parameter

first parameter

and one other reg.


second parameter

Figure 4.36 Evaluation order of three parameter trees.




SUBSECTION 4.2.4

Simple code generation

249


PROCEDURE Generate code for large trees (Node, Target register):
SET Auxiliary register set TO
Available register set \ Target register;
WHILE Node /= No node:
Compute the weights of all nodes of the tree of Node;
SET Tree node TO Maximal non_large tree (Node);
Generate code
(Tree node, Target register, Auxiliary register set);
IF Tree node /= Node:
SET Temporary location TO Next free temporary location();
Emit ("Store R" Target register ",T" Temporary location);
Replace Tree node by a reference to Temporary location;
Return any temporary locations in the tree of Tree node
to the pool of free temporary locations;
ELSE Tree node = Node:
Return any temporary locations in the tree of Node
to the pool of free temporary locations;
SET Node TO No node;
FUNCTION Maximal non_large tree (Node) RETURNING a node:
IF Node .weight <= Size of Auxiliary register set: RETURN Node;
IF Node .left .weight > Size of Auxiliary register set:
RETURN Maximal non_large tree (Node .left);
ELSE Node .right .weight >= Size of Auxiliary register set:
RETURN Maximal non_large tree (Node .right);
Figure 4.37 Code generation for large trees.



250

SECTION 4.2

Code generation


Load_Mem
Load_Mem
Mult_Reg
Load_Const
Mult_Reg
Store_Reg
Load_Mem
Load_Mem
Mult_Reg
Load_Mem
Subtr_Reg

a,R1
c,R2
R2,R1
4,R2
R2,R1
R1,T1
b,R1
b,R2
R2,R1
T1,R2
R2,R1

Figure 4.38 Code generated for b*b - 4*(a*c) with only 2 registers.



SUBSECTION 4.2.4

Simple code generation


-2
*1
b1

*2
b0

41

*1
a1

c0

Figure 4.39 Register-weighted tree for a memory-register machine.




251

252

SECTION 4.2

Code generation


Load_Const
Load_Mem
Mult_Mem
Mult_Reg
Load_Mem
Mult_Mem
Subtr_Reg

4,R2
a,R1
c,R1
R1,R2
b,R1
b,R1
R2,R1

Figure 4.40 Code for the register-weighted tree for a memory-register machine.



SUBSECTION 4.2.5

Code generation for basic blocks


{

int n;
n
x
n
y

=
=
=
=

a
b
n
d

+
+
+
*

1;
n*n + c;
1;
n;

}
Figure 4.41 Sample basic block in C.



253

254

SECTION 4.2

Code generation



=:
+
a

=:

=:

=:

n
1

+
b

x
c

n
1

*
n

Figure 4.42 AST of the sample basic block.




y
n

SUBSECTION 4.2.5

Code generation for basic blocks

255



=:

=:
+
a

n
+

1
b

=:
+

x
c

=:
n

*
d

*
n

Figure 4.43 Data dependency graph for the sample basic block.


y
n

256

SECTION 4.2

Code generation


x

+
b

c
*

+
1

+
a

Figure 4.44 Cleaned-up data dependency graph for the sample basic block.


SUBSECTION 4.2.5

Code generation for basic blocks


{

int n;
n
x
n
y

=
=
=
=

a +
b +
n*n
d *

1;
n*n + c;
+ 1;
n;

/* subexpression n*n ... */


/* ... in common */

}
Figure 4.45 Basic block in C with common subexpression.



257

258

SECTION 4.2

Code generation


x

+
b

c
*

+
*

+
a

Figure 4.46 Data dependency graph with common subexpression.




SUBSECTION 4.2.5

Code generation for basic blocks


x

+
b

c
*

+
1

+
a

Figure 4.47 Cleaned-up data dependency graph with common subexpression eliminated.


259

260

SECTION 4.2

Code generation


position
1
2
3
4
5
6
7
8

triple
a + 1
@1 * @1
b + @2
@3 + c
@4 =: x
@1 + 1
d * @6
@7 =: y

Figure 4.48 The data dependency graph of Figure 4.44 as an array of triples.



SUBSECTION 4.2.5

Code generation for basic blocks


x
Store_Reg R1,x
+
+
b

Add_Mem c,R1
c

I1

Add_Reg I1,R1
Load_Mem b,R1

Figure 4.49 Rewriting and ordering a ladder sequence.




261

262

SECTION 4.2

Code generation


x

+
b

c
*

+
1

+
a

Figure 4.50 Cleaned-up data dependency graph for the sample basic block.


SUBSECTION 4.2.5

Code generation for basic blocks


x

+
+
b

c
*
+

Figure 4.51 Data dependency graph after removal of the first ladder sequence.


263

264

SECTION 4.2

Code generation


X1
+
a

Figure 4.52 Data dependency graph after removal of the second ladder sequence.


SUBSECTION 4.2.5

Code generation for basic blocks


Load_Mem
Add_Const
Load_Reg

a,R1
1,R1
R1,X1

Load_Reg
Mult_Reg
Add_Mem
Add_Mem
Store_Reg

X1,R1
X1,R1
b,R1
c,R1
R1,x

Load_Reg
Add_Const
Mult_Mem
Store_Reg

X1,R1
1,R1
d,R1
R1,y

Figure 4.53 Pseudo-register target code generated for the basic block.



265

266

SECTION 4.2

Code generation


Load_Mem
Add_Const
Load_Reg

a,R1
1,R1
R1,R2

Load_Reg
Mult_Reg
Add_Mem
Add_Mem
Store_Reg

R2,R1
R2,R1
b,R1
c,R1
R1,x

Load_Reg
Add_Const
Mult_Mem
Store_Reg

R2,R1
1,R1
d,R1
R1,y

Figure 4.54 Code generated for the program segment of Figure 4.41.



SUBSECTION 4.2.5

Code generation for basic blocks


Load_Mem
Add_Const
Load_Reg

a,R1
1,R1
R1,R2

Mult_Reg
Add_Mem
Add_Mem
Store_Reg

R1,R2
b,R2
c,R2
R2,x

Add_Const
Mult_Mem
Store_Reg

1,R1
d,R1
R1,y

Figure 4.55 Code generated by the GNU C compiler, gcc.



267

268

SECTION 4.2

Code generation


{

int n;
n = a + 1;
*x = b + n*n + c;
n = n + 1;
y = d * n;

}
Figure 4.56 Sample basic block with an assignment under a pointer.



SUBSECTION 4.2.5

Code generation for basic blocks

269



=:
+
a

=:

=:*
+

n
+

1
b

x
c

=:
n

*
d

*
n

Figure 4.57 Data dependency graph with an assignment under a pointer.




y
n

270

SECTION 4.2

Code generation


x

=:*

+
+
b

x
c

+
n

*
=:
+

n
1

Figure 4.58 Cleaned-up data dependency graph with an assignment under a pointer.


SUBSECTION 4.2.5

Code generation for basic blocks


Load_Mem
Add_Const
Store_Reg

a,R1
1,R1
R1,n

Load_Mem
Mult_Mem
Add_Mem
Add_Mem
Store_Indirect_Mem

n,R1
n,R1
b,R1
c,R1
R1,x

Load_Mem
Add_Const
Mult_Mem
Store_Reg

n,R1
1,R1
d,R1
R1,y

Figure 4.59 Target code generated for the basic block of Figure 4.56.



271

272

Code generation

SECTION 4.2


Rn
#1

cst

Load_Const cst ,R n

load constant

cost = 1

Load_Mem mem,R n

load from memory

cost = 3

Add_Mem mem,R n

add from memory

cost = 3

Add_Reg R 1 ,R n

add registers

cost = 1

Mult_Mem mem,R n

multiply from memory

cost = 6

Mult_Reg Rm,R n

multiply registers

cost = 4

Add_Scaled_Reg cst ,Rm,R n

add scaled register

cost = 4

Mult_Scaled_Reg cst ,Rm,R n

multiply scaled register

cost = 5

Rn
#2

mem
Rn
+

#3

mem

Rn
Rn
+

#4

R1

Rn
Rn
*

#5

mem

Rn
Rn
*

#6

Rm

Rn
Rn
+

#7

* #7.1

Rn
cst

Rm

Rn
#8

*
* #8.1

Rn
cst

Rm

Figure 4.60 Sample instruction patterns for BURS code generation.




SUBSECTION 4.2.6

BURS code generation and dynamic programming


+
b

*
4

*
8

Figure 4.61 Input tree for the BURS code generation.




273

274

SECTION 4.2

Code generation


+ #4
* #6

R
#2 b

* #6

R
#1 4

#1 8

#2 a

Figure 4.62 Naive rewrite of the input tree.




SUBSECTION 4.2.6

BURS code generation and dynamic programming


Load_Const
Load_Mem
Mult_Reg
Load_Const
Mult_Reg
Load_Mem
Add_Reg

8,R1
a,R2
R2,R1
4,R2
R1,R2
b,R1
R2,R1
Total

;
;
;
;
;
;
;
=

1 unit
3 units
4 units
1 unit
4 units
3 units
1 unit
17 units

Figure 4.63 Code resulting from the naive rewrite.



275

276

SECTION 4.2

Code generation


+ #7
R
#2 b

*
* #5

4
R

#1 8

Figure 4.64 Top-down largest-fit rewrite of the input tree.




SUBSECTION 4.2.6

BURS code generation and dynamic programming


Load_Const
Mult_Mem
Load_Mem
Add_Scaled_Reg

8,R1
a,R1
b,R2
4,R1,R2
Total

;
;
;
;
=

1 unit
6 units
3 units
4 units
14 units

Figure 4.65 Code resulting from the top-down largest-fit rewrite.



277

278

SECTION 4.2

Code generation


#7->reg
result

recognized

#7.1

result

to be recognized

*
cst
(a)

R
R1

*
cst

recognized
R1

(b)

Figure 4.66 The dotted trees corresponding to #7reg and to #7.1.




SUBSECTION 4.2.6

BURS code generation and dynamic programming

279


PROCEDURE Bottom_up pattern matching (Node):
IF Node is an operation:
Bottom_up pattern matching (Node .left);
Bottom_up pattern matching (Node .right);
SET Node .label set TO Label set for (Node);
ELSE IF Node is a constant:
SET Node .label set TO Label set for constant ();
ELSE Node is a variable:
SET Node .label set TO Label set for variable ();
FUNCTION Label set for (Node) RETURNING a label set:
SET Label set TO the Empty set;
FOR EACH Label IN the Machine label set:
FOR EACH Left label IN Node .left .label set:
FOR EACH Right label IN Node .right .label set:
IF Label .operator = Node .operator
AND Label .operand = Left label .result
AND Label .second operand = Right label .result:
Insert Label into Label set;
RETURN Label set;
FUNCTION Label set for constant () RETURNING a label set:
SET Label set TO
{ (No operator, No location, No location, "Constant") };
FOR EACH Label IN the Machine label set:
IF Label .operator = "Load" AND Label .operand = "Constant":
Insert Label into Label set;
RETURN Label set;
FUNCTION Label set for variable () RETURNING a label set:
SET Label set TO
{ (No operator, No location, No location, "Memory") };
FOR EACH Label IN the Machine label set:
IF Label .operator = "Load" AND Label .operand = "Memory":
Insert Label into Label set;
RETURN Label set;
Figure 4.67 Outline code for bottom-up pattern matching in trees.



280

SECTION 4.2

Code generation


TYPE operator: "Load", +, *;
TYPE location: "Constant", "Memory", "Register", a label;
TYPE label:
// a node in a pattern tree
an operator FIELD operator;
a location FIELD operand;
a location FIELD second operand;
a location FIELD result;
Figure 4.68 Types for bottom-up pattern recognition in trees.



SUBSECTION 4.2.6

BURS code generation and dynamic programming


#4->reg

+ #7->reg

->mem
#2->reg

#6->reg
#7.1
#8->reg
#8.1
#5->reg
#6->reg

* #7.1
#8.1

->cst
#1->reg

->cst
#1->reg

->mem
#2->reg

Figure 4.69 Label sets resulting from bottom-up pattern matching.




281

282

SECTION 4.2

Code generation


PROCEDURE Bottom_up pattern matching (Node):
IF Node is an operation:
Bottom_up pattern matching (Node .left);
Bottom_up pattern matching (Node .right);
SET Node .state TO Next state
[Node .operand, Node .left .state, Node .right .state];
ELSE IF Node is a constant:
SET Node .state TO State for constant;
ELSE Node is a variable:
SET Node .state TO State for variable;
Figure 4.70 Outline code for efficient bottom-up pattern matching in trees.



SUBSECTION 4.2.6

BURS code generation and dynamic programming


+ #4->reg @13

#7.1

* #8->reg @9

b
->mem @0

#2->reg @3
#5->reg @7

* #7.1

4
->cst @0

#8.1

#1->reg @1
8
->cst @0
#1->reg @1

a
->mem @0
#2->reg @3

Figure 4.71 Bottom-up pattern matching with costs.




283

284

SECTION 4.2

Code generation


Load_Mem
Load_Const
Mult_Scaled_Reg
Load_Mem
Add_Reg

a,R1
4,R2
8,R1,R2
b,R1
R2,R1
Total

;
;
;
;
;
=

3 units
1 unit
5 units
3 units
1 unit
13 units

Figure 4.72 Code generated by bottom-up pattern matching.



SUBSECTION 4.2.6

BURS code generation and dynamic programming


Load_Mem
Load_Const
Mult_Scaled_Reg
Add_Mem

a,R1
4,R2
8,R1,R2
b,R1
Total

;
;
;
;
=

3 units
1 unit
5 units
3 units
12 units

Figure 4.73 Code generated by bottom-up pattern matching, using commutativity.



285

286

SECTION 4.2

Code generation


S00
S01
S02
S03
S04
S05
S06
S07
S08
S09
S10
S11
S12
S13

=
=
=
=
=
=
=
=
=
=
=
=
=
=

{}
{cst@0, #1reg@1}
{mem@0, #2reg@3}
{#4reg@0}
{#6reg@5, #7.1@0, #8.1@0}
{#3reg@0}
{#5reg@4, #7.1@0, #8.1@0}
{#6reg@0}
{#5reg@0}
{#7reg@0}
{#8reg@1, #7.1@0, #8.1@0}
{#8reg@0}
{#8reg@2, #7.1@0, #8.1@0}
{#8reg@4, #7.1@0, #8.1@0}

Figure 4.74 States of the BURS automaton for Figure 4.60.



SUBSECTION 4.2.6

BURS code generation and dynamic programming


cst: S01
mem: S02
Figure 4.75 Initial states for the basic operands.



287

288

SECTION 4.2

Code generation


+
S01
S02
S03
S04
S05
S06
S07
S08
S09
S10
S11
S12
S13

S01
S03
S05
S03
S09
S03
S09
S03
S03
S03
S03
S03
S03
S09
S13
*
S01
S02
S03
S04
S05
S06
S07
S08
S09
S10
S11
S12
S13

S01
S04
S06
S04
S10
S04
S12
S04
S04
S04
S04
S04
S13
S12
S02
S07
S08
S07
S11
S07
S11
S07
S07
S07
S07
S07
S11
S11
S13

Figure 4.76 The transition table Cost conscious next state[ ].



SUBSECTION 4.2.6

BURS code generation and dynamic programming

289


S01
cst
mem
reg

-#1

S02
-#2

S03
--#4

S04
--#6

S05
--#3

S06
--#5

S07
--#6

S08
--#5

S09
--#7

S10
--#8

Figure 4.77 The code generation table.



S11
--#8

S12
--#8

S13
--#8

290

SECTION 4.2

Code generation


#4

+ S03
#2

#8

b S02
#1

* S12
4 S01
-

* S06
8 S01

#2

a S02

Figure 4.78 States and instructions used in BURS code generation.




SUBSECTION 4.2.7

Register allocation by graph coloring


a := read();
b := read();
c := read();
a := a + b + c;
if (a < 10) {
d := c + 8;
print(c);
} else if (a < 20) {
e := 10;
d := e + a;
print(e);
} else {
f := 12;
d := f + a;
print(f);
}
print(d);
Figure 4.79 A program segment for live analysis.



291

292

SECTION 4.2

Code generation


a := read();

a
b

b := read();
c
c := read();
a := a+b+c;

a < 10
d

a < 20

d := c+8;
print(c);
e

e := 10;
d := e+a;
print(e);

f := 12;
d := f+a;
print(f);

print(d);

Figure 4.80 Live ranges of the variables from Figure 4.79.




SUBSECTION 4.2.7

Register allocation by graph coloring


f

Figure 4.81 Register interference graph for the variables of Figure 4.79.


293

294

SECTION 4.2

Code generation


FUNCTION Color graph (Graph) RETURNING the Colors used set:
IF Graph = the Empty graph: RETURN the Empty set;
// Find the Least connected node:
SET Least connected node TO No node;
FOR EACH Node IN Graph .nodes:
SET Degree TO 0;
FOR EACH Arc IN Graph .arcs:
IF Arc contains Node:
Increment Degree;
IF Least connected node = No node OR
Degree < Minimum degree:
SET Least connected node TO Node;
SET Minimum degree TO Degree;
// Remove Least connected node from Graph:
SET Least connected node arc set TO the Empty set;
FOR EACH Arc IN Graph .arcs:
IF Arc contains Least connected node:
Remove Arc from Graph .arcs;
Insert Arc in Least connected node arc set;
Remove Least connected node from Graph .nodes;
// Color the reduced Graph recursively:
SET Colors used set TO Color graph (Graph);
// Color the Least connected node:
SET Left over colors set TO Colors used set;
FOR EACH Arc IN Least connected node arc set:
FOR EACH End point IN Arc:
IF End point /= Least connected node:
Remove End point .color from Left over colors set;
IF Left over colors set = Empty:
SET Color TO New color;
Insert Color in Colors used set;
Insert Color in Left over colors set;
SET Least connected node .color TO
Arbitrary choice from Left over colors set;
// Reattach the Least connected node:
Insert Least connected node in Graph .nodes;
FOR EACH Arc IN Least connected node arc set:
Insert Arc in Graph .arcs;
RETURN Colors used set;
Figure 4.82 Outline of a graph coloring algorithm.



SUBSECTION 4.2.7

Register allocation by graph coloring

295


Graph:
f

Stack:

a c b

c b

d
e a c b

f1

d e a c b

f d e a c b

f1

f1

d2

d2

e a c b

a c b

e1

d e a c b

f1

a2

f1

a2

d2

e1

d2

e1

c b

c1

f1

a2

b3

d2

e1

c1

Figure 4.83 Coloring the interference graph for the variables of Figure 4.79.


296

SECTION 4.2

Code generation


; n in register %ax
cwd
; convert to double word:
;
(%dx,%ax) = (extend_sign(%ax), %ax)
negw %ax
; negate: (%ax,cf) := (-%ax, %ax /= 0)
adcw %dx,%dx
; add with carry: %dx := %dx + %dx + cf
; sign(n) in %dx
Figure 4.84 Optimal code for the function sign(n).



SUBSECTION 4.2.8

Supercompilation

297


Case n > 0
%dx
%ax
cf

Case n = 0
%dx
%ax
cf

Case n < 0
%dx
%ax
cf

initially:

-1

-n

-1

-n

-n

-1

-n

cwd
negw %ax
adcw %dx,%dx

Figure 4.85 Actions of the 80x86 code from Figure 4.84.



298

SECTION 4.2

Code generation



Problem

Technique

Expression trees, using


register-register or
memory-register instructions
with sufficient registers:
with insufficient registers:

Weighted trees;
Figure 4.30

Dependency graphs, using


register-register or
memory-register instructions

Ladder sequences;
Section 4.2.5.2

Expression trees, using any


instructions with cost function
with sufficient registers:
with insufficient registers:

Bottom-up tree rewriting;


Section 4.2.6

Register allocation when all


interferences are known

Graph coloring;
Section 4.2.7

Quality

Optimal
Optimal
Heuristic

Optimal
Heuristic
Heuristic

Figure 4.86 Comparison of some code generation techniques.




SUBSECTION 4.2.10

Debugging of code optimizers


int i, A[10];
for (i = 0; i < 20; i++) {
A[i] = 2*i;
}
Figure 4.87 Incorrect C program with compilation-dependent effect.



299

300

SECTION 4.2

Code generation



E
2
3
V

operation
* 2 ** n
* V
* V
** 2

replacement
E << n
V + V
(V << 1) + V
V * V

E
E
E
1

+ 0
* 1
** 1
** E

E
E
E
1

Figure 4.88 Some transformations for arithmetic simplification.



SUBSECTION 4.2.11

Preprocessing the intermediate code


void S {
...
print_square(i++);
...
}
void print_square(int n) {
printf("square = %d\n", n*n);
}
Figure 4.89 C code with a routine to be in-lined.



301

302

SECTION 4.2

Code generation


void S {
...
{int n = i++; printf("square = %d\n", n*n);}
...
}
void print_square(int n) {
printf("square = %d\n", n*n);
}
Figure 4.90 C code with the routine in-lined.



SECTION 4.3

Assemblers, linkers, and loaders


a.o

reference to
_printf

4100

1000

1000
b.o

0
2600
1600

C
1250

250

400

1400
4000

3000
printf.o

0
100

4500

500
entry point
_printf

original code
segments

relocation
bit maps

resulting
executable code
segment

Figure 4.91 Linking three code segments.




303

304

SECTION 4.3

Assemblers, linkers, and loaders


.data
...
.align 8
var1:
.long 666
...
.code
...
addl var1,%eax
...
jmp label1
...
label1:
...
...
Figure 4.92 Assembly code fragment with internal symbols.



SUBSECTION 4.3.1

Assembler design issues


Assembly
code

Assembled
binary

Backpatch list
for label1

. . . .
jmp label1

EA

EA

EA

. . . .
. . . .
. . . .
jmp label1
. . . .
jmp label1
. . . .
label1:
. . . .

Figure 4.93 A backpatch list for labels.




305

306

SECTION 4.3

Assemblers, linkers, and loaders


External symbol

Type

_options
__main
_printf
_atoi
_printf
_exit
_msg_list
_Out_Of_Memory
_fprintf
_exit
_file_list

entry point
entry point
reference
reference
reference
reference
entry point
entry point
reference
reference
reference

Address
50
100
500
600
650
700
300
800
900
950
4

data
code
code
code
code
code
data
code
code
code
data

Figure 4.94 Example of an external symbol table.



SECTION 4.4

Conclusion



Figure 4.95 Test graph for recursive descent marking.




307

308

CHAPTER 4

Processing the intermediate code


void Routine(void) {
if (...) {
while (...) {
A;
}
}
else {
switch (...) {
case: ...: B; break;
case: ...: C; break;
}
}
}
Figure 4.96 Routine code for static profiling.



SECTION 4.4

Conclusion


1
a
b

if
h

d
e

while

switch

A
l

C
m

end switch

end if
q

Figure 4.97 Flow graph for static profiling of Figure 4.96.




309



Memory management

310

SECTION 5.1

Data allocation with explicit deallocation


chunk

chunk

block

block
s
i
z
e

in use

s
i
z
e

not in use

. . . . . . .

size
0

marked in use

marked free

low

high

Figure 5.1 Memory structure used by the malloc/free mechanism.




311

312

SECTION 5.1

Data allocation with explicit deallocation


SET the polymorphic chunk pointer First_chunk pointer TO
Beginning of available memory;
SET the polymorphic chunk pointer One past available memory TO
Beginning of available memory + Size of available memory;
SET First_chunk pointer .size TO Size of available memory;
SET First_chunk pointer .free TO True;
FUNCTION Malloc (Block size) RETURNING a polymorphic block pointer:
SET Pointer TO Pointer to free block of size (Block size);
IF Pointer /= Null pointer: RETURN Pointer;
Coalesce free chunks;
SET Pointer TO Pointer to free block of size (Block size);
IF Pointer /= Null pointer: RETURN Pointer;
RETURN Solution to out of memory condition (Block size);
PROCEDURE Free (Block pointer):
SET Chunk pointer TO Block pointer - Administration size;
SET Chunk pointer .free TO True;
Figure 5.2 A basic implementation of Malloc(Block size).



SUBSECTION 5.1.1

Basic memory allocation

313


FUNCTION Pointer to free block of size (Block size)
RETURNING a polymorphic block pointer:
// Note that this is not a pure function
SET Chunk pointer TO First_chunk pointer;
SET Requested chunk size TO Administration size + Block size;
WHILE Chunk pointer /= One past available memory:
IF Chunk pointer .free:
IF Chunk pointer .size - Requested chunk size >= 0:
// large enough chunk found:
Split chunk (Chunk pointer, Requested chunk size);
SET Chunk pointer .free TO False;
RETURN Chunk pointer + Administration size;
// try next chunk:
SET Chunk pointer TO Chunk pointer + Chunk pointer .size;
RETURN Null pointer;
PROCEDURE Split chunk (Chunk pointer, Requested chunk size):
SET Left_over size TO Chunk pointer .size - Requested chunk size;
IF Left_over size > Administration size:
// there is a non-empty left-over chunk
SET Chunk pointer .size TO Requested chunk size;
SET Left_over chunk pointer TO
Chunk pointer + Requested chunk size;
SET Left_over chunk pointer .size TO Left_over size;
SET Left_over chunk pointer .free TO True;
PROCEDURE Coalesce free chunks:
SET Chunk pointer TO First_chunk pointer;
WHILE Chunk pointer /= One past available memory:
IF Chunk pointer .free:
Coalesce with all following free chunks (Chunk pointer);
SET Chunk pointer TO Chunk pointer + Chunk pointer .size;
PROCEDURE Coalesce with all following free chunks (Chunk pointer):
SET Next_chunk pointer TO Chunk pointer + Chunk pointer .size;
WHILE Next_chunk pointer /= One past available memory
AND Next_chunk pointer .free:
// Coalesce them:
SET Chunk pointer .size TO
Chunk pointer .size + Next_chunk pointer .size;
SET Next_chunk pointer TO Chunk pointer + Chunk pointer .size;
Figure 5.3 Auxiliary routines for the basic Malloc(Block size).



314

SECTION 5.1

Data allocation with explicit deallocation


SET Free list for T TO No T;
FUNCTION New T () RETURNING a pointer to an T:
IF Free list for T = No T:
// Acquire a new block of records:
SET New block [1 .. Block factor for T] TO
Malloc (Size of T * Block factor for T);
// Construct a free list in New block:
SET Free list for T TO address of New block [1];
FOR Record count IN [1 .. Block factor for T - 1]:
SET New block [Record count] .link TO
address of New block [Record count + 1];
SET New block [Block factor for T] .link TO No T;
// Extract a new record from the free list:
SET New record TO Free list for T;
SET Free list for T TO New record .link;
// Zero the New record here, if required;
RETURN New record;
PROCEDURE Free T (Old record):
// Prepend Old record to free list:
SET Old record .link TO Free list for T;
SET Free list for T TO address of Old record;
Figure 5.4 Outline code for blockwise allocation of records of type T.



SUBSECTION 5.1.2

Linked lists


free_list_Elem

in
use

free

in
use

in
use

.
.
.

.
.
.
in
use

free

block 1

free

free

.
.
.

Figure 5.5 List of free records in allocated blocks.




block k

.
.
.

315

316

SECTION 5.2

Data allocation with implicit deallocation


heap

root set

e
c
f

Figure 5.6 A root set and a heap with reachable and unreachable chunks.


SUBSECTION 5.2.3

Reference counting


program
data
area

heap
d

a
2

b
1
e
1

c
2

f
2

Figure 5.7 Chunks with reference count in a heap.




317

318

SECTION 5.2

Data allocation with implicit deallocation


program
data
area

heap
a

d
2

c
1

f
1

Figure 5.8 Result of removing the reference to chunk b in Figure 5.7.




SUBSECTION 5.2.3

Reference counting


IF Points into the heap (q):
Increment q .reference count;
IF Points into the heap (p):
Decrement p .reference count;
IF p .reference count = 0:
Free recursively depending on reference counts (p);
SET p TO q;
Figure 5.9 Code to be generated for the pointer assignment p:=q.



319

320

SECTION 5.2

Data allocation with implicit deallocation


PROCEDURE Free recursively depending on reference counts(Pointer);
WHILE Pointer /= No chunk:
IF NOT Points into the heap (Pointer): RETURN;
IF NOT Pointer .reference count = 0: RETURN;
FOR EACH Index IN 1 .. Pointer .number of pointers - 1:
Free recursively depending on reference counts
(Pointer .pointer [Index]);
SET Aux pointer TO Pointer;
IF Pointer .number of pointers = 0:
SET Pointer TO No chunk;
ELSE Pointer .number of pointers > 0:
SET Pointer TO
Pointer .pointer [Pointer .number of pointers];
Free chunk(Aux pointer);
// the actual freeing operation
Figure 5.10 Recursively freeing chunks while avoiding tail recursion.



SUBSECTION 5.2.3

Reference counting


program
data
area

heap
a

d
1

c
1

f
1

Figure 5.11 Reference counting fails to identify circular garbage.




321

322

Data allocation with implicit deallocation

SECTION 5.2


to parent

marked
bit
pointer
counter

size

1
3
0

p
o
i
n
t
e
r

p
o
i
n
t
e
r

p
o
i
n
t
e
r

p
o
i
n
t
e
r

3rd child

free
bit
to parent

. . . . . .

Figure 5.12 Marking the third child of a chunk.




. . . .

SUBSECTION 5.2.4

Mark and scan


P

Parent
pointer

C
0

Chunk
pointer

Figure 5.13 The Schorr and Waite algorithm, arriving at C.




323

324

SECTION 5.2

Data allocation with implicit deallocation


P

n -th pointer
Parent
pointer

C
n

Chunk
pointer
D
0

Figure 5.14 Moving to D.




SUBSECTION 5.2.4

Mark and scan


P

n -th pointer
Parent
pointer

C
n

Chunk
pointer
D

Number of pointers in D

Figure 5.15 About to return from D.




325

326

SECTION 5.2

Data allocation with implicit deallocation


P

Parent
pointer

Chunk
pointer
Number of pointers in C

Figure 5.16 About to return from C.




SUBSECTION 5.2.5

Two-space copying


program data area

from space

to space

program data area

to space

from space

Figure 5.17 Memory layout for two-space copying.




327

328

Data allocation with implicit deallocation

SECTION 5.2


from-space

b
root set

e
c
f

to-space

Figure 5.18 Initial situation in two-space copying.




Two-space copying

SUBSECTION 5.2.5


from-space

root set

to-space
a

scan pointer

Figure 5.19 Snapshot after copying the first level.




329

330

SECTION 5.2

Data allocation with implicit deallocation


from-space

root set

to-space
a

scan pointer

Figure 5.20 Snapshot after having scanned one chunk.




Compaction

SUBSECTION 5.2.6


new address of 1
chunk

garbage
old

new

garbage

chunk
2

Figure 5.21 Address calculation during compaction.




331

332

SECTION 5.2

Data allocation with implicit deallocation


new address of 1
garbage

chunk

new

garbage

old

new address of 2
chunk
2

Figure 5.22 Pointer update during compaction.




SUBSECTION 5.2.6

Compaction


old

new

free

Figure 5.23 Moving the chunks during compaction.




333



Imperative and objectoriented programs

334

CHAPTER 6

Imperative and object-oriented programs

335






 Imperative  Functional  Logic  Parallel and
 and object- 

 distributed




oriented




Lexical and  general:

2
 

syntactic
 specific:


analysis,




general:
3
 



identification &
Context
6.1


type checking: 
handling,
 






 specific:

6
7
8
9




 general:

4 
Code


 




generation,  specific:
6
7
8
9



Figure 6.1 Roadmap to paradigm-specific issues.




336

Context handling

SECTION 6.1


"wrong"
P

P
"signal"

level

3
P

1
P
scope stack

"left"

"rotate"
P

"printf"

...

P
"paint"

"matt"
P

"right"
4

"right"

...

"signal"
P

properties
name

Figure 6.2 A naive scope-structured symbol table.




...

SUBSECTION 6.1.1

Identification


void rotate(double angle) {
...
}
void paint(int left, int right) {
Shade matt, signal;
...
{
Counter right, wrong;
...
}
}
Figure 6.3 C code leading to the symbol table in Figure 6.2.



337

338

SECTION 6.1

Context handling


Id.info
name
hash table

"paint"

macro
decl

name

"signal"

macro
decl

name

"right"

macro
decl

P
properties
level

Figure 6.4 A hash-table based symbol table.




SUBSECTION 6.1.1

Identification

339


Id.info("wrong")

Id.info("right")

Id.info("signal")

Id.info("matt")
...

level
Id.info("right")

Id.info("left")

Id.info("paint")

Id.info("rotate")

4
3

...

2
1

Id.info("printf")

Id.info("signal")
...

0
scope stack

Figure 6.5 Scope table for the hash-table based symbol table.


340

SECTION 6.1

Context handling


PROCEDURE Remove topmost scope ():
SET Link pointer TO Scope stack [Top level];
WHILE Link pointer /= No link:
// Get the next Identification info record
SET Idf pointer TO Link pointer .idf_info;
SET Link pointer TO Link pointer .next;
// Get its first Declaration info record
SET Declaration pointer TO Idf pointer .decl;
// Now Declaration pointer .level = Top level
// Detach the first Declaration info record
SET Idf pointer .decl TO Declaration pointer .next;
Free the record pointed at by Declaration pointer;
Free Scope stack [Top level];
SET Top level TO Top level - 1;
Figure 6.6 Outline code for removing declarations at scope exit.



SUBSECTION 6.1.1

Identification


type record
T:

"zout"
selectors

"sel"
P

Figure 6.7 Finding the type of a selector.




. . .

341

342

SECTION 6.1

Context handling



Data definitions:
1. Let T be a type table that has entries containing either a type description or a
reference (TYPE) to a type table entry.
2. Let Cyclic be a set of type table entries.
Initializations:
Initialize the Cyclic set to empty.
Inference rules:
If there exists a TYPE type table entry t 1 in T and t 1 is not a member of
Cyclic, let t 2 be the type table entry referred to by t 1 .
1. If t 2 is t 1 then add t 1 to Cyclic.
2. If t 2 is a member of Cyclic then add t 1 to Cyclic.
3. If t 2 is again a TYPE type table entry, replace, in t 1 , the reference to t 2 with
the reference referred to by t 2 .
Figure 6.8 Closure algorithm for detecting cycles in type definitions.


SUBSECTION 6.1.2

Type checking

343



Data definitions:
Let each node in the expression have a variable type set S attached to it. Also,
each node is associated with a non-variable context C.
Initializations:
The type set S of each operator node contains the result types of all identifications of the operator in it; the type set S of each leaf node contains the types of
all identifications of the node. The context C of a node derives from the
language manual.
Inference rules:
For each node N with context C, if its type set S contains a type T 1 which the
context C allows to be coerced to a type T 2 , T 2 must also be present in S.
Figure 6.9 The closure algorithm for identification in the presence of overloading and coercions,
phase 1.


344

SECTION 6.1

Context handling



Data definitions:
Let each node in the expression have a type set S attached to it.
Initializations:
Let the type set S of each node be filled by the algorithm of Figure 6.9.
Inference rules:
1. For each operator node N with type set S, if S contains a type T such that
there is no operator identified in N that results in T and has operands T 1 and
T 2 such that T 1 is in the type set of the left operand of N and T 2 is in the type
set of the right operand of N, T is removed from S.
2. For each operand node N with type set S, if S contains a type T that is not
compatible with at least one type of the operator that works on N, T is removed
from S.
Figure 6.10 The closure algorithm for identification in the presence of overloading and coercions,
phase 2.


SUBSECTION 6.1.2

Type checking


:=
(location of)

deref

p
(location of)

Figure 6.11 AST for p := q with explicit deref.




345

346

SECTION 6.1

Context handling


expected

found

lvalue
rvalue

lvalue

deref
rvalue
error

Figure 6.12 Basic checking rules for lvalues and rvalues.



SUBSECTION 6.1.2

Type checking


expression
construct

resulting
kind

constant
identifier (variable)
identifier (otherwise)
&lvalue
*rvalue
V[rvalue]
V.selector
rvalue+rvalue
lvalue:=rvalue

rvalue
lvalue
rvalue
rvalue
lvalue
V
V
rvalue
rvalue

Figure 6.13 lvalue/rvalue requirements and results of some expression constructs.



347

348

SECTION 6.2

Source language data representation and handling


void do_buffer(void) {
char *buffer;
obtain_buffer(&buffer);
use_buffer(buffer);
}
void obtain_buffer(char **buffer_pointer) {
char actual_buffer[256];
*buffer_pointer = &actual_buffer;
/* this is a scope-violating assignment: */
/* scope of *buffer_pointer > scope of &actual_buffer */
}
Figure 6.14 Example of a scope violation in C.



SUBSECTION 6.2.6

Array types


B[1,1]

B[1,1]

B[1,2]

B[2,1]

B[1,3]

B[1,2]

B[2,1]

B[2,2]

B[2,2]

B[1,3]

B[2,3]

B[2,3]

(a)

(b)

Figure 6.15 Array B in row-major (a) and column-major (b) order.




349

350

SECTION 6.2

Source language data representation and handling


zeroth_element
n
LB1
LEN1
LEN_PRODUCT1
...
LBn
LENn
LEN_PRODUCTn

Figure 6.16 An array descriptor.




SUBSECTION 6.2.9

Object types


dispatch table
pointer to B
pointer to A inside B

m1_A_A
a1

m2_A_B

a2

m3_B_B

b1
B-object

B-class

Figure 6.17 The representation of an object of class B.




351

352

SECTION 6.2

Source language data representation and handling


class C {
field c1;
field c2;
method m1();
method m2();
};
class D {
field d1;
method m3();
method m4();
};
class E extends C, D {
field e1;
method m2();
method m4();
method m5();
};
Figure 6.18 An example of multiple inheritance.



SUBSECTION 6.2.9

Object types


dispatch table
pointer to E
pointer to C inside E

m1_C_C
c1

m2_C_E

c2

m3_D_D

pointer to D inside E

m4_D_E
d1

m5_E_E

e1
E-object

E-class

Figure 6.19 A representation of an object of class E.




353

354

SECTION 6.2

Source language data representation and handling


class A {
field a1;
field a2;
method m1();
method m3();
};
class C extends A {
field c1;
field c2;
method m1();
method m2();
};
class D extends A {
field d1;
method m3();
method m4();
};
class E extends C, D {
field e1;
method m2();
method m4();
method m5();
};
Figure 6.20 An example of dependent multiple inheritance.



SUBSECTION 6.2.9

Object types


dispatch table
pointer to E
pointer to C inside E

m1_A_C
m3_A_A
a1

m2_C_E

a2

m1_A_A

c1

m3_A_D

c2

m4_D_E

pointer to D inside E

m5_E_E

d1
index tables

e1
2
E-object

-4 -3 2

E-class

Figure 6.21 An object of class E, with dependent inheritance.




355

SECTION 6.3

Routines and their activation


high
addresses

administrative part

356

parameter k
.
.
parameter 1
lexical pointer
return information
dynamic link
registers & misc.
frame pointer FP

local variables
stack pointer SP
low
addresses

working stack

Figure 6.22 Possible structure of an activation record.




SUBSECTION 6.3.2

Routines


void use_iterator(void) {
int i;
while ((i = get_next_int()) > 0) {
printf("%d\n", i);
}
}
int get_next_int(void) {
yield 8;
yield 3;
yield 0;
}
Figure 6.23 An example application of an iterator in C notation.



357

358

SECTION 6.3

Routines and their activation


char buffer[100];
void Producer(void) {
while (produce_buffer()) {
resume(Consumer);
}
empty_buffer();
/* signal end of stream */
resume(Consumer);
}
void Consumer(void) {
resume(Producer);
while (!empty_buffer_received()) {
consume_buffer();
resume(Producer);
}
}
Figure 6.24 Simplified producer/consumer communication using coroutines.



SUBSECTION 6.3.2

Routines


int i;
void level_0(void) {
int j;
void level_1(void) {
int k;
void level_2(void) {
int l;
...
k = l;
j = l;

/* code has access to i, j, k, l */

}
...
j = k;

/* code has access to i, j, k */

}
...

/* code has access to i, j */

}
Figure 6.25 Nested routines in C notation.



359

360

SECTION 6.3

Routines and their activation


void level_0(void) {
void level_1(void) {
void level_2(void) {
...
goto L_1;
...
}
...
L_1:...
...
}
...
}
Figure 6.26 Example of a non-local goto.



SUBSECTION 6.3.4

Non-nested routines


#include <setjmp.h>
void find_div_7(int n, jmp_buf *jmpbuf_ptr) {
if (n % 7 == 0) longjmp(*jmpbuf_ptr, n);
find_div_7(n + 1, jmpbuf_ptr);
}
int main(void) {
jmp_buf jmpbuf;
int return_value;

/* type defined in setjmp.h */

if ((return_value = setjmp(jmpbuf)) == 0) {
/* setting up the label for longjmp() lands here */
find_div_7(1, &jmpbuf);
}
else {
/* returning from a call of longjmp() lands here */
printf("Answer = %d\n", return_value);
}
return 0;
}

Figure 6.27 Demonstration of the setjmp/longjmp mechanism.



361

362

SECTION 6.3

Routines and their activation


lexical pointer
routine address

Figure 6.28 A routine descriptor for a language that requires lexical pointers.


SUBSECTION 6.3.5

Nested routines

363


void level_1(void) {
int k;
void level_2(void) {
int l;
...
}
routine_descriptor level_2_as_a_value = {
FP_of_this_activation_record(), /* FP of level_1() */
level_2
/* code address of level_2()*/
};
A(level_2_as_a_value);

/* level_2() as a parameter */

}
Figure 6.29 Possible code for the construction of a routine descriptor.



364

SECTION 6.3

Routines and their activation



FP
D

Activation record of
lexical parent of R

Activation record of
lexical parent of R

Adm. part

Adm. part

Activation record of
the caller Q of R

Activation record of
the caller Q of R

Adm. part
...

Adm. part

lexical ptr
routine addr

code of Q

code of Q

code of R

code of R

Activation record
of R

PC

PC

a. Before calling R.

FP

lexical ptr
return addr
dynamic link

b. After calling R.

Figure 6.30 Calling a routine defined by the two-pointer routine descriptor D.




SUBSECTION 6.3.5

Nested routines


*(
*(
FP
+
offset(lexical_pointer)
)
+
offset(k)
) =
*(FP + offset(l))
Figure 6.31 Intermediate code for the non-local assignment k = l.



365

366

SECTION 6.3

Routines and their activation


*(
*(
*(

FP
+
offset(lexical_pointer)

)
+
offset(lexical_pointer)
)
+
offset(j)
) =
*(FP + offset(l))
Figure 6.32 Intermediate code for the non-local assignment j = l.



SUBSECTION 6.3.5

Nested routines



lex ptr to
level_0()

nested routine
level_2()
passed as a parameter

activation record of
level_1()

activation record

code of
level_2()

activation record
lex ptr to
level_1()
activation record of
level_2()

Figure 6.33 Passing a nested routine as a parameter and calling it.




367

368

SECTION 6.3

Routines and their activation


void level_1(void) {
non_local_label_descriptor L_1_as_a_value = {
FP_of_this_activation_record(), /* FP of level_1() */
L_1
/* code address of L_1 */
};
void level_2(void) {
...
non_local_goto(L_1_as_a_value);
...

/* goto L_1; */

}
...
L_1:...
...
}
Figure 6.34 Possible code for the construction of a label descriptor.



SUBSECTION 6.3.5

Nested routines


high
addresses
value of param. 4

value of param. 2

lexical pointer
routine address
01010

Figure 6.35 A closure for a partially parameterized routine.




369

370

SECTION 6.3

Routines and their activation


int i;
void level_2(int *j, int *k) {
int l;
...
*k = l;
*j = l;

/* code has access to i, *j, *k, l */

}
void level_1(int *j) {
int k;
...
level_2(j, &k);
A(level_2);

/*
/*
/*
/*

code has access to i, *j, k */


was: level_2(); */
level_2() as a parameter: */
this is a problem */

}
void level_0(void) {
int j;
...
level_1(&j);

/* code has access to i, j */


/* was: level_1(); */

}
Figure 6.36 Lambda-lifted routines in C notation.



SUBSECTION 6.3.6

Lambda lifting

371


int i;
void level_2(int *j, int *k) {
int l;
...
*k = l;
*j = l;

/* code has access to i, *j, *k, l */

}
void level_1(int *j) {
int *k = (int *)malloc(sizeof (int));
...
level_2(j, k);
A(closure(level_2, j, k));

/* code has access to i, *j, *k */


/* was: level_2(); */
/* was: A(level_2); */

}
void level_0(void) {
int *j = (int *)malloc(sizeof (int));
...
level_1(j);

/* code has access to i, *j */


/* was: level_1(); */

}
Figure 6.37 Lambda-lifted routines with additional heap allocation in C notation.



372

SECTION 6.4

Code generation for control flow statements


PROCEDURE Generate code for Boolean control expression (
Node, True label, False label
):
SELECT Node .type:
CASE Comparison type:
// <, >, ==, etc. in C
Generate code for comparison expression (Node .expr);
// The comparison result is now in the condition register
IF True label /= No label:
Emit ("IF condition register THEN GOTO" True label);
IF False label /= No label:
Emit ("GOTO" False label);
ELSE True label = No label:
IF False label /= No label:
Emit ("IF NOT condition register THEN GOTO"
False label);
CASE Lazy and type:
// the && in C
Generate code for Boolean control expression
(Node .left, No label, False label);
Generate code for Boolean control expression
(Node .right, True label, False label);
CASE ...
CASE Negation type:
// the ! in C
Generate code for Boolean control expression
(Node, False label, True label);
Figure 6.38 Code generation for Boolean expressions.



SUBSECTION 6.4.1

Local flow of control


tmp_case_value := case expression;
IF tmp_case_value = I 1 THEN GOTO label_1;
...
IF tmp_case_value = In THEN GOTO label_n;
GOTO label_else;
// or insert the code at label_else
label_1:
code for statement sequence 1
GOTO label_next;
...
label_n:
code for statement sequence n
GOTO label_next;
label_else:
code for else-statement sequence
label_next:
Figure 6.39 A simple translation scheme for case statements.



373

374

SECTION 6.4

Code generation for control flow statements


i := lower bound;
tmp_ub := upper bound;
tmp_step_size := step_size;
IF tmp_step_size = 0 THEN
... probably illegal; cause run-time error ...
IF tmp_step_size < 0 THEN GOTO neg_step;
IF i > tmp_ub THEN GOTO end_label;
// the next statement uses tmp_ub - i
//
to evaluate tmp_loop_count to its correct, unsigned value
tmp_loop_count := (tmp_ub - i) DIV tmp_step_size + 1;
GOTO loop_label;
neg_step:
IF i < tmp_ub THEN GOTO end_label;
// the next statement uses i - tmp_ub
//
to evaluate tmp_loop_count to its correct, unsigned value
tmp_loop_count := (i - tmp_ub) DIV (-tmp_step_size) + 1;
loop_label:
code for statement sequence
tmp_loop_count := tmp_loop_count - 1;
IF tmp_loop_count = 0 THEN GOTO end_label;
i := i + tmp_step_size;
GOTO loop_label;
end_label:
Figure 6.40 Code generation for a general for-statement.



SUBSECTION 6.4.1

Local flow of control


for (expr1; expr2; expr3) {
body;
}
Figure 6.41 A for-loop in C.



375

376

SECTION 6.4

Code generation for control flow statements


expr1;
while (expr2) {
body;
expr3;
}
Figure 6.42 A while loop that is almost equivalent to the for-loop of Figure 6.41.



SUBSECTION 6.4.1

Local flow of control

377


FOR i := 1
// The
sum :=
sum :=
sum :=
sum :=
END FOR;

TO n-3 STEP 4 DO
first loop takes care of the indices 1 .. (n div 4) * 4
sum + a[i];
sum + a[i+1];
sum + a[i+2];
sum + a[i+3];

FOR i := (n div 4) * 4 + 1 TO n DO
// This loop takes care of the remaining indices
sum := sum + a[i];
END FOR;
Figure 6.43 Two for-loops resulting from unrolling a for-loop.



378

SECTION 6.4

Code generation for control flow statements


high
addresses

parameter k
.
.
parameter 1
lexical pointer
return address
dynamic link
registers & misc.
FP

local variables

working stack

low
addresses

dynamic allocation
part

SP

Figure 6.44 An activation record with a dynamic allocation part.




SUBSECTION 6.4.2

Routine invocation



parameter n
.
.
parameter 1
administration part
old FP
activation record
of the caller

local variables

working stack

dynamic link

parameter k
.
.
parameter 1
activation record
of the callee

administration part
FP

local variables
SP
working stack

Figure 6.45 Two activation records on a stack.




379

380

SECTION 6.4

Code generation for control flow statements


#include <setjmp.h>
jmp_buf jmpbuf;

/* type defined in setjmp.h */

void handler(int signo) {


printf("ERROR, signo = %d\n", signo);
longjmp(jmpbuf, 1);
}
int main(void) {
signal(6, handler);
/* install the handler ... */
signal(12, handler);
/* ... for some signals */
if (setjmp(jmpbuf) == 0) {
/* setting up the label for longjmp() lands here */
/* normal code: */
...
} else {
/* returning from a call of longjmp() lands here */
/* exception code: */
...
}
}
Figure 6.46 An example program using setjmp/longjmp in a signal handler.



SUBSECTION 6.5.3

Code generation for generics



bool compare(tp *tp1, tp *tp2) { ... }


void assign(tp *dst, tp *src) { ... }
void dealloc(tp *arg) { ... }
tp *alloc(void) { ... }
void init(tp *dst) { ... }

Figure 6.47 A dope vector for generic type tp.




381

382

CHAPTER 6

Imperative and object-oriented programs


void A(int i) {
showSP("A.SP.entry");
if (i > 0) {B(i-1);}
showSP("A.SP.exit");
}
void B(int i) {
showSP("B.SP.entry");
if (i > 0) {A(i-1);}
showSP("B.SP.exit");
}
Figure 6.48 Mutually recursive test routines.



SECTION 6.6

Conclusion


void A(int i, void C()) {
showSP("A.SP.entry");
if (i > 0) {C(i-1, A);}
showSP("A.SP.exit");
}
void B(int i, void C()) {
showSP("B.SP.entry");
if (i > 0) {C(i-1, B);}
showSP("B.SP.exit");
}
Figure 6.49 Routines for testing routines as parameters.



383

384

CHAPTER 6

Imperative and object-oriented programs


void A(int i, label L) {
showSP("A.SP.entry");
if (i > 0) {B(i-1, exit); return;}
exit: showSP("A.SP.exit"); goto L;
}
void B(int i, label L) {
showSP("B.SP.entry");
if (i > 0) {A(i-1, exit); return;}
exit: showSP("B.SP.exit"); goto L;
}
Figure 6.50 Routines for testing labels as parameters.





Functional programs

385

386

CHAPTER 7

Functional programs


fac 0 = 1
fac n = n * fac (n-1)
Figure 7.1 Functional specification of factorial in Haskell.



CHAPTER 7

Functional programs


int fac(int n) {
int product = 1;
while (n > 0) {
product *= n;
n--;
}
return product;
}
Figure 7.2 Imperative definition of factorial in C.



387

388

CHAPTER 7

Functional programs



Compiler phase

Language aspect

Lexical analyzer

Offside rule

Parser

List notation
List comprehension
Pattern matching

Context handling

Polymorphic type checking

Run-time system

Referential transparency
Higher-order functions
Lazy evaluation

Figure 7.3 Overview of handling functional language aspects.




SECTION 7.2

Compiling functional languages



High-level language
7.3 Type inference
7.4 Desugaring

Functional core

Optimizations 7.7

7.6 Code generation

7.5 Run-time system

Figure 7.4 Structure of a functional compiler.




389

390

SECTION 7.4

Desugaring


fun p1,1 ... p1,n = expr1
fun p2,1 ... p2,n = expr2
.
.
.
fun pm ,1 ... pm,n = exprm
Figure 7.5 The general layout of a function using pattern matching.



SUBSECTION 7.4.2

The translation of pattern matching


fun a1 ... an

if (cond1) then let defs1 in expr1


else if (cond2) then let defs2 in expr2
.
.
.
else if (condm ) then let defsm in exprm
else error "fun: no matching pattern"

Figure 7.6 Translation of the pattern-matching function of Figure 7.5.



391

392

SECTION 7.4

Desugaring


take a1 a2 = if (a1 == 0) then
let xs = a2 in [ ]
else if (a2 == [ ]) then
let n = a1 in [ ]
else if (_type_constr a2 == Cons) then
let
n = a1
x = _type_field 1 a2
xs = _type_field 2 a2
in
x : take (n-1) xs
else
error "take: no matching pattern"
Figure 7.7 Translation of the function take.



SUBSECTION 7.4.2

The translation of pattern matching


take a1 a2 = if (a1 == 0) then
let xs = a2 in [ ]
else if (a2 == [ ]) then
let n = a1 in [ ]
else
let
n = a1
x = _type_field 1 a2
xs = _type_field 2 a2
in
x : take (n-1) xs
Figure 7.8 Optimized translation of the function take.



393

394

SECTION 7.4

Desugaring


last a1 = if (_type_constr a1 == Cons) then
let
x = _type_field 1 a1
xs = _type_field 2 a1
in
if (xs == [ ]) then
x
else
last xs
else
error "last: no matching pattern"
Figure 7.9 Translation of the function last.



SUBSECTION 7.4.3

The translation of list comprehension



T{ [ expr | ] }

[ expr ]

(1)

T{ [ expr | F, Q ] }

if ( F ) then T{ [ expr | Q ] }
else [ ]

(2)

T{ [ expr | e <- L, Q ] }

mappend fQ L
where
fQ e = T{ [ expr | Q ] }

(3)

Figure 7.10 Translation scheme for list comprehensions.




395

396

SECTION 7.4

Desugaring


sv_mul scal vec = let
s_mul x = scal * x
in
map s_mul vec
Figure 7.11 A function returning a function.



SUBSECTION 7.4.4

The translation of nested functions


sv_mul_dot_s_mul scal x = scal * x
sv_mul scal vec = map (sv_mul_dot_s_mul scal) vec
Figure 7.12 The function of Figure 7.11, with s_mul lambda-lifted.



397

398

SECTION 7.5

Graph reduction


@
en

@
f

e1

Figure 7.13 Identifying an essential redex.




SUBSECTION 7.5.1

Reduction order



root:

@
@

*
square

Figure 7.14 A graph with a built-in operator with strict arguments.




399

400

SECTION 7.5

Graph reduction


typedef enum {FUNC, NUM, NIL, CONS, APPL} node_type;
typedef struct node *Pnode;
typedef Pnode (*unary)(Pnode *arg);
struct function_descriptor {
int arity;
const char *name;
unary code;
};
struct node {
node_type tag;
union {
struct function_descriptor func;
int num;
struct {Pnode hd; Pnode tl;} cons;
struct {Pnode fun; Pnode arg;} appl;
} nd;
};
/* Constructor functions */
extern Pnode Func(int arity, const char *name, unary code);
extern Pnode Num(int num);
extern Pnode Nil(void);
extern Pnode Cons(Pnode hd, Pnode tl);
extern Pnode Appl(Pnode fun, Pnode arg);
Figure 7.15 Declaration of node types and constructor functions.



SUBSECTION 7.5.2

The reduction engine


Pnode Appl(const Pnode fun, Pnode arg) {
Pnode node = (Pnode) heap_alloc(sizeof(*node));
node->tag = APPL;
node->nd.appl.fun = fun;
node->nd.appl.arg = arg;
return node;
}
Figure 7.16 The node constructor function for application (@) nodes.



401

402

SECTION 7.5

Graph reduction


#include "node.h"
#include "eval.h"
#define STACK_DEPTH

10000

static Pnode arg[STACK_DEPTH];


static int top = STACK_DEPTH;

/* grows down */

Pnode eval(Pnode root) {


Pnode node = root;
int frame, arity;
frame = top;
for (;;) {
switch (node->tag) {
case APPL:
/* unwind */
arg[--top] = node->nd.appl.arg; /* stack argument */
node = node->nd.appl.fun;
/* application node */
break;
case FUNC:
arity = node->nd.func.arity;
if (frame-top < arity) {
/* curried function
top = frame;
return root;
}
node = node->nd.func.code(&arg[top]);
/* reduce
top += arity;
/* unstack arguments
*root = *node;
/* update root pointer
break;
default:
return node;
}
}
}
Figure 7.17 Reduction engine.



*/

*/
*/
*/

SUBSECTION 7.5.2

The reduction engine



argument stack
root:

@
twice

3
square

Figure 7.18 Stacking the arguments while searching for a redex.




403

404

SECTION 7.5

Graph reduction


Pnode mul(Pnode _arg0, Pnode _arg1) {
Pnode a = eval(_arg0);
Pnode b = eval(_arg1);
return Num(a->nd.num * b->nd.num);
}
Pnode _mul(Pnode *arg) {
return mul(arg[0], arg[1]);
}
struct node __mul = {FUNC, {2, "mul", _mul}};
Figure 7.19 Code and function descriptor of the built-in operator mul.



SUBSECTION 7.5.2

The reduction engine



argument stack
@
@
*

@
square

Figure 7.20 A more complicated argument stack.




405

406

CHAPTER 7

Functional programs


Pnode conditional(Pnode _arg0, Pnode _arg1, Pnode _arg2) {
Pnode cond = _arg0;
Pnode _then = _arg1;
Pnode _else = _arg2;
return eval(cond)->nd.num ? _then : _else;
}
Pnode _conditional(Pnode *arg) {
return conditional(arg[0], arg[1], arg[2]);
}
struct node __conditional =
{FUNC, {3, "conditional", _conditional}};
Figure 7.21 Code and function descriptor of the built-in conditional operator.



SECTION 7.6

Code generation for functional core programs


f a = let
b = fac a
c = b * b
in
c + c
Figure 7.22 Example of a let-expression.



407

408

CHAPTER 7

Functional programs


Pnode f(Pnode
Pnode a =
Pnode b =
Pnode c =

_arg0) {
_arg0;
Appl(&__fac, a);
Appl(Appl(&__mul, b), b);

return Appl(Appl(&__add, c), c);


}
Figure 7.23 Translation of the let-expression of Figure 7.22.



SECTION 7.6

Code generation for functional core programs


Pnode average(Pnode _arg0, Pnode _arg1) {
Pnode a = _arg0;
Pnode b = _arg1;
return
Appl(
Appl(
&__div,
Appl(
Appl(&__add, a),
b
)
),
Num(2)
);
}
Figure 7.24 Translation of the function average.



409

410

SECTION 7.6

Code generation for functional core programs


Pnode average(Pnode _arg0, Pnode _arg1) {
Pnode a = _arg0;
Pnode b = _arg1;
return div(Appl(Appl(&__add, a), b), Num(2));
}
Figure 7.25 Suppressing the top-level spine in the translation of the function average.



SUBSECTION 7.6.1

Avoiding the construction of some application spines


Pnode average(Pnode _arg0, Pnode _arg1) {
Pnode a = _arg0;
Pnode b = _arg1;
return div(add(a, b), Num(2));
}
Figure 7.26 Suppressing the argument spine to div in Figure 7.25.



411

412

SECTION 7.7

Optimizing the functional core



{b}
if
{b} {}
=
{b}
b

0
{}
0

{a,b}
/
{a}

{b}
b

Figure 7.27 Flow of strictness information in the AST of safe_div.




SUBSECTION 7.7.1

Strictness analysis



Language construct

Set to be propagated

L operator R
if C then T else E

LR
C(TE)

funm @ A 1 @ ... @ An

strict(fun, i) Ai

F @ A 1 @ ... @ An

let v = V in E

(E\{v})V
E

min(m,n)
i=1

, if v E
, otherwise

Figure 7.28 Rules for strictness information propagation.




413

414

SECTION 7.7

Optimizing the functional core



{a,b}
if

=
{a}
a

{a} {b}
b
{}
0

{a,b}
@
{a}

{b}
+

@
{a}
-

f(+,+)

{a}
a

{b}
b

{}
1

{}
1

Figure 7.29 Optimistic strictness analysis of the function f.




SUBSECTION 7.7.1

Strictness analysis



Data definitions:
1. Let Strict be an associative mapping from (identifier, int) pairs to Booleans,
where each pair describes a function name and argument index.
Initializations:
1. For each function f with arity m set Strict[f, i] to true for arguments i ranging from 1 to m.
Inference rules:
1. If propagation of strictness information through the AST of function f
shows that f is not strict in argument i, then Strict[f, i] must be false.
Figure 7.30 Closure algorithm for strictness predicate.


415

416

SECTION 7.7

Optimizing the functional core


Pnode fac(Pnode _arg0) {
Pnode n = _arg0;
return equal(n, Num(0))->nd.num ? Num(1)
: mul(n, fac(Appl(Appl(&__sub, n), Num(1))));
}
Figure 7.31 Lazy translation of the argument of fac.



SUBSECTION 7.7.1

Strictness analysis


Pnode fac(Pnode _arg0) {
Pnode n = _arg0;
return equal(n, Num(0))->nd.num ? Num(1)
: mul(n, fac(sub(n, Num(1))));
}
Figure 7.32 The first step in exploiting the strictness of the argument of fac.



417

418

SECTION 7.7

Optimizing the functional core


Pnode fac_evaluated(Pnode _arg0) {
Pnode n = _arg0;
return equal_evaluated(n, Num(0))->nd.num ? Num(1)
: mul_evaluated(n, fac_evaluated(sub_evaluated(n, Num(1))));
}
Pnode fac(Pnode _arg0) {
return fac_evaluated(eval(_arg0));
}
Figure 7.33 Exploiting the strictness of fac and the built-in operators.



SUBSECTION 7.7.2

Boxing analysis


int fac_unboxed(int n) {
return (n == 0 ? 1 : n * fac_unboxed(n-1));
}
Pnode fac_evaluated(Pnode _arg0) {
Pnode n = _arg0;
return Num(fac_unboxed(n->nd.num));
}
Pnode fac(Pnode _arg0) {
return fac_evaluated(eval(_arg0));
}
Figure 7.34 Strict, unboxed translation of the function fac.



419

420

SECTION 7.7

Optimizing the functional core


Pnode drop_unboxed(int n, Pnode lst) {
if (n == 0) {
return lst;
} else {
return drop_unboxed(n-1, tail(lst));
}
}
Figure 7.35 Strict, unboxed translation of the function drop.



SUBSECTION 7.7.3

Tail calls


Pnode drop_unboxed(int n, Pnode lst) {
L_drop_unboxed:
if (n == 0) {
return lst;
} else {
int
_tmp0 = n-1;
Pnode _tmp1 = tail(lst);
n = _tmp0; lst = _tmp1;
goto L_drop_unboxed;
}
}
Figure 7.36 Translation of drop with tail recursion eliminated.



421

422

SECTION 7.7

Optimizing the functional core


fac n = if (n == 0) then 1 else prod n (n-1)
where
prod acc n = if (n == 0) then acc
else prod (acc*n) (n-1)
Figure 7.37 Tail-recursive fac with accumulating argument.



SUBSECTION 7.7.4

Accumulator transformation


int fac_dot_prod_unboxed(int acc, int n) {
L_fac_dot_prod_unboxed:
if (n == 0) {
return acc;
} else {
int _tmp0 = n * acc;
int _tmp1 = n-1;
acc = _tmp0; n = _tmp1;
goto L_fac_dot_prod_unboxed;
}
}
int fac_unboxed(int n) {
if (n == 0) {
return 1;
} else {
return fac_dot_prod_unboxed(n, n-1);
}
}
Figure 7.38 Accumulating translation of fac, with tail recursion elimination.



423

424

SECTION 7.7

Optimizing the functional core


F x1...xn = if B then V else E F E 1 ...En

F x1...xn = if B then V else F E E 1 ...En


F xacc x1...xn = if B then xacc V else F (xacc E) E 1 ...En
Figure 7.39 Accumulating argument translation scheme.





Logic programs

425

426

CHAPTER 8

Logic programs


goal

AND

goal

AND ...

goal

AND

goal

AND ...

goal

AND

goal

AND ...

AND

goal

top element

OR

OR
...
OR
bottom element

Figure 8.1 The goal list stack.




SECTION 8.2

The general implementation model, interpreted

427



clause head :- clause body.


[ first goal ?= clause head ], clause body, other goals.
first goal, other goals.
Figure 8.2 The action of Attach clauses using one clause.


428

SECTION 8.3

Unification


struct variable {
char *name;
struct term *term;
};
struct structure {
char *functor;
int arity;
struct term **components;
};
typedef enum {Is_Constant, Is_Variable,
typedef struct term {
Term_Type type;
union {
char *constant;
struct variable variable;
struct structure structure;
} term;
} Term;

Is_Structure} Term_Type;

/* Is_Constant */
/* Is_Variable */
/* Is_Structure */

Figure 8.3 Data types for the construction of terms.



SUBSECTION 8.3.2

The implementation of unification

429


int unify_terms(Term *goal_arg, Term *head_arg) {
/* Handle any bound variables: */
Term *goal = deref(goal_arg);
Term *head = deref(head_arg);
if (goal->type == Is_Variable || head->type == Is_Variable) {
return unify_unbound_variable(goal, head);
}
else {
return unify_non_variables(goal, head);
}
}
Figure 8.4 C code for the unification of two terms.



430

SECTION 8.3

Unification


Term *deref(Term *t) {
while (t->type == Is_Variable && t->term.variable.term != 0) {
t = t->term.variable.term;
}
return t;
}
Figure 8.5 The function deref().



SUBSECTION 8.3.2

The implementation of unification

431


int unify_unbound_variable(Term *goal, Term *head) {
/* Handle identical variables: */
if (goal == head) {
/* Unification of identical variables is trivial */
}
else {
/* Bind the unbound variable to the other term: */
if (head->type == Is_Variable) {
trail_binding(head);
head->term.variable.term = goal;
}
else {
trail_binding(goal);
goal->term.variable.term = head;
}
}
return 1;
/* variable unification always succeeds */
}
Figure 8.6 C code for unification involving at least one unbound variable.



432

SECTION 8.3

Unification


int unify_non_variables(Term *goal, Term *head) {
/* Handle terms of different type: */
if (goal->type != head->type) return 0;
switch (goal->type) {
case Is_Constant:
/* both are constants */
return unify_constants(goal, head);
case Is_Structure:
/* both are structures */
return unify_structures(
&goal->term.structure, &head->term.structure
);
}
}
Figure 8.7 C code for the unification of two non-variables.



SUBSECTION 8.3.2

The implementation of unification

433


int unify_constants(Term *goal, Term *head) {
return strcmp(goal->term.constant, head->term.constant) == 0;
}
Figure 8.8 C code for the unification of two constants.



434

SECTION 8.3

Unification


int unify_structures(
struct structure *s_goal, struct structure *s_head
) {
int counter;
if (s_goal->arity != s_head->arity
|| strcmp(s_goal->functor, s_head->functor) != 0
) return 0;
for (counter = 0; counter < s_head->arity; counter++) {
if (!unify_terms(
s_goal->components[counter], s_head->components[counter]
)) return 0;
}
return 1;
}
Figure 8.9 C code for the unification of two structures.



SUBSECTION 8.3.3

Unification of two unbound variables


E
C
B

D
D

(a)

(b)

Figure 8.10 Examples of variablevariable bindings.




435

436

Unification

SECTION 8.3


D

(a)

(b)

Figure 8.11 Examples of more complicated variablevariable bindings.




SUBSECTION 8.4.1

List procedures


void number_pair_list(void (*Action)(int v1, int v2)) {
(*Action)(1, 3);
(*Action)(4, 6);
(*Action)(7, 3);
}
Figure 8.12 An example of a list procedure.



437

438

SECTION 8.4

The general implementation model, compiled


int fs_sum;
void fs_test_sum(int v1, int v2) {
if (v1 + v2 == fs_sum) {
printf("Solution found: %d, %d\n", v1, v2);
}
}
void find_sum(int sum) {
fs_sum = sum;
number_pair_list(fs_test_sum);
}
Figure 8.13 An application of the list procedure.



SUBSECTION 8.4.1

List procedures


void find_sum(int sum) {
void test_sum(int v1, int v2) {
if (v1 + v2 == sum) {
printf("Solution found: %d, %d\n", v1, v2);
}
}
number_pair_list(test_sum);
}
Figure 8.14 An application of the list procedure, using nested routines.



439

440

SECTION 8.4

The general implementation model, compiled


void parent_2(
Term *goal_arg1,
Term *goal_arg2,
Action goal_list_tail
) {
... /* the local routines parent_2_clause_[1-5]() */
/* OR of all clauses for parent/2 */
parent_2_clause_1();
/* parent(arne, james). */
parent_2_clause_2();
/* parent(arne, sachiko). */
parent_2_clause_3();
/* parent(koos, rivka). */
parent_2_clause_4();
/* parent(sachiko, rivka). */
parent_2_clause_5();
/* parent(truitje, koos). */
}
Figure 8.15 Translation of the relation parent/2.



SUBSECTION 8.4.2

Compiled clause search and unification


/* translation of parent(arne, sachiko). */
void parent_2_clause_2(void) {
/* translation of arne, sachiko). */
void parent_2_clause_2_arg_1(void) {
/* translation of sachiko). */
void parent_2_clause_2_arg_2(void) {
/* translation of . */
void parent_2_clause_2_body(void) {
goal_list_tail();
}
/* translation of head term sachiko) */
unify_terms(goal_arg2, put_constant("sachiko"),
parent_2_clause_2_body
);
}
/* translation of head term arne, */
unify_terms(goal_arg1, put_constant("arne"),
parent_2_clause_2_arg_2
);
}
/* translation of parent( */
parent_2_clause_2_arg_1();
}
Figure 8.16 Generated clause routine for parent(arne, sachiko).



441

442

SECTION 8.4

The general implementation model, compiled


void unify_unbound_variable(
Term *goal_arg, Term *head_arg, Action goal_list_tail
) {
if (goal_arg == head_arg) {
/* Unification of identical variables succeeds trivially */
goal_list_tail();
}
else {
/* Bind the unbound variable to the other term: */
if (head_arg->type == Is_Variable) {
head_arg->term.variable.term = goal_arg;
goal_list_tail();
head_arg->term.variable.term = 0;
}
else {
goal_arg->term.variable.term = head_arg;
goal_list_tail();
goal_arg->term.variable.term = 0;
}
}
}
Figure 8.17 C code for full backtracking variable unification.



SUBSECTION 8.4.2

Compiled clause search and unification


void query(void) {
Term *X = put_variable("X");
void display_X(void) {
print_term(X);
printf("\n");
}
parent_2(put_constant("arne"), X, display_X);
}
Figure 8.18 Possible generated code for the query ? parent(arne, X).



443

444

SECTION 8.4

The general implementation model, compiled


void grandparent_2(
Term *goal_arg1, Term *goal_arg2, Action goal_list_tail
) {
/* translation of grandparent(X,Z):-parent(X,Y),parent(Y,Z). */
void grandparent_2_clause_1(void) {
/* translation of X,Z):-parent(X,Y),parent(Y,Z). */
void grandparent_2_clause_1_arg_1(void) {
Term *X = put_variable("X");
/* translation of Z):-parent(X,Y),parent(Y,Z). */
void grandparent_2_clause_1_arg_2(void) {
Term *Z = put_variable("Z");
/* translation of body parent(X,Y),parent(Y,Z). */
void grandparent_2_clause_1_body(void) {
Term *Y = put_variable("Y");
/* translation of parent(Y,Z). */
void grandparent_2_clause_1_goal_2(void) {
parent_2(Y, goal_arg2, goal_list_tail);
}
/* translation of parent(X,Y), */
parent_2(goal_arg1, Y, grandparent_2_clause_1_goal_2);
}
/* translation of head term Z):- */
unify_terms(goal_arg2, Z, grandparent_2_clause_1_body);
}
/* translation of head term X, */
unify_terms(goal_arg1, X, grandparent_2_clause_1_arg_2);
}
/* translation of grandparent( */
grandparent_2_clause_1_arg_1();
}
/* OR of all clauses for grandparent/2 */
grandparent_2_clause_1();
}
Figure 8.19 Clause routine generated for grandparent/2.



SUBSECTION 8.4.3

Optimized clause selection in the WAM

445


void parent_2(
Term *goal_arg1,
Term *goal_arg2,
Action goal_list_tail
) {
... /* the local routines parent_2_clause_[1-5]() */
/* switch_on_term(L_Variable, L_Constant, L_Structure) */
switch (deref(goal_arg1)->type) {
case Is_Variable:
goto L_Variable;
case Is_Constant:
goto L_Constant;
case Is_Structure: goto L_Structure;
}
... /* code labeled by L_Variable, L_Constant, L_Structure */
L_fail:;
}
Figure 8.20 Optimized translation of the relation parent/2.



446

SECTION 8.4

The general implementation model, compiled


L_Variable:
/* OR of all clauses for parent/2 */
parent_2_clause_1();
/* parent(arne, james). */
parent_2_clause_2();
/* parent(arne, sachiko). */
parent_2_clause_3();
/* parent(koos, rivka). */
parent_2_clause_4();
/* parent(sachiko, rivka). */
parent_2_clause_5();
/* parent(truitje, koos). */
goto L_fail;
Figure 8.21 Code generated for the label L_Variable.



SUBSECTION 8.4.3

Optimized clause selection in the WAM

447


L_Constant:
/* switch_on_constant(8, constant_table) */
switch (
entry(deref(goal_arg1)->term.constant, 8, constant_table)
) {
case ENTRY_arne:
goto L_Constant_arne;
case ENTRY_koos:
goto L_Constant_koos;
case ENTRY_sachiko: goto L_Constant_sachiko;
case ENTRY_truitje: goto L_Constant_truitje;
}
goto L_fail;
L_Constant_arne:
parent_2_clause_1();
parent_2_clause_2();
goto L_fail;

/* parent(arne, james). */
/* parent(arne, sachiko). */

L_Constant_koos:
parent_2_clause_3();
goto L_fail;

/* parent(koos, rivka). */

L_Constant_sachiko:
parent_2_clause_4();
goto L_fail;

/* parent(sachiko, rivka). */

L_Constant_truitje:
parent_2_clause_5();
goto L_fail;

/* parent(truitje, koos). */

Figure 8.22 Code generated for the labels L_Constant_....



448

SECTION 8.4

The general implementation model, compiled


L_Structure:
goto L_fail;
Figure 8.23 Code generated for the label L_Structure.



SUBSECTION 8.4.4

Implementing the cut mechanism

449


void first_gp_2(
Term *goal_arg1, Term *goal_arg2, Action goal_list_tail
) {
/* label declarations required by GNU C for non-local jump */
__label__ L_cut;
/* translation of first_gp(X,Z):-parent(X,Y),!,parent(Y,Z). */
void first_gp_2_clause_1(void) {
/* translation of X,Z):-parent(X,Y),!,parent(Y,Z). */
void first_gp_2_clause_1_arg_1(void) {
Term *X = put_variable("X");
/* translation of Z):-parent(X,Y),!,parent(Y,Z). */
void first_gp_2_clause_1_arg_2(void) {
Term *Z = put_variable("Z");
/* translation of body parent(X,Y),!,parent(Y,Z). */
void first_gp_2_clause_1_body(void) {
Term *Y = put_variable("Y");
/* translation of !,parent(Y,Z). */
void first_gp_2_clause_1_goal_2(void) {
/* translation of parent(Y,Z). */
void first_gp_2_clause_1_goal_3(void) {
parent_2(Y, goal_arg2, goal_list_tail);
}
/* translation of !, */
first_gp_2_clause_1_goal_3();
goto L_cut;
}
/* translation of parent(X,Y), */
parent_2(goal_arg1, Y, first_gp_2_clause_1_goal_2);
}
/* translation of head term Z):- */
unify_terms(goal_arg2, Z, first_gp_2_clause_1_body);
}
/* translation of head term X, */
unify_terms(goal_arg1, X, first_gp_2_clause_1_arg_2);
}
/* translation of first_gp( */
first_gp_2_clause_1_arg_1();
}
/* OR of all clauses for first_gp/2 */
first_gp_2_clause_1();
L_fail:;
L_cut:;
}
Figure 8.24 Clause routine generated for first_grandparent/2.



450

SECTION 8.4

The general implementation model, compiled

SUBSECTION 8.4.5

Implementing the predicates assert and retract

451


void parent_2(
Term *goal_arg1,
Term *goal_arg2,
Action goal_list_tail
) {
... /* the local routines parent_2_clause_[1-5]() */
/* guarded OR of all clauses for parent/2 */
if (!retracted_parent_2_clause_1)
parent_2_clause_1();
/* parent(arne, james). */
if (!retracted_parent_2_clause_2)
parent_2_clause_2();
/* parent(arne, sachiko). */
if (!retracted_parent_2_clause_3)
parent_2_clause_3();
/* parent(koos, rivka). */
if (!retracted_parent_2_clause_4)
parent_2_clause_4();
/* parent(sachiko, rivka). */
if (!retracted_parent_2_clause_5)
parent_2_clause_5();
/* parent(truitje, koos). */
L_fail:
if (number_of_clauses_added_at_end_for_parent_2 > 0) {
stack_argument(goal_arg2);
stack_argument(goal_arg1);
stack_argument(put_constant("parent/2"));
interpret(goal_list_tail);
}
L_cut:;
}
Figure 8.25 Translation of parent/2 with code for controlling asserted and retracted clauses.



452

SECTION 8.4

The general implementation model, compiled


Compiled code

Interpreter
stack

...
...
...
stack_argument(goal_arg2);
stack_argument(goal_arg1);
stack_argument(put_constant(...
interpret(goal_list_tail);
...
...
...
control

data

code
...
...
void interpret(...
...
...

Figure 8.26 Switch to interpretation of added clauses.




top

SUBSECTION 8.4.5

Implementing the predicates assert and retract

453


void do_relation(const char *relation, Action goal_list_tail) {
switch (entry(relation, N, relation_table)) {
case ENTRY_...:
case ENTRY_parent_2:
goal_arg1 = unstack_argument();
goal_arg2 = unstack_argument();
parent_2(goal_arg1, goal_arg2, goal_list_tail);
break;
case ENTRY_grandparent_2:
...
break;
case ENTRY_...:
...
default:
interpret_asserted_relation(relation, goal_list_tail);
break;
}
}
Figure 8.27 Dispatch routine for goals in asserted clauses.



454

SECTION 8.4

The general implementation model, compiled


Compiled code

Interpreter
stack

data
...
...
...
...
void parent_2(...
control
...
...

top

code
...
...
goal_arg1 = unstack_argument();
goal_arg2 = unstack_argument();
parent_2(goal_arg1, goal_arg2, goal_list_tail);
...
...

Figure 8.28 Switch to execution of compiled clauses.




SECTION 8.5

Compiled code for unification


/* translation of parent(arne, sachiko). */
void parent_2_clause_2(void) {
Trail_Mark trail_mark;
set_trail_mark(&trail_mark);
/* translation of (arne, sachiko) */
if (unify_terms(goal_arg1, put_constant("arne"))
&& unify_terms(goal_arg2, put_constant("sachiko"))
) {
/* translation of . */
void parent_2_clause_2_body(void) {
goal_list_tail();
}
/* translation of . */
parent_2_clause_2_body();
}
restore_bindings_until_trail_mark(&trail_mark);
}
Figure 8.29 Clause routine with flat unification for parent(arne, sachiko).



455

456

SECTION 8.5

Compiled code for unification


/* optimized translation of parent(arne, sachiko). */
void parent_2_clause_2(void) {
Trail_Mark trail_mark;
set_trail_mark(&trail_mark);
/* optimized translation of (arne, sachiko) */
UNIFY_CONSTANT(goal_arg1, "arne");
/* macro */
UNIFY_CONSTANT(goal_arg2, "sachiko");
/* macro */
/* optimized translation of . */
goal_list_tail();
L_fail:
restore_bindings_until_trail_mark(&trail_mark);
}
Figure 8.30 Clause routine with compiled unification for parent(arne, sachiko).



SUBSECTION 8.5.1

Unification instructions in the WAM

457


g = deref(goal_arg);
if (g->type == Is_Variable) {
trail_binding(g);
g->term.variable.term = put_constant(head_text);
}
else {
if (g->type != Is_Constant) goto L_fail;
if (strcmp(g->term.constant, head_text) != 0) goto L_fail;
}
Figure 8.31 The unification instruction UNIFY_CONSTANT.



458

SECTION 8.5

Compiled code for unification


v = deref(head_var);
/* head_var may already be bound */
g = deref(goal_arg);
if (v->type == Is_Variable) {
if (g != v) {
trail_binding(v);
v->term.variable.term = g;
}
}
else {
/* no further compilation possible */
/* call interpreter */
if (!unify_terms(g, v)) goto L_fail;
}
Figure 8.32 The unification instruction UNIFY_VARIABLE.



SUBSECTION 8.5.2

Deriving a unification instruction by manual partial evaluation

459


int unify_terms(Term *goal_arg, Term *head_arg) {
/* Handle any bound variables: */
Term *goal = deref(goal_arg);
Term *head = deref(head_arg);
if (goal->type == Is_Variable || head->type == Is_Variable) {
/* Handle identical variables: */
if (goal == head) return 1;
/* Bind the unbound variable to the other term: */
if (head->type == Is_Variable) {
trail_binding(head);
head->term.variable.term = goal;
}
else {
trail_binding(goal);
goal->term.variable.term = head;
}
return 1;
/* always succeeds */
}
else {
/* Handle terms of different type: */
if (goal->type != head->type) return 0;
switch (goal->type) {
case Is_Constant:
/* both are constants */
return strcmp(goal->term.constant, head->term.constant) == 0;
case Is_Structure:
/* both are structures */
return unify_structures(
&goal->term.structure, &head->term.structure
);
}
}
}

Figure 8.33 Deriving the instruction UNIFY_CONSTANT, stage 1.



460

SECTION 8.5

Compiled code for unification


int unify_constant(Term *goal_arg, char *head_text) {
Term *goal;
/* Handle bound goal_arg: */
goal = deref(goal_arg);
if (goal->type == Is_Variable
|| (put_constant(head_text))->type == Is_Variable
) {
/* Handle identical variables: */
if (goal == (put_constant(head_text))) return 1;
/* Bind the unbound variable to the other term: */
if ((put_constant(head_text))->type == Is_Variable) {
trail_binding((put_constant(head_text)));
(put_constant(head_text))->term.variable.term = goal;
}
else {
trail_binding(goal);
goal->term.variable.term = (put_constant(head_text));
}
return 1;
/* always succeeds */
}
else {
/* Handle terms of different type: */
if (goal->type != (put_constant(head_text))->type) return 0;
switch (goal->type) {
case Is_Constant:
/* both are constants */
return
strcmp(
goal->term.constant,
(put_constant(head_text))->term.constant
) == 0;
case Is_Structure:
/* both are structures */
return unify_structures(
&goal->term.structure,
&(put_constant(head_text))->term.structure
);
}
}
}

Figure 8.34 Deriving the instruction UNIFY_CONSTANT, stage 2.



SUBSECTION 8.5.2

Deriving a unification instruction by manual partial evaluation

461


int unify_constant(Term *goal_arg, char *head_text) {
Term *goal;
/* Handle bound goal_arg: */
goal = deref(goal_arg);
if (goal->type == Is_Variable || Is_Constant == Is_Variable) {
/* Handle identical variables: */
if (goal == (put_constant(head_text))) return 1;
/* Bind the unbound variable to the other term: */
if (Is_Constant == Is_Variable) {
trail_binding((put_constant(head_text)));
ERRONEOUS = goal;
}
else {
trail_binding(goal);
goal->term.variable.term = (put_constant(head_text));
}
return 1;
/* always succeeds */
}
else {
/* Handle terms of different type: */
if (goal->type != Is_Constant) return 0;
switch (goal->type) {
case Is_Constant:
/* both are constants */
return strcmp(goal->term.constant, head_text) == 0;
case Is_Structure:
/* both are structures */
return unify_structures(
&goal->term.structure, ERRONEOUS
);
}
}
}
Figure 8.35 Deriving the instruction UNIFY_CONSTANT, stage 3.



462

SECTION 8.5

Compiled code for unification


int unify_constant(Term *goal_arg, char *head_text) {
Term *goal;
/* Handle bound goal_arg: */
goal = deref(goal_arg);
if (goal->type == Is_Variable || 0) {
/* Handle identical variables: */
if (0) return 1;
/* Bind the unbound variable to the other term: */
if (0) {
trail_binding((put_constant(head_text)));
ERRONEOUS = goal;
}
else {
trail_binding(goal);
goal->term.variable.term = (put_constant(head_text));
}
return 1;
/* always succeeds */
}
else {
/* Handle terms of different type: */
if (goal->type != Is_Constant) return 0;
switch (Is_Constant) {
case Is_Constant:
/* both are constants */
return strcmp(goal->term.constant, head_text) == 0;
case Is_Structure:
/* both are structures */
return unify_structures(
&goal->term.structure, ERRONEOUS
);
}
}
}
Figure 8.36 Deriving the instruction UNIFY_CONSTANT, stage 4.



SUBSECTION 8.5.2

Deriving a unification instruction by manual partial evaluation

463


int unify_constant(Term *goal_arg, char *head_text) {
Term *goal;
/* Handle bound goal_arg: */
goal = deref(goal_arg);
if (goal->type == Is_Variable) {
trail_binding(goal);
goal->term.variable.term = (put_constant(head_text));
return 1;
/* always succeeds */
}
else {
if (goal->type != Is_Constant) return 0;
return strcmp(goal->term.constant, head_text) == 0;
}
}
Figure 8.37 Deriving the instruction UNIFY_CONSTANT, stage 5.



464

SECTION 8.5

Compiled code for unification


int unify_constant(Term *goal_arg, char *head_text) {
Term *goal;
goal = deref(goal_arg);
if (goal->type == Is_Variable) {
trail_binding(goal);
goal->term.variable.term = put_constant(head_text);
}
else {
if (goal->type != Is_Constant) goto L_fail;
if (strcmp(goal->term.constant, head_text) != 0) goto L_fail;
}
return 1;
L_fail:
return 0;
}

Figure 8.38 Deriving the instruction UNIFY_CONSTANT, final result.



SUBSECTION 8.5.3

Unification of structures in the WAM


g = deref(goal_arg);
if (g->type == Is_Variable) {
trail_binding(g);
g->term.variable.term =
put_structure(head_functor, head_arity);
initialize_components(g->term.variable.term);
}
else {
if (g->type != Is_Structure) goto L_fail;
if (g->term.structure.arity != head_arity
|| strcmp(g->term.structure.functor, head_functor) != 0
) goto L_fail;
}
Figure 8.39 The unification instruction UNIFY_STRUCTURE.



465

466

SECTION 8.5

Compiled code for unification


/* match times(2, X) */
UNIFY_STRUCTURE(goal_arg, "times", 2);
/* macro */
{
/* match (2, X) */
Term **goal_comp = deref(goal_arg)->term.structure.components;
UNIFY_CONSTANT(goal_comp[0], "2");
UNIFY_VARIABLE(goal_comp[1], X);

/* macro */
/* macro */

}
Figure 8.40 Compiled code for the head times(2, X).



Unification of structures in the WAM

SUBSECTION 8.5.3


variable
goal:

structure

Y
"times"
anonymous
variables

2
components
_A01

_A02

Figure 8.41 The effect of the instruction UNIFY_STRUCTURE.




467

468

SECTION 8.5

Compiled code for unification


variable
goal:

structure

Y
"times"
anonymous
variables

2
components
_A01

constant
2

_A02

variable
X

Figure 8.42 The effect of the instructions of Figure 8.40.




. . .

SUBSECTION 8.5.4

An optimization: read/write mode


/* match times(2, X) */
mode_1 = mode;
/* save mode */
UNIFY_STRUCTURE(&goal_arg, "times", 2); /* macro */
/* saves read/write mode and may set it to Write_Mode */
{
/* match (2, X) */
register Term **goal_comp =
deref(goal_arg)->term.structure.components;
UNIFY_CONSTANT(&goal_comp[0], "2"); /* macro */
UNIFY_VARIABLE(&goal_comp[1], X);
/* macro */
}
mode = mode_1;
Figure 8.43 Compiled code with read/write mode for the head times(2, X).



469

470

SECTION 8.5

Compiled code for unification


if (mode == Read_Mode) {
g = deref(*goal_ptr);
if (g->type == Is_Variable) {
trail_binding(g);
g->term.variable.term = put_constant(head_text);
}
else {
if (g->type != Is_Constant) goto L_fail;
if (strcmp(g->term.constant, head_text) != 0) goto L_fail;
}
}
else { /* mode == Write_Mode */
*goal_ptr = put_constant(head_text);
}
Figure 8.44 The instruction UNIFY_CONSTANT with read/write mode.



SUBSECTION 8.5.4

An optimization: read/write mode

471


if (mode == Read_Mode) {
g = deref(*goal_ptr);
if (g->type == Is_Variable) {
trail_binding(g);
g->term.variable.term =
put_structure(head_functor, head_arity);
mode = Write_Mode;
/* signal uninitialized goals */
}
else {
if (g->type != Is_Structure) goto L_fail;
if (g->term.structure.arity != head_arity
|| strcmp(g->term.structure.functor, head_functor) != 0
) goto L_fail;
}
}
else { /* mode == Write_Mode */
*goal_ptr = put_structure(head_functor, head_arity);
}
Figure 8.45 The instruction UNIFY_STRUCTURE with read/write mode.



472

SECTION 8.5

Compiled code for unification


variable
goal:

structure

Y
"times"
2
components
???
???

Figure 8.46 The effect of the instruction UNIFY_STRUCT with read/write mode.


An optimization: read/write mode

SUBSECTION 8.5.4


variable
goal:

structure

Y
"times"
2
components

constant
2
variable
X

. . .

Figure 8.47 The effect of the instructions of Figure 8.40 with read/write mode.


473



Parallel and distributed


programs

474

CHAPTER 9

Parallel and distributed programs


CPU

CPU

CPU

shared memory
(a) multiprocessor

mem

mem

mem

CPU

CPU

CPU

network
(b) multicomputer

Figure 9.1 Multiprocessors and multicomputers.




475

476

SECTION 9.1

Parallel programming models


monitor BinMonitor;
bin: integer;
occupied: Boolean := false;
full, empty: Condition;
operation put(x: integer);
begin
while occupied do
# wait if the bin already is occupied
wait(empty);
od;
bin := x;
# put the item in the bin
occupied := true;
signal(full);
# wake up a process blocked in get
end;
operation get(x: out integer);
begin
while not occupied do # wait if the bin is empty
wait(full);
od;
x := bin;
# get the item from the bin
occupied := false;
signal(empty);
# wakeup a process blocked in put
end;
end;
Figure 9.2 An example monitor.



SUBSECTION 9.1.5

Data-parallel languages


process 1

process 1

read

write

send
msg

msg

write

receive

send

shared variables
read

process 2

shared variables

message passing

IN

obj.

receive

process 2

process 1

process 1

obj.

process 2

parallel objects

array, part 1

pr. 1

OUT
Tuple Space

IN

OUT
process 2

pr. 2 array, part 2

Tuple Space

data-parallel

Figure 9.3 Summary of the five parallel programming models.




477

478

CHAPTER 9

Parallel and distributed programs


Status
Program counter
Stack pointer
Stack limits
Registers
Next-pointer

Figure 9.4 Structure of a thread control block.




SECTION 9.3

Shared variables


lock variable
(a)

(b)
Thread control blocks
not runnable

not runnable

(c)
Thread control blocks
not runnable

runnable

Figure 9.5 A lock variable containing a list of blocked threads.




479

480

SECTION 9.4

Message passing


name
age

"John"
0000001A

= 26

marshaling

00 00 00 09 J

n \0 00 00 00 1A

4-byte length
unmarshaling
name

age
1A000000

= 26

"John"

Figure 9.6 Marshaling.




SUBSECTION 9.5.1

Object location


CPU 4

CPU 7

X:
7 4000
CPU address
7, 4000,
Determine_Value,
"Diamond"
address 4000:
Y:

Figure 9.7 Using object identifiers for remote references.




481

482

SECTION 9.5

Parallel object-oriented languages


machine C

machine A
reference to

machine B

(a)

X:

reference to

(b)

forwarding
address

(c)

X:

X:
reference to

Figure 9.8 Pointer chasing.




SUBSECTION 9.6.1

Avoiding the overhead of associative addressing


int n, year;
float f;
...
(1)
(2)
(3)
(4)
(5)
(6)
(7)
(8)

in("Olympic
in("Classic
in("Classic
in("Classic
in("Popular
in("Popular
in("Popular
in("Popular

/* code initializing n and f */


year", 1928);
movie", "Way out West", 1937);
movie", "Sons of the desert", 1933);
movie", "Sons of the desert", ? &year);
number", 65536);
number", 3.14159);
number", n);
number", f);
Figure 9.9 A demo program in C/Linda.



483

484

SECTION 9.6

Tuple space


Partition 1:
(1) in("Olympic year", 1928);
Partition 2:
(2) in("Classic movie", "Way out West", 1937);
Partition 3:
(3) in("Classic movie", "Sons of the desert", 1933);
(4) in("Classic movie", "Sons of the desert", ? &year);
Partition 4:
(5) in("Popular number", 65536);
(7) in("Popular number", n);
Partition 5:
(6) in("Popular number", 3.14159);
(8) in("Popular number", f);
Figure 9.10 The partitions for the demo C/Linda program in Figure 9.9.



SUBSECTION 9.7.4

Automatic parallelization for distributed-memory machines


1

1000

1
CPU 0
50
51

CPU 1

100

451
CPU 10
500

Figure 9.11 An array distributed over 10 CPUs.




485

486

SECTION 9.7

Automatic parallelization


INPUT:
ID: number of this processor (ranging from 0 to P-1)
lower_bound: number of first row assigned to this processor
upper_bound: number of last row assigned to this processor
real A[lower_bound..upper_bound, 1000],
B[lower_bound-1 .. upper_bound+1, 1000];
if ID > 0 then send B[lower_bound, *] to processor ID-1;
if ID < P-1 then send B[upper_bound, *] to processor ID+1;
if ID > 0 then receive B[lower_bound-1, *] from processor ID-1;
if ID < P-1 then receive B[upper_bound+1, *] from processor ID+1;
for i := lower_bound to upper_bound do
for j := 1 to 1000 do
A[i, j] := B[i-1, j] + B[i+1, j];
od;
od;
Figure 9.12 Generated (pseudo-)code.



SECTION 9.8

Conclusion


int Maximum(int value) {
if (mycpu == 1) {
/* the first CPU creates the tuple */
out("max", value, 1);
} else {
/* other CPUs wait their turn */
in("max", ? &max, mycpu);
if (value > max) max = value;
out("max", max, mycpu + 1);
}
/* wait until all CPUs are done */
read("max", ? &max, P);
return max;
}
Figure 9.13 C/Linda code for the operation Maximum.



487

Appendix A A simple objectoriented compiler/interpreter

488

Appendix A A simple object-oriented compiler/interpreter



declarations

simple
assignments
for-loops
expressions

lexical analysis

lexical analysis
of declarations

parsing

parsing of
simple
expressions

context handling

context handling
of declarations

optimization

optimization of
for-loops

code generation

code generation
for assignments

Figure A.1 Table of tasks in a compiler with some tasks filled in.


489

490

Appendix A A simple object-oriented compiler/interpreter


expression digit | parenthesized_expression
digit DIGIT
parenthesized_expression ( expression operator expression )
operator + | *
Figure A.2 The grammar for simple fully parenthesized expressions in AND/OR form.



SECTION A.2

The simple object-oriented compiler

491


class Demo_Compiler {
public static void main(String[ ] args) {
// Get the intermediate code
Lex.Get_Token();
// start the lexical analyser
Expression icode = Expression.Parse_Or();
if (Lex.Token_Class != Lex.EOF) {
System.err.println("Garbage at end of program ignored");
}
if (icode == null) {
System.err.println("No intermediate code produced");
return;
}
// Print it
System.out.print("Expression: ");
icode.Print();
System.out.println();
// Interpret it
System.out.print("Interpreted: ");
System.out.println(icode.Interpret());
// Generate code for it
System.out.println("Code:");
icode.Generate_Code();
System.out.println("PRINT");
}
}
Figure A.3 Class Demo_Compiler, driver for the object-oriented demo compiler.



492

Appendix A A simple object-oriented compiler/interpreter


import java.io.IOException;
class Lex {
public static
public static
public static
public static

int Token_Class;
final int EOF = 256;
final int DIGIT = 257;
char Token_Char_Value;

public static void Get_Token() {


char ch;
// Get non-whitespace character
do {int ch_or_EOF;
try {ch_or_EOF = System.in.read();}
catch (IOException ex) {ch_or_EOF = -1;}
if (ch_or_EOF < 0) {Token_Class = EOF; return;}
ch = (char)ch_or_EOF;
// cast is safe
} while (Character.isWhitespace(ch));
// Classify it
if (Character.isDigit(ch)) {
Token_Class = DIGIT; Token_Char_Value = ch;
}
else {
Token_Class = ch;
}
}
}
Figure A.4 Class Lex, lexical analyzer for the object-oriented demo compiler.



SECTION A.2

The simple object-oriented compiler

493


abstract class Expression {
public static Expression Parse_Or() {
// Try to parse a parenthesized_expression
Parenthesized_Expression pe = new Parenthesized_Expression();
if (pe.Parse_And()) return pe;
// Try to parse a digit
Digit d = new Digit();
if (d.Parse_And()) return d;
return null;
}
abstract public void Print();
abstract public int Interpret();
abstract public void Generate_Code();
}
Figure A.5 Class Expression for the object-oriented demo compiler.



494

Appendix A A simple object-oriented compiler/interpreter


class Digit extends Expression {
private char digit = \0;
public boolean Parse_And() {
if (Lex.Token_Class == Lex.DIGIT) {
digit = Lex.Token_Char_Value;
Lex.Get_Token();
return true;
}
return false;
}
public void Print() {System.out.print(digit);}
public int Interpret() {return digit - 0;}
public void Generate_Code() {
System.out.print("PUSH " + digit + "\n");
}
}
Figure A.6 Class Digit for the object-oriented demo compiler.



SECTION A.2

The simple object-oriented compiler

495


class Parenthesized_Expression extends Expression {
private Expression left;
private Operator oper;
private Expression right;
public boolean Parse_And() {
if (Lex.Token_Class == () {
Lex.Get_Token();
if ((left = Expression.Parse_Or()) == null) {
System.err.println("Missing left expression");
return false;
}
if ((oper = Operator.Parse_Or()) == null) {
System.err.println("Missing operator");
return false;
}
if ((right = Expression.Parse_Or()) == null) {
System.err.println("Missing right expression");
return false;
}
if (Lex.Token_Class != )) {
System.err.println("Missing ) added");
}
else {Lex.Get_Token();}
return true;
}
return false;
}
public void Print() {
System.out.print(();
left.Print(); oper.Print(); right.Print();
System.out.print());
}
public int Interpret() {
return oper.Interpret(left.Interpret(), right.Interpret());
}
public void Generate_Code() {
left.Generate_Code();
right.Generate_Code();
oper.Generate_Code();
}
}
Figure A.7 Class Parenthesized_Expression for the object-oriented demo compiler.



496

Appendix A A simple object-oriented compiler/interpreter

SECTION A.2

The simple object-oriented compiler

497


abstract class Operator {
public static Operator Parse_Or() {
// Try to parse a +
Actual_Operator co_plus = new Actual_Operator(+);
if (co_plus.Parse_And()) return co_plus;
// Try to parse a *
Actual_Operator co_times = new Actual_Operator(*);
if (co_times.Parse_And()) return co_times;
return null;
}
abstract public void Print();
abstract public int Interpret(int e_left, int e_right);
abstract public void Generate_Code();
}
class Actual_Operator extends Operator {
private char oper;
public Actual_Operator(char op) {oper = op;}
public boolean Parse_And() {
if (Lex.Token_Class == oper) {Lex.Get_Token(); return true;}
return false;
}
public void Print() {System.out.print(oper);}
public int Interpret(int e_left, int e_right) {
switch (oper) {
case +: return e_left + e_right;
case *: return e_left * e_right;
}
return 0;
}
public void Generate_Code() {
switch (oper) {
case +: System.out.print("ADD\n"); break;
case *: System.out.print("MULT\n"); break;
}
}
}
Figure A.8 Class Operator for the object-oriented demo compiler.



498

Appendix A A simple object-oriented compiler/interpreter


class A extends ... {
private A1 a1;
// fields for the components
private A2 a2;
...
private An an;
public boolean Parse_And() {
// Try to parse the alternative A1 A2 ... An:
if ((a1 = A1.Parse_Or()) != null) {
if ((a2 = A2.Parse_Or()) == null)
error("Missing A2");
...
if ((an = An.Parse_Or()) == null)
error("Missing An");
return true;
}
return false;
}
}
Figure A.9 The template for an AND non-terminal.



SECTION A.3

Object-oriented parsing


abstract class A {
public static A Parse_Or() {
// Try to parse an A1:
A1 a1 = new A1();
if (a1.Parse_And()) return a1;
...
// Try to parse an An:
An an = new An();
if (an.Parse_And()) return an;
return null;
}
}
Figure A.10 The template for an OR non-terminal.



499

More answers to exercises

500

More answers to exercises


SET the flag There is a character _a_ buffered TO False;
PROCEDURE Accept filtered character Ch from previous module:
IF There is a character _a_ buffered = True:
// See if this is a second a:
IF Ch = a:
// We have aa:
SET There is a character _a_ buffered TO False;
Output character b to next module;
ELSE Ch /= a:
SET There is a character _a_ buffered TO False;
Output character a to next module;
Output character Ch to next module;
ELSE IF Ch = a:
SET There is a character _a_ buffered TO True;
ELSE There is no character a buffered AND Ch /= a:
Output character Ch to next module;
PROCEDURE Flush:
IF There is a character _a_ buffered:
SET There is a character _a_ buffered TO False;
Output character a to next module;
Flush next module;
Figure Answers.1 The filter aa b as a post-main module.



501

502

More answers to exercises


void skip_layout_and_comment(void) {
while (is_layout(input_char)) {next_char();}
while (is_comment_starter(input_char)) {
skip_comment();
while (is_layout(input_char)) {next_char();}
}
}
void skip_comment(void) {
next_char();
while (!is_comment_stopper(input_char)) {
if (is_end_of_input(input_char)) return;
else if (is_comment_starter(input_char)) {
skip_comment();
}
else next_char();
}
next_char();
}
Figure Answers.2 Skipping layout and nested comment.



More answers to exercises


T (R 1 &R 2 &...&Rn)

T R 1 (R 2 &R 3 &...&Rn)
T R 2 (R 1 &R 3 &...&Rn)
...
T Rn (R 1 &R 2 &...&Rn 1 )

Figure Answers.3 move rule for the composition operator &.



503

504

More answers to exercises


0
1
2
3

(1, 0)

(1, 1)

(2, 0)
-

(2, 1)

(3, 2)
(3, 3)

(1, 0)
(1, 1)
(2, 0)
(2, 1)
(3, 2)
(3, 3)
-

Figure Answers.4 Fitting the strips with entries marked by state.



More answers to exercises

505


%Start

Comment

Layout

([ \t])

AnyQuoted
StringChar
CharChar

(\\.)
(["\n\\]|{AnyQuoted})
([\n\\]|{AnyQuoted})

StartComment
EndComment
SafeCommentChar
UnsafeCommentChar

("/*")
("*/")
([*\n])
("*")

%%
\"{StringChar}*\"
\{CharChar}\

{printf("%s", yytext);}
{printf("%s", yytext);}

{Layout}*{StartComment}
<Comment>{SafeCommentChar}+
<Comment>{UnsafeCommentChar}
<Comment>"\n"
<Comment>{EndComment}

{BEGIN
{} /*
{} /*
{} /*
{BEGIN

/* string */
/* character */

Comment;}
safe comment chunk */
unsafe char, read one by one */
to break up long comments */
INITIAL;}

Figure Answers.5 Lex filter for removing comments from C program text.



506

More answers to exercises


GENERIC PROCEDURE F1(Type)(parameters to F1):
SET the Type variable Var TO ...;
// some code using Type and Var
GENERIC PROCEDURE F2(Type)(parameters to F2):
FUNCTION F1_1(parameters to F1_1):
INSTANTIATE F1(Integer);
FUNCTION F1_2(parameters to F1_2):
INSTANTIATE F1(Real);
SET the Type variable Var TO ...;
// some code using F1_1, F1_2, Type and Var
GENERIC PROCEDURE F3(Type)(parameters to F3):
FUNCTION F2_1(parameters to F2_1):
INSTANTIATE F2(Integer);
FUNCTION F2_2(parameters to F2_2):
INSTANTIATE F2(Real);
SET the Type variable Var TO ...;
// some code using F2_1, F2_2, Type and Var
Figure Answers.6 An example of exponential generics by macro expansion.



More answers to exercises


%token IDENTIFIER
%token EOF
%%
input_suffix :
expression_suffix EOF | EOF ;
expression :
term | expression + term ;
expression_suffix :
term_suffix | expression_suffix + term | + term ;
term :
IDENTIFIER | ( expression ) ;
term_suffix :
expression ) | expression_suffix ) | ) ;
Figure Answers.7 An LALR(1) suffix grammar for the grammar of Figure 2.84.



507

508

More answers to exercises


stack continuation
parenthesized_expression rest_expression EOF
( expression ) rest_expression EOF
expression ) rest_expression EOF
term rest_expression ) rest_expression EOF
IDENTIFIER rest_expression ) rest_expression EOF
rest_expression ) rest_expression EOF
) rest_expression EOF
rest_expression EOF
EOF

FIRST set
{ ( }
(
{ IDENTIFIER ( }
{ IDENTIFIER ( }
IDENTIFIER
{ + }
)
{ + }
EOF

Figure Answers.8 Stack continuations with their FIRST sets.



More answers to exercises


shift-reduce
S1

S0
S->.xSx{$}
S->.x{$}

S->x.Sx{x$}
S->x.{x$}
S->.xSx{x}
S->.x{x}
S
S2
S->xS.x{x$}

x
S3
S->xSx.{x$}

Figure Answers.9 The SLR(1) automaton for S x S x | x.




509

510

More answers to exercises


shift-reduce

shift-reduce
S1

S0
S->.xSx{$}
S->.x{$}

S->x.Sx{$}
S->x.{$}
S->.xSx{x}
S->.x{x}
S

S4

S->x.Sx{x}
S->x.{x}
S->.xSx{x}
S->.x{x}
S

S2
S->xS.x{$}

S5
S->xS.x{x}

x
S3

S->xSx.{$}

S6
S->xSx.{x}

Figure Answers.10 The LR(1) automaton for S x S x | x.




More answers to exercises


shift-reduce

shift-reduce
S4

S1

S0
S->.xSx{$}
S->.x{$}

S->x.Sx{$}
S->x.{$}
S->.xSx{x}
S->.x{x}
S

S->x.Sx{x}
S->x.{x}
S->.xSx{x}
S->.x{x}

S
S5

S2
S->xS.x{$}

S->xS.x{x}

x
S3

S->xSx.{$}

S6
S->xSx.{x}

Figure Answers.11 The LALR(1) automaton for S x S x | x.




511

512

More answers to exercises


if_statement short_if_statement | long_if_statement
short_if_statement if ( expression ) statement
long_if_statement
if ( expression ) statement_but_not_short_if
else statement
statement_but_not_short_if long_if_statement | other_statement
statement if_statement | other_statement
other_statement ...
Figure Answers.12 An unambiguous grammar for the if-then-else statement.



More answers to exercises


S4

S 4a
A->ERR.{$}

A->B.{$}

ERR

B
S->.A{$}
S->.xb{$}
A->.aAb{$}
A->.B{$}
A->.ERR{$}
B->.x{$}
A

S0
a

x
S1

S->A.{$}

S2
S->x.b{$}
B->x.{$}

Figure Answers.13 State S 0 of the error-recovering parser.




513

514

More answers to exercises


FUNCTION Topological sort of (a set Set) RETURNING a list:
SET List TO Empty list;
SET Busy list TO Empty list;
WHILE there is a Node in Set but not in List:
Append Node and its predecessors to List;
RETURN List;
PROCEDURE Append Node and its predecessors to List:
// Check if Node is already (being) dealt with; if so, there
// is a cycle:
IF Node is in Busy list:
Panic with "Cycle detected";
RETURN;
Append Node to Busy list;
// Append the predecessors of Node:
FOR EACH Node_1 IN the Set of nodes that Node is dependent on:
IF Node_1 is not in List:
Append Node_1 and its predecessors to List;
Append Node to List;
Figure Answers.14 Outline code for topological sort with cycle detection.



More answers to exercises


S

i1

s1

i1

i2

s1

s2

i1

s1

i2

s2

Figure Answers.15 Dependency graphs for S, A, and B.




515

516

More answers to exercises


from S
i1

s1
from B

Figure Answers.16 IS-SI graph of A.




More answers to exercises


S

i1

i2

s1

s2

i1

i2

s1

s2

i1

i2

s1

Figure Answers.17 Dependency graphs for S and A.




s2

517

518

More answers to exercises


IS-SI graph set of A:

i1

i2

s1

s2

i1

i2

s1

merged IS-SI graph of A:

i1

i2

s1

s2

Figure Answers.18 IS-SI graph sets and IS-SI graph of A.




s2

More answers to exercises


i1

i1

i2

s1

s1

i1

s2

s1

Figure Answers.19 Dependency graph of S.




519

520

More answers to exercises


i1

i2

s1

s2

Figure Answers.20 IS-SI graph of S.




More answers to exercises



i1

i2

s1

s2

Figure Answers.21 IS-SI graph of S.




521

522

More answers to exercises


parent prepares S.i1 and S.i2:
PROCEDURE Visit to S (i1, i2, s1, s2):
// S.i1 and S.i2 available
Visit to T (-, T.cont_i);
SET U.i TO f2(S.i2);
Visit to U (U.i, U.s);
SET T.i TO f1(S.i1, U.s);
SET T.s TO Compute(T.cont_i, T.i)
SET S.s1 TO f3(T.s);
SET S.s2 TO f4(U.s);

//
//
//
//
//
//
//

T.cont_i available
U.i available
U.s available
T.i available
T.s available
S.s1 available
S.s2 available

Figure Answers.22 Visit to S().



More answers to exercises

523


Number(SYN value)
Digit_Seq Base_Tag
ATTRIBUTE RULES
SET Number .value TO Checked number value(
Digit_Seq .repr,
Base_Tag .base);
Digit_Seq(SYN repr)
Digit_Seq[1] Digit
ATTRIBUTE RULES
SET Digit_Seq .repr TO
Concat(Digit_Seq[1] .repr , Digit .repr);
|
Digit
ATTRIBUTE RULES
SET Digit_Seq .repr TO Digit .repr;
Digit(SYN repr)
Digit_Token
ATTRIBUTE RULES
SET Digit .repr TO Digit_Token .repr [0];
Base_Tag(SYN base)
B
ATTRIBUTE RULES
SET Base_Tag .base TO 8;
|
D
ATTRIBUTE RULES
SET Base_Tag .base TO 10;
Figure Answers.23 An L-attributed grammar for Number.



524

More answers to exercises



While_statement

X
Condition

Statements

Figure Answers.24 Threaded AST of the while statement.




More answers to exercises



Repeat_statement

X
Statements

Condition

Figure Answers.25 Threaded AST of the repeat statement.




525

526

More answers to exercises


#include
#include

"parser.h"
"thread.h"

/* for types AST_node and Expression */


/* for self check */

/* PRIVATE */
static AST_node *Thread_expression(Expression *expr, AST_node *succ) {
switch (expr->type) {
case D:
expr->successor = succ; return expr;
break;
case P:
expr->successor = succ;
return
Thread_expression(expr->left,
Thread_expression(expr->right, expr)
);
break;
}
}
/* PUBLIC */
AST_node *Thread_start;
void Thread_AST(AST_node *icode) {
Thread_start = Thread_expression(icode, 0);
}
Figure Answers.26 Alternative threading code for the demo compiler from Section 1.2.



More answers to exercises

527


CASE Operator type:
SET Left value TO Pop working stack ();
SET Right value TO Pop working stack ();
SELECT Active node .operator:
CASE +: Push working stack (Left value + Right value);
CASE *: ...
CASE ...
SET Active node TO Active node .successor;
Figure Answers.27 Iterative interpreter code for operators.



528

More answers to exercises


FUNCTION Weight of (Node) RETURNING an integer:
SELECT Node .type:
CASE Constant type: RETURN 1;
CASE Variable type: RETURN 1;
CASE ...
CASE Add type:
SET Required left TO Weight of (Node .left);
SET Required right TO Weight of (Node .right);
IF Required left > Required right: RETURN Required left;
IF Required left < Required right: RETURN Required right;
// Required left = Required right
RETURN Required left + 1;
CASE Sub type:
SET Required left TO Weight of (Node .left);
SET Required right TO Weight of (Node .right);
IF Required left > Required right: RETURN Required left;
RETURN Required right + 1;
CASE ...
Figure Answers.28 Adapted weight function for minimizing the stack height.



More answers to exercises

529


FUNCTION Weight of (Node, Left or right) RETURNING an integer:
SELECT Node .type:
CASE Constant type: RETURN 1;
CASE Variable type:
IF Left or right = Left: RETURN 1;
ELSE Left or right = Right: RETURN 0;
CASE ...
CASE Add type:
SET Required left TO Weight of (Node .left, Left);
SET Required right TO Weight of (Node .right, Right);
IF Required left > Required right:
RETURN Required left;
IF Required left < Required right:
RETURN Required right;
// Required left = Required right
RETURN Required left + 1;
CASE ...
Figure Answers.29 Revised weight function for register-memory operations.



530

More answers to exercises



a
b
c
d
e
f
g
h
i
j
k
l
m
n
o
p
q

=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=
=

Equation
1
0.7 a
0.3 a
b
0.1 (d+f)
g
0.9 (d+f)
c
0.4 h
0.4 h
0.2 h
i
j
k
e
l+m+n
o+p

Value
1.0
0.7
0.3
0.7
0.7
6.3
6.3
0.3
0.12
0.12
0.06
0.12
0.12
0.06
0.7
0.30
1.0

Figure Answers.30 Traffic equations and their solution for Figure 4.97.



More answers to exercises


a

while
d

A
g
i

while

Figure Answers.31 Flow graph for static profiling of a nested while loop.


531

532

More answers to exercises



a
b
c
d
e
f
g
h
i
j

=
=
=
=
=
=
=
=
=
=

Equation
1
i
0.1 (a+b)
0.9 (a+b)
d
e
f
j
0.1 (g+h)
0.9 (g+h)

Value
1.0
9.0
1.0
9.0
9.0
9.0
9.0
81.0
9.0
81.0

Figure Answers.32 Traffic equations and their solution for Figure Answers.31.



More answers to exercises


x

+
*

*
*

a *
2

b
b

*
*

a *
2

b
a

Figure Answers.33 The dependency graph before common subexpression elimination.




533

534

More answers to exercises



+
*
-

*
2

Figure Answers.34 The dependency graph after common subexpression elimination.




More answers to exercises



=:

*
+
p

p
1

Figure Answers.35 The dependency graph of the expression *p++.




535

536

More answers to exercises


position
1
2
3
4
5
6
7
8
9
10

triple
a * a
a * 2
@2 * b
@1 + @3
b * b
@4 + @5
@6 =: x
@1 - @3
@8 + @5
@9 =: y

Figure Answers.36 The data dependency graph of Figure Answers.34 in triple representation.



More answers to exercises

537


y

R4
R2

R3

*
2

Figure Answers.37 The dependency graph of Figure Answers.34 with first ladder sequence
removed.


538

More answers to exercises


R4
R2

R3

*
2

Figure Answers.38 The dependency graph of Figure Answers.37 with second ladder sequence
removed.


More answers to exercises

539


R4
R2

Figure Answers.39 The dependency graph of Figure Answers.38 with third ladder sequence
removed.


540

More answers to exercises


R4
*
b

Figure Answers.40 The dependency graph of Figure Answers.39 with fourth ladder sequence
removed.


More answers to exercises



#3->reg
#3->reg
#4->reg
#7->reg

@26R1
@18R2
@13R2
@14R2

#5->reg @17R1
#5,9->mem @20R1
#8->reg @9R2
#8,9->mem @12R2

->mem @0R0
#2->reg @3R1

#5->reg @7R1
#5,9->mem @10R1
#6->reg @8R2

->cst @0R0

#1->reg @1R1
#1,9->mem @4R1

8
->cst @0R0
#1->reg @1R1
#1,9->mem @4R1

a
->mem @0R0
#2->reg @3R1

Figure Answers.41 Bottom-up pattern matching with costs and register usage.


541

542

More answers to exercises


Load_Const
Mult_Mem
Store_Reg
Load_Const
Mult_Mem
Store_Reg
Load_Mem
Add_Mem

8,R
a,R
R,tmp
4,R
tmp,R
R,tmp
b,R
tmp,R
Total

;
;
;
;
;
;
;
;
=

1 unit
6 units
3 units
1 unit
6 units
3 units
3 units
3 units
26 units

Figure Answers.42 Code generated by bottom-up pattern matching for 1 register.



More answers to exercises


a

tmp_2ab

tmp_bb

tmp_aa

Figure Answers.43 The register interference graph for Exercise {#IntGr2}.




543

544

More answers to exercises


SET the polymorphic chunk pointer Scan pointer TO
Beginning of available memory;
FUNCTION Malloc (Block size) RETURNING a polymorphic block pointer:
SET Pointer TO Pointer to free block of size (Block size);
IF Pointer /= Null pointer: RETURN Pointer;
Perform the marking part of the garbage collector;
SET Scan pointer TO Beginning of available memory;
SET Pointer TO Pointer to free block of size (Block size);
IF Pointer /= Null pointer: RETURN Pointer;
RETURN Solution to out of memory condition (Block size);
FUNCTION Pointer to free block of size (Block size)
RETURNING a polymorphic block pointer:
// Note that this is not a pure function
SET Chunk pointer TO First_chunk pointer;
SET Requested chunk size TO Administration size + Block size;
WHILE Chunk pointer /= One past available memory:
IF Chunk pointer >= Scan pointer:
Scan chunk at (Chunk pointer);
IF Chunk pointer .free:
Coalesce with all following free chunks (Chunk pointer);
IF Chunk pointer .size - Requested chunk size >= 0:
// large enough chunk found:
Split chunk (Chunk pointer, Requested chunk size);
SET Chunk pointer .free TO False;
RETURN Chunk pointer + Administration size;
// try next chunk:
SET Chunk pointer TO Chunk pointer + Chunk pointer .size;
RETURN Null pointer;
Figure Answers.44 A Malloc() with incremental scanning.



More answers to exercises

545


PROCEDURE Scan chunk at (Chunk pointer):
IF Chunk pointer .marked = True:
SET Chunk pointer .marked TO False;
ELSE Chunk pointer .marked = False:
SET Chunk pointer .free TO True;
SET Scan pointer TO Chunk pointer + Chunk pointer .size;
PROCEDURE Coalesce with all following free chunks (Chunk pointer):
SET Next_chunk pointer TO Chunk pointer + Chunk pointer .size;
IF Next_chunk pointer >= Scan pointer:
Scan chunk at (Next_chunk pointer);
WHILE Next_chunk pointer /= One past available memory
AND Next_chunk pointer .free:
// Coalesce them:
SET Chunk pointer .size TO
Chunk pointer .size + Next_chunk pointer .size;
SET Next_chunk pointer TO Chunk pointer + Chunk pointer .size;
IF Next_chunk pointer >= Scan pointer:
Scan chunk at (Next_chunk pointer);
Figure Answers.45 Auxiliary routines for the Malloc() with incremental scanning.



546

More answers to exercises



zeroth_offset (A) = (LB 1 LEN_PRODUCT 1

+LB 2 LEN_PRODUCT 2
...
+LBn LEN_PRODUCTn )
Figure Answers.46 Formula for zeroth_offset(A).


More answers to exercises



base (A)+zeroth_offset (A)


+i 1 LEN_PRODUCT 1 +...+in LEN_PRODUCTn
Figure Answers.47 The address of A[i 1 , ..., in ].


547

548

More answers to exercises


IsShape_Shape_Shape

IsShape_Shape_Shape

IsRectangle_Shape_Rectangle

IsRectangle_Shape_Rectangle

IsSquare_Shape_Shape

IsSquare_Shape_Square

SurfaceArea_Shape_Rectangle

SurfaceArea_Shape_Rectangle

method table for Rectangle

method table for Square

Figure Answers.48 Method tables for Rectangle and Square.




More answers to exercises


FUNCTION Instance of (Obj, Class) RETURNING a boolean;
SET Object Class TO Obj. Class;
WHILE Object Class /= No class:
IF Object Class = Class:
RETURN true;
ELSE Object Class /= Class:
SET Object Class TO Object Class .Parent;
RETURN false;
Figure Answers.49 Implementation of the instanceof operator.



549

550

More answers to exercises


void do_elements(int n, int element()) {
int elem = read_integer();
int new_element(int i) {
return (i == n ? elem : element(i));
}
if (elem == 0) {
printf("median = %d0, element((n-1)/2));
}
else {
do_elements(n + 1, new_element);
}
}
void print_median(void) {
int error(int i) {
printf("There is no element number %d0, i);
abort();
}
do_elements(0, error);
}
Figure Answers.50 Code for implementing an array without using an array.



More answers to exercises


tmp_hash_entry := get_hash_entry(case expression);
IF tmp_hash_entry = NO_ENTRY THEN GOTO label_else;
GOTO tmp_hash_entry.label;
label_1:
code for statement sequence 1
GOTO label_next;
...
label_n:
code for statement sequence n
GOTO label_next;
label_else:
code for else-statement sequence
label_next:
Figure Answers.51 Intermediate code for a hash-table implementation of case statements.



551

552

More answers to exercises


expr1;
while (expr2) {
body;
forloop_continue_label:
expr3;
/* whileloop_continue_label: */
}
Figure Answers.52 A while loop that is exactly equivalent to the for-loop of Figure 6.41.



More answers to exercises

553


i := lower bound;
tmp_ub := upper bound - unrolling factor + 1;
IF i > tmp_ub THEN GOTO end_label;
tmp_loop_count := (tmp_ub - i) DIV unrolling factor + 1;
loop_label:
code for statement sequence
i := i + 1;
...
code for statement sequence
i := i + 1;
// these two lines unrolling factor times
tmp_loop_count := tmp_loop_count - 1;
IF tmp_loop_count = 0 THEN GOTO end_label;
GOTO loop_label;
end_label:
Figure Answers.53 Loop-unrolling code.



554

More answers to exercises


unique a1 = if (_type_constr a1 == Cons &&
_type_constr (_type_field 2 a1) == Cons) then
let
a = _type_field 1 a1
b = _type_field 1 (_type_field 2 a1)
cs = _type_field 2 (_type_field 2 a1)
in
if (a == b) then a : unique cs
else a : unique (b:cs)
else
a1
Figure Answers.54 A functional-core equivalent of the function unique.



More answers to exercises


qsort [ ]
= []
qsort (x:xs) = qsort (mappend qleft xs)
++ qsort (mappend qright
where
qleft y = if (y < x)
qright y = if (y >= x)

++ [x]
xs)
then [y] else [ ]
then [y] else [ ]

Figure Answers.55 A functional-core equivalent of the function qsort.



555

556

More answers to exercises


qsort [ ]
= []
qsort (x:xs) = qsort (mappend (qleft x) xs) ++ [x]
++ qsort (mappend (qright x) xs)
qleft x y

= if (y < x)

then [y] else [ ]

qright x y = if (y >= x) then [y] else [ ]


Figure Answers.56 A functional-core lambda-lifted equivalent of the function qsort.



More answers to exercises

557


WHILE Assignment set /= Empty assignment set:
// Get a doable assignment:
IF there is an Index SUCH THAT
(P_new[Index] := ) is in the Assignment set AND
P_old[Index] occurs at most in :
// P_new[Index] := is doable:
SET Next TO Index;
ELSE there is no such Index:
// Make room for a doable assignment:
CHOOSE a (P_new[Index] := ) FROM the Assignment set;
CHOOSE Scratch SUCH THAT
Temp[Scratch] does not occur in the Assignment set;
Generate code to move P_old[Next] to Temp[Scratch];
Substitute Temp[Scratch] for P_old[Next]
in the Assignment set;
// Now P_new[Index] := is doable:
SET Next TO Index;
Generate code for (P_new[Next] := );
Remove (P_new[Next] := ) from the Assignment set;
Figure Answers.57 Outline code for doing in situ assignments.



558

More answers to exercises


int unify_lists(struct list *l_goal, struct list *l_head) {
int counter;
if (l_goal->arity != l_head->arity) return 0;
for (counter = 0; counter < l_head->arity; counter++) {
if (!unify_terms(
l_goal->components[counter], l_head->components[counter]
)) return 0;
}
return 1;
}
Figure Answers.58 C code for the unification of two lists.



More answers to exercises


void intersect(
void list1(void (*Action)(int v)),
void list2(void (*Action)(int v)),
void (*Action)(int v)
) {
void in_list2(int v) {
void equal_to_v(int v2) {
if (v == v2) Action(v);
}
list2(equal_to_v);
}
list1(in_list2);
}
Figure Answers.59 A list procedure that implements integer list intersection.



559

560

More answers to exercises


void built_in_var(Term *t, Action goal_list_tail) {
Term *arg = deref(t);
if (arg->type == Is_Variable) {
goal_list_tail();
}
}
Figure Answers.60 A C routine for implementing built_in_var().



More answers to exercises

561


void built_in_is(Term *t1, Term *t2, Action goal_list_tail) {
Term *arg = deref(t1);
Term *expr = evaluate_expression(t2);
if (arg->type == Is_Variable) {
arg->term.variable.term = expr;
goal_list_tail();
arg->term.variable.term = 0;
}
else
if (arg->type == Is_Constant
&& atoi(arg->term.constant) == atoi(expr->term.constant)
) {
goal_list_tail();
}
}
Figure Answers.61 A C routine for implementing built_in_is().



562

More answers to exercises


void built_in_between(
Term *t1, Term *t2, Term *t3,
Action goal_list_tail
) {
Term *arg = deref(t1);
Term *expr1 = evaluate_expression(t2);
Term *expr2 = evaluate_expression(t3);
if (arg->type == Is_Variable) {
int from = atoi(expr1->term.constant);
int to = atoi(expr2->term.constant);
int i;
for (i = from; i < to; i++) {
arg->term.variable.term = put_integer(i);
goal_list_tail();
arg->term.variable.term = 0;
}
}
else
if (arg->type == Is_Constant
&& atoi(expr1->term.constant) <= atoi(arg->term.constant)
&& atoi(arg->term.constant) <= atoi(expr2->term.constant)
) {
goal_list_tail();
}
}
Figure Answers.62 A first implementation of a compiled between.



More answers to exercises

563

Das könnte Ihnen auch gefallen