Register without options:
Runtime simplification. Less pointer manipulation. There is no concept of stack overflow. Less code, less memory management - less space for critical errors.
The compilation complexity increases: a register allocation phase appears. In the case of execution on a virtual machine, the number of registers is not important for us, we can make enough of them in order not to do allocation at all, but simply map all parameters and variable functions to registers (see Lua). If the number of parameters exceeds the number of registers, it is possible to allocate a part of the activation record in the heap, but it is easier to make it so that the compiler would suggest the author of such a code to treat the head.
In any case, if there is a question of simplifying the runtime at the cost of complicating the compiler, this should be done.
Ability to optimize: mapping N registers of the virtual machine to the registers of the processor. On a stack machine, this is much more difficult.
jsk.h file: it will be required by both the compiler and the interpreter.typedef unsigned char regnum;
struct command {
enum opcodes {
hlt,
store, // dst>
jz, // dst>
echo, // dst>
mov, // >dst
load, // >dst
input, // >dst
add, // src>dst
sub, // src>dst
mul, // src>dst
div, // src>dst
eq, // src>dst
ne, // src>dst
ge, // src>dst
le, // src>dst
gt, // src>dst
lt // src>dst
};
opcodes opcode;
regnum dest;
union {
struct {
regnum src1, src2;
};
short imm;
};
command(opcodes opcode, regnum dest, regnum src1, regnum src2) :
opcode(opcode), dest(dest), src1(src1), src2(src2) {}
command(opcodes opcode, regnum dest, short imm) :
opcode(opcode), dest(dest), imm(imm) {}
};
-fshort-enums-fshort-enumsecho will be stored together with the program code, at the very end; combine the same strings so that only one copy is kept. To do this, we will store the map all lines, where the value is the "identifier" of the line (its sequence number in the program).map we will store the number of the register allocated to it by the name of a variable.print() method with a similar emit() , which glues the lists of commands of the child nodes into one large list.value node into three different types: a literal number, a literal string, and a reference to a variable. When formatting the code, the differences between them were insignificant, but during the generation it was already necessary to distinguish them.command structures. Actually, execution is a cycle of 4 lines, and the implementation of command functions; most of the code is auxiliary husk.#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#include "jsk.h"
int pc = 0 ; // ( )
bool halted = false ;
int mem[ 1000 ]; // :
typedef int (*op)( int src1, int src2, int dest, int imm); //
const char * fdata = NULL ; // -
extern op ops[]; //
int main( int argc, char ** argv) {
if (argc!= 2 ) {
printf( "Missing pcode file name. \n " );
exit( 1 );
}
int fd = open(argv[ 1 ], O_RDONLY);
if (fd< 0 ) {
printf( "Cannot open pcode file. \n " );
exit( 1 );
}
struct stat finfo;
fstat(fd, &finfo);
fdata = ( const char *)mmap( 0 , finfo.st_size, PROT_READ, MAP_PRIVATE, fd, 0 );
if (!fdata) {
printf( "Cannot read pcode file. \n " );
exit( 1 );
}
const command* pcode = ( const command*) fdata;
int r[ 256 ] = { 0 }; // registers
while (!halted) {
command next = pcode[pc++];
r[next.dest] = ops[next.opcode](r[next.src1], r[next.src2], r[next.dest], next.imm);
}
munmap(( void *)fdata, finfo.st_size);
close(fd);
return 0 ;
}
int hlt( int src1, int src2, int dest, int imm) { halted = true ; return dest; }
int store( int src1, int src2, int dest, int imm) { mem[imm] = dest; return dest; }
int jz( int src1, int src2, int dest, int imm) { if (!dest) pc+=imm; return dest; }
int echo( int src1, int src2, int dest, int imm) { if (imm) printf( " %s " , fdata+imm); else printf( " %d " , dest); return dest; }
int mov( int src1, int src2, int dest, int imm) { return imm; }
int load( int src1, int src2, int dest, int imm) { return mem[imm]; }
int input( int src1, int src2, int dest, int imm) { int d; scanf( " %d " , &d); return d; }
int add( int src1, int src2, int dest, int imm) { return src1+src2; }
int sub( int src1, int src2, int dest, int imm) { return src1-src2; }
int mul( int src1, int src2, int dest, int imm) { return src1*src2; }
int div( int src1, int src2, int dest, int imm) { return src1/src2; }
int eq( int src1, int src2, int dest, int imm) { return src1==src2; }
int ne( int src1, int src2, int dest, int imm) { return src1!=src2; }
int ge( int src1, int src2, int dest, int imm) { return src1>=src2; }
int le( int src1, int src2, int dest, int imm) { return src1<=src2; }
int gt( int src1, int src2, int dest, int imm) { return src1>src2; }
int lt( int src1, int src2, int dest, int imm) { return src1<src2; }
op ops[] = {hlt, store, jz, echo, mov, load, input, add, sub, mul, div, eq, ne, ge, le, gt, lt};
[tyomitch@home ~]$ bison jsk.y
[tyomitch@home ~]$ c++ -fshort-enums jsk.tab.c lex.yy.c -o jskc
[tyomitch@home ~]$ c++ -fshort-enums jsk.c -o jsk
[tyomitch@home ~]$ ./jskc < test.jsk > pcode
[tyomitch@home ~]$ hexdump -C pcode
00000000 04 01 00 00 04 02 e8 03 03 00 26 01 03 01 00 00 |..........&.....|
00000010 03 00 a0 00 03 02 00 00 03 00 a7 00 0e 03 01 02 |................|
00000020 02 03 1d 00 07 04 01 02 04 05 02 00 0a 06 04 05 |................|
00000030 03 00 62 01 03 06 00 00 03 00 cc 00 06 07 00 00 |..b.............|
00000040 04 08 01 00 0b 09 07 08 02 09 04 00 04 0a 01 00 |................|
00000050 08 0b 06 0a 07 02 0b 00 02 00 0e 00 04 0c 02 00 |................|
00000060 0b 0d 07 0c 02 0d 04 00 04 0e 01 00 07 0f 06 0e |................|
00000070 07 01 0f 00 02 00 07 00 04 10 03 00 0b 11 07 10 |................|
00000080 02 11 03 00 03 00 46 01 00 00 00 00 02 00 01 00 |......F.........|
00000090 03 00 6a 01 02 00 e1 ff 03 00 ff 00 00 00 00 00 |..j.............|
000000a0 20 d0 b4 d0 be 20 00 2c 20 d0 b0 20 d1 8f 20 d0 | .... ., .. .. .|
000000b0 b1 d1 83 d0 b4 d1 83 20 d1 83 d0 b3 d0 b0 d0 b4 |....... ........|
000000c0 d1 8b d0 b2 d0 b0 d1 82 d1 8c 0a 00 3f 20 20 28 |............? (|
000000d0 31 3d d0 bc d0 b5 d0 bd d1 8c d1 88 d0 b5 2c 20 |1=............, |
000000e0 32 3d d0 b1 d0 be d0 bb d1 8c d1 88 d0 b5 2c 20 |2=............, |
000000f0 33 3d d0 bf d0 be d0 bf d0 b0 d0 bb 29 20 00 d0 |3=..........) ..|
00000100 92 d1 80 d1 91 d1 88 d1 8c 2c 20 d1 82 d0 b0 d0 |........., .....|
00000110 ba 20 d0 bd d0 b5 20 d0 b1 d1 8b d0 b2 d0 b0 d0 |. .... .........|
00000120 b5 d1 82 21 0a 00 d0 97 d0 b0 d0 b4 d1 83 d0 bc |...!............|
00000130 d0 b0 d0 b9 20 d1 87 d0 b8 d1 81 d0 bb d0 be 20 |.... .......... |
00000140 d0 be d1 82 20 00 d0 a3 d1 80 d0 b0 21 20 d0 af |.... .......! ..|
00000150 20 d0 bc d0 be d0 bb d0 be d0 b4 d0 b5 d1 86 21 | ..............!|
00000160 0a 00 d0 ad d1 82 d0 be 20 00 d0 af 20 d0 bd d0 |........ ... ...|
00000170 b8 d1 87 d0 b5 d0 b3 d0 be 20 d0 bd d0 b5 20 d0 |......... .... .|
00000180 bf d0 be d0 bd d1 8f d0 bb 21 0a 00 |.........!..|
0000018c
[tyomitch@home ~]$ ./jsk pcode
0 1000,
500? (1=, 2=, 3=) 2
750? (1=, 2=, 3=) 2
875? (1=, 2=, 3=) 2
938? (1=, 2=, 3=) 1
906? (1=, 2=, 3=) 1
890? (1=, 2=, 3=) 2
898? (1=, 2=, 3=) 2
902? (1=, 2=, 3=) 1
900? (1=, 2=, 3=) 1
899? (1=, 2=, 3=) 1
, !
0xa0 bytes are occupied by the p-code (fours of bytes), and the rest of the file is strings in UTF-8.emit involves combining commands from child nodes in the same order (left to right) in which these nodes were created during parsing. It is possible to save both memory for storing commands, and code for merging them, if all generated commands are immediately dumped into the result, and only “metainformation” such as register numbers is stored in characters.if and while , where, after checking the condition, you must insert a jump forward if the condition is not met; and until the end of the analysis of the structure it is not known how much it is necessary to jump. We'll have to leave the dummy team in place of the jump, and fill it in at the end of the analysis. Such a system of one-pass code generation with dummies, and their subsequent “patching”, is called backpatching . It is very versatile, and not only allows you to compile all the usual control structures, but also simplifies the implementation of break operators jumping forward an unknown distance.if and while , add a marker to the grammar - “empty character”: OP: 'if' '(' EXPR ')' M OP 'else' M OP;
| 'while' '(' EXPR ')' M OP;
M:;
M executed after the EXPR convolution (which generates the condition verification code) and before the OP convolution, so that we can add the generation of the dummy to the convolution code M When we collapse the entire structure, fill the dummy with a jump to the next dummy (after if ) or until the end of the generated code (after the else and while ).while know the distance to jump back at the end of the cycle? After all, if there are no lists of commands, then in the convolution code we will not be able to find out how many commands the whole construct took. We'll have to get another N marker, which does not generate the code, but simply remembers the address of the desired place: OP: 'while' N '(' EXPR ')' M OP;
M:;
N:;
%{
#include <iostream>
#include <vector>
#include <list>
#include <map>
extern int yylineno;
extern int yylex();
void yyerror( char *s) {
std::cerr << s << ", line " << yylineno << std::endl;
exit( 1 );
}
#include "jsk.h"
struct arg_t {
regnum dest; // if numeric
std::string str; // if literal
} ;
typedef struct {
std::string str; // tokens
regnum dest; // expr
arg_t arg;
std::list < arg_t > args;
int addr; // markers
char null; // op (no data)
} YYSTYPE;
#define YYSTYPE YYSTYPE
#define TOKENPASTE(x, y) x ## y
#define TOKENPASTE2(x, y) TOKENPASTE(x, y)
#define foreach(i, list) typedef typeof(list) TOKENPASTE2(T, __LINE__ ); \
for (TOKENPASTE2(T, __LINE__ )::iterator i = list.begin(); i != list.end(); i++)
template<typename L>
inline void append(L& list1, L& list2) {
list 1. splice(list1.end(), list2, list2.begin(), list2.end());
}
std::vector < command > pcode;
std::map<std::string,regnum> vars;
std::map<std::string,std::list < int > > strings;
regnum lastreg = 0 ;
inline regnum newreg() {
if (!++lastreg)
yyerror( "Too many registers used." );
return lastreg;
}
inline int emit( const command& cmd) {
pcode.push_back(cmd);
return pcode.size()- 1 ;
}
inline int emit(command::opcodes opcode, regnum dest, regnum src1, regnum src2) {
return emit(command(opcode, dest, src1, src2));
}
inline int emit(command::opcodes opcode, regnum dest, short imm) {
return emit(command(opcode, dest, imm));
}
inline void fix( int index, command::opcodes opcode, regnum dest, short imm) {
pcode[index] = command(opcode, dest, imm);
}
%}
%token IF ELSE WHILE EXIT
%token EQ LE GE NE
%token STRING NUM ID
%type < str > ID NUM STRING
%type < dest > EXPR EXPR1 EXPR2 TERM VAL
%type < arg > ARG
%type < args > ARGS
%type < null > OPS OP1 OP2 OP
%type < addr > MN
%%
PROGRAM: OPS { emit(command::hlt, 0 , 0 ); }
;
OPS: OP // no action
| OPS OP // no action
;
OP1: '{' OPS '}' {}
| EXPR ';' {}
| IF '(' EXPR ')' M OP1 ELSE M OP1 { fix( $5 , command::jz, $3 , $8 - $5 );
fix( $8 , command::jz, 0 , pcode.size()- 1 - $8 );
}
| WHILE N '(' EXPR ')' M OP1 { fix( $6 , command::jz, $4 , emit(command::jz, 0 , $2 -pcode.size()- 1 ) - $6 ); }
| EXIT ';' { emit(command::hlt, 0 , 0 ); }
;
OP2: IF '(' EXPR ')' M OP { fix( $5 , command::jz, $3 , pcode.size()- $5 ); }
| IF '(' EXPR ')' M OP1 ELSE M OP2 { fix( $5 , command::jz, $3 , $8 - $5 );
fix( $8 , command::jz, 0 , pcode.size()- 1 - $8 );
}
| WHILE N '(' EXPR ')' M OP2 { fix( $6 , command::jz, $4 , emit(command::jz, 0 , $2 -pcode.size()- 1 ) - $6 ); }
;
OP: OP1 | OP2 ; // no action
M: { $$ = emit(command::hlt, 0 , 0 ); } // dummy
;
N: { $$ = pcode.size(); }
;
EXPR: EXPR1 // inherit
| ID '=' EXPR { $$ = $3 ;
if (vars[ $1 ])
emit(command::add, vars[ $1 ], $3 , 0 );
else
vars[ $1 ] = $3 ; // new var
}
EXPR1: EXPR2 // inherit
| EXPR1 EQ EXPR2 { $$ = newreg(); emit(command::eq, $$ , $1 , $3 ); }
| EXPR1 LE EXPR2 { $$ = newreg(); emit(command::le, $$ , $1 , $3 ); }
| EXPR1 GE EXPR2 { $$ = newreg(); emit(command::ge, $$ , $1 , $3 ); }
| EXPR1 NE EXPR2 { $$ = newreg(); emit(command::ne, $$ , $1 , $3 ); }
| EXPR1 '>' EXPR2 { $$ = newreg(); emit(command::gt, $$ , $1 , $3 ); }
| EXPR1 '<' EXPR2 { $$ = newreg(); emit(command::lt, $$ , $1 , $3 ); }
;
EXPR2: TERM // inherit
| EXPR2 '+' TERM { $$ = newreg(); emit(command::add, $$ , $1 , $3 ); }
| EXPR2 '-' TERM { $$ = newreg(); emit(command::sub, $$ , $1 , $3 ); }
;
TERM: VAL // inherit
| TERM '*' VAL { $$ = newreg(); emit(command::mul, $$ , $1 , $3 ); }
| TERM '/' VAL { $$ = newreg(); emit(command::div, $$ , $1 , $3 ); }
;
VAL: NUM { $$ = newreg(); emit(command::mov, $$ , atoi( $1 .c_str())); }
| '-' VAL { $$ = newreg(); emit(command::sub, $$ , 0 , $2 ); }
| '!' VAL { $$ = newreg(); emit(command::eq, $$ , 0 , $2 ); }
| '(' EXPR ')' { $$ = $2 ; }
| ID { if (vars[ $1 ])
$$ = vars[ $1 ];
else
yyerror( "Undefined variable" );
}
| ID '(' ARGS ')' { if (! $1 .compare( "input" )) {
if ( $3 .size())
yyerror( "Input: too many arguments" );
$$ = newreg();
emit(command::input, $$ , 0 );
} else if (! $1 .compare( "echo" )) {
if (! $3 .size())
yyerror( "Input: too many arguments" );
$$ = 0 ;
foreach(i, $3 )
if (!i- > dest) // string
strings[i- > str].push_back(emit(command::echo, 0 , 0 ));
else
emit(command::echo, i- > dest, 0 );
} else
yyerror( "Undefined function" );
}
;
ARGS: { $$ .clear(); }
| ARG { $$ .clear(); $$ .push_back( $1 ); }
| ARGS ',' ARG { $$ = $1 ; $$ .push_back( $3 ); }
;
ARG: EXPR { $$ .dest = $1 ; }
| STRING { $$ .str= $1 ; $$ .dest= 0 ; }
;
%%
int main() {
yyparse();
int offset = pcode.size()* sizeof (command);
foreach(i, strings) {
foreach(j, i- > second) // all refs
pcode[*j].imm = offset;
offset += i- > first.length();
offset++;
}
foreach(i, pcode) // dump code
write( 1 , &*i, sizeof (*i));
foreach(i, strings) // dump strings
write( 1 , i- > first.c_str(), i- > first.length()+ 1 );
}
Source: https://habr.com/ru/post/99592/
All Articles