File names are infinite in length, where infinity is set to 255 characters.
--Peter Collinson: The Unix File System
00 mov r01, 0 01 mov r02, 0x3e8 02 echo 0x126 03 echo r01 04 echo 0xa0 05 echo r02 06 echo 0xa7 07 le r03, r01, r02 08 jz r03, + 0x1d (= 0x26) 09 add r04, r01, r02 0a mov r05,2 0b div r06, r04, r05 0c echo 0x162 0d echo r06 0e echo 0xcc 0f input r07 10 mov r08, 1 11 eq r09, r07, r08 12 jz r09, +4 (= 0x17) 13 mov r0a, 1 14 sub r0b, r06, r0a 15 add r02, r0b, 0 16 jz 0, + e (= 0x25) 17 mov r0c, 2 18 eq r0d, r07, r0c 19 jz r0d, +4 (= 0x1e) 1a mov r0e, 1 1b add r0f, r06, r0e 1c add r01, r0f, 0 1d jz 0, +7 (= 0x25) 1e mov r10, 3 1f eq r11, r07, r10 20 jz r11, +3 (= 0x24) 21 echo 0x146 22 hlt 23 jz 0, +1 (= 0x25) 24 echo 0x16a 25 jz 0, -0x1f (= 7) 26 echo 0xff 27 hlt
r03 used only in commands 07-08 , and r0f only in commands 1b-1c . It means that there are no obstacles to use the same physical register for the stored values: in the 07-08 commands it will be used for one value, and in 1b-1c - for another. Any two registers that are not used simultaneously for different values can be combined into one physical register. ...
10 mov r, 42
11 jz 0, 20
12 echo r
...
20 ...
...
30 jz 0, 12
...
r is mentioned only in commands 10–12, it is implicitly used in command 20, because after it, a jump back to 12 is possible; therefore, r cannot be combined with other registers used in command 20. ...
10 mov r, 42
11 echo r
...
20 ...
...
30 mov r, 24
31 echo r
...
r register is not actually used, although it is mentioned before this (10-11), and after that (30-31). We can store any other values ​​in it, because before the next use (31) it will be assigned a new value. In principle, we can even assign different physical registers for r in 10-11 and 31-31, because the values ​​will still be stored different. But for this you need to find out that r two independent “versions” and process them separately.iteration number: 1 (input / output) 2 (input / output) 3 (input / output) 4 (input / output) ... total (input / output) 00 mov r01, 0 / r01 01 mov r02, 0x3e8 / r01 r01 / r01, r02 02 echo 0x126 / r01 r01 / r01 r01 / r01 r01, r02 / r01, r02 03 echo r01 r01 / r01 / r01 / r01 / r02 r01, r02 / r01, r02 04 echo 0xa0 / r02 r02 / r02 r02 / r02 r01, r02 / r01, r02 05 echo r02 r02 / r02 / r02 / r02 / r01, r02 r01, r02 / r01, r02 06 echo 0xa7 / r01, r02 r01, r02 / r01, r02 r01, r02 / r01, r02 r01, r02 / r01, r02 07 le r03, r01, r02 r01, r02 / r01, r02 / r03 r01, r02 / r03 r01, r02 / r01, r02, r03 r01, r02 / r01, r02, r03 08 jz r03, + 0x1d (= 0x26) r03 / r03 / r01, r02 r01, r02, r03 / r01, r02 r01, r02, r03 / r01, r02 r01, r02, r03 / r01, r02 09 add r04, r01, r02 r01, r02 / r01, r02 / r01, r02 / r01, r02 / r04 r01, r02 / r01, r02, r04 0a mov r05,2 / r04, r05 r04 / r04, r05 r04 / r04, r05 r01, r02, r04 / r01, r02, r04, r05 0b div r06, r04, r05 r04, r05 / r04, r05 / r04, r05 / r04, r05 / r06 r01, r02, r04, r05 / r01, r02, r06 0c echo 0x162 / r06 r06 / r06 r06 / r06 r01, r02, r06 / r01, r02, r06 0d echo r06 r06 / r06 / r06 / r06 / r01, r02, r06 / r01, r02, r06 0e echo 0xcc r01, r02, r06 / r01, r02, r06 0f input r07 / r07 r01, r02, r06 / r01, r02, r06, r07 10 mov r08, 1 / r07, r08 r07 / r07, r08 r07 / r07, r08 r01, r02, r06, r07 / r01, r02, r06, r07, r08 11 eq r09, r07, r08 r07, r08 / r07, r08 / r09 r07, r08 / r09 r07, r08 / r09 r01, r02, r06, r07, r08 / r01, r02, r06, r07, r09 12 jz r09, +4 (= 0x17) r09 / r09 / r09 / r09 / r06, r07 r01, r02, r06, r07, r09 / r01, r02, r06, r07 13 mov r0a, 1 / r06, r0a r06 / r06, r0a r06 / r06, r0a r01, r06 / r01, r06, r0a 14 sub r0b, r06, r0a r06, r0a / r06, r0a / r0b r06, r0a / r0b r06, r0a / r0b r01, r06, r0a / r01, r0b 15 add r02, r0b, 0 r0b / r0b / r0b / r0b / r01, r0b / r01, r02 16 jz 0, + e (= 0x25) r01, r02 / r01, r02 17 mov r0c, 2 / r07, r0c r07 / r07, r0c r07 / r07, r0c r01, r02, r06, r07 / r01, r02, r06, r07, r0c 18 eq r0d, r07, r0c r07, r0c / r07, r0c / r0d r07, r0c / r0d r07, r0c / r0d r01, r02, r06, r07, r0c / r01, r02, r06, r07, r0d 19 jz r0d, +4 (= 0x1e) r0d / r0d / r0d / r0d / r06, r07 r01, r02, r06, r07, r0d / r01, r02, r06, r07 1a mov r0e, 1 / r06, r0e r06 / r06, r0e r06 / r06, r0e r02, r06 / r02, r06, r0e 1b add r0f, r06, r0e r06, r0e / r06, r0e / r0f r06, r0e / r0f r06, r0e / r0f r02, r06, r0e / r02, r0f 1c add r01, r0f, 0 r0f / r0f / r0f / r0f / r02, r0f / r01, r02 1d jz 0, +7 (= 0x25) / r01, r02 r01, r02 / r01, r02 1e mov r10, 3 / r07, r10 r07 / r07, r10 r07 / r07, r10 r01, r02, r07 / r01, r02, r07, r10 1f eq r11, r07, r10 r07, r10 / r07, r10 / r11 r07, r10 / r11 r07, r10 / r11 r01, r02, r07, r10 / r01, r02, r11 20 jz r11, +3 (= 0x24) r11 / r11 / r11 / r11 / r01, r02, r11 / r01, r02 21 echo 0x146 22 hlt 23 jz 0, +1 (= 0x25) / r01, r02 r01, r02 / r01, r02 24 echo 0x16a / r01, r02 r01, r02 / r01, r02 25 jz 0, -0x1f (= 7) / r01, r02 r01, r02 / r01, r02 r01, r02 / r01, r02 r01, r02 / r01, r02 26 echo 0xff 27 hltConcerning separate iterations:
typedef std::list< struct commandn>::iterator pcommandn;
struct commandn {
command cmd;
pcommandn tgt; //
commandn( const command& cmd) : cmd(cmd), tgt( NULL ) {}
};
std::list<commandn> pcode;
std::vector<pcommandn> pcodeidx;
inline int emit( const command& cmd) {
pcodeidx.push_back(pcode.insert(pcode.end(), commandn(cmd)));
return pcode.size()- 1 ;
}
inline int emit(command::opcodes opcode, regnum dest, regnum src1, regnum src2) {
return emit(command(opcode, dest, src1, src2));
}
inline int emit(command::opcodes opcode, regnum dest, short imm) {
return emit(command(opcode, dest, imm));
}
inline void fix( int index, command::opcodes opcode, regnum dest, short imm) {
*pcodeidx[index] = commandn(command(opcode, dest, imm));
}
int index; to the struct commandn int index;int main() {
yyparse();
//
for ( int i= 0 ; i<pcode.size(); i++)
if (pcodeidx[i]->cmd.opcode==command::jz)
pcodeidx[i]->tgt = pcodeidx[i+ 1 +pcodeidx[i]->cmd.imm];
// /
// ...
// /
int index = 0 ;
foreach(i, pcode) i->index = index++;
foreach(i, pcode)
if (i->cmd.opcode==command::jz)
i->cmd.imm = i->tgt->index-i->index- 1 ;
// -
// ...
}
struct commandn ) sets of living at the input and output registers: separately - registers of the p-code, to determine the lifetime; and separately - physical registers, to determine the compatibility with the actual appointment.typedef std::set<regnum> aliveset;
enum physreg { firstp = 1 , lastp = 4 };
typedef std::set<physreg> alivesetp;
struct commandn {
// ...
aliveset onenter, onexit; // liveness check
alivesetp onenterp, onexitp; // phys reg allocation
// ...
};
inline bool hasnext(pcommandn cmd) {
return (cmd->cmd.opcode!=command::hlt &&!
(cmd->cmd.opcode==command::jz && !cmd->cmd.dest));
}
inline bool has2src(pcommandn cmd) {
return cmd->cmd.opcode>=command::add;
}
inline bool writesdest(pcommandn cmd) {
return cmd->cmd.opcode>=command::mov;
}
inline bool readsdest(pcommandn cmd) {
return cmd->cmd.opcode>=command::store &&
cmd->cmd.opcode<=command::echo;
}
std::vector< int > liveness() { // returns usecount
std::vector< int > usecount(lastreg+ 1 );
bool changed = false ;
// rule 1
foreach(i, pcode) {
i->onenter = i->onexit = aliveset();
if (has2src(i)) {
if (i->cmd.src1) {
usecount[i->cmd.src1]++;
i->onenter.insert(i->cmd.src1);
}
if (i->cmd.src2) {
usecount[i->cmd.src2]++;
i->onenter.insert(i->cmd.src2);
}
} else if (readsdest(i) && i->cmd.dest) {
usecount[i->cmd.dest]++;
i->onenter.insert(i->cmd.dest);
}
if (i->onenter.size())
changed = true ;
}
while (changed) {
changed = false ;
foreach2(i, pcode, next) {
int oldsize = i->onenter.size()+i->onexit.size();
// rule 2 (next command)
if (hasnext(i))
i->onexit.insert(next->onenter.begin(), next->onenter.end());
// rule 2 (jmp target)
if (i->cmd.opcode==command::jz)
i->onexit.insert(i->tgt->onenter.begin(), i->tgt->onenter.end());
// rule 3
i->onenter.insert(i->onexit.begin(), i->onexit.end());
if (writesdest(i))
i->onenter.erase(i->cmd.dest);
if (i->onenter.size()+i->onexit.size() != oldsize)
changed = true ;
}
}
return usecount;
}
foreach2 macro foreach2 allows us to look both at the current and at the next element in the loop body:#define foreach2(i, list, next) typedef typeof(list) TOKENPASTE2(T, __LINE__ ); \
for (TOKENPASTE2(T, __LINE__ )::iterator next = list.begin(), i=next++; i != list.end(); i=next, next++)
test.jsk 35 iterations were enough, because jumping reduces the distance.// / :
while ( 1 ) { //
//
// ...
std::vector< int > usecount = liveness();
std::vector<physreg> physmap(lastreg+ 1 );
foreach(i, pcode)
i->onenterp = i->onexitp = alivesetp();
while ( 1 ) {
//
std::vector< int >::iterator max = std::max_element(usecount.begin(), usecount.end());
if (!*max) break ; //
*max = 0 ;
regnum r = max - usecount.begin();
// . - â„–r
std::vector< int > collisions(lastp); // 0-based
for (physreg p=firstp; p<=lastp; p=(physreg)(p+ 1 )) { // error: ISO C++ forbids incrementing an enum
foreach(i, pcode) {
if (contains(i->onenter, r) && contains(i->onenterp, p))
collisions[p- 1 ]++;
if (contains(i->onexit, r) && contains(i->onexitp, p))
collisions[p- 1 ]++;
}
if (collisions[p- 1 ]) continue ; // .
physmap[r] = p;
// - â„–r â„–p
foreach(i, pcode) {
if (contains(i->onenter, r))
i->onenterp.insert(p);
if (contains(i->onexit, r))
i->onexitp.insert(p);
}
break ; //
}
if (physmap[r]) continue ; //
// . - â„–r
// : ,
foreach2(i, pcode, next)
if ((i->cmd.opcode!=command::load && i->cmd.opcode!=command::store) &&
((contains(i->onenter, r) && i->onenterp.size()==lastp) ||
(contains(i->onexit, r) && i->onexitp.size()==lastp))) {
// , ,
// ...
goto afterspill;
}
// : ?
yyerror( "don't know how to proceed" );
}
// ;
foreach(i, pcode) {
i->cmd.dest = physmap[i->cmd.dest];
if (has2src(i)) {
i->cmd.src1 = physmap[i->cmd.src1];
i->cmd.src2 = physmap[i->cmd.src2];
}
}
break ;
afterspill : // - " "
}
...
10 need r1, r2, r3, r4
...
20 need r1, r3, r4, r5
...
30 need r2, r3, r4, r5
...
The r5 lacks the physical, because all the physical ones were spent on r1,r2,r3,r4 ; but there is not a single command where all five registers are needed at the same time.store/load pairs, and they need to be cleaned - otherwise the registers used in them create enough “noise” so that the registers are not enough for anything else.load and store , we will also remove other extra commands at the same time. Given that our p-code operations have no other side effects, other than calculating the register with the result, you can delete commands whose result is not alive at the output. Maybe the programmer assigned the value of an unused variable, or used an operator like (2+2);nop ; as it, let's assign an unconditional jump to the next command ( jz r00, +0 ). The benefit is that at the next iteration, the jump to nop shrink along the chain, and the nop itself will be cleaned up as an unreachable command.int reachcnt; field to the struct commandn int reachcnt;void markReachable(pcommandn cmd) {
while (!cmd->reachcnt++) {
if (cmd->cmd.opcode==command::jz)
markReachable(cmd->tgt);
if (hasnext(cmd))
cmd++;
else
break ;
}
}
void nopOut(pcommandn cmd) {
pcommandn next = cmd; next++;
cmd->cmd = command(command::jz, 0 , 0 );
cmd->tgt = next;
}
void simpleopt() {
//
foreach(i, pcode)
if (i->cmd.opcode==command::jz)
while (i->tgt->cmd.opcode==command::jz && !i->tgt->cmd.dest)
i->tgt = i->tgt->tgt;
//
foreach(i, pcode) i->reachcnt = 0 ;
markReachable(pcode.begin());
foreach2(i, pcode, next)
if (!i->reachcnt)
pcode.erase(i);
// nop:
foreach2(i, pcode, next)
if (i->cmd.opcode==command::jz &&
!i->cmd.dest && i->tgt==next)
pcode.erase(i);
}
// :
simpleopt();
//
liveness();
bool changed = false ;
foreach(i, pcode)
if (writesdest(i) && !contains(i->onexit, i->cmd.dest)) {
nopOut(i);
changed = true ;
}
if (changed) continue ;
//
// ...
struct commandn will require another field, the last for today: std::map<regnum,regnum> version;version , it means that the detour has not yet reached this command.reachcnt as in markReachable()std::map<regnum,regnum> renameto; //
void versionizeReg(pcommandn cmd, regnum r, regnum rv) { // r rv
while (!contains(cmd- > version, r) &&
(contains(cmd- > onenter, r) || contains(cmd- > onexit, r))) {
//
cmd- > version[r] = renameto[rv];
if (cmd- > cmd.dest==r) cmd- > cmd.dest=renameto[rv];
if (has2src(cmd)) {
if (cmd- > cmd.src1==r) cmd- > cmd.src 1 =renameto[rv];
if (cmd- > cmd.src2==r) cmd- > cmd.src 2 =renameto[rv];
}
if (!contains(cmd- > onexit,r)) return ; //
if (cmd- > cmd.opcode==command::jz &&
contains(cmd- > tgt- > onenter, r)) {
versionizeReg(cmd- > tgt, r, rv);
}
if (hasnext(cmd)) {
cmd++;
if (contains(cmd- > onenter, r))
continue ;
}
return ; //
}
// ?
if (contains(cmd- > version, r) && cmd- > version[r]!=renameto[rv]) {
// renameto[rv] cmd->version[r]
regnum from = std::max(cmd- > version[r], renameto[rv]),
to = std::min(cmd- > version[r], renameto[rv]);
renameto[from] = to;
foreach(i, pcode) {
if (i- > cmd.dest==from) i- > cmd.dest = to;
if (has2src(i)) {
if (i- > cmd.src1==from) i- > cmd.src 1 = to;
if (i- > cmd.src2==from) i- > cmd.src 2 = to;
}
if (contains(i- > version, r) && i- > version[r]==from)
i- > version[r] = to;
}
}
}
void versionize(pcommandn cmd) {
while (!cmd- > reachcnt++) {
// versionize registers that live on exit
foreach(r, cmd- > onexit)
if (!contains(cmd- > version, *r)) {
regnum rv = newreg();
renameto[rv] = rv;
versionizeReg(cmd, *r, rv);
}
if (cmd- > cmd.opcode==command::jz)
versionize(cmd- > tgt);
if (hasnext(cmd))
cmd++;
else
break ;
}
}
// :
foreach(i, pcode) {
i- > version.clear();
i- > reachcnt = 0 ;
}
renameto.clear();
int lastbasereg = lastreg;
versionize(pcode.begin());
// , 1
foreach(i, pcode) {
if (i- > cmd.dest) i- > cmd.dest-=lastbasereg;
if (has2src(i)) {
if (i- > cmd.src1) i- > cmd.src 1 -=lastbasereg;
if (i- > cmd.src2) i- > cmd.src 2 -=lastbasereg;
}
}
lastreg -= lastbasereg;
regnum spillable(pcommandn cmd) { // ,
aliveset alive;
alive.insert(cmd->onenter.begin(), cmd->onenter.end());
alive.insert(cmd->onexit.begin(), cmd->onexit.end());
alive.erase(cmd->cmd.dest);
if (has2src(cmd)) {
alive.erase(cmd->cmd.src1);
alive.erase(cmd->cmd.src2);
}
if (!alive.size()) return 0 ;
return *alive.begin();
}
map we will store by register number the “address” of its copy in memory so that all register outflows fall into the same place. The address is assigned to the register only at the moment of the first flood.// spill slots
std::map<regnum, int > spill;
int lastspill = 0 ;
void spillAt(pcommandn cmd, regnum victim, pcommandn next) {
// /
int spslot = spill[victim];
if (!spslot) {
spslot = ++lastspill;
spill[victim] = spslot;
}
// store load
pcommandn me = pcode.insert(next, *cmd);
cmd->cmd = command(command::store, victim, spslot);
commandn load(command(command::load, victim, spslot));
if (hasnext(me))
pcode.insert(next, load);
if (me->cmd.opcode==command::jz)
me->tgt = pcode.insert(me->tgt, load);
}
store-cmd-load troika, it is necessary that the existing jumps on the team jump to the store inserted in front of it. In order not to look for such jumps throughout the code, we insert the cmd copy with the following command, and then replace the original command with the store .load executed on both outcomes; therefore, we add it to the next command, and insert it in front of the goal of the jump.commandn to another, we will have to associate the lines for echo with the commands themselves by “identifier”, as in the first tree implementation: neither command indices nor pointers remain unchanged.)// . - №r
regnum victim;
// 1: ,
foreach2(i, pcode, next)
if ((i->cmd.opcode!=command::load && i->cmd.opcode!=command::store) &&
((contains(i->onenter, r) && i->onenterp.size()==lastp) ||
(contains(i->onexit, r) && i->onexitp.size()==lastp))) {
victim = spillable(i);
assert(victim);
spillAt(i, victim, next);
goto afterspill;
}
// 2: " "
physreg bestfit = (physreg)(std::max_element(collisions.begin(), collisions.end())-collisions.begin()+ 1 );
foreach2(i, pcode, next)
if ((i->cmd.opcode!=command::load && i->cmd.opcode!=command::store) &&
((contains(i->onenter, r) && contains(i->onenterp, bestfit)) ||
(contains(i->onexit, r) && contains(i->onexitp, bestfit))) &&
(victim = spillable(i))) {
spillAt(i, victim, next);
goto afterspill;
}
yyerror( "don't know how to proceed" );
spill .load and store is generated, and because of them there are no registers left for meaningful commands.load/store commands load/store//
std::vector< bool > used(lastreg+ 1 );
foreach(i, pcode) {
if (writesdest(i) && i->cmd.opcode!=command::load)
used[i->cmd.dest] = true ;
if (readsdest(i) && i->cmd.opcode!=command::store)
used[i->cmd.dest] = true ;
if (has2src(i)) {
used[i->cmd.src1] = true ;
used[i->cmd.src2] = true ;
}
}
foreach(i, pcode)
if ((i->cmd.opcode==command::load && !used[i->cmd.dest]) ||
(i->cmd.opcode==command::store && !used[i->cmd.dest]))
nopOut(i);
load , then everything except the last one will be deleted because the read register is not alive. But if we have chains of the same store , then to remove them you need to come up with something new. The algorithm is very similar to the iterative determination of the need for registers, only now we define “unpreserved” - in order to clear all the saved registers already saved.void unsaved() {
bool changed = false ;
// rule 1
foreach(i, pcode) {
i->onenter = i->onexit = aliveset();
if (writesdest(i)) {
i->onexit.insert(i->cmd.dest);
changed = true ;
}
}
while (changed) {
changed = false ;
foreach2(i, pcode, next) {
int oldsize = i->onenter.size()+i->onexit.size();
// rule 2 (next command)
if (hasnext(i))
next->onenter.insert(i->onexit.begin(), i->onexit.end());
// rule 2 (jmp target)
if (i->cmd.opcode==command::jz)
i->tgt->onenter.insert(i->onexit.begin(), i->onexit.end());
// rule 3
i->onexit.insert(i->onenter.begin(), i->onenter.end());
if (i->cmd.opcode==command::store)
i->onexit.erase(i->cmd.dest);
if (i->onenter.size()+i->onexit.size() != oldsize)
changed = true ;
}
}
}
afterspill : // - " "
unsaved();
foreach(i, pcode)
if (i->cmd.opcode==command::store && !contains(i->onenter, i->cmd.dest))
nopOut(i);
}
00 mov r01, 0 01 mov r02, 0x3e8 02 echo 0x12e 03 echo r01 04 echo 0xa8 05 echo r02 06 echo 0xaf 07 le r03, r01, r02 08 jz r03, + 0x1f (= 0x28) 09 add r03, r01, r02 0a mov r04,2 0b div r03, r03, r04 0c echo 0x16a 0d echo r03 0e echo 0xd4 0f input r04 10 store r01, 1 11 mov r01, 1 12 eq r01, r04, r01 13 jz r01, +5 (= 0x19) 14 load r01, 1 15 mov r02, 1 16 sub r02, r03, r02 17 add r02, r02, 0 18 jz 0, -0x12 (= 0x7) 19 mov r01,2 1a eq r01, r04, r01 1b jz r01, +4 (= 0x20) 1c mov r01, 1 1d add r01, r03, r01 1e add r01, r01, 0 1f jz 0, -0x19 (= 0x7) 20 load r01, 1 21 mov r03, 3 22 eq r03, r04, r03 23 jz r03, +2 (= 0x26) 24 echo 0x14e 25 hlt 26 echo 0x172 27 jz 0, -0x21 (= 7) 28 echo 0x107 29 hlt
Source: https://habr.com/ru/post/99595/
All Articles