DASM
|
00001 #!/usr/bin/perl -w 00002 ## 00003 # DASM DCPU-16 source assembler class. 00004 # 00005 # This class provides the implementation of the source code assembler 00006 # for DASM. 00007 # 00008 # The files we support end in '.dasm'. 00009 # 00010 # Source file format: 00011 # 00012 # @code 00013 # [<label>] <opcode> <operand a>, <operand b> 00014 # [<label>] <ext-opcode> <operand> 00015 # ; <comment> 00016 # @endcode 00017 # 00018 # 00019 # It also provides a disassembler which can produce source code which 00020 # can be passed back to the assembler. If the source comes from one 00021 # of the binary formats that doesn't offer relocations or symblols, 00022 # the content will be more difficult to parse interpret, but should still 00023 # work. 00024 # 00025 # The structures we use are intended to be able to support libraries of 00026 # relocatable, partially linked code. This has not yet been tested and 00027 # the lack of any indication of which symbols are local and which are 00028 # imported may be a problem in the future. 00029 # 00030 # In addition to the 1.1 DCPU-16 specification, the assembler supports: 00031 # 00032 # - Character literals for values: @n 00033 # `'X'` will evaluate to the number 88. 00034 # 00035 # - Expression calculations: @n 00036 # `1+4` will evaluate to the number 5. @n 00037 # Expression calculations are limited, and do not honour precedence 00038 # rules. Use brackets for clarity. 00039 # 00040 # - Symbols may be used in additive calculations: @n 00041 # `data + 4` will evaluate to the value of symbol '`data`' plus 4. @n 00042 # Symbols may (internally) be addresses or constants: 00043 # - Address symbols are created through the standard labels. 00044 # - Address symbols may (in future versions) be subject to relocations. 00045 # 00046 # - Symbols may be 'local' by preceding with a '`$`' character: @n 00047 # Any symbol preceded by a '`$`' will have the last label used prepended 00048 # to the symbol name. This allows labels and references to be used 00049 # within a routine without polluting the global namespace. 00050 # 00051 # - Additional instructions: 00052 # - `DAT <value[,<value> ]*` @n 00053 # ... will write literal values to the core. @n 00054 # The DAT instruction is supported in the assembler, and generated in 00055 # the disassembler for undefined instruction sequences. 00056 # - `CONST value` @n 00057 # ... will define a constant symbol with the name given in the label. 00058 # - `ADDRESS <value>` @n 00059 # ... will define an address symbol with the name given in the label. 00060 # 00061 # - Directives: @n 00062 # `.<directive> <parameters>` allows additional meta operations to be 00063 # performed. Directives supported: 00064 # - `.ORIGIN address` @n 00065 # ... Sets the address for subsequent compilation 00066 # - `.INCLUDE file` @n 00067 # ...Includes another file at the current location 00068 # 00069 # @note We should really consider changing the structure so that the 00070 # DASMIO objects inherit from a parser class, rather than having 00071 # to reference `$self->{'dasm'}` for parser operations like 00072 # error(). 00073 # 00074 # @file 00075 # @author Justin Fletcher 00076 # 00077 00078 package DASMIO::Source; 00079 00080 use DASM; 00081 use DCPU; 00082 00083 use Carp; 00084 00085 00086 ## Whether we're debugging the 'DAT' instruction 00087 my $debug_dat = 0; 00088 00089 00090 ## Whether we're debugging the symbol processing 00091 my $debug_sym = 0; 00092 00093 ## 00094 # Create an object upon which we can work. 00095 # 00096 # @param[in] $proto Prototype object, or this class's name 00097 # @param[in] $dasm DASM object we're working on 00098 # 00099 # @return new object 00100 sub new 00101 { 00102 my $proto = shift; 00103 my $dasm = shift; 00104 my $class = ref($proto) || $proto; 00105 my $self = { 00106 00107 # The last label we encountered, for local labelling 00108 'lastlabel' => undef, 00109 }; 00110 00111 bless $self, $class; 00112 00113 $self->{'dasm'} = $dasm; 00114 00115 return $self; 00116 } 00117 00118 00119 ## 00120 # Identify whether we can handle reading or writing a given file 00121 # by its filename. 00122 # 00123 # @param[in] $self Our object 00124 # @param[in] $filename Filename we're accessing 00125 # @param[in] $type 'read' or 'write' 00126 # 00127 # @retval 1 if we can access it 00128 # @retval 0 if we cannot access it 00129 sub identify 00130 { 00131 my ($self, $filename, $type) = @_; 00132 00133 if ($filename =~ /\.dasm$/) 00134 { 00135 return 1; 00136 } 00137 00138 return 0; 00139 } 00140 00141 00142 ## 00143 # List the extensions we support. 00144 # 00145 # @param[in] $self Our object 00146 # 00147 # @return arrayref of extensions 00148 sub extensions 00149 { 00150 return [ 'dasm' ]; 00151 } 00152 00153 00154 ## 00155 # Parse an instruction line. 00156 # 00157 # @param[in] $self IO object 00158 # @param[in] $line Line to process 00159 sub parseline 00160 { 00161 my ($self, $line) = @_; 00162 my $dasm = $self->{'dasm'}; 00163 00164 # Trim any trailing newline that might have been left 00165 chomp $line; 00166 00167 # Directives processed first - consider moving these to a callback. 00168 if ($line =~ s/^\.([A-Za-z]+)\s*//) 00169 { 00170 my $directive = uc $1; 00171 if ($directive eq 'ORIGIN') 00172 { 00173 my ($constant, $symname) = $self->parse_constant(\$line, 'constant'); 00174 $dasm->{'pc'} = $constant; 00175 } 00176 elsif ($directive eq 'INCLUDE') 00177 { 00178 if ($line =~ s/"(.*?)"//) 00179 { 00180 $dasm->readfile($1); 00181 } 00182 else 00183 { 00184 $dasm->error("Unrecognised INCLUDE '$line'"); 00185 } 00186 } 00187 else 00188 { 00189 $dasm->error("Unrecognised directive '$directive'"); 00190 } 00191 } 00192 00193 # We hold on to the label and PC location so that we can assign at the 00194 # end if they were set. 00195 my $label = undef; 00196 my $startpc = $dasm->{'pc'}; 00197 00198 # The initial part of the line may be a label, starting with a ':' 00199 # However, the label isn't always for this address; it may be a constant 00200 # label or some other type of label. So we merely remember it until we 00201 # know what sort of label it is. 00202 if ($line =~ s/^:([\w\$]+)//) 00203 { 00204 $label = $1; 00205 if ($label =~ /^\$/) 00206 { 00207 # A local label, so prepend the last label we saw 00208 if (!defined $self->{'lastlabel'}) 00209 { 00210 $dasm->error("Cannot use local label without a recent label"); 00211 } 00212 $label = $self->{'lastlabel'} . $label; 00213 } 00214 else 00215 { 00216 $self->{'lastlabel'} = $label; 00217 } 00218 } 00219 00220 # Skip any indentation 00221 $line =~ s/^\s*//; 00222 00223 # Skip any empty lines or comments 00224 if ($line eq '' || 00225 $line =~ s/^;//) 00226 { 00227 goto line_complete; 00228 } 00229 00230 # Extract the opcode, if there is one 00231 if ($line =~ s/^([a-zA-Z0-9_]+)\b\s*//) 00232 { 00233 my $opcode = uc $1; 00234 00235 # First check for magic opcodes that we introduced: 00236 if ($opcode eq 'DAT') 00237 { 00238 # Parse operands until we have none left 00239 while ($line !~ /^\s*$/ && 00240 $line !~ /^\s*;/) 00241 { 00242 # We want to allow string data as well, for simplicity. 00243 # We do NOT handle UTF-8 - everything is 8 bit. 00244 if ($line =~ s/^\s*"([^"]+)"// || # "d string 00245 $line =~ s/^\s*'([^']+)'//) # 'd string 00246 { 00247 print " string: '$1'\n" if ($debug_dat); 00248 for my $chr (unpack "c*", $1) 00249 { 00250 $dasm->storetopc($chr); 00251 } 00252 } 00253 else 00254 { 00255 my ($constant, $symname) = $self->parse_constant(\$line); 00256 if ($symname) 00257 { 00258 print " symbol $symname\n" if ($debug_dat); 00259 # Relocation symbol, update the table 00260 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $symname; 00261 } 00262 print " value $constant\n" if ($debug_dat); 00263 $dasm->storetopc($constant); 00264 } 00265 last if ($line !~ s/^\s*,\s*//); 00266 } 00267 if ($line !~ s/^\s*// && 00268 $line !~ s/^\s*;//) 00269 { 00270 $dasm->error("Garbage after operand to opcode '$opcode'"); 00271 } 00272 } 00273 elsif ($opcode eq 'CONST') 00274 { 00275 # Constant value symbol declaration 00276 if (defined $label) 00277 { 00278 my ($constant, $symname) = $self->parse_constant(\$line, 'constant'); 00279 $dasm->setsymbol($label, $constant, 'constant'); 00280 $label = undef; 00281 } 00282 else 00283 { 00284 $self->error("CONST must be supplied with a label"); 00285 } 00286 } 00287 elsif ($opcode eq 'ADDRESS') 00288 { 00289 # Address value symbol declaration 00290 if (defined $label) 00291 { 00292 my ($constant, $symname) = $self->parse_constant(\$line, 'constant'); 00293 $dasm->setsymbol($label, $constant, 'address'); 00294 $label = undef; 00295 } 00296 else 00297 { 00298 $self->error("ADDRESS must be supplied with a label"); 00299 } 00300 } 00301 00302 # Now the real opcodes 00303 elsif ($DCPU::BasicOps{$opcode}) 00304 { 00305 # We have a basic opcode 00306 my $opval = $DCPU::BasicOps{$opcode}; 00307 00308 # Parse each operand 00309 my $operand_a = $self->parse_operand(\$line); 00310 if ($line !~ s/^\s*,\s*//) 00311 { 00312 $dasm->error("Expected operand b to opcode '$opcode'"); 00313 } 00314 00315 my $operand_b = $self->parse_operand(\$line); 00316 if ($line !~ s/^\s*$// && 00317 $line !~ s/^\s*;//) 00318 { 00319 $dasm->error("Garbage after operand b to opcode '$opcode'"); 00320 } 00321 00322 if ($debug_asm) 00323 { 00324 printf "%04x : %-6s : %x\n", $dasm->{'pc'}, $opcode, $opval; 00325 printf " %02x : %-9s : %s\n", $operand_a->{'opval'}, 00326 $operand_a->{'type'}, 00327 ($operand_a->{'value'} // ''); 00328 printf " %02x : %-9s : %s\n", $operand_b->{'opval'}, 00329 $operand_b->{'type'}, 00330 ($operand_b->{'value'} // ''); 00331 } 00332 $dasm->storetopc($opval + 00333 ($operand_a->{'opval'} << 4) + 00334 ($operand_b->{'opval'} << 10)); 00335 00336 if (defined $operand_a->{'nextword'}) 00337 { 00338 if ($operand_a->{'nextword'}->{'symbol'}) 00339 { 00340 # Relocation symbol, update the table 00341 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'}; 00342 } 00343 $dasm->storetopc($operand_a->{'nextword'}->{'value'}); 00344 } 00345 00346 if (defined $operand_b->{'nextword'}) 00347 { 00348 if ($operand_b->{'nextword'}->{'symbol'}) 00349 { 00350 # Relocation symbol, update the table 00351 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_b->{'nextword'}->{'symbol'}; 00352 } 00353 $dasm->storetopc($operand_b->{'nextword'}->{'value'}); 00354 } 00355 } 00356 elsif ($DCPU::ExtendedOps{$opcode}) 00357 { 00358 # We have an extended opcode 00359 my $opval = $DCPU::ExtendedOps{$opcode}; 00360 00361 # Parse the operand 00362 my $operand_a = $self->parse_operand(\$line); 00363 if ($line !~ s/^\s*$// && 00364 $line !~ s/^\s*;//) 00365 { 00366 $dasm->error("Garbage after operand a to opcode '$opcode'"); 00367 } 00368 00369 $dasm->storetopc(($opval<<4) + 00370 ($operand_a->{'opval'}<<10)); 00371 00372 # Update the core with the following words 00373 if (defined $operand_a->{'nextword'}) 00374 { 00375 if ($operand_a->{'nextword'}->{'symbol'}) 00376 { 00377 # Relocation symbol, update the table 00378 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'}; 00379 } 00380 $dasm->storetopc($operand_a->{'nextword'}->{'value'}); 00381 } 00382 } 00383 else 00384 { 00385 $dasm->error("Bad opcode '$opcode'"); 00386 } 00387 } 00388 else 00389 { 00390 $dasm->error("Bad source '$line'"); 00391 } 00392 00393 line_complete: 00394 if (defined $label) 00395 { 00396 $dasm->label($startpc, $label); 00397 } 00398 } 00399 00400 00401 ## 00402 # Parse operands and return the values of them 00403 # 00404 # @param[in] $self IO object 00405 # @param[in,out] $lineref Reference to the line that we're parsing 00406 # 00407 # @return hashref of operand details: 00408 # 'type' => 'reg' (register value : register=>regname) 00409 # 'regaddr' (register address : register=>regname) 00410 # 'regindex' (registered indexed : register>regname) 00411 # 'pop' (stack pop) 00412 # 'peek' (stack peek) 00413 # 'push' (stack push) 00414 # 'sp' (stack pointer) 00415 # 'pc' (program counter) 00416 # 'o' (overflow) 00417 # 'indirect' (dereference next address) 00418 # 'load' (load next address) 00419 # 'loadshort' (load short value : value=>short value) 00420 # 'opval' => value in the operand register 00421 # 'register' => register, if applicable 00422 # 'nextword' => next word data, if applicable 00423 # 'value' => value, if applicable 00424 sub parse_operand 00425 { 00426 my ($self, $lineref) = @_; 00427 my $dasm = $self->{'dasm'}; 00428 00429 #0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order) 00430 if ($$lineref =~ s/^([ABCXYZIJ])\b//i) 00431 { 00432 return { 00433 'type' => 'reg', 00434 'register' => uc $1, 00435 'opval' => 0 + $DCPU::RegisterMap{uc $1} 00436 }; 00437 } 00438 #0x08-0x0f: [register] 00439 elsif ($$lineref =~ s/^\[\s*([ABCXYZIJ])\s*\]//i) 00440 { 00441 return { 00442 'type' => 'regaddr', 00443 'register' => uc $1, 00444 'opval' => 0x08 + $DCPU::RegisterMap{uc $1} 00445 }; 00446 } 00447 #0x10-0x17: [next word + register] 00448 # 0x1e: [next word] 00449 elsif ($$lineref =~ s/^\[\s*//) 00450 { 00451 # Variation on syntax, as part of the tests at: 00452 # http://0x10cwiki.com/wiki/Comparison_of_Developer_Tools 00453 # allows for [register + next word] as well. 00454 if ($$lineref =~ s/^([ABCXYZIJ])\s*\+\s*//i) 00455 { 00456 my $reg = uc $1; 00457 # This is the variation. 00458 my $symbol = $self->parse_symbol($lineref); 00459 if ($$lineref =~ s/^\s*\]//) 00460 { 00461 return { 00462 'type' => 'regindex', 00463 'register' => $reg, 00464 'nextword' => $symbol, 00465 'opval' => 0x10 + $DCPU::RegisterMap{$reg} 00466 }; 00467 00468 } 00469 else 00470 { 00471 $dasm->error("Garbage whilst parsing inverted indexed operand"); 00472 } 00473 } 00474 else 00475 { 00476 my $symbol = $self->parse_symbol($lineref); 00477 if ($$lineref =~ s/^\s*\+\s*([ABCXYZIJ])\s*\]//i) 00478 { 00479 return { 00480 'type' => 'regindex', 00481 'register' => uc $1, 00482 'nextword' => $symbol, 00483 'opval' => 0x10 + $DCPU::RegisterMap{uc $1} 00484 }; 00485 } 00486 elsif ($$lineref =~ s/^\s*\]//) 00487 { 00488 return { 00489 'type' => 'indirect', 00490 'nextword' => $symbol, 00491 'opval' => 0x1e 00492 }; 00493 } 00494 else 00495 { 00496 $dasm->error("Garbage whilst parsing referenced operand"); 00497 } 00498 } 00499 } 00500 # 0x18: POP / [SP++] 00501 elsif ($$lineref =~ s/^POP\b//) 00502 { 00503 return { 00504 'type' => 'pop', 00505 'opval' => 0x18 00506 }; 00507 } 00508 # 0x19: PEEK / [SP] 00509 elsif ($$lineref =~ s/^PEEK\b//) 00510 { 00511 return { 00512 'type' => 'peek', 00513 'opval' => 0x19 00514 }; 00515 } 00516 # 0x1a: PUSH / [--SP] 00517 elsif ($$lineref =~ s/^PUSH\b//i) 00518 { 00519 return { 00520 'type' => 'push', 00521 'opval' => 0x1a 00522 }; 00523 } 00524 # 0x1b: SP 00525 elsif ($$lineref =~ s/^SP\b//i) 00526 { 00527 return { 00528 'type' => 'sp', 00529 'opval' => 0x1b 00530 }; 00531 } 00532 # 0x1c: PC 00533 elsif ($$lineref =~ s/^PC\b//i) 00534 { 00535 return { 00536 'type' => 'pc', 00537 'opval' => 0x1c 00538 }; 00539 } 00540 # 0x1d: O 00541 elsif ($$lineref =~ s/^O\b//) 00542 { 00543 return { 00544 'type' => 'o', 00545 'opval' => 0x1d 00546 }; 00547 } 00548 # 0x1f: next word (literal) 00549 #0x20-0x3f: literal value 0x00-0x1f (literal) 00550 else 00551 { 00552 # Literal value, which could be a short-form literal value, 00553 # or a symbol 00554 my $symbol = $self->parse_symbol($lineref); 00555 if ($symbol->{'type'} eq 'constant') 00556 { 00557 if ($symbol->{'value'} >= 0 && 00558 $symbol->{'value'} < 0x20) 00559 { 00560 return { 00561 'type' => 'loadshort', 00562 'opval' => 0x20 + $symbol->{'value'}, 00563 'value' => $symbol->{'value'} 00564 }; 00565 } 00566 } 00567 00568 # Check for overflow 00569 if ($symbol->{'type'} eq 'constant' && 00570 $symbol->{'value'} > 0xffff) 00571 { 00572 $dasm->error( sprintf "Literal value (0x%x) too large", $symbol->{'value'} ); 00573 } 00574 # Check for underflow 00575 if ($symbol->{'type'} eq 'constant' && 00576 $symbol->{'value'} < 0) 00577 { 00578 # I think we'll allow negative numbers to wrap down, as that's 00579 # useful in the assembler. But those which wrap twice are too 00580 # small. 00581 if ($symbol->{'value'} < -0x10000) 00582 { 00583 $dasm->error( sprintf "Literal value (0x%x) too small", $symbol->{'value'} ); 00584 } 00585 $symbol->{'value'} += 0x10000; 00586 } 00587 return { 00588 'type' => 'load', 00589 'opval' => 0x1f, 00590 'nextword' => $symbol 00591 }; 00592 } 00593 } 00594 00595 00596 ## 00597 # Parse a symbol name or value. 00598 # 00599 # @param[in] $self DASM object 00600 # @param[in,out] $lineref Reference to the line that we're parsing 00601 # 00602 # @return hashref of symbol details: 00603 # 'type' => 'constant' : a constant value 00604 # 'address' : an address constant 00605 # 'value' => value 00606 # 'symbol' => any symbol name to use as relocation 00607 sub parse_symbol 00608 { 00609 my ($self, $lineref, $nonterm) = @_; 00610 my $dasm = $self->{'dasm'}; 00611 00612 my %symbol = ( 00613 'type' => 'unknown', 00614 'value' => 0, 00615 ); 00616 00617 if (!defined $lineref) 00618 { 00619 confess "parse_symbol called without a parameter"; 00620 } 00621 if (ref $lineref ne 'SCALAR') 00622 { 00623 confess "parse_symbol called with ".ref($lineref)." but expected SCALAR"; 00624 } 00625 00626 # Skip any leading spaces 00627 $$lineref =~ s/^\s*//; 00628 00629 print "parse_symbol: $$lineref\n" if ($debug_sym); 00630 00631 if ($$lineref =~ s/^\(//) 00632 { 00633 # Opening bracket for an expression, so parse inside the 00634 # brackets 00635 my $innersymbol = $self->parse_symbol($lineref); 00636 00637 %symbol = %$innersymbol; 00638 00639 # Check that we ended properly 00640 if ($$lineref !~ s/^\s*\)//) 00641 { 00642 $dasm->error("Missing closing bracket"); 00643 } 00644 } 00645 elsif ($$lineref =~ s/^([\$A-Za-z_][\w\$]*)//) 00646 { 00647 # Symbol reference 00648 my $symname = $1; 00649 00650 # Check for local symbols and redefine them for the local label 00651 if ($symname =~ /^\$/) 00652 { 00653 if (!defined $self->{'lastlabel'}) 00654 { 00655 $dasm->error("Cannot use a local label before a label has been set"); 00656 } 00657 $symname = $self->{'lastlabel'} . $symname; 00658 } 00659 00660 my $sym = $dasm->getsymbol($symname); 00661 # Hmm. Whilst decoding the symbol here makes some sense, we could 00662 # just leave it for the linkage stage. 00663 #if (defined $sym) 00664 #{ 00665 # # Symbol has been defined 00666 # %symbol = ( %$sym, 00667 # 'symbol' => $symname 00668 # ); 00669 #} 00670 #else 00671 { 00672 %symbol = ( 00673 'type' => 'address', 00674 'value' => 0, 00675 'symbol' => $symname 00676 ); 00677 } 00678 } 00679 elsif ($$lineref =~ s/^([+-]?0x[A-Fa-f0-9]+)// || 00680 $$lineref =~ s/^([+-]?\d+)//) 00681 { 00682 # Hex or decimal value 00683 my $value = eval $1; 00684 %symbol = ( 00685 'type' => 'constant', 00686 'value' => $value 00687 ); 00688 } 00689 elsif ($$lineref =~ s/^'(.)'//) 00690 { 00691 # character, so convert to an ordinal 00692 %symbol = ( 00693 'type' => 'constant', 00694 'value' => ord($1) 00695 ); 00696 } 00697 else 00698 { 00699 $dasm->error("Unrecognised symbol parsing '$$lineref'"); 00700 } 00701 00702 # We need to special case the syntax here, because the defined format 00703 # of the assembler allows for register names here (this could be 00704 # refined by having a more complex symbol definition). 00705 if ($$lineref =~ /\+\s*[ABCXYZIJ]\b/i) 00706 { 00707 # This is a relative reference, so don't try to perform arithmetic 00708 # here. 00709 return \%symbol; 00710 } 00711 00712 if ($$lineref =~ s/^\s*([\+\-\*\/\%\&\|\^]|<<|>>)//) 00713 { 00714 # Operators - note that this is wrong WRT precedence as it won't 00715 # make *, / and % higher priority than + and -. I'm not fussed 00716 # right now. FIXME 00717 my $operator = $1; 00718 my $second = $self->parse_symbol($lineref); 00719 if (defined $symbol{'symbol'} && 00720 $operator ne '+') 00721 { 00722 $dasm->error("Symbol '$symbol{'symbol'}' not yet defined in arithmetic operation (only additive operations allowed)"); 00723 } 00724 if (defined $symbol{'symbol'} && 00725 defined $second->{'symbol'}) 00726 { 00727 $dasm->error("Relocation by multiple symbols ('$symbol{'symbol'}' and '$second->{'symbol'}') not supported"); 00728 } 00729 00730 if (defined $second->{'symbol'}) 00731 { 00732 # Swap them around so that the symbol is in our local 00733 # This is safe because the only type of symbol operation we 00734 # support is additive. 00735 my %copy = %symbol; 00736 %symbol = %$second; 00737 $second = \%copy; 00738 } 00739 00740 if ($operator eq '+') 00741 { $symbol{'value'} += $second->{'value'}; } 00742 elsif ($operator eq '-') 00743 { $symbol{'value'} -= $second->{'value'}; } 00744 elsif ($operator eq '*') 00745 { $symbol{'value'} *= $second->{'value'}; } 00746 elsif ($operator eq '/') 00747 { $symbol{'value'} /= $second->{'value'}; } 00748 elsif ($operator eq '%') 00749 { $symbol{'value'} %= $second->{'value'}; } 00750 elsif ($operator eq '&') 00751 { $symbol{'value'} &= $second->{'value'}; } 00752 elsif ($operator eq '|') 00753 { $symbol{'value'} |= $second->{'value'}; } 00754 elsif ($operator eq '^') 00755 { $symbol{'value'} ^= $second->{'value'}; } 00756 elsif ($operator eq '<<') 00757 { $symbol{'value'} <<= $second->{'value'}; } 00758 elsif ($operator eq '>>') 00759 { $symbol{'value'} >>= $second->{'value'}; } 00760 else 00761 { 00762 die "Rogue operator $operator - should not happen"; 00763 } 00764 00765 if ($second->{'type'} eq 'address') 00766 { 00767 $symbol{'type'} = $second->{'type'}; 00768 } 00769 } 00770 00771 return \%symbol; 00772 } 00773 00774 ## 00775 # Obtain a constant from an input line. 00776 # 00777 # @param[in] $self DASMIO object 00778 # @param[in,out] $lineref reference to line to read from 00779 # @param[in] $symbol 'constant' : only a constant is acceptable 00780 # 'symbol' : symbol is acceptable 00781 # 00782 # @return $value The value to store 00783 # $symbol Relocation symbol to apply, or none if not found 00784 sub parse_constant 00785 { 00786 my ($self, $lineref, $symbol) = @_; 00787 00788 $symbol = 'constant' if (!defined $symbol); 00789 00790 print "Constant: $$lineref\n" if ($debug_dat); 00791 00792 my $operand = $self->parse_operand($lineref); 00793 print " type $operand->{'type'}\n" if ($debug_dat); 00794 00795 # The only types of operand we support here are the 00796 # nextword and constant value operands - 'load' and 'loadshort' 00797 my $sym = undef; 00798 if ($operand->{'type'} eq 'load') 00799 { 00800 $sym = $operand->{'nextword'}; 00801 } 00802 elsif ($operand->{'type'} eq 'loadshort') 00803 { 00804 $sym = { 00805 'type' => 'constant', 00806 'value' => $operand->{'value'} 00807 }; 00808 } 00809 else 00810 { 00811 $dasm->error("Constant operand expected, not type $operand->{'type'}"); 00812 } 00813 00814 if ($sym->{'symbol'} && 00815 $symbol eq 'constant') 00816 { 00817 # A symbol which we know about and which is a constant. 00818 my $symval = $dasm->getsymbol($symname); 00819 if (defined $symval && 00820 $symval->{'type'} eq 'constant') 00821 { 00822 # We can interpret it here immediately 00823 return ($sym->{'value'} + $symval->{'value'}, undef); 00824 } 00825 else 00826 { 00827 $dasm->error("Symbol operand not expected"); 00828 } 00829 } 00830 00831 return ($sym->{'value'}, $symval->{'symbol'}); 00832 } 00833 00834 00835 00836 ########################################################################### 00837 # Disassembly routines 00838 # (these are included in the source parser, so that we can decode the 00839 # compiled files to re-compilable files) 00840 00841 00842 ## 00843 # Write out a compilable source file based on what we have in the core. 00844 # 00845 # @param[in] $self Address object 00846 # 00847 # @return Content to write 00848 sub write 00849 { 00850 my ($self) = @_; 00851 my $dasm = $self->{'dasm'}; 00852 my $core = $dasm->{'core'}; 00853 00854 # Easier access to symbols 00855 my $symbols = $dasm->{'symbols'}; 00856 00857 # Collect a list of address symbols 00858 my %addressSymbols = map { 00859 $symbols->{$_}->{'value'} => $_ 00860 } grep { 00861 $symbols->{$_}->{'type'} eq 'address' 00862 } keys %$symbols; 00863 00864 # The lines we'll return 00865 my @lines; 00866 00867 # If we have any constants, we need to write them out first 00868 my @constants = grep { 00869 $symbols->{$_}->{'type'} eq 'constant' 00870 } keys %$symbols; 00871 if (scalar(@constants) != 0) 00872 { 00873 push @lines, "; Constants"; 00874 } 00875 for my $sym (@constants) 00876 { 00877 push @lines, sprintf "%-16s%-8s0x%04x", 00878 ":$sym ", 00879 "CONST", 00880 $symbols->{$sym}->{'value'}; 00881 } 00882 00883 my $lastaddress = -1; 00884 00885 # We have to deal with the addresses in order, because we might 00886 # have a corrupt file that ends before the data or which is 00887 # discontinuous. 00888 my @addresses = sort { $a <=> $b } keys %$core; 00889 if (scalar(@addresses) != 0) 00890 { 00891 push @lines, "; Core"; 00892 } 00893 while (defined (my $address = shift @addresses)) 00894 { 00895 my $value = $core->{$address}; 00896 00897 # We never write out hardware 00898 next if (ref $value ne ''); 00899 00900 if ($lastaddress+1 != $address) 00901 { 00902 # If we have discontinous core data, we need to change the 00903 # origin. 00904 push @lines, sprintf ".ORIGIN 0x%04x", $address; 00905 } 00906 00907 if (defined $addressSymbols{$address}) 00908 { 00909 push @lines, ":$addressSymbols{$address}"; 00910 } 00911 00912 my $opcode = $value & 15; 00913 my $opcodename; 00914 00915 # List of the memory locations associated with this instruction 00916 my @memdata = ($value); 00917 00918 # List of the decoded operands 00919 my @operands; 00920 if ($opcode !=0) 00921 { 00922 # Basic opcode 00923 $opcodename = $DCPU::InvBasicOps{$opcode}; 00924 my $operand_a = ($value>>4) & 63; 00925 my $operand_b = ($value>>10) & 63; 00926 00927 # Decode each operand into printable descriptions, pulling data 00928 # out of nextword as necessary. 00929 push @operands, $self->decode_operand($operand_a, 00930 $address + scalar(@memdata), 00931 \@memdata); 00932 00933 push @operands, $self->decode_operand($operand_b, 00934 $address + scalar(@memdata), 00935 \@memdata); 00936 00937 if (!defined $opcodename) 00938 { 00939 die "No such opcode: $opcode\n"; 00940 } 00941 } 00942 else 00943 { 00944 # Extended opcodes 00945 $opcode = ($value>>4) & 63; 00946 $opcodename = $DCPU::InvExtendedOps{$opcode}; 00947 if (!defined $opcodename) 00948 { 00949 # Invalid opcode, so we treat as data 00950 $opcodename = "DAT"; 00951 @operands = ($value); 00952 } 00953 else 00954 { 00955 # a valid extended opcode 00956 my $operand_a = ($value>>10) & 63; 00957 00958 push @operands, $self->decode_operand($operand_a, 00959 $address + 1, 00960 \@memdata); 00961 } 00962 } 00963 00964 # Convert the memdata into presentable hex values 00965 @memdata = map { defined $_ ? sprintf "%04x", $_ : '----' } @memdata; 00966 00967 # Construct the output line 00968 push @lines, sprintf "%-16s%-8s%-32s ; %04x: %s", 00969 "", 00970 $opcodename, 00971 (join ", ", @operands), 00972 $address, 00973 (join " ", @memdata); 00974 00975 # Skip the data we've just read off the address list 00976 for (2..scalar(@memdata)) 00977 { 00978 # We want to skip only the entries from the list that are actually 00979 # valid - the core might be discontinuous. 00980 if (defined $addresses[0] && 00981 $addresses[0] == $address+1) 00982 { 00983 shift @addresses; 00984 } 00985 $address++; 00986 } 00987 $lastaddress = $address; 00988 } 00989 00990 return join "\n", @lines, ""; 00991 } 00992 00993 00994 ## 00995 # Decode an operand into a string. 00996 # 00997 # @param[in] $self IO object 00998 # @param[in] $operand Operand value 00999 # @param[in] $address Address of 'nextword' 01000 # @param[in,out] $memdataref Reference to an address of data read from memory 01001 # which we will update if we read any data 01002 # 01003 # @return Operand definition string 01004 sub decode_operand 01005 { 01006 my ($self, $operand, $address, $memdataref) = @_; 01007 01008 # 0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order) 01009 if ($operand >= 0 && $operand <= 7) 01010 { 01011 return $DCPU::InvRegisterMap{ $operand & 7 }; 01012 } 01013 # 0x08-0x0f: [register] 01014 elsif ($operand >= 0x8 && $operand <= 0xf) 01015 { 01016 return '[' . $DCPU::InvRegisterMap{ $operand & 7 } . ']'; 01017 } 01018 # 0x10-0x17: [next word + register] 01019 elsif ($operand >= 0x10 && $operand <= 0x17) 01020 { 01021 my $nextword = $self->decode_nextword($address, $memdataref); 01022 return "[$nextword + " . $DCPU::InvRegisterMap{ $operand & 7 } . ']'; 01023 } 01024 # 0x18: POP / [SP++] 01025 elsif ($operand == 0x18) 01026 { 01027 return 'POP'; 01028 } 01029 # 0x19: PEEK / [SP] 01030 elsif ($operand == 0x19) 01031 { 01032 return 'PEEK'; 01033 } 01034 # 0x1a: PUSH / [--SP] 01035 elsif ($operand == 0x1a) 01036 { 01037 return 'PUSH'; 01038 } 01039 # 0x1b: SP 01040 elsif ($operand == 0x1b) 01041 { 01042 return 'SP'; 01043 } 01044 # 0x1c: PC 01045 elsif ($operand == 0x1c) 01046 { 01047 return 'PC'; 01048 } 01049 # 0x1d: O 01050 elsif ($operand == 0x1d) 01051 { 01052 return 'O'; 01053 } 01054 # 0x1e: [next word] 01055 elsif ($operand == 0x1e) 01056 { 01057 my $nextword = $self->decode_nextword($address, $memdataref); 01058 return "[$nextword]"; 01059 } 01060 # 0x1f: next word (literal) 01061 elsif ($operand == 0x1f) 01062 { 01063 my $nextword = $self->decode_nextword($address, $memdataref); 01064 return "$nextword"; 01065 } 01066 # 0x20-0x3f: literal value 0x00-0x1f (literal) 01067 elsif ($operand >= 0x20 && $operand <= 0x3f) 01068 { 01069 return $operand - 0x20; 01070 } 01071 else 01072 { 01073 return "!Invalid: $operand!"; 01074 } 01075 } 01076 01077 01078 ## 01079 # Read the next word from a memory address, whilst decoding operands. 01080 # Apply any relocations that we know about such that the decode comes 01081 # out as readable. 01082 # 01083 # The actual value in memory is appended to the memdataref list. 01084 # If the memory is not known, we return '?' and an undef entry in the array. 01085 # 01086 # @param[in] $self IO object 01087 # @param[in] $address Address of 'nextword' 01088 # @param[in,out] $memdataref Reference to an address of data read from memory 01089 # which we will update if we read any data 01090 # 01091 # @return extword string 01092 sub decode_nextword 01093 { 01094 my ($self, $address, $memdataref) = @_; 01095 my $dasm = $self->{'dasm'}; 01096 my $core = $dasm->{'core'}; 01097 01098 my $value = $dasm->{'core'}->{$address}; 01099 01100 # We're using it, so push it on to our stack 01101 push @$memdataref, $value; 01102 01103 if (!defined $value) 01104 { 01105 # No value, which means that the core is truncated or discontinuous 01106 # around an invalid location, so we return '?' 01107 return '?'; 01108 } 01109 01110 # Check whether we have a relocation at that address 01111 if (defined $dasm->{'relocations'}->{$address}) 01112 { 01113 # We do... which means that we have to perform report the symbol 01114 # of the relocation as part of a calculation. 01115 01116 if ($value == 0) 01117 { 01118 # However, the offset appears to be 0, so we can just use the 01119 # symbol name. 01120 return "$dasm->{'relocations'}->{$address}"; 01121 } 01122 else 01123 { 01124 return "($value + $dasm->{'relocations'}->{$address})"; 01125 } 01126 } 01127 01128 if ($value >= 256) 01129 { 01130 return sprintf "0x%04x", $value; 01131 } 01132 01133 return sprintf "0x%02x", $value; 01134 } 01135 01136 01137 # Must return true 01138 1;