DASM
|
00001 #!/usr/bin/perl -w 00002 ## 00003 # DASM DCPU-16 source assembler class. 00004 # 00005 # This class provides the implementation of the source code assembler 00006 # for DASM. 00007 # 00008 # The files we support end in '.dasm'. 00009 # 00010 # Source file format: 00011 # 00012 # @code 00013 # [<label>] <opcode> <operand a>, <operand b> 00014 # [<label>] <ext-opcode> <operand> 00015 # ; <comment> 00016 # @endcode 00017 # 00018 # 00019 # It also provides a disassembler which can produce source code which 00020 # can be passed back to the assembler. If the source comes from one 00021 # of the binary formats that doesn't offer relocations or symblols, 00022 # the content will be more difficult to parse interpret, but should still 00023 # work. 00024 # 00025 # The structures we use are intended to be able to support libraries of 00026 # relocatable, partially linked code. This has not yet been tested and 00027 # the lack of any indication of which symbols are local and which are 00028 # imported may be a problem in the future. 00029 # 00030 # In addition to the 1.1 DCPU-16 specification, the assembler supports: 00031 # 00032 # - Character literals for values: @n 00033 # `'X'` will evaluate to the number 88. 00034 # 00035 # - Expression calculations: @n 00036 # `1+4` will evaluate to the number 5. @n 00037 # Expression calculations are limited, and do not honour precedence 00038 # rules. Use brackets for clarity. 00039 # 00040 # - Symbols may be used in additive calculations: @n 00041 # `data + 4` will evaluate to the value of symbol '`data`' plus 4. @n 00042 # Symbols may (internally) be addresses or constants: 00043 # - Address symbols are created through the standard labels. 00044 # - Address symbols may (in future versions) be subject to relocations. 00045 # 00046 # - Symbols may be 'local' by preceding with a '`$`' character: @n 00047 # Any symbol preceded by a '`$`' will have the last label used prepended 00048 # to the symbol name. This allows labels and references to be used 00049 # within a routine without polluting the global namespace. 00050 # 00051 # - Additional instructions: 00052 # - `DAT <value[,<value> ]*` @n 00053 # ... will write literal values to the core. @n 00054 # The DAT instruction is supported in the assembler, and generated in 00055 # the disassembler for undefined instruction sequences. 00056 # - `RES <value>` @n 00057 # ... will reserve \<value\> words in the core. @n 00058 # - `CONST value` @n 00059 # ... will define a constant symbol with the name given in the label. 00060 # - `ADDRESS <value>` @n 00061 # ... will define an address symbol with the name given in the label. 00062 # - `RN <register>` @n 00063 # ... will define a register binding for the name given in the label. @n 00064 # Register bindings allow you to call a register by a different name, 00065 # which can make the code mode readable. 00066 # 00067 # - Directives: @n 00068 # `.<directive> <parameters>` allows additional meta operations to be 00069 # performed. Directives supported: 00070 # - `.ORIGIN address` @n 00071 # ... Sets the address for subsequent compilation 00072 # - `.INCLUDE file` @n 00073 # ...Includes another file at the current location 00074 # 00075 # @note We should really consider changing the structure so that the 00076 # DASMIO objects inherit from a parser class, rather than having 00077 # to reference `$self->{'dasm'}` for parser operations like 00078 # error(). 00079 # 00080 # @file 00081 # @author Justin Fletcher 00082 # 00083 00084 package DASMIO::Source; 00085 00086 use DASM; 00087 use DCPU; 00088 00089 use Carp; 00090 00091 00092 ## Whether we're debugging the 'DAT' instruction 00093 my $debug_dat = 0; 00094 00095 00096 ## Whether we're debugging the symbol processing 00097 my $debug_sym = 0; 00098 00099 00100 ## How much we reserve in our disassembly before changing to .ORIGIN 00101 my $disassemble_res_limit = 257; 00102 00103 ## 00104 # Create an object upon which we can work. 00105 # 00106 # @param[in] $proto Prototype object, or this class's name 00107 # @param[in] $dasm DASM object we're working on 00108 # 00109 # @return new object 00110 sub new 00111 { 00112 my $proto = shift; 00113 my $dasm = shift; 00114 my $class = ref($proto) || $proto; 00115 my $self = { 00116 00117 # The last label we encountered, for local labelling 00118 'lastlabel' => undef, 00119 00120 # Register mappings, new name to old name 00121 'regbinding' => {}, 00122 00123 # Register mapping regular expression (generics) 00124 'regbindingre_g' => '[ABCXYZIJ]', 00125 00126 # Register mapping regular expression (stack pointer) 00127 'regbindingre_sp' => '(?:SP)', 00128 00129 # Register mapping regular expression (overflow) 00130 'regbindingre_o' => '(?:O)', 00131 00132 # Register mapping regular expression (program counter) 00133 'regbindingre_pc' => '(?:PC)', 00134 }; 00135 00136 bless $self, $class; 00137 00138 $self->{'dasm'} = $dasm; 00139 00140 return $self; 00141 } 00142 00143 00144 ## 00145 # Identify whether we can handle reading or writing a given file 00146 # by its filename. 00147 # 00148 # @param[in] $self Our object 00149 # @param[in] $filename Filename we're accessing 00150 # @param[in] $type 'read' or 'write' 00151 # 00152 # @retval 1 if we can access it 00153 # @retval 0 if we cannot access it 00154 sub identify 00155 { 00156 my ($self, $filename, $type) = @_; 00157 00158 if ($filename =~ /\.dasm$/) 00159 { 00160 return 1; 00161 } 00162 00163 return 0; 00164 } 00165 00166 00167 ## 00168 # List the extensions we support. 00169 # 00170 # @param[in] $self Our object 00171 # 00172 # @return arrayref of extensions 00173 sub extensions 00174 { 00175 return [ 'dasm' ]; 00176 } 00177 00178 00179 ## 00180 # Parse an instruction line. 00181 # 00182 # @param[in] $self IO object 00183 # @param[in] $line Line to process 00184 sub parseline 00185 { 00186 my ($self, $line) = @_; 00187 my $dasm = $self->{'dasm'}; 00188 00189 # Trim any trailing newline that might have been left 00190 chomp $line; 00191 00192 # Directives processed first - consider moving these to a callback. 00193 if ($line =~ s/^\.([A-Za-z]+)\s*//) 00194 { 00195 my $directive = uc $1; 00196 00197 # Check if we understand the directive 00198 my $dirfunc = 'directive_' . $directive; 00199 if ($self->can($dirfunc)) 00200 { 00201 # If we can invoke it, do so, providing the line to parse 00202 $self->$dirfunc(\$line); 00203 00204 # Check for trailing whitespace/comments 00205 if ($line !~ s/^\s*$// && 00206 $line !~ s/^\s*;//) 00207 { 00208 $dasm->error("Garbage at end of '$directive' directive"); 00209 } 00210 } 00211 else 00212 { 00213 $dasm->error("Unrecognised directive '$directive'"); 00214 } 00215 00216 return; 00217 } 00218 00219 00220 # We hold on to the label and PC location so that we can assign at the 00221 # end if they were set. 00222 my $label = undef; 00223 my $startpc = $dasm->{'pc'}; 00224 00225 # The initial part of the line may be a label, starting with a ':' 00226 # However, the label isn't always for this address; it may be a constant 00227 # label or some other type of label. So we merely remember it until we 00228 # know what sort of label it is. 00229 if ($line =~ s/^:([\w\$]+)//) 00230 { 00231 $label = $1; 00232 if ($label =~ /^\$/) 00233 { 00234 # A local label, so prepend the last label we saw 00235 if (!defined $self->{'lastlabel'}) 00236 { 00237 $dasm->error("Cannot use local label without a recent label"); 00238 } 00239 $label = $self->{'lastlabel'} . $label; 00240 } 00241 else 00242 { 00243 $self->{'lastlabel'} = $label; 00244 } 00245 } 00246 00247 # Skip any indentation 00248 $line =~ s/^\s*//; 00249 00250 # Skip any empty lines or comments 00251 if ($line eq '' || 00252 $line =~ s/^;//) 00253 { 00254 goto line_complete; 00255 } 00256 00257 # Extract the opcode, if there is one 00258 if ($line =~ s/^([a-zA-Z0-9_]+)\b\s*//) 00259 { 00260 my $opcode = uc $1; 00261 00262 # First check for magic opcodes that we introduced: 00263 my $opfunc = 'op_' . $opcode; 00264 if ($self->can($opfunc)) 00265 { 00266 # If we can invoke it, do so, providing the line to parse 00267 $self->$opfunc(\$line, \$label); 00268 00269 # Check for trailing whitespace/comments 00270 if ($line !~ s/^\s*// && 00271 $line !~ s/^\s*;//) 00272 { 00273 $dasm->error("Garbage at end of '$opcode' line"); 00274 } 00275 } 00276 00277 # Now the real opcodes 00278 elsif ($DCPU::BasicOps{$opcode}) 00279 { 00280 # We have a basic opcode 00281 my $opval = $DCPU::BasicOps{$opcode}; 00282 00283 # Parse each operand 00284 my $operand_a = $self->parse_operand(\$line); 00285 if ($line !~ s/^\s*,\s*//) 00286 { 00287 $dasm->error("Expected operand b to opcode '$opcode'"); 00288 } 00289 00290 my $operand_b = $self->parse_operand(\$line); 00291 if ($line !~ s/^\s*$// && 00292 $line !~ s/^\s*;//) 00293 { 00294 $dasm->error("Garbage after operand b to opcode '$opcode'"); 00295 } 00296 00297 if ($debug_asm) 00298 { 00299 printf "%04x : %-6s : %x\n", $dasm->{'pc'}, $opcode, $opval; 00300 printf " %02x : %-9s : %s\n", $operand_a->{'opval'}, 00301 $operand_a->{'type'}, 00302 ($operand_a->{'value'} // ''); 00303 printf " %02x : %-9s : %s\n", $operand_b->{'opval'}, 00304 $operand_b->{'type'}, 00305 ($operand_b->{'value'} // ''); 00306 } 00307 $dasm->storetopc($opval + 00308 ($operand_a->{'opval'} << 4) + 00309 ($operand_b->{'opval'} << 10)); 00310 00311 if (defined $operand_a->{'nextword'}) 00312 { 00313 if ($operand_a->{'nextword'}->{'symbol'}) 00314 { 00315 # Relocation symbol, update the table 00316 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'}; 00317 } 00318 $dasm->storetopc($operand_a->{'nextword'}->{'value'}); 00319 } 00320 00321 if (defined $operand_b->{'nextword'}) 00322 { 00323 if ($operand_b->{'nextword'}->{'symbol'}) 00324 { 00325 # Relocation symbol, update the table 00326 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_b->{'nextword'}->{'symbol'}; 00327 } 00328 $dasm->storetopc($operand_b->{'nextword'}->{'value'}); 00329 } 00330 } 00331 elsif ($DCPU::ExtendedOps{$opcode}) 00332 { 00333 # We have an extended opcode 00334 my $opval = $DCPU::ExtendedOps{$opcode}; 00335 00336 # Parse the operand 00337 my $operand_a = $self->parse_operand(\$line); 00338 if ($line !~ s/^\s*$// && 00339 $line !~ s/^\s*;//) 00340 { 00341 $dasm->error("Garbage after operand a to opcode '$opcode'"); 00342 } 00343 00344 $dasm->storetopc(($opval<<4) + 00345 ($operand_a->{'opval'}<<10)); 00346 00347 # Update the core with the following words 00348 if (defined $operand_a->{'nextword'}) 00349 { 00350 if ($operand_a->{'nextword'}->{'symbol'}) 00351 { 00352 # Relocation symbol, update the table 00353 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'}; 00354 } 00355 $dasm->storetopc($operand_a->{'nextword'}->{'value'}); 00356 } 00357 } 00358 else 00359 { 00360 $dasm->error("Bad opcode '$opcode'"); 00361 } 00362 } 00363 else 00364 { 00365 $dasm->error("Bad source '$line'"); 00366 } 00367 00368 line_complete: 00369 if (defined $label) 00370 { 00371 $dasm->label($startpc, $label); 00372 } 00373 } 00374 00375 00376 ## 00377 # Bind register names, to give them other names than the default. 00378 # 00379 # @param[in] $self This source object 00380 # @param[in] $new New register name 00381 # @param[in] $old Old register name, or undef to remove mapping 00382 sub rebindregister 00383 { 00384 my ($self, $new, $old) = @_; 00385 00386 # Remember the mapping 00387 if (defined $old) 00388 { 00389 $self->{'regbinding'}->{uc $new} = uc $old; 00390 } 00391 else 00392 { 00393 delete $self->{'regbinding'}->{uc $new}; 00394 } 00395 00396 # Generate the regex to use for matching registers 00397 my $re = ""; 00398 $re = join "|", grep { $self->{'regbinding'}->{$_} ne 'O' && 00399 $self->{'regbinding'}->{$_} ne 'SP' && 00400 $self->{'regbinding'}->{$_} ne 'PC' } 00401 keys %{$self->{'regbinding'}}; 00402 if ($re eq '') 00403 { 00404 $re = "[ABCXYZIJ]"; 00405 } 00406 else 00407 { 00408 $re = "(?:[ABCXYZIJ]|$re)"; 00409 } 00410 $self->{'regbindingre_g'} = $re; 00411 00412 # Now the specific registers 00413 00414 # Overflow 00415 $re = join "|", 'O', grep { $self->{'regbinding'}->{$_} eq 'O' } 00416 keys %{$self->{'regbinding'}}; 00417 $self->{'regbindingre_o'} = "(?:$re)"; 00418 00419 # Stack pointer 00420 $re = join "|", 'SP', grep { $self->{'regbinding'}->{$_} eq 'SP' } 00421 keys %{$self->{'regbinding'}}; 00422 $self->{'regbindingre_sp'} = "(?:$re)"; 00423 00424 # Program counter 00425 $re = join "|", 'PC', grep { $self->{'regbinding'}->{$_} eq 'PC' } 00426 keys %{$self->{'regbinding'}}; 00427 $self->{'regbindingre_pc'} = "(?:$re)"; 00428 } 00429 00430 ## 00431 # Map a register name to a standard register name. 00432 # 00433 # @param[in] $self This object 00434 # @param[in] $reg Register to lookup 00435 sub regbinding 00436 { 00437 my ($self, $reg) = @_; 00438 $reg = uc $reg; 00439 if (defined $self->{'regbinding'}->{$reg}) 00440 { 00441 $reg = $self->{'regbinding'}->{$reg}; 00442 } 00443 00444 return $reg; 00445 } 00446 00447 00448 ## 00449 # Special opcode 'DAT' to define data in the core. 00450 # 00451 # @param[in] $self This source object 00452 # @param[in,out] $lineref Reference to the line end we are parsing, 00453 # updated to leave remaining text 00454 # @param[in,out] $labelref Reference to the label for this line, updated to 00455 # undef if we should not defined a label for this 00456 # initial address 00457 sub op_DAT 00458 { 00459 my ($self, $lineref, $labelref) = @_; 00460 my $dasm = $self->{'dasm'}; 00461 00462 # Parse operands until we cannot see another item 00463 while (1) 00464 { 00465 # We want to allow string data as well, for simplicity. 00466 # We do NOT handle UTF-8 - everything is 8 bit. 00467 if ($$lineref =~ s/^\s*"([^"]+)"// || # "d string 00468 $$lineref =~ s/^\s*'([^']+)'//) # 'd string 00469 { 00470 print " string: '$1'\n" if ($debug_dat); 00471 for my $chr (unpack "c*", $1) 00472 { 00473 $dasm->storetopc($chr); 00474 } 00475 } 00476 else 00477 { 00478 my ($constant, $symname) = $self->parse_constant($lineref); 00479 if ($symname) 00480 { 00481 print " symbol $symname\n" if ($debug_dat); 00482 # Relocation symbol, update the table 00483 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $symname; 00484 } 00485 print " value $constant\n" if ($debug_dat); 00486 $dasm->storetopc($constant); 00487 } 00488 00489 # Only continue if we have a comma followed by something to parse 00490 last if ($$lineref !~ s/^\s*,\s*//); 00491 } 00492 } 00493 00494 00495 ## 00496 # Special opcode 'RES' to reserve a block of memory in the core. 00497 # 00498 # @param[in] $self This source object 00499 # @param[in,out] $lineref Reference to the line end we are parsing, 00500 # updated to leave remaining text 00501 # @param[in,out] $labelref Reference to the label for this line, updated to 00502 # undef if we should not defined a label for this 00503 # initial address 00504 sub op_RES 00505 { 00506 my ($self, $lineref, $labelref) = @_; 00507 my $dasm = $self->{'dasm'}; 00508 00509 # Parse operands until we cannot see another item 00510 my ($constant, $symname) = $self->parse_constant($lineref, 'constant', 'nowrap'); 00511 00512 if ($constant < 0) 00513 { 00514 $dasm->error("Attempt to reserve negative space ($constant)"); 00515 } 00516 elsif ($constant >= 0x10000) 00517 { 00518 $dasm->error(sprintf "Attempt to reserve huge space (0x%x)", $constant); 00519 } 00520 00521 my $pc = $dasm->{'pc'} + $constant; 00522 if ($pc >= 0x10000) 00523 { 00524 $dasm->error(sprintf "Reservation would overflow core (%d words overflows to 0x%x)", $constant, $pc); 00525 } 00526 00527 $dasm->{'pc'} = $pc; 00528 } 00529 00530 00531 ## 00532 # Special opcode 'CONST' to define a constant symbol. 00533 # 00534 # @param[in] $self This source object 00535 # @param[in,out] $lineref Reference to the line end we are parsing, 00536 # updated to leave remaining text 00537 # @param[in,out] $labelref Reference to the label for this line, updated to 00538 # undef if we should not defined a label for this 00539 # initial address 00540 sub op_CONST 00541 { 00542 my ($self, $lineref, $labelref) = @_; 00543 my $dasm = $self->{'dasm'}; 00544 00545 # Constant value symbol declaration 00546 if (defined $$labelref) 00547 { 00548 my ($constant, $symname) = $self->parse_constant($lineref, 'constant'); 00549 $dasm->setsymbol($$labelref, $constant, 'constant'); 00550 $$labelref = undef; 00551 } 00552 else 00553 { 00554 $dasm->error("CONST must be supplied with a label"); 00555 } 00556 } 00557 00558 00559 ## 00560 # Special opcode 'ADDRESS' to define an address symbol. 00561 # 00562 # @param[in] $self This source object 00563 # @param[in,out] $lineref Reference to the line end we are parsing, 00564 # updated to leave remaining text 00565 # @param[in,out] $labelref Reference to the label for this line, updated to 00566 # undef if we should not defined a label for this 00567 # initial address 00568 sub op_ADDRESS 00569 { 00570 my ($self, $lineref, $labelref) = @_; 00571 my $dasm = $self->{'dasm'}; 00572 00573 # Address value symbol declaration 00574 if (defined $$labelref) 00575 { 00576 my ($constant, $symname) = $self->parse_constant($lineref, 'constant'); 00577 $dasm->setsymbol($$labelref, $constant, 'address'); 00578 $$labelref = undef; 00579 } 00580 else 00581 { 00582 $self->error("ADDRESS must be supplied with a label"); 00583 } 00584 } 00585 00586 00587 ## 00588 # Special opcode 'RN' to define a register name (binding) 00589 # 00590 # @param[in] $self This source object 00591 # @param[in,out] $lineref Reference to the line end we are parsing, 00592 # updated to leave remaining text 00593 # @param[in,out] $labelref Reference to the label for this line, updated to 00594 # undef if we should not defined a label for this 00595 # initial address 00596 sub op_RN 00597 { 00598 my ($self, $lineref, $labelref) = @_; 00599 my $dasm = $self->{'dasm'}; 00600 00601 # Constant value symbol declaration 00602 if (defined $$labelref) 00603 { 00604 my $binding; 00605 if ($$lineref =~ s/^([A-Za-z]\w*)//) 00606 { 00607 $binding = $1; 00608 } 00609 else 00610 { 00611 $dasm->error("Binding not specified for register '$$labelref'"); 00612 } 00613 00614 # Repeatedly dereference the binding until we have a final one, 00615 # but only allow a limited number of bindings, otherwise we might 00616 # be in a loop. 00617 my $count = 0; 00618 while (defined $self->{'regbinding'}->{uc $binding} && 00619 $count < 20) 00620 { 00621 $binding = $self->{'regbinding'}->{uc $binding}; 00622 $count++ 00623 } 00624 if ($count >= 20) 00625 { 00626 $dasm->error("Too many levels of binding for register '$$labelref'"); 00627 } 00628 00629 if ($$labelref =~ /^[ABCXYZIJ]|SP|PC|O$/i) 00630 { 00631 $dasm->error("Core register $$labelref cannot be rebound"); 00632 } 00633 00634 $self->rebindregister($$labelref, $binding); 00635 00636 $$labelref = undef; 00637 } 00638 else 00639 { 00640 $dasm->error("RN must be supplied with a register name to bind"); 00641 } 00642 } 00643 00644 00645 ## 00646 # Assembler directive 'ORIGIN' to change compilation address. 00647 # 00648 # @param[in] $self This source object 00649 # @param[in,out] $lineref Reference to the line end we are parsing, 00650 # updated to leave remaining text 00651 sub directive_ORIGIN 00652 { 00653 my ($self, $lineref) = @_; 00654 my $dasm = $self->{'dasm'}; 00655 00656 my ($constant, $symname) = $self->parse_constant($lineref, 'constant', 'nowrap'); 00657 00658 if ($constant < 0) 00659 { 00660 $dasm->error("ORIGIN must be positive (not $constant)"); 00661 } 00662 elsif ($constant >= 0x10000) 00663 { 00664 $dasm->error(sprintf "ORIGIN must be within core (0x%x > 0xFFFF)", $constant); 00665 } 00666 $dasm->{'pc'} = $constant; 00667 } 00668 00669 00670 ## 00671 # Assembler directive 'ORIGIN' to change compilation address. 00672 # 00673 # @param[in] $self This source object 00674 # @param[in,out] $lineref Reference to the line end we are parsing, 00675 # updated to leave remaining text 00676 sub directive_INCLUDE 00677 { 00678 my ($self, $lineref) = @_; 00679 my $dasm = $self->{'dasm'}; 00680 00681 if ($$lineref =~ s/"(.*?)"//) 00682 { 00683 $dasm->readfile($1); 00684 } 00685 else 00686 { 00687 $dasm->error("Unrecognised INCLUDE '$$lineref'"); 00688 } 00689 } 00690 00691 00692 ## 00693 # Parse operands and return the values of them 00694 # 00695 # @param[in] $self IO object 00696 # @param[in,out] $lineref Reference to the line that we're parsing 00697 # @param[in] $wrap 'nowrap' : do not wrap large/small numbers 00698 # 'wrap' : wrap numbers (default) 00699 # 00700 # @return hashref of operand details: 00701 # 'type' => 'reg' (register value : register=>regname) 00702 # 'regaddr' (register address : register=>regname) 00703 # 'regindex' (registered indexed : register>regname) 00704 # 'pop' (stack pop) 00705 # 'peek' (stack peek) 00706 # 'push' (stack push) 00707 # 'sp' (stack pointer) 00708 # 'pc' (program counter) 00709 # 'o' (overflow) 00710 # 'indirect' (dereference next address) 00711 # 'load' (load next address) 00712 # 'loadshort' (load short value : value=>short value) 00713 # 'opval' => value in the operand register 00714 # 'register' => register, if applicable 00715 # 'nextword' => next word data, if applicable 00716 # 'value' => value, if applicable 00717 sub parse_operand 00718 { 00719 my ($self, $lineref, $wrap) = @_; 00720 my $dasm = $self->{'dasm'}; 00721 00722 $wrap = 'wrap' if (!defined $wrap); 00723 00724 #0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order) 00725 if ($$lineref =~ s/^($self->{'regbindingre_g'})\b//i) 00726 { 00727 my $reg = $self->regbinding($1); 00728 return { 00729 'type' => 'reg', 00730 'register' => $reg, 00731 'opval' => 0 + $DCPU::RegisterMap{$reg} 00732 }; 00733 } 00734 #0x08-0x0f: [register] 00735 elsif ($$lineref =~ s/^\[\s*($self->{'regbindingre_g'})\s*\]//i) 00736 { 00737 my $reg = $self->regbinding($1); 00738 return { 00739 'type' => 'regaddr', 00740 'register' => $reg, 00741 'opval' => 0x08 + $DCPU::RegisterMap{$reg} 00742 }; 00743 } 00744 #0x10-0x17: [next word + register] 00745 # 0x1e: [next word] 00746 elsif ($$lineref =~ s/^\[\s*//) 00747 { 00748 # Variation on syntax, as part of the tests at: 00749 # http://0x10cwiki.com/wiki/Comparison_of_Developer_Tools 00750 # allows for [register + next word] as well. 00751 if ($$lineref =~ s/^($self->{'regbindingre_g'})\s*\+\s*//i) 00752 { 00753 my $reg = $self->regbinding($1); 00754 00755 # This is the variation. 00756 my $symbol = $self->parse_symbol($lineref); 00757 if ($$lineref =~ s/^\s*\]//) 00758 { 00759 return { 00760 'type' => 'regindex', 00761 'register' => $reg, 00762 'nextword' => $symbol, 00763 'opval' => 0x10 + $DCPU::RegisterMap{$reg} 00764 }; 00765 00766 } 00767 else 00768 { 00769 $dasm->error("Garbage whilst parsing inverted indexed operand"); 00770 } 00771 } 00772 else 00773 { 00774 my $symbol = $self->parse_symbol($lineref); 00775 if ($$lineref =~ s/^\s*\+\s*($self->{'regbindingre_g'})\s*\]//i) 00776 { 00777 my $reg = $self->regbinding($1); 00778 return { 00779 'type' => 'regindex', 00780 'register' => $reg, 00781 'nextword' => $symbol, 00782 'opval' => 0x10 + $DCPU::RegisterMap{$reg} 00783 }; 00784 } 00785 elsif ($$lineref =~ s/^\s*\]//) 00786 { 00787 return { 00788 'type' => 'indirect', 00789 'nextword' => $symbol, 00790 'opval' => 0x1e 00791 }; 00792 } 00793 else 00794 { 00795 $dasm->error("Garbage whilst parsing referenced operand"); 00796 } 00797 } 00798 } 00799 # 0x18: POP / [SP++] 00800 elsif ($$lineref =~ s/^POP\b//) 00801 { 00802 return { 00803 'type' => 'pop', 00804 'opval' => 0x18 00805 }; 00806 } 00807 # 0x19: PEEK / [SP] 00808 elsif ($$lineref =~ s/^PEEK\b//) 00809 { 00810 return { 00811 'type' => 'peek', 00812 'opval' => 0x19 00813 }; 00814 } 00815 # 0x1a: PUSH / [--SP] 00816 elsif ($$lineref =~ s/^PUSH\b//i) 00817 { 00818 return { 00819 'type' => 'push', 00820 'opval' => 0x1a 00821 }; 00822 } 00823 # 0x1b: SP 00824 elsif ($$lineref =~ s/^$self->{'regbindingre_sp'}\b//i) 00825 { 00826 return { 00827 'type' => 'sp', 00828 'opval' => 0x1b 00829 }; 00830 } 00831 # 0x1c: PC 00832 elsif ($$lineref =~ s/^$self->{'regbindingre_pc'}\b//i) 00833 { 00834 return { 00835 'type' => 'pc', 00836 'opval' => 0x1c 00837 }; 00838 } 00839 # 0x1d: O 00840 elsif ($$lineref =~ s/^$self->{'regbindingre_o'}\b//i) 00841 { 00842 return { 00843 'type' => 'o', 00844 'opval' => 0x1d 00845 }; 00846 } 00847 # 0x1f: next word (literal) 00848 #0x20-0x3f: literal value 0x00-0x1f (literal) 00849 else 00850 { 00851 # Literal value, which could be a short-form literal value, 00852 # or a symbol 00853 my $symbol = $self->parse_symbol($lineref); 00854 if ($symbol->{'type'} eq 'constant') 00855 { 00856 if ($symbol->{'value'} >= 0 && 00857 $symbol->{'value'} < 0x20) 00858 { 00859 return { 00860 'type' => 'loadshort', 00861 'opval' => 0x20 + $symbol->{'value'}, 00862 'value' => $symbol->{'value'} 00863 }; 00864 } 00865 } 00866 00867 # Check for overflow 00868 if ($symbol->{'type'} eq 'constant' && 00869 $symbol->{'value'} > 0xffff && 00870 $wrap eq 'wrap') 00871 { 00872 $dasm->error( sprintf "Literal value (0x%x) too large", $symbol->{'value'} ); 00873 } 00874 # Check for underflow 00875 if ($symbol->{'type'} eq 'constant' && 00876 $symbol->{'value'} < 0 && 00877 $wrap eq 'wrap') 00878 { 00879 # I think we'll allow negative numbers to wrap down, as that's 00880 # useful in the assembler. But those which wrap twice are too 00881 # small. 00882 if ($symbol->{'value'} < -0x10000) 00883 { 00884 $dasm->error( sprintf "Literal value (-0x%x) too small", -$symbol->{'value'} ); 00885 } 00886 $symbol->{'value'} += 0x10000; 00887 } 00888 return { 00889 'type' => 'load', 00890 'opval' => 0x1f, 00891 'nextword' => $symbol 00892 }; 00893 } 00894 } 00895 00896 00897 ## 00898 # Parse a symbol name or value. 00899 # 00900 # @param[in] $self DASM object 00901 # @param[in,out] $lineref Reference to the line that we're parsing 00902 # 00903 # @return hashref of symbol details: 00904 # 'type' => 'constant' : a constant value 00905 # 'address' : an address constant 00906 # 'value' => value 00907 # 'symbol' => any symbol name to use as relocation 00908 sub parse_symbol 00909 { 00910 my ($self, $lineref, $nonterm) = @_; 00911 my $dasm = $self->{'dasm'}; 00912 00913 my %symbol = ( 00914 'type' => 'unknown', 00915 'value' => 0, 00916 ); 00917 00918 if (!defined $lineref) 00919 { 00920 confess "parse_symbol called without a parameter"; 00921 } 00922 if (ref $lineref ne 'SCALAR') 00923 { 00924 confess "parse_symbol called with ".ref($lineref)." but expected SCALAR"; 00925 } 00926 00927 # Skip any leading spaces 00928 $$lineref =~ s/^\s*//; 00929 00930 print "parse_symbol: $$lineref\n" if ($debug_sym); 00931 00932 if ($$lineref =~ s/^\(//) 00933 { 00934 # Opening bracket for an expression, so parse inside the 00935 # brackets 00936 my $innersymbol = $self->parse_symbol($lineref); 00937 00938 %symbol = %$innersymbol; 00939 00940 # Check that we ended properly 00941 if ($$lineref !~ s/^\s*\)//) 00942 { 00943 $dasm->error("Missing closing bracket"); 00944 } 00945 } 00946 elsif ($$lineref =~ s/^([\$A-Za-z_][\w\$]*)//) 00947 { 00948 # Symbol reference 00949 my $symname = $1; 00950 00951 # Check for local symbols and redefine them for the local label 00952 if ($symname =~ /^\$/) 00953 { 00954 if (!defined $self->{'lastlabel'}) 00955 { 00956 $dasm->error("Cannot use a local label before a label has been set"); 00957 } 00958 $symname = $self->{'lastlabel'} . $symname; 00959 } 00960 00961 my $sym = $dasm->getsymbol($symname); 00962 # Hmm. Whilst decoding the symbol here makes some sense, we could 00963 # just leave it for the linkage stage. 00964 #if (defined $sym) 00965 #{ 00966 # # Symbol has been defined 00967 # %symbol = ( %$sym, 00968 # 'symbol' => $symname 00969 # ); 00970 #} 00971 #else 00972 { 00973 %symbol = ( 00974 'type' => 'address', 00975 'value' => 0, 00976 'symbol' => $symname 00977 ); 00978 } 00979 } 00980 elsif ($$lineref =~ s/^([+-]?0x[A-Fa-f0-9]+)// || 00981 $$lineref =~ s/^([+-]?\d+)//) 00982 { 00983 # Hex or decimal value 00984 my $value = eval $1; 00985 %symbol = ( 00986 'type' => 'constant', 00987 'value' => $value 00988 ); 00989 } 00990 elsif ($$lineref =~ s/^'(.)'//) 00991 { 00992 # character, so convert to an ordinal 00993 %symbol = ( 00994 'type' => 'constant', 00995 'value' => ord($1) 00996 ); 00997 } 00998 else 00999 { 01000 if ($$lineref eq '') 01001 { 01002 $dasm->error("No symbol supplied"); 01003 } 01004 $dasm->error("Unrecognised symbol parsing '$$lineref'"); 01005 } 01006 01007 # We need to special case the syntax here, because the defined format 01008 # of the assembler allows for register names here (this could be 01009 # refined by having a more complex symbol definition). 01010 if ($$lineref =~ /\+\s*$self->{'regbindingre_g'}\b/i) 01011 { 01012 # This is a relative reference, so don't try to perform arithmetic 01013 # here. 01014 return \%symbol; 01015 } 01016 01017 if ($$lineref =~ s/^\s*([\+\-\*\/\%\&\|\^]|<<|>>)//) 01018 { 01019 # Operators - note that this is wrong WRT precedence as it won't 01020 # make *, / and % higher priority than + and -. I'm not fussed 01021 # right now. FIXME 01022 my $operator = $1; 01023 my $second = $self->parse_symbol($lineref); 01024 if (defined $symbol{'symbol'} && 01025 $operator ne '+') 01026 { 01027 $dasm->error("Symbol '$symbol{'symbol'}' not yet defined in arithmetic operation (only additive operations allowed)"); 01028 } 01029 if (defined $symbol{'symbol'} && 01030 defined $second->{'symbol'}) 01031 { 01032 $dasm->error("Relocation by multiple symbols ('$symbol{'symbol'}' and '$second->{'symbol'}') not supported"); 01033 } 01034 01035 if (defined $second->{'symbol'}) 01036 { 01037 # Swap them around so that the symbol is in our local 01038 # This is safe because the only type of symbol operation we 01039 # support is additive. 01040 my %copy = %symbol; 01041 %symbol = %$second; 01042 $second = \%copy; 01043 } 01044 01045 if ($operator eq '+') 01046 { $symbol{'value'} += $second->{'value'}; } 01047 elsif ($operator eq '-') 01048 { $symbol{'value'} -= $second->{'value'}; } 01049 elsif ($operator eq '*') 01050 { $symbol{'value'} *= $second->{'value'}; } 01051 elsif ($operator eq '/') 01052 { $symbol{'value'} /= $second->{'value'}; } 01053 elsif ($operator eq '%') 01054 { $symbol{'value'} %= $second->{'value'}; } 01055 elsif ($operator eq '&') 01056 { $symbol{'value'} &= $second->{'value'}; } 01057 elsif ($operator eq '|') 01058 { $symbol{'value'} |= $second->{'value'}; } 01059 elsif ($operator eq '^') 01060 { $symbol{'value'} ^= $second->{'value'}; } 01061 elsif ($operator eq '<<') 01062 { $symbol{'value'} <<= $second->{'value'}; } 01063 elsif ($operator eq '>>') 01064 { $symbol{'value'} >>= $second->{'value'}; } 01065 else 01066 { 01067 die "Rogue operator $operator - should not happen"; 01068 } 01069 01070 if ($second->{'type'} eq 'address') 01071 { 01072 $symbol{'type'} = $second->{'type'}; 01073 } 01074 } 01075 01076 return \%symbol; 01077 } 01078 01079 ## 01080 # Obtain a constant from an input line. 01081 # 01082 # @param[in] $self DASMIO object 01083 # @param[in,out] $lineref reference to line to read from 01084 # @param[in] $symbol 'constant' : only a constant is acceptable 01085 # 'symbol' : symbol is acceptable (default) 01086 # @param[in] $wrap 'nowrap' : do not wrap large/small numbers 01087 # 'wrap' : wrap numbers (default) 01088 # 01089 # @return $value The value to store 01090 # $symbol Relocation symbol to apply, or none if not found 01091 sub parse_constant 01092 { 01093 my ($self, $lineref, $symbol, $wrap) = @_; 01094 my $dasm = $self->{'dasm'}; 01095 01096 $symbol = 'constant' if (!defined $symbol); 01097 $wrap = 'wrap' if (!defined $wrap); 01098 01099 print "Constant: $$lineref\n" if ($debug_dat); 01100 01101 my $operand = $self->parse_operand($lineref, $wrap); 01102 print " type $operand->{'type'}\n" if ($debug_dat); 01103 01104 # The only types of operand we support here are the 01105 # nextword and constant value operands - 'load' and 'loadshort' 01106 my $sym = undef; 01107 if ($operand->{'type'} eq 'load') 01108 { 01109 $sym = $operand->{'nextword'}; 01110 } 01111 elsif ($operand->{'type'} eq 'loadshort') 01112 { 01113 $sym = { 01114 'type' => 'constant', 01115 'value' => $operand->{'value'} 01116 }; 01117 } 01118 else 01119 { 01120 $dasm->error("Constant operand expected, not type $operand->{'type'}"); 01121 } 01122 01123 if ($sym->{'symbol'} && 01124 $symbol eq 'constant') 01125 { 01126 # A symbol which we know about and which is a constant. 01127 my $symname = $sym->{'symbol'}; 01128 my $symval = $dasm->getsymbol($symname); 01129 if (defined $symval && 01130 $symval->{'type'} eq 'constant') 01131 { 01132 # We can interpret it here immediately 01133 return ($sym->{'value'} + $symval->{'value'}, undef); 01134 } 01135 elsif (defined $symval) 01136 { 01137 $dasm->error("Symbol operand '$symname' (type=$symval->{'type'}) not expected"); 01138 } 01139 else 01140 { 01141 $dasm->error("Symbol operand '$symname' not expected"); 01142 } 01143 } 01144 01145 return ($sym->{'value'}, $symval->{'symbol'}); 01146 } 01147 01148 01149 01150 ########################################################################### 01151 # Disassembly routines 01152 # (these are included in the source parser, so that we can decode the 01153 # compiled files to re-compilable files) 01154 01155 01156 ## 01157 # Write out a compilable source file based on what we have in the core. 01158 # 01159 # @param[in] $self Address object 01160 # 01161 # @return Content to write 01162 sub write 01163 { 01164 my ($self) = @_; 01165 my $dasm = $self->{'dasm'}; 01166 my $core = $dasm->{'core'}; 01167 01168 # Easier access to symbols 01169 my $symbols = $dasm->{'symbols'}; 01170 01171 # Collect a list of address symbols 01172 my %addressSymbols = map { 01173 $symbols->{$_}->{'value'} => $_ 01174 } grep { 01175 $symbols->{$_}->{'type'} eq 'address' 01176 } keys %$symbols; 01177 01178 # The lines we'll return 01179 my @lines; 01180 01181 # If we have any constants, we need to write them out first 01182 my @constants = grep { 01183 $symbols->{$_}->{'type'} eq 'constant' 01184 } keys %$symbols; 01185 if (scalar(@constants) != 0) 01186 { 01187 push @lines, "; Constants"; 01188 } 01189 for my $sym (@constants) 01190 { 01191 push @lines, sprintf "%-16s%-8s0x%04x", 01192 ":$sym ", 01193 "CONST", 01194 $symbols->{$sym}->{'value'}; 01195 } 01196 01197 my $lastaddress = -1; 01198 01199 # We have to deal with the addresses in order, because we might 01200 # have a corrupt file that ends before the data or which is 01201 # discontinuous. 01202 my @addresses = sort { $a <=> $b } keys %$core; 01203 if (scalar(@addresses) != 0) 01204 { 01205 push @lines, "; Core"; 01206 } 01207 while (defined (my $address = shift @addresses)) 01208 { 01209 my $value = $core->{$address}; 01210 01211 # We never write out hardware 01212 next if (ref $value ne ''); 01213 01214 if ($lastaddress+1 != $address) 01215 { 01216 # If we have discontinous core data, we need to change the 01217 # origin. 01218 my $res = $address - ($lastaddress+1); 01219 if ($res < $disassemble_res_limit) 01220 { 01221 # Apply a heuristic for the amount of space between the 01222 # data in the core. If there's less than the limit then 01223 # we'll just use a RES statement to reserve the skipped 01224 # words. 01225 push @lines, sprintf "%-16s%-8s0x%-30x ; %04x - %04x", 01226 "", 01227 "RES", 01228 $res, 01229 $lastaddress+1, 01230 $address-1; 01231 } 01232 else 01233 { 01234 push @lines, sprintf ".ORIGIN 0x%04x", $address; 01235 } 01236 } 01237 01238 if (defined $addressSymbols{$address}) 01239 { 01240 push @lines, ":$addressSymbols{$address}"; 01241 } 01242 01243 my $opcode = $value & 15; 01244 my $opcodename; 01245 01246 # List of the memory locations associated with this instruction 01247 my @memdata = ($value); 01248 01249 # List of the decoded operands 01250 my @operands; 01251 if ($opcode !=0) 01252 { 01253 # Basic opcode 01254 $opcodename = $DCPU::InvBasicOps{$opcode}; 01255 my $operand_a = ($value>>4) & 63; 01256 my $operand_b = ($value>>10) & 63; 01257 01258 # Decode each operand into printable descriptions, pulling data 01259 # out of nextword as necessary. 01260 push @operands, $self->decode_operand($operand_a, 01261 $address + scalar(@memdata), 01262 \@memdata); 01263 01264 push @operands, $self->decode_operand($operand_b, 01265 $address + scalar(@memdata), 01266 \@memdata); 01267 01268 if (!defined $opcodename) 01269 { 01270 die "No such opcode: $opcode\n"; 01271 } 01272 } 01273 else 01274 { 01275 # Extended opcodes 01276 $opcode = ($value>>4) & 63; 01277 $opcodename = $DCPU::InvExtendedOps{$opcode}; 01278 if (!defined $opcodename) 01279 { 01280 # Invalid opcode, so we treat as data 01281 $opcodename = "DAT"; 01282 @operands = ($value); 01283 } 01284 else 01285 { 01286 # a valid extended opcode 01287 my $operand_a = ($value>>10) & 63; 01288 01289 push @operands, $self->decode_operand($operand_a, 01290 $address + 1, 01291 \@memdata); 01292 } 01293 } 01294 01295 # Convert the memdata into presentable hex values 01296 @memdata = map { defined $_ ? sprintf "%04x", $_ : '----' } @memdata; 01297 01298 # Construct the output line 01299 push @lines, sprintf "%-16s%-8s%-32s ; %04x: %s", 01300 "", 01301 $opcodename, 01302 (join ", ", @operands), 01303 $address, 01304 (join " ", @memdata); 01305 01306 # Skip the data we've just read off the address list 01307 for (2..scalar(@memdata)) 01308 { 01309 # We want to skip only the entries from the list that are actually 01310 # valid - the core might be discontinuous. 01311 if (defined $addresses[0] && 01312 $addresses[0] == $address+1) 01313 { 01314 shift @addresses; 01315 } 01316 $address++; 01317 } 01318 $lastaddress = $address; 01319 } 01320 01321 return join "\n", @lines, ""; 01322 } 01323 01324 01325 ## 01326 # Decode an operand into a string. 01327 # 01328 # @param[in] $self IO object 01329 # @param[in] $operand Operand value 01330 # @param[in] $address Address of 'nextword' 01331 # @param[in,out] $memdataref Reference to an address of data read from memory 01332 # which we will update if we read any data 01333 # 01334 # @return Operand definition string 01335 sub decode_operand 01336 { 01337 my ($self, $operand, $address, $memdataref) = @_; 01338 01339 # 0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order) 01340 if ($operand >= 0 && $operand <= 7) 01341 { 01342 return $DCPU::InvRegisterMap{ $operand & 7 }; 01343 } 01344 # 0x08-0x0f: [register] 01345 elsif ($operand >= 0x8 && $operand <= 0xf) 01346 { 01347 return '[' . $DCPU::InvRegisterMap{ $operand & 7 } . ']'; 01348 } 01349 # 0x10-0x17: [next word + register] 01350 elsif ($operand >= 0x10 && $operand <= 0x17) 01351 { 01352 my $nextword = $self->decode_nextword($address, $memdataref); 01353 return "[$nextword + " . $DCPU::InvRegisterMap{ $operand & 7 } . ']'; 01354 } 01355 # 0x18: POP / [SP++] 01356 elsif ($operand == 0x18) 01357 { 01358 return 'POP'; 01359 } 01360 # 0x19: PEEK / [SP] 01361 elsif ($operand == 0x19) 01362 { 01363 return 'PEEK'; 01364 } 01365 # 0x1a: PUSH / [--SP] 01366 elsif ($operand == 0x1a) 01367 { 01368 return 'PUSH'; 01369 } 01370 # 0x1b: SP 01371 elsif ($operand == 0x1b) 01372 { 01373 return 'SP'; 01374 } 01375 # 0x1c: PC 01376 elsif ($operand == 0x1c) 01377 { 01378 return 'PC'; 01379 } 01380 # 0x1d: O 01381 elsif ($operand == 0x1d) 01382 { 01383 return 'O'; 01384 } 01385 # 0x1e: [next word] 01386 elsif ($operand == 0x1e) 01387 { 01388 my $nextword = $self->decode_nextword($address, $memdataref); 01389 return "[$nextword]"; 01390 } 01391 # 0x1f: next word (literal) 01392 elsif ($operand == 0x1f) 01393 { 01394 my $nextword = $self->decode_nextword($address, $memdataref); 01395 return "$nextword"; 01396 } 01397 # 0x20-0x3f: literal value 0x00-0x1f (literal) 01398 elsif ($operand >= 0x20 && $operand <= 0x3f) 01399 { 01400 return $operand - 0x20; 01401 } 01402 else 01403 { 01404 return "!Invalid: $operand!"; 01405 } 01406 } 01407 01408 01409 ## 01410 # Read the next word from a memory address, whilst decoding operands. 01411 # Apply any relocations that we know about such that the decode comes 01412 # out as readable. 01413 # 01414 # The actual value in memory is appended to the memdataref list. 01415 # If the memory is not known, we return '?' and an undef entry in the array. 01416 # 01417 # @param[in] $self IO object 01418 # @param[in] $address Address of 'nextword' 01419 # @param[in,out] $memdataref Reference to an address of data read from memory 01420 # which we will update if we read any data 01421 # 01422 # @return extword string 01423 sub decode_nextword 01424 { 01425 my ($self, $address, $memdataref) = @_; 01426 my $dasm = $self->{'dasm'}; 01427 my $core = $dasm->{'core'}; 01428 01429 my $value = $dasm->{'core'}->{$address}; 01430 01431 # We're using it, so push it on to our stack 01432 push @$memdataref, $value; 01433 01434 if (!defined $value) 01435 { 01436 # No value, which means that the core is truncated or discontinuous 01437 # around an invalid location, so we return '?' 01438 return '?'; 01439 } 01440 01441 # Check whether we have a relocation at that address 01442 if (defined $dasm->{'relocations'}->{$address}) 01443 { 01444 # We do... which means that we have to perform report the symbol 01445 # of the relocation as part of a calculation. 01446 01447 if ($value == 0) 01448 { 01449 # However, the offset appears to be 0, so we can just use the 01450 # symbol name. 01451 return "$dasm->{'relocations'}->{$address}"; 01452 } 01453 else 01454 { 01455 return "($value + $dasm->{'relocations'}->{$address})"; 01456 } 01457 } 01458 01459 if ($value >= 256) 01460 { 01461 return sprintf "0x%04x", $value; 01462 } 01463 01464 return sprintf "0x%02x", $value; 01465 } 01466 01467 01468 # Must return true 01469 1;