DASM
DASMIO/Source.pm
Go to the documentation of this file.
00001 #!/usr/bin/perl -w
00002 ##
00003 # DASM DCPU-16 source assembler class.
00004 #
00005 # This class provides the implementation of the source code assembler
00006 # for DASM.
00007 #
00008 # The files we support end in '.dasm'.
00009 #
00010 # Source file format:
00011 #
00012 # @code
00013 # [<label>]    <opcode>       <operand a>, <operand b>
00014 # [<label>]    <ext-opcode>   <operand>
00015 # ; <comment>
00016 # @endcode
00017 #
00018 #
00019 # It also provides a disassembler which can produce source code which
00020 # can be passed back to the assembler. If the source comes from one
00021 # of the binary formats that doesn't offer relocations or symblols,
00022 # the content will be more difficult to parse interpret, but should still
00023 # work.
00024 # 
00025 # The structures we use are intended to be able to support libraries of
00026 # relocatable, partially linked code. This has not yet been tested and
00027 # the lack of any indication of which symbols are local and which are
00028 # imported may be a problem in the future.
00029 #
00030 # In addition to the 1.1 DCPU-16 specification, the assembler supports:
00031 # 
00032 # - Character literals for values: @n
00033 #   `'X'` will evaluate to the number 88.
00034 #
00035 # - Expression calculations: @n
00036 #   `1+4` will evaluate to the number 5. @n
00037 #   Expression calculations are limited, and do not honour precedence
00038 #   rules. Use brackets for clarity.
00039 #
00040 # - Symbols may be used in additive calculations: @n
00041 #   `data + 4` will evaluate to the value of symbol '`data`' plus 4. @n
00042 #   Symbols may (internally) be addresses or constants:
00043 #   - Address symbols are created through the standard labels.
00044 #   - Address symbols may (in future versions) be subject to relocations.
00045 #
00046 # - Symbols may be 'local' by preceding with a '`$`' character: @n
00047 #   Any symbol preceded by a '`$`' will have the last label used prepended
00048 #   to the symbol name. This allows labels and references to be used
00049 #   within a routine without polluting the global namespace.
00050 #
00051 # - Additional instructions:
00052 #   - `DAT <value[,<value> ]*` @n
00053 #     ... will write literal values to the core. @n
00054 #     The DAT instruction is supported in the assembler, and generated in
00055 #     the disassembler for undefined instruction sequences.
00056 #   - `RES <value>` @n
00057 #     ... will reserve \<value\> words in the core. @n
00058 #   - `CONST value` @n
00059 #     ... will define a constant symbol with the name given in the label.
00060 #   - `ADDRESS <value>` @n
00061 #     ...  will define an address symbol with the name given in the label.
00062 #   - `RN <register>` @n
00063 #     ...  will define a register binding for the name given in the label. @n
00064 #     Register bindings allow you to call a register by a different name,
00065 #     which can make the code mode readable.
00066 #
00067 # - Directives: @n
00068 #   `.<directive> <parameters>` allows additional meta operations to be
00069 #   performed. Directives supported:
00070 #   - `.ORIGIN address` @n
00071 #     ... Sets the address for subsequent compilation
00072 #   - `.INCLUDE file` @n
00073 #     ...Includes another file at the current location
00074 #
00075 # @note We should really consider changing the structure so that the
00076 #       DASMIO objects inherit from a parser class, rather than having
00077 #       to reference `$self->{'dasm'}` for parser operations like
00078 #       error().
00079 #
00080 # @file
00081 # @author Justin Fletcher
00082 #
00083 
00084 package DASMIO::Source;
00085 
00086 use DASM;
00087 use DCPU;
00088 
00089 use Carp;
00090 
00091 
00092 ## Whether we're debugging the 'DAT' instruction
00093 my $debug_dat = 0;
00094 
00095 
00096 ## Whether we're debugging the symbol processing
00097 my $debug_sym = 0;
00098 
00099 
00100 ## How much we reserve in our disassembly before changing to .ORIGIN
00101 my $disassemble_res_limit = 257;
00102 
00103 ##
00104 # Create an object upon which we can work.
00105 #
00106 # @param[in] $proto  Prototype object, or this class's name
00107 # @param[in] $dasm   DASM object we're working on
00108 #
00109 # @return new object
00110 sub new
00111 {
00112     my $proto = shift;
00113     my $dasm  = shift;
00114     my $class = ref($proto) || $proto;
00115     my $self  = {
00116 
00117             # The last label we encountered, for local labelling
00118             'lastlabel' => undef,
00119             
00120             # Register mappings, new name to old name
00121             'regbinding' => {},
00122             
00123             # Register mapping regular expression (generics)
00124             'regbindingre_g' => '[ABCXYZIJ]',
00125 
00126             # Register mapping regular expression (stack pointer)
00127             'regbindingre_sp' => '(?:SP)',
00128 
00129             # Register mapping regular expression (overflow)
00130             'regbindingre_o' => '(?:O)',
00131 
00132             # Register mapping regular expression (program counter)
00133             'regbindingre_pc' => '(?:PC)',
00134         };
00135 
00136     bless $self, $class;
00137     
00138     $self->{'dasm'} = $dasm;
00139     
00140     return $self;
00141 }
00142 
00143 
00144 ##
00145 # Identify whether we can handle reading or writing a given file
00146 # by its filename.
00147 #
00148 # @param[in]  $self     Our object
00149 # @param[in]  $filename Filename we're accessing
00150 # @param[in]  $type     'read' or 'write'
00151 #
00152 # @retval 1 if we can access it
00153 # @retval 0 if we cannot access it
00154 sub identify
00155 {
00156     my ($self, $filename, $type) = @_;
00157     
00158     if ($filename =~ /\.dasm$/)
00159     {
00160         return 1;
00161     }
00162     
00163     return 0;
00164 }
00165 
00166 
00167 ##
00168 # List the extensions we support.
00169 #
00170 # @param[in]  $self     Our object
00171 #
00172 # @return arrayref of extensions
00173 sub extensions
00174 {
00175     return [ 'dasm' ];
00176 }
00177 
00178 
00179 ##
00180 # Parse an instruction line.
00181 #
00182 # @param[in] $self    IO object
00183 # @param[in] $line    Line to process
00184 sub parseline
00185 {
00186     my ($self, $line) = @_;
00187     my $dasm = $self->{'dasm'};
00188     
00189     # Trim any trailing newline that might have been left
00190     chomp $line;
00191     
00192     # Directives processed first - consider moving these to a callback.
00193     if ($line =~ s/^\.([A-Za-z]+)\s*//)
00194     {
00195         my $directive = uc $1;
00196 
00197         # Check if we understand the directive
00198         my $dirfunc = 'directive_' . $directive;
00199         if ($self->can($dirfunc))
00200         {
00201             # If we can invoke it, do so, providing the line to parse
00202             $self->$dirfunc(\$line);
00203 
00204             # Check for trailing whitespace/comments
00205             if ($line !~ s/^\s*$// &&
00206                 $line !~ s/^\s*;//)
00207             {
00208                 $dasm->error("Garbage at end of '$directive' directive");
00209             }
00210         }
00211         else
00212         {
00213             $dasm->error("Unrecognised directive '$directive'");
00214         }
00215         
00216         return;
00217     }
00218 
00219     
00220     # We hold on to the label and PC location so that we can assign at the
00221     # end if they were set.
00222     my $label = undef;
00223     my $startpc = $dasm->{'pc'};
00224 
00225     # The initial part of the line may be a label, starting with a ':'
00226     # However, the label isn't always for this address; it may be a constant
00227     # label or some other type of label. So we merely remember it until we
00228     # know what sort of label it is.
00229     if ($line =~ s/^:([\w\$]+)//)
00230     {
00231         $label = $1;
00232         if ($label =~ /^\$/)
00233         {
00234             # A local label, so prepend the last label we saw
00235             if (!defined $self->{'lastlabel'})
00236             {
00237                 $dasm->error("Cannot use local label without a recent label");
00238             }
00239             $label = $self->{'lastlabel'} . $label;
00240         }
00241         else
00242         {
00243             $self->{'lastlabel'} = $label;
00244         }
00245     }
00246     
00247     # Skip any indentation
00248     $line =~ s/^\s*//;
00249     
00250     # Skip any empty lines or comments
00251     if ($line eq '' ||
00252         $line =~ s/^;//)
00253     {
00254         goto line_complete;
00255     }
00256     
00257     # Extract the opcode, if there is one
00258     if ($line =~ s/^([a-zA-Z0-9_]+)\b\s*//)
00259     {
00260         my $opcode = uc $1;
00261         
00262         # First check for magic opcodes that we introduced:
00263         my $opfunc = 'op_' . $opcode;
00264         if ($self->can($opfunc))
00265         {
00266             # If we can invoke it, do so, providing the line to parse
00267             $self->$opfunc(\$line, \$label);
00268             
00269             # Check for trailing whitespace/comments
00270             if ($line !~ s/^\s*// &&
00271                 $line !~ s/^\s*;//)
00272             {
00273                 $dasm->error("Garbage at end of '$opcode' line");
00274             }
00275         }
00276 
00277         # Now the real opcodes
00278         elsif ($DCPU::BasicOps{$opcode})
00279         {
00280             # We have a basic opcode
00281             my $opval = $DCPU::BasicOps{$opcode};
00282             
00283             # Parse each operand
00284             my $operand_a = $self->parse_operand(\$line);
00285             if ($line !~ s/^\s*,\s*//)
00286             {
00287                 $dasm->error("Expected operand b to opcode '$opcode'");
00288             }
00289 
00290             my $operand_b = $self->parse_operand(\$line);
00291             if ($line !~ s/^\s*$// &&
00292                 $line !~ s/^\s*;//)
00293             {
00294                 $dasm->error("Garbage after operand b to opcode '$opcode'");
00295             }
00296 
00297             if ($debug_asm)
00298             {
00299                 printf "%04x : %-6s : %x\n", $dasm->{'pc'}, $opcode, $opval;
00300                 printf "  %02x : %-9s : %s\n", $operand_a->{'opval'},
00301                                                $operand_a->{'type'},
00302                                                ($operand_a->{'value'} // '');
00303                 printf "  %02x : %-9s : %s\n", $operand_b->{'opval'},
00304                                                $operand_b->{'type'},
00305                                                ($operand_b->{'value'} // '');
00306             }
00307             $dasm->storetopc($opval +
00308                              ($operand_a->{'opval'} << 4) +
00309                              ($operand_b->{'opval'} << 10));
00310             
00311             if (defined $operand_a->{'nextword'})
00312             {
00313                 if ($operand_a->{'nextword'}->{'symbol'})
00314                 {
00315                     # Relocation symbol, update the table
00316                     $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'};
00317                 }
00318                 $dasm->storetopc($operand_a->{'nextword'}->{'value'});
00319             }
00320 
00321             if (defined $operand_b->{'nextword'})
00322             {
00323                 if ($operand_b->{'nextword'}->{'symbol'})
00324                 {
00325                     # Relocation symbol, update the table
00326                     $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_b->{'nextword'}->{'symbol'};
00327                 }
00328                 $dasm->storetopc($operand_b->{'nextword'}->{'value'});
00329             }
00330         }
00331         elsif ($DCPU::ExtendedOps{$opcode})
00332         {
00333             # We have an extended opcode
00334             my $opval = $DCPU::ExtendedOps{$opcode};
00335             
00336             # Parse the operand
00337             my $operand_a = $self->parse_operand(\$line);
00338             if ($line !~ s/^\s*$// &&
00339                 $line !~ s/^\s*;//)
00340             {
00341                 $dasm->error("Garbage after operand a to opcode '$opcode'");
00342             }
00343 
00344             $dasm->storetopc(($opval<<4) +
00345                              ($operand_a->{'opval'}<<10));
00346             
00347             # Update the core with the following words
00348             if (defined $operand_a->{'nextword'})
00349             {
00350                 if ($operand_a->{'nextword'}->{'symbol'})
00351                 {
00352                     # Relocation symbol, update the table
00353                     $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'};
00354                 }
00355                 $dasm->storetopc($operand_a->{'nextword'}->{'value'});
00356             }
00357         }
00358         else
00359         {
00360             $dasm->error("Bad opcode '$opcode'");
00361         }
00362     }
00363     else
00364     {
00365          $dasm->error("Bad source '$line'");
00366     }
00367 
00368 line_complete:
00369     if (defined $label)
00370     {
00371         $dasm->label($startpc, $label);
00372     }
00373 }
00374 
00375 
00376 ##
00377 # Bind register names, to give them other names than the default.
00378 #
00379 # @param[in]     $self      This source object
00380 # @param[in]     $new       New register name
00381 # @param[in]     $old       Old register name, or undef to remove mapping
00382 sub rebindregister
00383 {
00384     my ($self, $new, $old) = @_;
00385     
00386     # Remember the mapping
00387     if (defined $old)
00388     {
00389         $self->{'regbinding'}->{uc $new} = uc $old;
00390     }
00391     else
00392     {
00393         delete $self->{'regbinding'}->{uc $new};
00394     }
00395     
00396     # Generate the regex to use for matching registers
00397     my $re = "";
00398     $re = join "|", grep { $self->{'regbinding'}->{$_} ne 'O' &&
00399                            $self->{'regbinding'}->{$_} ne 'SP' &&
00400                            $self->{'regbinding'}->{$_} ne 'PC' }
00401                         keys %{$self->{'regbinding'}};
00402     if ($re eq '')
00403     {
00404         $re = "[ABCXYZIJ]";
00405     }
00406     else
00407     {
00408         $re = "(?:[ABCXYZIJ]|$re)";
00409     }
00410     $self->{'regbindingre_g'} = $re;
00411     
00412     # Now the specific registers
00413 
00414     # Overflow
00415     $re = join "|", 'O', grep { $self->{'regbinding'}->{$_} eq 'O' }
00416                             keys %{$self->{'regbinding'}};
00417     $self->{'regbindingre_o'} = "(?:$re)";
00418 
00419     # Stack pointer
00420     $re = join "|", 'SP', grep { $self->{'regbinding'}->{$_} eq 'SP' }
00421                             keys %{$self->{'regbinding'}};
00422     $self->{'regbindingre_sp'} = "(?:$re)";
00423 
00424     # Program counter
00425     $re = join "|", 'PC', grep { $self->{'regbinding'}->{$_} eq 'PC' }
00426                             keys %{$self->{'regbinding'}};
00427     $self->{'regbindingre_pc'} = "(?:$re)";
00428 }
00429 
00430 ##
00431 # Map a register name to a standard register name.
00432 #
00433 # @param[in]  $self     This object
00434 # @param[in]  $reg      Register to lookup
00435 sub regbinding
00436 {
00437     my ($self, $reg) = @_;
00438     $reg = uc $reg;
00439     if (defined $self->{'regbinding'}->{$reg})
00440     {
00441         $reg = $self->{'regbinding'}->{$reg};
00442     }
00443     
00444     return $reg;
00445 }
00446 
00447 
00448 ##
00449 # Special opcode 'DAT' to define data in the core.
00450 #
00451 # @param[in]     $self      This source object
00452 # @param[in,out] $lineref   Reference to the line end we are parsing,
00453 #                           updated to leave remaining text
00454 # @param[in,out] $labelref  Reference to the label for this line, updated to
00455 #                           undef if we should not defined a label for this
00456 #                           initial address
00457 sub op_DAT
00458 {
00459     my ($self, $lineref, $labelref) = @_;
00460     my $dasm = $self->{'dasm'};
00461 
00462     # Parse operands until we cannot see another item
00463     while (1)
00464     {
00465         # We want to allow string data as well, for simplicity.
00466         # We do NOT handle UTF-8 - everything is 8 bit.
00467         if ($$lineref =~ s/^\s*"([^"]+)"// || # "d string
00468             $$lineref =~ s/^\s*'([^']+)'//)   # 'd string
00469         {
00470             print "  string: '$1'\n" if ($debug_dat);
00471             for my $chr (unpack "c*", $1)
00472             {
00473                 $dasm->storetopc($chr);
00474             }
00475         }
00476         else
00477         {
00478             my ($constant, $symname) = $self->parse_constant($lineref);
00479             if ($symname)
00480             {
00481                 print "  symbol $symname\n" if ($debug_dat);
00482                 # Relocation symbol, update the table
00483                 $dasm->{'relocations'}->{ $dasm->{'pc'} } = $symname;
00484             }
00485             print "  value $constant\n" if ($debug_dat);
00486             $dasm->storetopc($constant);
00487         }
00488 
00489         # Only continue if we have a comma followed by something to parse
00490         last if ($$lineref !~ s/^\s*,\s*//);
00491     }
00492 }
00493 
00494 
00495 ##
00496 # Special opcode 'RES' to reserve a block of memory in the core.
00497 #
00498 # @param[in]     $self      This source object
00499 # @param[in,out] $lineref   Reference to the line end we are parsing,
00500 #                           updated to leave remaining text
00501 # @param[in,out] $labelref  Reference to the label for this line, updated to
00502 #                           undef if we should not defined a label for this
00503 #                           initial address
00504 sub op_RES
00505 {
00506     my ($self, $lineref, $labelref) = @_;
00507     my $dasm = $self->{'dasm'};
00508 
00509     # Parse operands until we cannot see another item
00510     my ($constant, $symname) = $self->parse_constant($lineref, 'constant', 'nowrap');
00511     
00512     if ($constant < 0)
00513     {
00514         $dasm->error("Attempt to reserve negative space ($constant)");
00515     }
00516     elsif ($constant >= 0x10000)
00517     {
00518         $dasm->error(sprintf "Attempt to reserve huge space (0x%x)", $constant);
00519     }
00520     
00521     my $pc = $dasm->{'pc'} + $constant;
00522     if ($pc >= 0x10000)
00523     {
00524         $dasm->error(sprintf "Reservation would overflow core (%d words overflows to 0x%x)", $constant, $pc);
00525     }
00526 
00527     $dasm->{'pc'} = $pc;
00528 }
00529 
00530 
00531 ##
00532 # Special opcode 'CONST' to define a constant symbol.
00533 #
00534 # @param[in]     $self      This source object
00535 # @param[in,out] $lineref   Reference to the line end we are parsing,
00536 #                           updated to leave remaining text
00537 # @param[in,out] $labelref  Reference to the label for this line, updated to
00538 #                           undef if we should not defined a label for this
00539 #                           initial address
00540 sub op_CONST
00541 {
00542     my ($self, $lineref, $labelref) = @_;
00543     my $dasm = $self->{'dasm'};
00544 
00545     # Constant value symbol declaration
00546     if (defined $$labelref)
00547     {
00548         my ($constant, $symname) = $self->parse_constant($lineref, 'constant');
00549         $dasm->setsymbol($$labelref, $constant, 'constant');
00550         $$labelref = undef;
00551     }
00552     else
00553     {
00554         $dasm->error("CONST must be supplied with a label");
00555     }
00556 }
00557 
00558 
00559 ##
00560 # Special opcode 'ADDRESS' to define an address symbol.
00561 #
00562 # @param[in]     $self      This source object
00563 # @param[in,out] $lineref   Reference to the line end we are parsing,
00564 #                           updated to leave remaining text
00565 # @param[in,out] $labelref  Reference to the label for this line, updated to
00566 #                           undef if we should not defined a label for this
00567 #                           initial address
00568 sub op_ADDRESS
00569 {
00570     my ($self, $lineref, $labelref) = @_;
00571     my $dasm = $self->{'dasm'};
00572 
00573     # Address value symbol declaration
00574     if (defined $$labelref)
00575     {
00576         my ($constant, $symname) = $self->parse_constant($lineref, 'constant');
00577         $dasm->setsymbol($$labelref, $constant, 'address');
00578         $$labelref = undef;
00579     }
00580     else
00581     {
00582         $self->error("ADDRESS must be supplied with a label");
00583     }
00584 }
00585 
00586 
00587 ##
00588 # Special opcode 'RN' to define a register name (binding)
00589 #
00590 # @param[in]     $self      This source object
00591 # @param[in,out] $lineref   Reference to the line end we are parsing,
00592 #                           updated to leave remaining text
00593 # @param[in,out] $labelref  Reference to the label for this line, updated to
00594 #                           undef if we should not defined a label for this
00595 #                           initial address
00596 sub op_RN
00597 {
00598     my ($self, $lineref, $labelref) = @_;
00599     my $dasm = $self->{'dasm'};
00600 
00601     # Constant value symbol declaration
00602     if (defined $$labelref)
00603     {
00604         my $binding;
00605         if ($$lineref =~ s/^([A-Za-z]\w*)//)
00606         {
00607             $binding = $1;
00608         }
00609         else
00610         {
00611             $dasm->error("Binding not specified for register '$$labelref'");
00612         }
00613 
00614         # Repeatedly dereference the binding until we have a final one,
00615         # but only allow a limited number of bindings, otherwise we might
00616         # be in a loop.
00617         my $count = 0;
00618         while (defined $self->{'regbinding'}->{uc $binding} &&
00619                $count < 20)
00620         {
00621             $binding = $self->{'regbinding'}->{uc $binding};
00622             $count++
00623         }
00624         if ($count >= 20)
00625         {
00626             $dasm->error("Too many levels of binding for register '$$labelref'");
00627         }
00628 
00629         if ($$labelref =~ /^[ABCXYZIJ]|SP|PC|O$/i)
00630         {
00631             $dasm->error("Core register $$labelref cannot be rebound");
00632         }
00633         
00634         $self->rebindregister($$labelref, $binding);
00635 
00636         $$labelref = undef;
00637     }
00638     else
00639     {
00640         $dasm->error("RN must be supplied with a register name to bind");
00641     }
00642 }
00643 
00644 
00645 ##
00646 # Assembler directive 'ORIGIN' to change compilation address.
00647 #
00648 # @param[in]     $self      This source object
00649 # @param[in,out] $lineref   Reference to the line end we are parsing,
00650 #                           updated to leave remaining text
00651 sub directive_ORIGIN
00652 {
00653     my ($self, $lineref) = @_;
00654     my $dasm = $self->{'dasm'};
00655 
00656     my ($constant, $symname) = $self->parse_constant($lineref, 'constant', 'nowrap');
00657     
00658     if ($constant < 0)
00659     {
00660         $dasm->error("ORIGIN must be positive (not $constant)");
00661     }
00662     elsif ($constant >= 0x10000)
00663     {
00664         $dasm->error(sprintf "ORIGIN must be within core (0x%x > 0xFFFF)", $constant);
00665     }
00666     $dasm->{'pc'} = $constant;
00667 }
00668 
00669 
00670 ##
00671 # Assembler directive 'ORIGIN' to change compilation address.
00672 #
00673 # @param[in]     $self      This source object
00674 # @param[in,out] $lineref   Reference to the line end we are parsing,
00675 #                           updated to leave remaining text
00676 sub directive_INCLUDE
00677 {
00678     my ($self, $lineref) = @_;
00679     my $dasm = $self->{'dasm'};
00680 
00681     if ($$lineref =~ s/"(.*?)"//)
00682     {
00683         $dasm->readfile($1);
00684     }
00685     else
00686     {
00687         $dasm->error("Unrecognised INCLUDE '$$lineref'");
00688     }
00689 }
00690 
00691 
00692 ##
00693 # Parse operands and return the values of them
00694 #
00695 # @param[in]     $self      IO object
00696 # @param[in,out] $lineref   Reference to the line that we're parsing
00697 # @param[in]     $wrap      'nowrap'   : do not wrap large/small numbers
00698 #                           'wrap'     : wrap numbers (default)
00699 #
00700 # @return hashref of operand details:
00701 #           'type'  =>  'reg'       (register value : register=>regname)
00702 #                       'regaddr'   (register address : register=>regname)
00703 #                       'regindex'  (registered indexed : register>regname)
00704 #                       'pop'       (stack pop)
00705 #                       'peek'      (stack peek)
00706 #                       'push'      (stack push)
00707 #                       'sp'        (stack pointer)
00708 #                       'pc'        (program counter)
00709 #                       'o'         (overflow)
00710 #                       'indirect'  (dereference next address)
00711 #                       'load'      (load next address)
00712 #                       'loadshort' (load short value : value=>short value)
00713 #           'opval' => value in the operand register
00714 #           'register' => register, if applicable
00715 #           'nextword' => next word data, if applicable
00716 #           'value'    => value, if applicable
00717 sub parse_operand
00718 {
00719     my ($self, $lineref, $wrap) = @_;
00720     my $dasm = $self->{'dasm'};
00721 
00722     $wrap = 'wrap' if (!defined $wrap);
00723     
00724     #0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order)
00725     if ($$lineref =~ s/^($self->{'regbindingre_g'})\b//i)
00726     {
00727         my $reg = $self->regbinding($1);
00728         return {
00729                 'type'     => 'reg',
00730                 'register' => $reg,
00731                 'opval'    => 0 + $DCPU::RegisterMap{$reg}
00732             };
00733     }
00734     #0x08-0x0f: [register]
00735     elsif ($$lineref =~ s/^\[\s*($self->{'regbindingre_g'})\s*\]//i)
00736     {
00737         my $reg = $self->regbinding($1);
00738         return {
00739                 'type'     => 'regaddr',
00740                 'register' => $reg,
00741                 'opval'    => 0x08 + $DCPU::RegisterMap{$reg}
00742             };
00743     }
00744     #0x10-0x17: [next word + register]
00745     #     0x1e: [next word]
00746     elsif ($$lineref =~ s/^\[\s*//)
00747     {
00748         # Variation on syntax, as part of the tests at:
00749         # http://0x10cwiki.com/wiki/Comparison_of_Developer_Tools
00750         # allows for [register + next word] as well.
00751         if ($$lineref =~ s/^($self->{'regbindingre_g'})\s*\+\s*//i)
00752         {
00753             my $reg = $self->regbinding($1);
00754 
00755             # This is the variation.
00756             my $symbol = $self->parse_symbol($lineref);
00757             if ($$lineref =~ s/^\s*\]//)
00758             {
00759                 return {
00760                         'type'     => 'regindex',
00761                         'register' => $reg,
00762                         'nextword' => $symbol,
00763                         'opval'    => 0x10 + $DCPU::RegisterMap{$reg}
00764                     };
00765                 
00766             }
00767             else
00768             {
00769                 $dasm->error("Garbage whilst parsing inverted indexed operand");
00770             }
00771         }
00772         else
00773         {
00774             my $symbol = $self->parse_symbol($lineref);
00775             if ($$lineref =~ s/^\s*\+\s*($self->{'regbindingre_g'})\s*\]//i)
00776             {
00777                 my $reg = $self->regbinding($1);
00778                 return {
00779                         'type'     => 'regindex',
00780                         'register' => $reg,
00781                         'nextword' => $symbol,
00782                         'opval'    => 0x10 + $DCPU::RegisterMap{$reg}
00783                     };
00784             }
00785             elsif ($$lineref =~ s/^\s*\]//)
00786             {
00787                 return {
00788                         'type'     => 'indirect',
00789                         'nextword' => $symbol,
00790                         'opval'    => 0x1e
00791                     };
00792             }
00793             else
00794             {
00795                 $dasm->error("Garbage whilst parsing referenced operand");
00796             }
00797         }
00798     }
00799     #     0x18: POP / [SP++]
00800     elsif ($$lineref =~ s/^POP\b//)
00801     {
00802         return {
00803                 'type'     => 'pop',
00804                 'opval'    => 0x18
00805             };
00806     }
00807     #     0x19: PEEK / [SP]
00808     elsif ($$lineref =~ s/^PEEK\b//)
00809     {
00810         return {
00811                 'type'     => 'peek',
00812                 'opval'    => 0x19
00813             };
00814     }
00815     #     0x1a: PUSH / [--SP]
00816     elsif ($$lineref =~ s/^PUSH\b//i)
00817     {
00818         return {
00819                 'type'     => 'push',
00820                 'opval'    => 0x1a
00821             };
00822     }
00823     #     0x1b: SP
00824     elsif ($$lineref =~ s/^$self->{'regbindingre_sp'}\b//i)
00825     {
00826         return {
00827                 'type'     => 'sp',
00828                 'opval'    => 0x1b
00829             };
00830     }
00831     #     0x1c: PC
00832     elsif ($$lineref =~ s/^$self->{'regbindingre_pc'}\b//i)
00833     {
00834         return {
00835                 'type'     => 'pc',
00836                 'opval'    => 0x1c
00837             };
00838     }
00839     #     0x1d: O
00840     elsif ($$lineref =~ s/^$self->{'regbindingre_o'}\b//i)
00841     {
00842         return {
00843                 'type'     => 'o',
00844                 'opval'    => 0x1d
00845             };
00846     }
00847     #     0x1f: next word (literal)
00848     #0x20-0x3f: literal value 0x00-0x1f (literal)
00849     else 
00850     {
00851         # Literal value, which could be a short-form literal value,
00852         # or a symbol
00853         my $symbol = $self->parse_symbol($lineref);
00854         if ($symbol->{'type'} eq 'constant')
00855         {
00856             if ($symbol->{'value'} >= 0 &&
00857                 $symbol->{'value'} < 0x20)
00858             {
00859                 return {
00860                         'type'  => 'loadshort',
00861                         'opval' => 0x20 + $symbol->{'value'},
00862                         'value' => $symbol->{'value'}
00863                     };
00864             }
00865         }
00866         
00867         # Check for overflow
00868         if ($symbol->{'type'} eq 'constant' &&
00869             $symbol->{'value'} > 0xffff &&
00870             $wrap eq 'wrap')
00871         {
00872             $dasm->error( sprintf "Literal value (0x%x) too large", $symbol->{'value'} );
00873         }
00874         # Check for underflow
00875         if ($symbol->{'type'} eq 'constant' &&
00876             $symbol->{'value'} < 0 &&
00877             $wrap eq 'wrap')
00878         {
00879             # I think we'll allow negative numbers to wrap down, as that's
00880             # useful in the assembler. But those which wrap twice are too
00881             # small.
00882             if ($symbol->{'value'} < -0x10000)
00883             {
00884                 $dasm->error( sprintf "Literal value (-0x%x) too small", -$symbol->{'value'} );
00885             }
00886             $symbol->{'value'} += 0x10000;
00887         }
00888         return {
00889                 'type'  => 'load',
00890                 'opval' => 0x1f,
00891                 'nextword' => $symbol
00892             };
00893     }
00894 }
00895 
00896 
00897 ##
00898 # Parse a symbol name or value.
00899 #
00900 # @param[in]     $self      DASM object
00901 # @param[in,out] $lineref   Reference to the line that we're parsing
00902 #
00903 # @return hashref of symbol details:
00904 #           'type'  =>  'constant' : a constant value
00905 #                       'address'  : an address constant
00906 #           'value' =>  value
00907 #           'symbol' => any symbol name to use as relocation
00908 sub parse_symbol
00909 {
00910     my ($self, $lineref, $nonterm) = @_;
00911     my $dasm = $self->{'dasm'};
00912 
00913     my %symbol = (
00914             'type'  => 'unknown',
00915             'value' => 0,
00916         );
00917     
00918     if (!defined $lineref)
00919     {
00920         confess "parse_symbol called without a parameter";
00921     }
00922     if (ref $lineref ne 'SCALAR')
00923     {
00924         confess "parse_symbol called with ".ref($lineref)." but expected SCALAR";
00925     }
00926 
00927     # Skip any leading spaces
00928     $$lineref =~ s/^\s*//;
00929     
00930     print "parse_symbol: $$lineref\n" if ($debug_sym);
00931     
00932     if ($$lineref =~ s/^\(//)
00933     {
00934         # Opening bracket for an expression, so parse inside the
00935         # brackets
00936         my $innersymbol = $self->parse_symbol($lineref);
00937         
00938         %symbol = %$innersymbol;
00939         
00940         # Check that we ended properly
00941         if ($$lineref !~ s/^\s*\)//)
00942         {
00943             $dasm->error("Missing closing bracket");
00944         }
00945     }
00946     elsif ($$lineref =~ s/^([\$A-Za-z_][\w\$]*)//)
00947     {
00948         # Symbol reference
00949         my $symname = $1;
00950 
00951         # Check for local symbols and redefine them for the local label
00952         if ($symname =~ /^\$/)
00953         {
00954             if (!defined $self->{'lastlabel'})
00955             {
00956                 $dasm->error("Cannot use a local label before a label has been set");
00957             }
00958             $symname = $self->{'lastlabel'} . $symname;
00959         }
00960 
00961         my $sym = $dasm->getsymbol($symname);
00962         # Hmm. Whilst decoding the symbol here makes some sense, we could
00963         # just leave it for the linkage stage.
00964         #if (defined $sym)
00965         #{
00966         #    # Symbol has been defined
00967         #    %symbol = ( %$sym,
00968         #                'symbol' => $symname
00969         #              );
00970         #}
00971         #else
00972         {
00973             %symbol = (
00974                     'type'   => 'address',
00975                     'value'  => 0,
00976                     'symbol' => $symname
00977                 );
00978         }
00979     }
00980     elsif ($$lineref =~ s/^([+-]?0x[A-Fa-f0-9]+)// ||
00981            $$lineref =~ s/^([+-]?\d+)//)
00982     {
00983         # Hex or decimal value
00984         my $value = eval $1;
00985         %symbol = (
00986                 'type' => 'constant',
00987                 'value' => $value
00988             );
00989     }
00990     elsif ($$lineref =~ s/^'(.)'//)
00991     {
00992         # character, so convert to an ordinal
00993         %symbol = (
00994                 'type' => 'constant',
00995                 'value' => ord($1)
00996             );
00997     }
00998     else
00999     {
01000         if ($$lineref eq '')
01001         {
01002             $dasm->error("No symbol supplied");
01003         }
01004         $dasm->error("Unrecognised symbol parsing '$$lineref'");
01005     }
01006     
01007     # We need to special case the syntax here, because the defined format
01008     # of the assembler allows for register names here (this could be
01009     # refined by having a more complex symbol definition).
01010     if ($$lineref =~ /\+\s*$self->{'regbindingre_g'}\b/i)
01011     {
01012         # This is a relative reference, so don't try to perform arithmetic
01013         # here.
01014         return \%symbol;
01015     }
01016     
01017     if ($$lineref =~ s/^\s*([\+\-\*\/\%\&\|\^]|<<|>>)//)
01018     {
01019         # Operators - note that this is wrong WRT precedence as it won't
01020         # make *, / and % higher priority than + and -. I'm not fussed
01021         # right now. FIXME
01022         my $operator = $1;
01023         my $second = $self->parse_symbol($lineref);
01024         if (defined $symbol{'symbol'} &&
01025             $operator ne '+')
01026         {
01027             $dasm->error("Symbol '$symbol{'symbol'}' not yet defined in arithmetic operation (only additive operations allowed)");
01028         }
01029         if (defined $symbol{'symbol'} &&
01030             defined $second->{'symbol'})
01031         {
01032             $dasm->error("Relocation by multiple symbols ('$symbol{'symbol'}' and '$second->{'symbol'}') not supported");
01033         }
01034         
01035         if (defined $second->{'symbol'})
01036         {
01037             # Swap them around so that the symbol is in our local
01038             # This is safe because the only type of symbol operation we
01039             # support is additive.
01040             my %copy = %symbol;
01041             %symbol = %$second;
01042             $second = \%copy;
01043         }
01044         
01045         if ($operator eq '+')
01046         { $symbol{'value'} += $second->{'value'}; }
01047         elsif ($operator eq '-')
01048         { $symbol{'value'} -= $second->{'value'}; }
01049         elsif ($operator eq '*')
01050         { $symbol{'value'} *= $second->{'value'}; }
01051         elsif ($operator eq '/')
01052         { $symbol{'value'} /= $second->{'value'}; }
01053         elsif ($operator eq '%')
01054         { $symbol{'value'} %= $second->{'value'}; }
01055         elsif ($operator eq '&')
01056         { $symbol{'value'} &= $second->{'value'}; }
01057         elsif ($operator eq '|')
01058         { $symbol{'value'} |= $second->{'value'}; }
01059         elsif ($operator eq '^')
01060         { $symbol{'value'} ^= $second->{'value'}; }
01061         elsif ($operator eq '<<')
01062         { $symbol{'value'} <<= $second->{'value'}; }
01063         elsif ($operator eq '>>')
01064         { $symbol{'value'} >>= $second->{'value'}; }
01065         else
01066         {
01067             die "Rogue operator $operator - should not happen";
01068         }
01069 
01070         if ($second->{'type'} eq 'address')
01071         {
01072             $symbol{'type'} = $second->{'type'};
01073         }
01074     }
01075     
01076     return \%symbol;
01077 }
01078 
01079 ##
01080 # Obtain a constant from an input line.
01081 #
01082 # @param[in]     $self      DASMIO object
01083 # @param[in,out] $lineref   reference to line to read from
01084 # @param[in]     $symbol    'constant' : only a constant is acceptable
01085 #                           'symbol'   : symbol is acceptable (default)
01086 # @param[in]     $wrap      'nowrap'   : do not wrap large/small numbers
01087 #                           'wrap'     : wrap numbers (default)
01088 #
01089 # @return $value    The value to store
01090 #         $symbol   Relocation symbol to apply, or none if not found
01091 sub parse_constant
01092 {
01093     my ($self, $lineref, $symbol, $wrap) = @_;
01094     my $dasm = $self->{'dasm'};
01095     
01096     $symbol = 'constant' if (!defined $symbol);
01097     $wrap   = 'wrap' if (!defined $wrap);
01098     
01099     print "Constant: $$lineref\n" if ($debug_dat);
01100 
01101     my $operand = $self->parse_operand($lineref, $wrap);
01102     print "  type $operand->{'type'}\n" if ($debug_dat);
01103 
01104     # The only types of operand we support here are the
01105     # nextword and constant value operands - 'load' and 'loadshort'
01106     my $sym = undef;
01107     if ($operand->{'type'} eq 'load')
01108     {
01109         $sym = $operand->{'nextword'};
01110     }
01111     elsif ($operand->{'type'} eq 'loadshort')
01112     {
01113         $sym = {
01114                 'type' => 'constant',
01115                 'value' => $operand->{'value'}
01116             };
01117     }
01118     else
01119     {
01120         $dasm->error("Constant operand expected, not type $operand->{'type'}");
01121     }
01122 
01123     if ($sym->{'symbol'} &&
01124         $symbol eq 'constant')
01125     {
01126         # A symbol which we know about and which is a constant.
01127         my $symname = $sym->{'symbol'};
01128         my $symval = $dasm->getsymbol($symname);
01129         if (defined $symval &&
01130             $symval->{'type'} eq 'constant')
01131         {
01132             # We can interpret it here immediately
01133             return ($sym->{'value'} + $symval->{'value'}, undef);
01134         }
01135         elsif (defined $symval)
01136         {
01137             $dasm->error("Symbol operand '$symname' (type=$symval->{'type'}) not expected");
01138         }
01139         else
01140         {
01141             $dasm->error("Symbol operand '$symname' not expected");
01142         }
01143     }
01144     
01145     return ($sym->{'value'}, $symval->{'symbol'});
01146 }
01147 
01148 
01149 
01150 ###########################################################################
01151 # Disassembly routines
01152 # (these are included in the source parser, so that we can decode the
01153 # compiled files to re-compilable files)
01154 
01155 
01156 ##
01157 # Write out a compilable source file based on what we have in the core.
01158 #
01159 # @param[in]  $self     Address object
01160 #
01161 # @return     Content to write
01162 sub write
01163 {
01164     my ($self) = @_;
01165     my $dasm = $self->{'dasm'};
01166     my $core = $dasm->{'core'};
01167 
01168     # Easier access to symbols
01169     my $symbols = $dasm->{'symbols'};
01170     
01171     # Collect a list of address symbols
01172     my %addressSymbols = map {
01173             $symbols->{$_}->{'value'} => $_
01174         } grep {
01175             $symbols->{$_}->{'type'} eq 'address'
01176         } keys %$symbols;
01177     
01178     # The lines we'll return
01179     my @lines;
01180     
01181     # If we have any constants, we need to write them out first
01182     my @constants = grep {
01183                         $symbols->{$_}->{'type'} eq 'constant'
01184                     } keys %$symbols;
01185     if (scalar(@constants) != 0)
01186     {
01187         push @lines, "; Constants";
01188     }
01189     for my $sym (@constants)
01190     {
01191         push @lines, sprintf "%-16s%-8s0x%04x",
01192                              ":$sym ",
01193                              "CONST",
01194                              $symbols->{$sym}->{'value'};
01195     }
01196 
01197     my $lastaddress = -1;
01198     
01199     # We have to deal with the addresses in order, because we might
01200     # have a corrupt file that ends before the data or which is
01201     # discontinuous.
01202     my @addresses = sort { $a <=> $b } keys %$core;
01203     if (scalar(@addresses) != 0)
01204     {
01205         push @lines, "; Core";
01206     }
01207     while (defined (my $address = shift @addresses))
01208     {
01209         my $value = $core->{$address};
01210         
01211         # We never write out hardware
01212         next if (ref $value ne '');
01213 
01214         if ($lastaddress+1 != $address)
01215         {
01216             # If we have discontinous core data, we need to change the
01217             # origin.
01218             my $res = $address - ($lastaddress+1);
01219             if ($res < $disassemble_res_limit)
01220             {
01221                 # Apply a heuristic for the amount of space between the
01222                 # data in the core. If there's less than the limit then
01223                 # we'll just use a RES statement to reserve the skipped
01224                 # words.
01225                 push @lines, sprintf "%-16s%-8s0x%-30x ; %04x - %04x",
01226                                  "",
01227                                  "RES",
01228                                  $res,
01229                                  $lastaddress+1,
01230                                  $address-1;
01231             }
01232             else
01233             {
01234                 push @lines, sprintf ".ORIGIN 0x%04x", $address;
01235             }
01236         }
01237         
01238         if (defined $addressSymbols{$address})
01239         {
01240             push @lines, ":$addressSymbols{$address}";
01241         }
01242         
01243         my $opcode = $value & 15;
01244         my $opcodename;
01245         
01246         # List of the memory locations associated with this instruction
01247         my @memdata = ($value);
01248         
01249         # List of the decoded operands
01250         my @operands; 
01251         if ($opcode !=0)
01252         {
01253             # Basic opcode
01254             $opcodename = $DCPU::InvBasicOps{$opcode};
01255             my $operand_a = ($value>>4) & 63;
01256             my $operand_b = ($value>>10) & 63;
01257             
01258             # Decode each operand into printable descriptions, pulling data
01259             # out of nextword as necessary.
01260             push @operands, $self->decode_operand($operand_a,
01261                                                   $address + scalar(@memdata),
01262                                                   \@memdata);
01263 
01264             push @operands, $self->decode_operand($operand_b,
01265                                                   $address + scalar(@memdata),
01266                                                   \@memdata);
01267             
01268             if (!defined $opcodename)
01269             {
01270                 die "No such opcode: $opcode\n";
01271             }
01272         }
01273         else
01274         {
01275             # Extended opcodes
01276             $opcode = ($value>>4) & 63;
01277             $opcodename = $DCPU::InvExtendedOps{$opcode};
01278             if (!defined $opcodename)
01279             {
01280                 # Invalid opcode, so we treat as data
01281                 $opcodename = "DAT";
01282                 @operands = ($value);
01283             }
01284             else
01285             {
01286                 # a valid extended opcode
01287                 my $operand_a = ($value>>10) & 63;
01288 
01289                 push @operands, $self->decode_operand($operand_a,
01290                                                       $address + 1,
01291                                                       \@memdata);
01292             }
01293         }
01294 
01295         # Convert the memdata into presentable hex values
01296         @memdata = map { defined $_ ? sprintf "%04x", $_ : '----' } @memdata;
01297 
01298         # Construct the output line
01299         push @lines, sprintf "%-16s%-8s%-32s ; %04x: %s",
01300                                  "",
01301                                  $opcodename,
01302                                  (join ", ", @operands),
01303                                  $address,
01304                                  (join " ", @memdata);
01305         
01306         # Skip the data we've just read off the address list
01307         for (2..scalar(@memdata))
01308         {
01309             # We want to skip only the entries from the list that are actually
01310             # valid - the core might be discontinuous.
01311             if (defined $addresses[0] &&
01312                 $addresses[0] == $address+1)
01313             {
01314                 shift @addresses;
01315             }
01316             $address++;
01317         }
01318         $lastaddress = $address;
01319     }
01320 
01321     return join "\n", @lines, "";
01322 }
01323 
01324 
01325 ##
01326 # Decode an operand into a string.
01327 #
01328 # @param[in] $self     IO object
01329 # @param[in] $operand  Operand value
01330 # @param[in] $address  Address of 'nextword'
01331 # @param[in,out] $memdataref    Reference to an address of data read from memory
01332 #                               which we will update if we read any data
01333 #
01334 # @return Operand definition string
01335 sub decode_operand
01336 {
01337     my ($self, $operand, $address, $memdataref) = @_;
01338     
01339     # 0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order)
01340     if ($operand >= 0 && $operand <= 7)
01341     {
01342         return $DCPU::InvRegisterMap{ $operand & 7 };
01343     }
01344     # 0x08-0x0f: [register]
01345     elsif ($operand >= 0x8 && $operand <= 0xf)
01346     {
01347         return '[' . $DCPU::InvRegisterMap{ $operand & 7 } . ']';
01348     }
01349     # 0x10-0x17: [next word + register]
01350     elsif ($operand >= 0x10 && $operand <= 0x17)
01351     {
01352         my $nextword = $self->decode_nextword($address, $memdataref);
01353         return "[$nextword + " . $DCPU::InvRegisterMap{ $operand & 7 } . ']';
01354     }
01355     #      0x18: POP / [SP++]
01356     elsif ($operand == 0x18)
01357     {
01358         return 'POP';
01359     }
01360     #      0x19: PEEK / [SP]
01361     elsif ($operand == 0x19)
01362     {
01363         return 'PEEK';
01364     }
01365     #      0x1a: PUSH / [--SP]
01366     elsif ($operand == 0x1a)
01367     {
01368         return 'PUSH';
01369     }
01370     #      0x1b: SP
01371     elsif ($operand == 0x1b)
01372     {
01373         return 'SP';
01374     }
01375     #      0x1c: PC
01376     elsif ($operand == 0x1c)
01377     {
01378         return 'PC';
01379     }
01380     #      0x1d: O
01381     elsif ($operand == 0x1d)
01382     {
01383         return 'O';
01384     }
01385     #      0x1e: [next word]
01386     elsif ($operand == 0x1e)
01387     {
01388         my $nextword = $self->decode_nextword($address, $memdataref);
01389         return "[$nextword]";
01390     }
01391     #      0x1f: next word (literal)
01392     elsif ($operand == 0x1f)
01393     {
01394         my $nextword = $self->decode_nextword($address, $memdataref);
01395         return "$nextword";
01396     }
01397     #  0x20-0x3f: literal value 0x00-0x1f (literal)
01398     elsif ($operand >= 0x20 && $operand <= 0x3f)
01399     {
01400         return $operand - 0x20;
01401     }
01402     else
01403     {
01404         return "!Invalid: $operand!";
01405     }
01406 }
01407 
01408 
01409 ##
01410 # Read the next word from a memory address, whilst decoding operands.
01411 # Apply any relocations that we know about such that the decode comes
01412 # out as readable.
01413 #
01414 # The actual value in memory is appended to the memdataref list.
01415 # If the memory is not known, we return '?' and an undef entry in the array.
01416 #
01417 # @param[in] $self     IO object
01418 # @param[in] $address  Address of 'nextword'
01419 # @param[in,out] $memdataref    Reference to an address of data read from memory
01420 #                               which we will update if we read any data
01421 #
01422 # @return extword string
01423 sub decode_nextword
01424 {
01425     my ($self, $address, $memdataref) = @_;
01426     my $dasm = $self->{'dasm'};
01427     my $core = $dasm->{'core'};
01428     
01429     my $value = $dasm->{'core'}->{$address};
01430 
01431     # We're using it, so push it on to our stack
01432     push @$memdataref, $value;
01433 
01434     if (!defined $value)
01435     {
01436         # No value, which means that the core is truncated or discontinuous
01437         # around an invalid location, so we return '?'
01438         return '?';
01439     }
01440 
01441     # Check whether we have a relocation at that address
01442     if (defined $dasm->{'relocations'}->{$address})
01443     {
01444         # We do... which means that we have to perform report the symbol
01445         # of the relocation as part of a calculation.
01446         
01447         if ($value == 0)
01448         {
01449             # However, the offset appears to be 0, so we can just use the
01450             # symbol name.
01451             return "$dasm->{'relocations'}->{$address}";
01452         }
01453         else
01454         {
01455             return "($value + $dasm->{'relocations'}->{$address})";
01456         }
01457     }
01458     
01459     if ($value >= 256)
01460     {
01461         return sprintf "0x%04x", $value;
01462     }
01463     
01464     return sprintf "0x%02x", $value;
01465 }
01466 
01467 
01468 # Must return true
01469 1;