DASM
DASMIO/Source.pm
Go to the documentation of this file.
00001 #!/usr/bin/perl -w
00002 ##
00003 # DASM DCPU-16 source assembler class.
00004 #
00005 # This class provides the implementation of the source code assembler
00006 # for DASM.
00007 #
00008 # The files we support end in '.dasm'.
00009 #
00010 # Source file format:
00011 #
00012 # @code
00013 # [<label>]    <opcode>       <operand a>, <operand b>
00014 # [<label>]    <ext-opcode>   <operand>
00015 # ; <comment>
00016 # @endcode
00017 #
00018 #
00019 # It also provides a disassembler which can produce source code which
00020 # can be passed back to the assembler. If the source comes from one
00021 # of the binary formats that doesn't offer relocations or symblols,
00022 # the content will be more difficult to parse interpret, but should still
00023 # work.
00024 # 
00025 # The structures we use are intended to be able to support libraries of
00026 # relocatable, partially linked code. This has not yet been tested and
00027 # the lack of any indication of which symbols are local and which are
00028 # imported may be a problem in the future.
00029 #
00030 # In addition to the 1.1 DCPU-16 specification, the assembler supports:
00031 # 
00032 # - Character literals for values: @n
00033 #   `'X'` will evaluate to the number 88.
00034 #
00035 # - Expression calculations: @n
00036 #   `1+4` will evaluate to the number 5. @n
00037 #   Expression calculations are limited, and do not honour precedence
00038 #   rules. Use brackets for clarity.
00039 #
00040 # - Symbols may be used in additive calculations: @n
00041 #   `data + 4` will evaluate to the value of symbol '`data`' plus 4. @n
00042 #   Symbols may (internally) be addresses or constants:
00043 #   - Address symbols are created through the standard labels.
00044 #   - Address symbols may (in future versions) be subject to relocations.
00045 #
00046 # - Symbols may be 'local' by preceding with a '`$`' character: @n
00047 #   Any symbol preceded by a '`$`' will have the last label used prepended
00048 #   to the symbol name. This allows labels and references to be used
00049 #   within a routine without polluting the global namespace.
00050 #
00051 # - Additional instructions:
00052 #   - `DAT <value[,<value> ]*` @n
00053 #     ... will write literal values to the core. @n
00054 #     The DAT instruction is supported in the assembler, and generated in
00055 #     the disassembler for undefined instruction sequences.
00056 #   - `CONST value` @n
00057 #     ... will define a constant symbol with the name given in the label.
00058 #   - `ADDRESS <value>` @n
00059 #     ...  will define an address symbol with the name given in the label.
00060 #
00061 # - Directives: @n
00062 #   `.<directive> <parameters>` allows additional meta operations to be
00063 #   performed. Directives supported:
00064 #   - `.ORIGIN address` @n
00065 #     ... Sets the address for subsequent compilation
00066 #   - `.INCLUDE file` @n
00067 #     ...Includes another file at the current location
00068 #
00069 # @note We should really consider changing the structure so that the
00070 #       DASMIO objects inherit from a parser class, rather than having
00071 #       to reference `$self->{'dasm'}` for parser operations like
00072 #       error().
00073 #
00074 # @file
00075 # @author Justin Fletcher
00076 #
00077 
00078 package DASMIO::Source;
00079 
00080 use DASM;
00081 use DCPU;
00082 
00083 use Carp;
00084 
00085 
00086 ## Whether we're debugging the 'DAT' instruction
00087 my $debug_dat = 0;
00088 
00089 
00090 ## Whether we're debugging the symbol processing
00091 my $debug_sym = 0;
00092 
00093 ##
00094 # Create an object upon which we can work.
00095 #
00096 # @param[in] $proto  Prototype object, or this class's name
00097 # @param[in] $dasm   DASM object we're working on
00098 #
00099 # @return new object
00100 sub new
00101 {
00102     my $proto = shift;
00103     my $dasm  = shift;
00104     my $class = ref($proto) || $proto;
00105     my $self  = {
00106 
00107             # The last label we encountered, for local labelling
00108             'lastlabel' => undef,
00109         };
00110 
00111     bless $self, $class;
00112     
00113     $self->{'dasm'} = $dasm;
00114     
00115     return $self;
00116 }
00117 
00118 
00119 ##
00120 # Identify whether we can handle reading or writing a given file
00121 # by its filename.
00122 #
00123 # @param[in]  $self     Our object
00124 # @param[in]  $filename Filename we're accessing
00125 # @param[in]  $type     'read' or 'write'
00126 #
00127 # @retval 1 if we can access it
00128 # @retval 0 if we cannot access it
00129 sub identify
00130 {
00131     my ($self, $filename, $type) = @_;
00132     
00133     if ($filename =~ /\.dasm$/)
00134     {
00135         return 1;
00136     }
00137     
00138     return 0;
00139 }
00140 
00141 
00142 ##
00143 # List the extensions we support.
00144 #
00145 # @param[in]  $self     Our object
00146 #
00147 # @return arrayref of extensions
00148 sub extensions
00149 {
00150     return [ 'dasm' ];
00151 }
00152 
00153 
00154 ##
00155 # Parse an instruction line.
00156 #
00157 # @param[in] $self    IO object
00158 # @param[in] $line    Line to process
00159 sub parseline
00160 {
00161     my ($self, $line) = @_;
00162     my $dasm = $self->{'dasm'};
00163     
00164     # Trim any trailing newline that might have been left
00165     chomp $line;
00166     
00167     # Directives processed first - consider moving these to a callback.
00168     if ($line =~ s/^\.([A-Za-z]+)\s*//)
00169     {
00170         my $directive = uc $1;
00171         if ($directive eq 'ORIGIN')
00172         {
00173             my ($constant, $symname) = $self->parse_constant(\$line, 'constant');
00174             $dasm->{'pc'} = $constant;
00175         }
00176         elsif ($directive eq 'INCLUDE')
00177         {
00178             if ($line =~ s/"(.*?)"//)
00179             {
00180                 $dasm->readfile($1);
00181             }
00182             else
00183             {
00184                 $dasm->error("Unrecognised INCLUDE '$line'");
00185             }
00186         }
00187         else
00188         {
00189             $dasm->error("Unrecognised directive '$directive'");
00190         }
00191     }
00192     
00193     # We hold on to the label and PC location so that we can assign at the
00194     # end if they were set.
00195     my $label = undef;
00196     my $startpc = $dasm->{'pc'};
00197 
00198     # The initial part of the line may be a label, starting with a ':'
00199     # However, the label isn't always for this address; it may be a constant
00200     # label or some other type of label. So we merely remember it until we
00201     # know what sort of label it is.
00202     if ($line =~ s/^:([\w\$]+)//)
00203     {
00204         $label = $1;
00205         if ($label =~ /^\$/)
00206         {
00207             # A local label, so prepend the last label we saw
00208             if (!defined $self->{'lastlabel'})
00209             {
00210                 $dasm->error("Cannot use local label without a recent label");
00211             }
00212             $label = $self->{'lastlabel'} . $label;
00213         }
00214         else
00215         {
00216             $self->{'lastlabel'} = $label;
00217         }
00218     }
00219     
00220     # Skip any indentation
00221     $line =~ s/^\s*//;
00222     
00223     # Skip any empty lines or comments
00224     if ($line eq '' ||
00225         $line =~ s/^;//)
00226     {
00227         goto line_complete;
00228     }
00229     
00230     # Extract the opcode, if there is one
00231     if ($line =~ s/^([a-zA-Z0-9_]+)\b\s*//)
00232     {
00233         my $opcode = uc $1;
00234         
00235         # First check for magic opcodes that we introduced:
00236         if ($opcode eq 'DAT')
00237         {
00238             # Parse operands until we have none left
00239             while ($line !~ /^\s*$/ &&
00240                    $line !~ /^\s*;/)
00241             {
00242                 # We want to allow string data as well, for simplicity.
00243                 # We do NOT handle UTF-8 - everything is 8 bit.
00244                 if ($line =~ s/^\s*"([^"]+)"// || # "d string
00245                     $line =~ s/^\s*'([^']+)'//)   # 'd string
00246                 {
00247                     print "  string: '$1'\n" if ($debug_dat);
00248                     for my $chr (unpack "c*", $1)
00249                     {
00250                         $dasm->storetopc($chr);
00251                     }
00252                 }
00253                 else
00254                 {
00255                     my ($constant, $symname) = $self->parse_constant(\$line);
00256                     if ($symname)
00257                     {
00258                         print "  symbol $symname\n" if ($debug_dat);
00259                         # Relocation symbol, update the table
00260                         $dasm->{'relocations'}->{ $dasm->{'pc'} } = $symname;
00261                     }
00262                     print "  value $constant\n" if ($debug_dat);
00263                     $dasm->storetopc($constant);
00264                 }
00265                 last if ($line !~ s/^\s*,\s*//);
00266             }
00267             if ($line !~ s/^\s*// &&
00268                 $line !~ s/^\s*;//)
00269             {
00270                 $dasm->error("Garbage after operand to opcode '$opcode'");
00271             }
00272         }
00273         elsif ($opcode eq 'CONST')
00274         {
00275             # Constant value symbol declaration
00276             if (defined $label)
00277             {
00278                 my ($constant, $symname) = $self->parse_constant(\$line, 'constant');
00279                 $dasm->setsymbol($label, $constant, 'constant');
00280                 $label = undef;
00281             }
00282             else
00283             {
00284                 $self->error("CONST must be supplied with a label");
00285             }
00286         }
00287         elsif ($opcode eq 'ADDRESS')
00288         {
00289             # Address value symbol declaration
00290             if (defined $label)
00291             {
00292                 my ($constant, $symname) = $self->parse_constant(\$line, 'constant');
00293                 $dasm->setsymbol($label, $constant, 'address');
00294                 $label = undef;
00295             }
00296             else
00297             {
00298                 $self->error("ADDRESS must be supplied with a label");
00299             }
00300         }
00301 
00302         # Now the real opcodes
00303         elsif ($DCPU::BasicOps{$opcode})
00304         {
00305             # We have a basic opcode
00306             my $opval = $DCPU::BasicOps{$opcode};
00307             
00308             # Parse each operand
00309             my $operand_a = $self->parse_operand(\$line);
00310             if ($line !~ s/^\s*,\s*//)
00311             {
00312                 $dasm->error("Expected operand b to opcode '$opcode'");
00313             }
00314 
00315             my $operand_b = $self->parse_operand(\$line);
00316             if ($line !~ s/^\s*$// &&
00317                 $line !~ s/^\s*;//)
00318             {
00319                 $dasm->error("Garbage after operand b to opcode '$opcode'");
00320             }
00321 
00322             if ($debug_asm)
00323             {
00324                 printf "%04x : %-6s : %x\n", $dasm->{'pc'}, $opcode, $opval;
00325                 printf "  %02x : %-9s : %s\n", $operand_a->{'opval'},
00326                                                $operand_a->{'type'},
00327                                                ($operand_a->{'value'} // '');
00328                 printf "  %02x : %-9s : %s\n", $operand_b->{'opval'},
00329                                                $operand_b->{'type'},
00330                                                ($operand_b->{'value'} // '');
00331             }
00332             $dasm->storetopc($opval +
00333                              ($operand_a->{'opval'} << 4) +
00334                              ($operand_b->{'opval'} << 10));
00335             
00336             if (defined $operand_a->{'nextword'})
00337             {
00338                 if ($operand_a->{'nextword'}->{'symbol'})
00339                 {
00340                     # Relocation symbol, update the table
00341                     $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'};
00342                 }
00343                 $dasm->storetopc($operand_a->{'nextword'}->{'value'});
00344             }
00345 
00346             if (defined $operand_b->{'nextword'})
00347             {
00348                 if ($operand_b->{'nextword'}->{'symbol'})
00349                 {
00350                     # Relocation symbol, update the table
00351                     $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_b->{'nextword'}->{'symbol'};
00352                 }
00353                 $dasm->storetopc($operand_b->{'nextword'}->{'value'});
00354             }
00355         }
00356         elsif ($DCPU::ExtendedOps{$opcode})
00357         {
00358             # We have an extended opcode
00359             my $opval = $DCPU::ExtendedOps{$opcode};
00360             
00361             # Parse the operand
00362             my $operand_a = $self->parse_operand(\$line);
00363             if ($line !~ s/^\s*$// &&
00364                 $line !~ s/^\s*;//)
00365             {
00366                 $dasm->error("Garbage after operand a to opcode '$opcode'");
00367             }
00368 
00369             $dasm->storetopc(($opval<<4) +
00370                              ($operand_a->{'opval'}<<10));
00371             
00372             # Update the core with the following words
00373             if (defined $operand_a->{'nextword'})
00374             {
00375                 if ($operand_a->{'nextword'}->{'symbol'})
00376                 {
00377                     # Relocation symbol, update the table
00378                     $dasm->{'relocations'}->{ $dasm->{'pc'} } = $operand_a->{'nextword'}->{'symbol'};
00379                 }
00380                 $dasm->storetopc($operand_a->{'nextword'}->{'value'});
00381             }
00382         }
00383         else
00384         {
00385             $dasm->error("Bad opcode '$opcode'");
00386         }
00387     }
00388     else
00389     {
00390          $dasm->error("Bad source '$line'");
00391     }
00392 
00393 line_complete:
00394     if (defined $label)
00395     {
00396         $dasm->label($startpc, $label);
00397     }
00398 }
00399 
00400 
00401 ##
00402 # Parse operands and return the values of them
00403 #
00404 # @param[in]     $self      IO object
00405 # @param[in,out] $lineref   Reference to the line that we're parsing
00406 #
00407 # @return hashref of operand details:
00408 #           'type'  =>  'reg'       (register value : register=>regname)
00409 #                       'regaddr'   (register address : register=>regname)
00410 #                       'regindex'  (registered indexed : register>regname)
00411 #                       'pop'       (stack pop)
00412 #                       'peek'      (stack peek)
00413 #                       'push'      (stack push)
00414 #                       'sp'        (stack pointer)
00415 #                       'pc'        (program counter)
00416 #                       'o'         (overflow)
00417 #                       'indirect'  (dereference next address)
00418 #                       'load'      (load next address)
00419 #                       'loadshort' (load short value : value=>short value)
00420 #           'opval' => value in the operand register
00421 #           'register' => register, if applicable
00422 #           'nextword' => next word data, if applicable
00423 #           'value'    => value, if applicable
00424 sub parse_operand
00425 {
00426     my ($self, $lineref) = @_;
00427     my $dasm = $self->{'dasm'};
00428     
00429     #0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order)
00430     if ($$lineref =~ s/^([ABCXYZIJ])\b//i)
00431     {
00432         return {
00433                 'type'     => 'reg',
00434                 'register' => uc $1,
00435                 'opval'    => 0 + $DCPU::RegisterMap{uc $1}
00436             };
00437     }
00438     #0x08-0x0f: [register]
00439     elsif ($$lineref =~ s/^\[\s*([ABCXYZIJ])\s*\]//i)
00440     {
00441         return {
00442                 'type'     => 'regaddr',
00443                 'register' => uc $1,
00444                 'opval'    => 0x08 + $DCPU::RegisterMap{uc $1}
00445             };
00446     }
00447     #0x10-0x17: [next word + register]
00448     #     0x1e: [next word]
00449     elsif ($$lineref =~ s/^\[\s*//)
00450     {
00451         # Variation on syntax, as part of the tests at:
00452         # http://0x10cwiki.com/wiki/Comparison_of_Developer_Tools
00453         # allows for [register + next word] as well.
00454         if ($$lineref =~ s/^([ABCXYZIJ])\s*\+\s*//i)
00455         {
00456             my $reg = uc $1;
00457             # This is the variation.
00458             my $symbol = $self->parse_symbol($lineref);
00459             if ($$lineref =~ s/^\s*\]//)
00460             {
00461                 return {
00462                         'type'     => 'regindex',
00463                         'register' => $reg,
00464                         'nextword' => $symbol,
00465                         'opval'    => 0x10 + $DCPU::RegisterMap{$reg}
00466                     };
00467                 
00468             }
00469             else
00470             {
00471                 $dasm->error("Garbage whilst parsing inverted indexed operand");
00472             }
00473         }
00474         else
00475         {
00476             my $symbol = $self->parse_symbol($lineref);
00477             if ($$lineref =~ s/^\s*\+\s*([ABCXYZIJ])\s*\]//i)
00478             {
00479                 return {
00480                         'type'     => 'regindex',
00481                         'register' => uc $1,
00482                         'nextword' => $symbol,
00483                         'opval'    => 0x10 + $DCPU::RegisterMap{uc $1}
00484                     };
00485             }
00486             elsif ($$lineref =~ s/^\s*\]//)
00487             {
00488                 return {
00489                         'type'     => 'indirect',
00490                         'nextword' => $symbol,
00491                         'opval'    => 0x1e
00492                     };
00493             }
00494             else
00495             {
00496                 $dasm->error("Garbage whilst parsing referenced operand");
00497             }
00498         }
00499     }
00500     #     0x18: POP / [SP++]
00501     elsif ($$lineref =~ s/^POP\b//)
00502     {
00503         return {
00504                 'type'     => 'pop',
00505                 'opval'    => 0x18
00506             };
00507     }
00508     #     0x19: PEEK / [SP]
00509     elsif ($$lineref =~ s/^PEEK\b//)
00510     {
00511         return {
00512                 'type'     => 'peek',
00513                 'opval'    => 0x19
00514             };
00515     }
00516     #     0x1a: PUSH / [--SP]
00517     elsif ($$lineref =~ s/^PUSH\b//i)
00518     {
00519         return {
00520                 'type'     => 'push',
00521                 'opval'    => 0x1a
00522             };
00523     }
00524     #     0x1b: SP
00525     elsif ($$lineref =~ s/^SP\b//i)
00526     {
00527         return {
00528                 'type'     => 'sp',
00529                 'opval'    => 0x1b
00530             };
00531     }
00532     #     0x1c: PC
00533     elsif ($$lineref =~ s/^PC\b//i)
00534     {
00535         return {
00536                 'type'     => 'pc',
00537                 'opval'    => 0x1c
00538             };
00539     }
00540     #     0x1d: O
00541     elsif ($$lineref =~ s/^O\b//)
00542     {
00543         return {
00544                 'type'     => 'o',
00545                 'opval'    => 0x1d
00546             };
00547     }
00548     #     0x1f: next word (literal)
00549     #0x20-0x3f: literal value 0x00-0x1f (literal)
00550     else 
00551     {
00552         # Literal value, which could be a short-form literal value,
00553         # or a symbol
00554         my $symbol = $self->parse_symbol($lineref);
00555         if ($symbol->{'type'} eq 'constant')
00556         {
00557             if ($symbol->{'value'} >= 0 &&
00558                 $symbol->{'value'} < 0x20)
00559             {
00560                 return {
00561                         'type'  => 'loadshort',
00562                         'opval' => 0x20 + $symbol->{'value'},
00563                         'value' => $symbol->{'value'}
00564                     };
00565             }
00566         }
00567         
00568         # Check for overflow
00569         if ($symbol->{'type'} eq 'constant' &&
00570             $symbol->{'value'} > 0xffff)
00571         {
00572             $dasm->error( sprintf "Literal value (0x%x) too large", $symbol->{'value'} );
00573         }
00574         # Check for underflow
00575         if ($symbol->{'type'} eq 'constant' &&
00576             $symbol->{'value'} < 0)
00577         {
00578             # I think we'll allow negative numbers to wrap down, as that's
00579             # useful in the assembler. But those which wrap twice are too
00580             # small.
00581             if ($symbol->{'value'} < -0x10000)
00582             {
00583                 $dasm->error( sprintf "Literal value (0x%x) too small", $symbol->{'value'} );
00584             }
00585             $symbol->{'value'} += 0x10000;
00586         }
00587         return {
00588                 'type'  => 'load',
00589                 'opval' => 0x1f,
00590                 'nextword' => $symbol
00591             };
00592     }
00593 }
00594 
00595 
00596 ##
00597 # Parse a symbol name or value.
00598 #
00599 # @param[in]     $self      DASM object
00600 # @param[in,out] $lineref   Reference to the line that we're parsing
00601 #
00602 # @return hashref of symbol details:
00603 #           'type'  =>  'constant' : a constant value
00604 #                       'address'  : an address constant
00605 #           'value' =>  value
00606 #           'symbol' => any symbol name to use as relocation
00607 sub parse_symbol
00608 {
00609     my ($self, $lineref, $nonterm) = @_;
00610     my $dasm = $self->{'dasm'};
00611 
00612     my %symbol = (
00613             'type'  => 'unknown',
00614             'value' => 0,
00615         );
00616     
00617     if (!defined $lineref)
00618     {
00619         confess "parse_symbol called without a parameter";
00620     }
00621     if (ref $lineref ne 'SCALAR')
00622     {
00623         confess "parse_symbol called with ".ref($lineref)." but expected SCALAR";
00624     }
00625 
00626     # Skip any leading spaces
00627     $$lineref =~ s/^\s*//;
00628     
00629     print "parse_symbol: $$lineref\n" if ($debug_sym);
00630     
00631     if ($$lineref =~ s/^\(//)
00632     {
00633         # Opening bracket for an expression, so parse inside the
00634         # brackets
00635         my $innersymbol = $self->parse_symbol($lineref);
00636         
00637         %symbol = %$innersymbol;
00638         
00639         # Check that we ended properly
00640         if ($$lineref !~ s/^\s*\)//)
00641         {
00642             $dasm->error("Missing closing bracket");
00643         }
00644     }
00645     elsif ($$lineref =~ s/^([\$A-Za-z_][\w\$]*)//)
00646     {
00647         # Symbol reference
00648         my $symname = $1;
00649 
00650         # Check for local symbols and redefine them for the local label
00651         if ($symname =~ /^\$/)
00652         {
00653             if (!defined $self->{'lastlabel'})
00654             {
00655                 $dasm->error("Cannot use a local label before a label has been set");
00656             }
00657             $symname = $self->{'lastlabel'} . $symname;
00658         }
00659 
00660         my $sym = $dasm->getsymbol($symname);
00661         # Hmm. Whilst decoding the symbol here makes some sense, we could
00662         # just leave it for the linkage stage.
00663         #if (defined $sym)
00664         #{
00665         #    # Symbol has been defined
00666         #    %symbol = ( %$sym,
00667         #                'symbol' => $symname
00668         #              );
00669         #}
00670         #else
00671         {
00672             %symbol = (
00673                     'type'   => 'address',
00674                     'value'  => 0,
00675                     'symbol' => $symname
00676                 );
00677         }
00678     }
00679     elsif ($$lineref =~ s/^([+-]?0x[A-Fa-f0-9]+)// ||
00680            $$lineref =~ s/^([+-]?\d+)//)
00681     {
00682         # Hex or decimal value
00683         my $value = eval $1;
00684         %symbol = (
00685                 'type' => 'constant',
00686                 'value' => $value
00687             );
00688     }
00689     elsif ($$lineref =~ s/^'(.)'//)
00690     {
00691         # character, so convert to an ordinal
00692         %symbol = (
00693                 'type' => 'constant',
00694                 'value' => ord($1)
00695             );
00696     }
00697     else
00698     {
00699         $dasm->error("Unrecognised symbol parsing '$$lineref'");
00700     }
00701     
00702     # We need to special case the syntax here, because the defined format
00703     # of the assembler allows for register names here (this could be
00704     # refined by having a more complex symbol definition).
00705     if ($$lineref =~ /\+\s*[ABCXYZIJ]\b/i)
00706     {
00707         # This is a relative reference, so don't try to perform arithmetic
00708         # here.
00709         return \%symbol;
00710     }
00711     
00712     if ($$lineref =~ s/^\s*([\+\-\*\/\%\&\|\^]|<<|>>)//)
00713     {
00714         # Operators - note that this is wrong WRT precedence as it won't
00715         # make *, / and % higher priority than + and -. I'm not fussed
00716         # right now. FIXME
00717         my $operator = $1;
00718         my $second = $self->parse_symbol($lineref);
00719         if (defined $symbol{'symbol'} &&
00720             $operator ne '+')
00721         {
00722             $dasm->error("Symbol '$symbol{'symbol'}' not yet defined in arithmetic operation (only additive operations allowed)");
00723         }
00724         if (defined $symbol{'symbol'} &&
00725             defined $second->{'symbol'})
00726         {
00727             $dasm->error("Relocation by multiple symbols ('$symbol{'symbol'}' and '$second->{'symbol'}') not supported");
00728         }
00729         
00730         if (defined $second->{'symbol'})
00731         {
00732             # Swap them around so that the symbol is in our local
00733             # This is safe because the only type of symbol operation we
00734             # support is additive.
00735             my %copy = %symbol;
00736             %symbol = %$second;
00737             $second = \%copy;
00738         }
00739         
00740         if ($operator eq '+')
00741         { $symbol{'value'} += $second->{'value'}; }
00742         elsif ($operator eq '-')
00743         { $symbol{'value'} -= $second->{'value'}; }
00744         elsif ($operator eq '*')
00745         { $symbol{'value'} *= $second->{'value'}; }
00746         elsif ($operator eq '/')
00747         { $symbol{'value'} /= $second->{'value'}; }
00748         elsif ($operator eq '%')
00749         { $symbol{'value'} %= $second->{'value'}; }
00750         elsif ($operator eq '&')
00751         { $symbol{'value'} &= $second->{'value'}; }
00752         elsif ($operator eq '|')
00753         { $symbol{'value'} |= $second->{'value'}; }
00754         elsif ($operator eq '^')
00755         { $symbol{'value'} ^= $second->{'value'}; }
00756         elsif ($operator eq '<<')
00757         { $symbol{'value'} <<= $second->{'value'}; }
00758         elsif ($operator eq '>>')
00759         { $symbol{'value'} >>= $second->{'value'}; }
00760         else
00761         {
00762             die "Rogue operator $operator - should not happen";
00763         }
00764 
00765         if ($second->{'type'} eq 'address')
00766         {
00767             $symbol{'type'} = $second->{'type'};
00768         }
00769     }
00770     
00771     return \%symbol;
00772 }
00773 
00774 ##
00775 # Obtain a constant from an input line.
00776 #
00777 # @param[in]     $self      DASMIO object
00778 # @param[in,out] $lineref   reference to line to read from
00779 # @param[in]     $symbol    'constant' : only a constant is acceptable
00780 #                           'symbol'   : symbol is acceptable
00781 #
00782 # @return $value    The value to store
00783 #         $symbol   Relocation symbol to apply, or none if not found
00784 sub parse_constant
00785 {
00786     my ($self, $lineref, $symbol) = @_;
00787     
00788     $symbol = 'constant' if (!defined $symbol);
00789     
00790     print "Constant: $$lineref\n" if ($debug_dat);
00791 
00792     my $operand = $self->parse_operand($lineref);
00793     print "  type $operand->{'type'}\n" if ($debug_dat);
00794 
00795     # The only types of operand we support here are the
00796     # nextword and constant value operands - 'load' and 'loadshort'
00797     my $sym = undef;
00798     if ($operand->{'type'} eq 'load')
00799     {
00800         $sym = $operand->{'nextword'};
00801     }
00802     elsif ($operand->{'type'} eq 'loadshort')
00803     {
00804         $sym = {
00805                 'type' => 'constant',
00806                 'value' => $operand->{'value'}
00807             };
00808     }
00809     else
00810     {
00811         $dasm->error("Constant operand expected, not type $operand->{'type'}");
00812     }
00813 
00814     if ($sym->{'symbol'} &&
00815         $symbol eq 'constant')
00816     {
00817         # A symbol which we know about and which is a constant.
00818         my $symval = $dasm->getsymbol($symname);
00819         if (defined $symval &&
00820             $symval->{'type'} eq 'constant')
00821         {
00822             # We can interpret it here immediately
00823             return ($sym->{'value'} + $symval->{'value'}, undef);
00824         }
00825         else
00826         {
00827             $dasm->error("Symbol operand not expected");
00828         }
00829     }
00830     
00831     return ($sym->{'value'}, $symval->{'symbol'});
00832 }
00833 
00834 
00835 
00836 ###########################################################################
00837 # Disassembly routines
00838 # (these are included in the source parser, so that we can decode the
00839 # compiled files to re-compilable files)
00840 
00841 
00842 ##
00843 # Write out a compilable source file based on what we have in the core.
00844 #
00845 # @param[in]  $self     Address object
00846 #
00847 # @return     Content to write
00848 sub write
00849 {
00850     my ($self) = @_;
00851     my $dasm = $self->{'dasm'};
00852     my $core = $dasm->{'core'};
00853 
00854     # Easier access to symbols
00855     my $symbols = $dasm->{'symbols'};
00856     
00857     # Collect a list of address symbols
00858     my %addressSymbols = map {
00859             $symbols->{$_}->{'value'} => $_
00860         } grep {
00861             $symbols->{$_}->{'type'} eq 'address'
00862         } keys %$symbols;
00863     
00864     # The lines we'll return
00865     my @lines;
00866     
00867     # If we have any constants, we need to write them out first
00868     my @constants = grep {
00869                         $symbols->{$_}->{'type'} eq 'constant'
00870                     } keys %$symbols;
00871     if (scalar(@constants) != 0)
00872     {
00873         push @lines, "; Constants";
00874     }
00875     for my $sym (@constants)
00876     {
00877         push @lines, sprintf "%-16s%-8s0x%04x",
00878                              ":$sym ",
00879                              "CONST",
00880                              $symbols->{$sym}->{'value'};
00881     }
00882 
00883     my $lastaddress = -1;
00884     
00885     # We have to deal with the addresses in order, because we might
00886     # have a corrupt file that ends before the data or which is
00887     # discontinuous.
00888     my @addresses = sort { $a <=> $b } keys %$core;
00889     if (scalar(@addresses) != 0)
00890     {
00891         push @lines, "; Core";
00892     }
00893     while (defined (my $address = shift @addresses))
00894     {
00895         my $value = $core->{$address};
00896         
00897         # We never write out hardware
00898         next if (ref $value ne '');
00899 
00900         if ($lastaddress+1 != $address)
00901         {
00902             # If we have discontinous core data, we need to change the
00903             # origin.
00904             push @lines, sprintf ".ORIGIN 0x%04x", $address;
00905         }
00906         
00907         if (defined $addressSymbols{$address})
00908         {
00909             push @lines, ":$addressSymbols{$address}";
00910         }
00911         
00912         my $opcode = $value & 15;
00913         my $opcodename;
00914         
00915         # List of the memory locations associated with this instruction
00916         my @memdata = ($value);
00917         
00918         # List of the decoded operands
00919         my @operands; 
00920         if ($opcode !=0)
00921         {
00922             # Basic opcode
00923             $opcodename = $DCPU::InvBasicOps{$opcode};
00924             my $operand_a = ($value>>4) & 63;
00925             my $operand_b = ($value>>10) & 63;
00926             
00927             # Decode each operand into printable descriptions, pulling data
00928             # out of nextword as necessary.
00929             push @operands, $self->decode_operand($operand_a,
00930                                                   $address + scalar(@memdata),
00931                                                   \@memdata);
00932 
00933             push @operands, $self->decode_operand($operand_b,
00934                                                   $address + scalar(@memdata),
00935                                                   \@memdata);
00936             
00937             if (!defined $opcodename)
00938             {
00939                 die "No such opcode: $opcode\n";
00940             }
00941         }
00942         else
00943         {
00944             # Extended opcodes
00945             $opcode = ($value>>4) & 63;
00946             $opcodename = $DCPU::InvExtendedOps{$opcode};
00947             if (!defined $opcodename)
00948             {
00949                 # Invalid opcode, so we treat as data
00950                 $opcodename = "DAT";
00951                 @operands = ($value);
00952             }
00953             else
00954             {
00955                 # a valid extended opcode
00956                 my $operand_a = ($value>>10) & 63;
00957 
00958                 push @operands, $self->decode_operand($operand_a,
00959                                                       $address + 1,
00960                                                       \@memdata);
00961             }
00962         }
00963 
00964         # Convert the memdata into presentable hex values
00965         @memdata = map { defined $_ ? sprintf "%04x", $_ : '----' } @memdata;
00966 
00967         # Construct the output line
00968         push @lines, sprintf "%-16s%-8s%-32s ; %04x: %s",
00969                                  "",
00970                                  $opcodename,
00971                                  (join ", ", @operands),
00972                                  $address,
00973                                  (join " ", @memdata);
00974         
00975         # Skip the data we've just read off the address list
00976         for (2..scalar(@memdata))
00977         {
00978             # We want to skip only the entries from the list that are actually
00979             # valid - the core might be discontinuous.
00980             if (defined $addresses[0] &&
00981                 $addresses[0] == $address+1)
00982             {
00983                 shift @addresses;
00984             }
00985             $address++;
00986         }
00987         $lastaddress = $address;
00988     }
00989 
00990     return join "\n", @lines, "";
00991 }
00992 
00993 
00994 ##
00995 # Decode an operand into a string.
00996 #
00997 # @param[in] $self     IO object
00998 # @param[in] $operand  Operand value
00999 # @param[in] $address  Address of 'nextword'
01000 # @param[in,out] $memdataref    Reference to an address of data read from memory
01001 #                               which we will update if we read any data
01002 #
01003 # @return Operand definition string
01004 sub decode_operand
01005 {
01006     my ($self, $operand, $address, $memdataref) = @_;
01007     
01008     # 0x00-0x07: register (A, B, C, X, Y, Z, I or J, in that order)
01009     if ($operand >= 0 && $operand <= 7)
01010     {
01011         return $DCPU::InvRegisterMap{ $operand & 7 };
01012     }
01013     # 0x08-0x0f: [register]
01014     elsif ($operand >= 0x8 && $operand <= 0xf)
01015     {
01016         return '[' . $DCPU::InvRegisterMap{ $operand & 7 } . ']';
01017     }
01018     # 0x10-0x17: [next word + register]
01019     elsif ($operand >= 0x10 && $operand <= 0x17)
01020     {
01021         my $nextword = $self->decode_nextword($address, $memdataref);
01022         return "[$nextword + " . $DCPU::InvRegisterMap{ $operand & 7 } . ']';
01023     }
01024     #      0x18: POP / [SP++]
01025     elsif ($operand == 0x18)
01026     {
01027         return 'POP';
01028     }
01029     #      0x19: PEEK / [SP]
01030     elsif ($operand == 0x19)
01031     {
01032         return 'PEEK';
01033     }
01034     #      0x1a: PUSH / [--SP]
01035     elsif ($operand == 0x1a)
01036     {
01037         return 'PUSH';
01038     }
01039     #      0x1b: SP
01040     elsif ($operand == 0x1b)
01041     {
01042         return 'SP';
01043     }
01044     #      0x1c: PC
01045     elsif ($operand == 0x1c)
01046     {
01047         return 'PC';
01048     }
01049     #      0x1d: O
01050     elsif ($operand == 0x1d)
01051     {
01052         return 'O';
01053     }
01054     #      0x1e: [next word]
01055     elsif ($operand == 0x1e)
01056     {
01057         my $nextword = $self->decode_nextword($address, $memdataref);
01058         return "[$nextword]";
01059     }
01060     #      0x1f: next word (literal)
01061     elsif ($operand == 0x1f)
01062     {
01063         my $nextword = $self->decode_nextword($address, $memdataref);
01064         return "$nextword";
01065     }
01066     #  0x20-0x3f: literal value 0x00-0x1f (literal)
01067     elsif ($operand >= 0x20 && $operand <= 0x3f)
01068     {
01069         return $operand - 0x20;
01070     }
01071     else
01072     {
01073         return "!Invalid: $operand!";
01074     }
01075 }
01076 
01077 
01078 ##
01079 # Read the next word from a memory address, whilst decoding operands.
01080 # Apply any relocations that we know about such that the decode comes
01081 # out as readable.
01082 #
01083 # The actual value in memory is appended to the memdataref list.
01084 # If the memory is not known, we return '?' and an undef entry in the array.
01085 #
01086 # @param[in] $self     IO object
01087 # @param[in] $address  Address of 'nextword'
01088 # @param[in,out] $memdataref    Reference to an address of data read from memory
01089 #                               which we will update if we read any data
01090 #
01091 # @return extword string
01092 sub decode_nextword
01093 {
01094     my ($self, $address, $memdataref) = @_;
01095     my $dasm = $self->{'dasm'};
01096     my $core = $dasm->{'core'};
01097     
01098     my $value = $dasm->{'core'}->{$address};
01099 
01100     # We're using it, so push it on to our stack
01101     push @$memdataref, $value;
01102 
01103     if (!defined $value)
01104     {
01105         # No value, which means that the core is truncated or discontinuous
01106         # around an invalid location, so we return '?'
01107         return '?';
01108     }
01109 
01110     # Check whether we have a relocation at that address
01111     if (defined $dasm->{'relocations'}->{$address})
01112     {
01113         # We do... which means that we have to perform report the symbol
01114         # of the relocation as part of a calculation.
01115         
01116         if ($value == 0)
01117         {
01118             # However, the offset appears to be 0, so we can just use the
01119             # symbol name.
01120             return "$dasm->{'relocations'}->{$address}";
01121         }
01122         else
01123         {
01124             return "($value + $dasm->{'relocations'}->{$address})";
01125         }
01126     }
01127     
01128     if ($value >= 256)
01129     {
01130         return sprintf "0x%04x", $value;
01131     }
01132     
01133     return sprintf "0x%02x", $value;
01134 }
01135 
01136 
01137 # Must return true
01138 1;