/* * va_optimizer3.c - Brute force search for optimum vertical adder * * T. Scott Dattalo * FEB03 * * This program exhaustively searchs a subset of PIC instructions * for the optimal solution of the Vertical Adder problem. * * License: * * Modified BSD. * * Copyright (c) 2003, T. Scott Dattalo * All rights reserved. * * Redistribution and use in source and binary forms, * with or without modification, are permitted provided * that the following conditions is met: * * * Redistributions of source code must retain the above * copyright notice, this list of conditions and the following disclaimer. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* Copied from the comments of the first hand-written version of this: ;********************************************************************* ;vertical_adder ; ; The purpose of this routine is to add two numbers that are ;oriented 'vertically' instead of horizontally. In other ;words, the least significant bit of each variable is in one file register, ;the next bit is in another register, and so on. This method of addition ;makes it possible to perform 8 simultaneous additions. ; ; The arithmetic operation performed given A and B are the two numbers ;being added together and C is the carry is: ; ; sum = A + B + C ; B+ = sum & 1 ;next state of B is the LSB of the sum of the 3 bits ; C+ = sum >> 1 ;The carry out is set if the sum is greater than 1 ; ; The truth table and boolean equations (along with their Karnaugh maps) ;are: ; ; Truth table: \BC \BC ; A \ 00 01 11 10 A \ 00 01 11 10 ; inputs | outputs +---+---+---+---+ +---+---+---+---+ ; A B C | B+ C+ 0 | | 1 | | 1 | 0 | | | 1 | | ; -------+--------- +---+---+---+---+ +---+---+---+---+ ; 0 0 0 | 0 0 1 | 1 | | 1 | | 1 | | 1 | 1 | 1 | ; 0 0 1 | 1 0 +---+---+---+---+ +---+---+---+---+ ; 0 1 0 | 1 0 B+ = A ^ B ^ C C+ = AB | BC | AC ; 0 1 1 | 0 1 ; 1 0 0 | 1 0 ; 1 0 1 | 0 1 ; 1 1 0 | 0 1 ; 1 1 1 | 1 1 ; ; Generating B+ is relatively straight forward. However, the carry out ;is somewhat difficult. The in-line comments describe the boolean equations ;at each step of the program. Though it's probably easier to follow the ;logic with Karnaugh maps. ; ; C+ = (~B+)&A | BC = (~(A^B^C)&A) | BC = AB | BC | AC ; ; \BC \BC \BC ; A \ 00 01 11 10 A \ 00 01 11 10 A \ 00 01 11 10 ; +---+---+---+---+ +---+---+---+---+ +---+---+---+---+ ; 0 | 1 | | 1 | | 0 | | | | | 0 | | | | | ; +---+---+---+---+ AND +---+---+---+---+ = +---+---+---+---+ ; 1 | | 1 | | 1 | 1 | 1 | 1 | 1 | 1 | 1 | | 1 | | 1 | ; +---+---+---+---+ +---+---+---+---+ +---+---+---+---+ ; ;followed by: ; ; ; \BC \BC \BC ; A \ 00 01 11 10 A \ 00 01 11 10 A \ 00 01 11 10 ; +---+---+---+---+ +---+---+---+---+ +---+---+---+---+ ; 0 | | | | | 0 | | | 1 | | 0 | | | 1 | | ; +---+---+---+---+ OR +---+---+---+---+ = +---+---+---+---+ ; 1 | | 1 | | 1 | 1 | | | 1 | | 1 | | 1 | 1 | 1 | ; +---+---+---+---+ +---+---+---+---+ +---+---+---+---+ ; ==================== ; ; INPUTS ; A0 - 8 bits of the first addend ; B0 - 8 bits of the second addend ; CARRY - 8 bits of the carry-in ; ; OUTPUTS: ; B0 - 8 bits of the result ; CARRY - 8 bits of the carry-out ; vertical_adder MOVF CARRY,W ;W = C XORWF A0,W ;W = A ^ C XORWF B0,F ;B+ = A ^ B ^ C XORWF B0,W ;W = (A^B^C) ^ (A^C) = B ANDWF CARRY,F ;C = B&C COMF B0,W ;W = ~(A^B^C) ANDWF A0,W ;W = ~(A^B^C) & A ; = A&~B&C | A&B&~C IORWF CARRY,F ;C = A&~B&C | A&B&~C | B&C ; = A&C | A&B | B&C Dmitry Kiryashov showed that this could be more efficiently written like: MOVWF temp ;Save carries XORWF A0,W ;W = A ^ C XORWF B0,F ;B+ = A ^ B ^ C IORWF temp,F ;C = (A ^ C) | C ANDWF B0,W ;W = (A ^ C) & (A ^ B ^ C) ; = (A ^ C ) & ~B XORWF temp,W ;Carry out = ((A ^ C) & ~B ) ^ ((A ^ C) | C) ; = (~A&~B&C | A&~B&~C) ^ (~A&C | A&~C |C) ; = (~A&~B&C | A&~B&~C) ^ (A | C) Note, we may not be interested in carry for the first bit. If that's the case then rewrite the Truth Table: ; ; Truth table: \B \B ; A \ 0 1 A \ 0 1 ;inputs | outputs +---+---+ +---+---+ ; A B | B+ C+ 0 | | 1 | 0 | | | ; -----+--------- +---+---+ +---+---+ ; 0 0 | 0 0 1 | 1 | | 1 | | 1 | ; 0 1 | 1 0 +---+---+ +---+---+ ; 1 0 | 1 0 B+ = A ^ B C+ = AB ; 1 1 | 0 1 There are many ways to implement this; here's one solution: MOVF A0,W ; XORWF B0,F ;B+ = A ^ B XORWF B0,W ;W = A ^ B ^ A = B ANDWF A0,W ;W = A & B Macros: ------- Here are PIC assembly Macros that implement the adder. VA_LSB macro A, B MOVF A,W ; XORWF B,F ;B+ = A ^ B XORWF B,W ;W = A ^ B ^ A = B ANDWF A,W ;W = A & B endm VA macro A,B,t MOVWF t ;Save carries XORWF A,W ;W = A ^ C XORWF B,F ;B+ = A ^ B ^ C IORWF t,F ;C = (A ^ C) | C ANDWF B,W ;W = (A ^ C) & (A ^ B ^ C) ; = (A ^ C ) & ~B XORWF t,W ;Carry out = ((A ^ C) & ~B ) ^ ((A ^ C) | C) ; = (~A&~B&C | A&~B&~C) ^ (~A&C | A&~C |C) ; = (~A&~B&C | A&~B&~C) ^ (A | C) endm To implement a 8, 4-bit vertical adders, then write: VA_LSB A0,B0 VA A1,B1,temp VA A2,B2,temp VA A3,B3,temp Note that this expands into 22 instructions (4 + 3*6). */ /* input variables */ unsigned int a, b, w, t, k; unsigned int i, looping, count; /* There's a function for instruction that is simulated. An instance the op_code_struct is created for each instruction, This structure contains a pointer to the function that simulates the instruction. */ struct op_code_struct { void (*pF_opcode) (void); /* pointer to the function that simulates this opcode */ char *mnemonic; /* disassembled mnemonic of the opcode */ }; /* Forward reference, function prototypes. */ void MOVWF_a(void); void MOVFW_a(void); void ANDWF_a(void); void ANDFW_a(void); void IORWF_a(void); void IORFW_a(void); void XORWF_a(void); void XORFW_a(void); void COMF_a(void); void COMF_aw(void); void MOVWF_b(void); void MOVFW_b(void); void ANDWF_b(void); void ANDFW_b(void); void IORWF_b(void); void IORFW_b(void); void XORWF_b(void); void XORFW_b(void); void COMF_b(void); void COMF_bw(void); void MOVWF_t(void); void MOVFW_t(void); void ANDWF_t(void); void ANDFW_t(void); void IORWF_t(void); void IORFW_t(void); void XORWF_t(void); void XORFW_t(void); void COMF_t(void); void COMF_tw(void); void MOVLW_k(void); void MOVLW_k_(void); void ANDLW_k(void); void ANDLW_k_(void); void IORLW_k(void); void IORLW_k_(void); void XORLW_k(void); void XORLW_k_(void); void CLRF_t(void); void XORLW_ff(void); void NOP(void); /* Declare the set of instructions we wish to test. */ struct op_code_struct instructions[] = { {NOP, "NOP"}, // The algorithm assumes that NOP is the first entry. {MOVWF_a, "MOVWF A0"}, {MOVFW_a, "MOVF A0,W"}, {ANDWF_a, "ANDWF A0,F"}, {ANDFW_a, "ANDWF A0,W"}, {XORWF_a, "XORWF A0,F"}, {XORFW_a, "XORWF A0,W"}, {IORWF_a, "IORWF A0,F"}, {IORFW_a, "IORWF A0,W"}, {COMF_a, "COMF A0,F"}, {COMF_aw, "COMF A0,W"}, {MOVWF_b, "MOVWF B0"}, {MOVFW_b, "MOVF B0,W"}, {ANDWF_b, "ANDWF B0,F"}, {ANDFW_b, "ANDWF B0,W"}, {XORWF_b, "XORWF B0,F"}, {XORFW_b, "XORWF B0,W"}, {IORWF_b, "IORWF B0,F"}, {IORFW_b, "IORWF B0,W"}, {COMF_b, "COMF B0,F"}, {COMF_bw, "COMF B0,W"}, {MOVWF_t, "MOVWF temp"}, {MOVFW_t, "MOVF temp,W"}, {ANDWF_t, "ANDWF temp,F"}, {ANDFW_t, "ANDWF temp,W"}, {XORWF_t, "XORWF temp,F"}, {XORFW_t, "XORWF temp,W"}, {IORWF_t, "IORWF temp,F"}, {IORFW_t, "IORWF temp,W"}, {COMF_t, "COMF temp,F"}, {COMF_tw, "COMF temp,W"}, //{MOVLW_k, "MOVLW 0xff"}, //{MOVLW_k_,"MOVLW 0x00"}, //{ANDLW_k, "ANDLW 0xff"}, //{ANDLW_k_,"ANDLW 0x00"}, //{XORLW_k, "XORLW 0xff"}, //{XORLW_k_,"XORLW 0x00"}, //{IORLW_k, "IORLW 0xff"}, //{IORLW_k_,"IORLW 0x00"}, {CLRF_t, "CLRF temp"}, {XORLW_ff,"XORLW 0xff"} // {nop,"nop"} }; #define TOTAL_INSTRUCTIONS (sizeof(instructions) / sizeof(struct op_code_struct)) void NOP(void) { } void MOVWF_a(void) { a = w; } void MOVFW_a(void) { w = a; } void ANDWF_a(void) { a &= w; } void ANDFW_a(void) { w &= a; } void XORWF_a(void) { a ^= w; } void XORFW_a(void) { w ^= a; } void IORWF_a(void) { a |= w; } void IORFW_a(void) { w |= a; } void COMF_a(void) { a = ~a; } void COMF_aw(void) { w = ~a; } void MOVWF_b(void) { b = w; } void MOVFW_b(void) { w = b; } void ANDWF_b(void) { b &= w; } void ANDFW_b(void) { w &= b; } void XORWF_b(void) { b ^= w; } void XORFW_b(void) { w ^= b; } void IORWF_b(void) { b |= w; } void IORFW_b(void) { w |= b; } void COMF_b(void) { b = ~b; } void COMF_bw(void) { w = ~b; } void MOVWF_t(void) { t = w; } void MOVFW_t(void) { w = t; } void ANDWF_t(void) { t &= w; } void ANDFW_t(void) { w &= t; } void XORWF_t(void) { t ^= w; } void XORFW_t(void) { w ^= t; } void IORWF_t(void) { t |= w; } void IORFW_t(void) { w |= t; } void COMF_t(void) { t = ~t; } void COMF_tw(void) { w = ~t; } void MOVLW_k(void) { w = k; } void MOVLW_k_(void) { w = ~k; } void ANDLW_k(void) { w &= k; } void ANDLW_k_(void) { w &= ~k; } void XORLW_k(void) { w ^= k; } void XORLW_k_(void) { w ^= ~k; } void IORLW_k(void) { w |= k; } void IORLW_k_(void) { w |= ~k; } void CLRF_t(void) { t = 0; } void XORLW_ff(void) { w = ~w; } //------------------------------------------------------------------ // If "LSB" is defined then we'll search for 4-instruction sequences // when there is no carry in. // //#define LSB #ifdef LSB #define PROGRAM_SIZE 4 #else #define PROGRAM_SIZE 6 #endif unsigned int program[PROGRAM_SIZE+1]; int main(int argc, char ** argv) { printf("vertical adder optimizer %d\n",TOTAL_INSTRUCTIONS); printf("%s\n",instructions[0].mnemonic); /* Initialize the program: */ for(i = 0; i= TOTAL_INSTRUCTIONS) { // If all instructions have been tested for slot 'i' // then increment the next slot program[i] = 1; program[i+1]++; } instructions[program[i]].pF_opcode(); } /* Check the Results The output results are checked against the truth table. See above. Since there are only 8 inputs/outputs we need to mask the upper bits. */ b &= 0xff; w &= 0xff; a &= 0xff; #ifdef LSB if((b == 0x06) && (w == 0x08) && (a==0x0C)) { #else if((b == 0x96) && (w == 0xe8) && (a==0xf0)) { #endif printf("Found a solution!\n"); for(i = 0; i