附3:软件驱动#include <stdio.h>#include "platform.h"#include "xbasic_types.h"#include "xparameters.h"#include "xil_io.h"#define test_speedint res[1000][1000];void delay() { int i, j, k; for (i = 0; i < 1000; i++) { for (j = 0; j < 1000; j++) { for (k = 0; k < 100; k++) ; } }}void show_reg() { int i; u32 result; printf("\n============SHOW REG ================\n"); for (i = 0; i < 9; i++) { result = Xil_In32(XPAR_CONV_0_S00_AXI_BASEADDR + 4 * i); printf("Reg %3d : %u\n", i, result); }}void load_kernel(int filter[3][3]) { UINTPTR kernel_addr = (UINTPTR) XPAR_CONV_0_S00_AXI_BASEADDR + 36; Xil_Out32(kernel_addr, filter[0][0]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[0][1]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[0][2]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[1][0]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[1][1]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[1][2]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[2][0]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[2][1]); kernel_addr = kernel_addr + 0x4; Xil_Out32(kernel_addr, filter[2][2]);}void test_set() { Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 8, 3); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 20, 22); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 32, 16); printf("1\n"); show_reg(); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 8, 21); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 20, 20); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 32, 14); printf("2\n"); show_reg(); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 8, 19); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 20, 13); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 32, 7); printf("3\n"); show_reg();}void Conv_SW(int filter[3][3], int arr[100][100], int arrW, int arrH) { int i, j; i = 2; j = 2; for (i = 2; i < arrH; i++) { for (j = 2; j < arrW;j++){ res[i][j] = 0; res[i][j] += filter[0][0] * arr[i - 1][j - 1]; res[i][j] += filter[0][1] * arr[i - 1][j]; res[i][j] += filter[0][2] * arr[i - 1][j + 1]; res[i][j] += filter[1][0] * arr[i][j - 1]; res[i][j] += filter[1][1] * arr[i][j]; res[i][j] += filter[1][2] * arr[i][j + 1]; res[i][j] += filter[2][0] * arr[i + 1][j - 1]; res[i][j] += filter[2][1] * arr[i + 1][j]; res[i][j] += filter[2][2] * arr[i + 1][j + 1]; } }}void Conv_HW(int filter[3][3], int arr[100][100], int arrW, int arrH) { int i, j; i = 2; j = 2; for (i = 2; i < arrH; i++) { //pre load Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 8, arr[i - 1][j - 1]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 20, arr[i][j - 1]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 32, arr[i + 1][j - 1]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 8, arr[i - 1][j]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 20, arr[i][j]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 32, arr[i + 1][j]); for (j = 2; j < arrW; j++) { Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 8, arr[i - 1][j + 1]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 20, arr[i][j + 1]); Xil_Out32(XPAR_CONV_0_S00_AXI_BASEADDR + 32, arr[i + 1][j + 1]); res[i][j] = Xil_In32(XPAR_CONV_0_S00_AXI_BASEADDR + 72); } }}int main() { printf("HELLO WORLD"); u32 result; int filterW = 3; int filterH = 3; int arrW = 5; int arrH = 5; int resW = filterW + arrW - 1; int resH = filterH + arrH - 1; int i, j; int pFilter[3][3]; int arr[100][100]; UINTPTR cur_addr = (UINTPTR) XPAR_CONV_0_S00_AXI_BASEADDR; pFilter[0][0] = 1; pFilter[0][1] = 3; pFilter[0][2] = 1; pFilter[1][0] = 0; pFilter[1][1] = 5; pFilter[1][2] = 0; pFilter[2][0] = 2; pFilter[2][1] = 1; pFilter[2][2] = 2; init_platform(); for (i = 0; i < 9; i++) { Xil_Out32(cur_addr, 0); cur_addr = cur_addr + 4; } load_kernel(pFilter); printf("Kernel Loaded\n");#ifdef test_single test_set(); result = Xil_In32(XPAR_CONV_0_S00_AXI_BASEADDR + 72); printf("Test Set Result %u", result); show_reg();#endif#ifdef test_func srand(10); arrW = 20; arrH = 20; resH = filterH + arrH - 1; resW = filterW + arrW - 1; for (i = 0; i < arrH; i++) { for (j = 0; j < arrW; j++) { arr[i][j] = rand() % 20; } } printf("*********************************************** \n"); printf("Filter: \n"); for (i = filterH - 1; i >= 0; i--) { for (j = filterW - 1; j >= 0; j--) { printf("%d ", pFilter[i][j]); } printf("\n"); } printf("*********************************************** \n"); printf("Matrix: \n"); for (i = 0; i < arrH; i++) { for (j = 0; j < arrW; j++) { printf("%4d ", arr[i][j]); } printf("\n"); } printf("*********************************************** \n"); printf("Software Start!\n"); Conv_SW(pFilter, arr, arrW, arrH); printf("\nSoftware end!\n"); printf("*********************************************** \n"); printf("Result1: \n"); for (i = 0; i < resH; i++) { for (j = 0; j < resW; j++) { printf("%5d ", res[i][j]); } printf("\n"); } for (i = 0; i < resH; i++) { for (j = 0; j < resW; j++) { res[i][j] = 0; } } printf("*********************************************** \n"); printf("HardWare Start!\n"); Conv_HW(pFilter, arr, arrW, arrH); printf("\nHardWare end!"); printf("Result2: \n"); for (i = 0; i < resH; i++) { for (j = 0; j < resW; j++) { printf("%5d ", res[i][j]); } printf("\n"); } printf("*********************************************** \n");#endif#ifdef test_speed arrW = 500; arrH = 500; resH = filterH + arrH - 1; resW = filterW + arrW - 1; printf("Software Start!\n"); for(i = 0; i< 200;i++) { Conv_SW(pFilter, arr, arrW, arrH); } printf("\nSoftware end!\n"); printf("HardWare Start!\n"); for(i = 0; i< 200;i++) { Conv_HW(pFilter, arr, arrW, arrH); } printf("\nHardWare end!"); cleanup_platform();#endif return 0;} |