Code Monkey home page Code Monkey logo

Comments (2)

ZhangGe6 avatar ZhangGe6 commented on August 21, 2024

header files, util functions, and makefile I use to compile are here for helping to debug

// MMult.h
void MMult_base(int m, int k, int n, double *A, double *B, double *C, int lda, int ldb, int ldc);
void MMult_unroll(int m, int k, int n, double *A, double *B, double *C, int lda, int ldb, int ldc);
void MMult_unroll_inner(int m, int k, int n, double *A, double *B, double *C, int lda, int ldb, int ldc);
// utils.h
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include "assert.h" 

void random_matrix( int m, int n, double *mat, int ldm);
void zero_matrix( int m, int n, double *mat, int ldm);
void print_matrix(int m, int n, double *mat, int ldm);
double compare_matrix(int m, int n, double *mat1, double *mat2, int ldm);
// utils.c
#include "params.h"
#include "utils.h"

void random_matrix(int m, int n, double *mat, int ldm){
    double drand48();

    for (int i = 0; i < m; ++i){
        for (int j = 0; j < n; ++j)
            // mat(i, j) = 2.0 * drand48( ) - 1.0;
            mat(i, j) = (double) (i + 1);
    }
    // print_matrix(m, n, mat, n);
}

void zero_matrix(int m, int n, double *mat, int ldm){
    for (int i = 0; i < m; ++i){
        for (int j = 0; j < n; ++j)
            mat(i, j) = (double) 0;
    }
    // print_matrix(m, n, mat, n);
}

void print_matrix(int m, int n, double *mat, int ldm){
    for (int i = 0; i < m; ++i){
        for (int j = 0; j < n; ++j){
            //std::cout<<mat(i, j)<<" ";
            printf("%f\t", mat(i, j));
        }
        // std::cout<<std::endl;
        printf("\n");
    }
    printf("\n");   
}

double compare_matrix(int m, int n, double *mat, double *mat2, int ldm){
    double max_diff = 0, diff;
    for (int i = 0; i < m; ++i){
        for (int j = 0; j < n; ++j){
            diff = abs(mat(i, j) - mat2(i, j));
            max_diff = diff > max_diff ? diff : max_diff;
        }
    }

    return max_diff;
}
# makefile

CC         := gcc
CFLAGS     := -O2 -Wall -msse3
LDFLAGS    := -lm


UTIL := utils.o
MulMethods := $(patsubst %.c, %.o, $(wildcard MMult*.c))
TEST_OBJS  := main.o $(MulMethods)

%.o: %.c
	$(CC) $(CFLAGS) -c $< -o $@
%.o: %.c
	$(CC) $(CFLAGS) -c $< -o $@

all:
	make clean
	echo $(MulMethods)
	make test_MMult.x
	make clean_tmp

test_MMult.x: $(TEST_OBJS) $(UTIL) params.h
	$(CC) $(TEST_OBJS) $(UTIL) $(LDFLAGS) $(BLAS_LIB) -o $(TEST_BIN) $@ 

clean:
	rm -f *.o *.x
clean_tmp:
	rm -f *.o

from how-to-optimize-gemm.

ZhangGe6 avatar ZhangGe6 commented on August 21, 2024

oops! The problem is that I define the macro as

#define B(i, j) B[i*ldb + j]

So when I call B(p + 1, j), I will get B(p + 1*ldb, j), rather than B((p + 1)*ldb, j) that I actually want.

So I modify the macro definition from

// params.h
#define A(i, j) A[i*lda + j]
#define B(i, j) B[i*ldb + j]
#define C(i, j) C[i*ldc + j]
#define mat(i, j) mat[(i)*(ldm) + (j)]
#define mat2(i, j) mat2[(i)*(ldm) + (j)]

to

// params.h
#define A(i, j) A[(i)*lda + j]     // add brackets here
#define B(i, j) B[(i)*ldb + j]     // add brackets here
#define C(i, j) C[(i)*ldc + j]     // add brackets here
#define mat(i, j) mat[(i)*(ldm) + (j)]
#define mat2(i, j) mat2[(i)*(ldm) + (j)]

Then the code gives me the right result!

from how-to-optimize-gemm.

Related Issues (17)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.