#include #include #include #include #include #include #include enum Error { SUCCESS, BAD_ARGUMENT }; using Vector = std::vector; class Matrix { std::vector _storage; size_t _nrows, _ncols; public: Matrix(size_t nrows = 0, size_t ncols = 0) : _storage(nrows * ncols), _nrows(nrows), _ncols(ncols) {} size_t nrows() const { return _nrows; } size_t ncols() const { return _ncols; } double &operator()(size_t i, size_t j) { return _storage[i * _ncols + j]; } double const &operator()(size_t i, size_t j) const { return _storage[i * _ncols + j]; } }; // Matrix vector product m * v in the given number of threads. Vector matrix_vector_product(Matrix const &m, Vector const &v, int num_threads = 1); // Function used to compute how many lines of a matrix goes to each thread. std::vector compute_partition(size_t N, size_t nthreads); int main(int argc, char *argv[]) { if (argc != 3) { std::cerr << "Please, give the number of elements and number of threads in " "the command line.\n"; return BAD_ARGUMENT; } int const N = std::stoi(argv[1]); if (N <= 0) { std::cerr << "Number of elements must be positive.\n"; return BAD_ARGUMENT; } int const num_threads = std::stoi(argv[2]); if (num_threads <= 0) { std::cerr << "Number of threads must be positive.\n"; return BAD_ARGUMENT; } // Initialize a vector with 1..N Vector v(N); std::iota(begin(v), end(v), 1.0); // Create an identity matrix. Matrix m(N, N); // Default-initalised to 0 for (int i = 0; i < N; ++i) { m(i, i) = 1.0; } auto t_start = std::chrono::high_resolution_clock::now(); // Compute m * v. Should give v. Vector vres = matrix_vector_product(m, v, num_threads); // Sum all values of vres. double s = std::accumulate(begin(vres), end(vres), 0.0); auto t_finish = std::chrono::high_resolution_clock::now(); std::cout << "The sum of the resulting vector is " << std::setprecision(15) << s << std::endl; std::cout << "The computation took " << (t_finish - t_start).count() / 1e9 << " seconds\n"; return SUCCESS; } // Matrix vector product m * v in the given number of threads. Vector matrix_vector_product(Matrix const &m, Vector const &v, int num_threads) { // The resulting vector is of the same size as the number of lines in m. Vector res(m.nrows()); // Function to compute the dot product of rows in a block with the vector. auto compute_row_block = [&m = std::as_const(m), &v = std::as_const(v), &res](size_t start, size_t finish) { for (size_t i = start; i < finish; ++i) { double s = 0.0; for (size_t j = 0; j < m.ncols(); j++) { s += m(i, j) * v[j]; } res[i] = s; } }; // Find number of elements and initial index for each thread. auto inits = compute_partition(m.nrows(), num_threads); std::vector threads; for (int t = 0; t < num_threads; ++t) { // Thread t is responsible for lines init[t] to init[t] + count[t] // (excluded). threads.emplace_back(compute_row_block, inits[t], inits[t + 1]); } for (auto &thd : threads) { thd.join(); } return res; } // Function used to compute how many lines of a matrix goes to each thread. std::vector compute_partition(size_t N, size_t p) { // Elements of thread t are from init[t] to init[t+1] std::vector inits(p + 1); // Quotient and remainder of the division of N to nthreads. auto q = N / p, r = N % p; // Threads before r get q+1 elements; from r onward the get q elements. inits[0] = 0; for (size_t i = 0; i < p; ++i) { inits[i + 1] = inits[i] + (i < r ? q + 1 : q); } return inits; }