#include #include #include #include #include #include #include #include #include #include #include #include #include //============================================================================= // // Definicao de tipos // // Codigos de erro do programa. enum Error { SUCCESS, BAD_ARGUMENT, BAD_FILE }; // Representacao de uma matriz. class Matrix { size_t _nrows, _ncols; std::vector _data; public: // Cria uma matriz com o numero de linhas e colunas especificado. Matrix(size_t nrows, size_t ncols) : _nrows(nrows), _ncols(ncols), _data(nrows * ncols) {} size_t num_rows() const { return _nrows; } size_t num_cols() const { return _ncols; } // Acessa elemento (indexacao) usando (i, j) ao inves de [i, j] double &operator()(size_t i, size_t j) { return _data[i * _ncols + j]; } double const &operator()(size_t i, size_t j) const { return _data[i * _ncols + j]; } // Troca o conteudo de duas matrizes. void swap(Matrix &m) { std::swap(_nrows, m._nrows); std::swap(_ncols, m._ncols); _data.swap(m._data); } }; // Estrutura para representar um pedaco 2D das matrizes, a ser // processado por uma thread. struct Slice { size_t start_row, end_row, start_col, end_col; }; //============================================================================= // // Prototipos de funcoes usada no main // // Leitura dos argumentos: // std::tuple read_args(int argc, char *argv[]); // Thread code to do computations for a slice of the matrix. void calc_slice(Matrix &u, Matrix &newu, Matrix const &f, Slice slice, double epsilon, int tid, int n_threads); // Função que calcula os elementos vão para cada thread. // Thread t fica com elementos de ret[t] a ret[t+1] std::vector compute_partition(size_t N, size_t p); // Escreve o conteudo da matriz no arquivo especificado. void write_to_file(std::string const &, Matrix const &); //============================================================================= // // main // int main(int argc, char *argv[]) { double const outside_temp = 0.; // Temperatura exterior. double const box_f_value = -100.; // Valor de f na caixa interna. auto [n_threads, N, epsilon, filename] = read_args(argc, argv); auto t_start = std::chrono::high_resolution_clock::now(); // Usa N+2 para ter celulas fantasma nas bordas. Matrix u(N + 2, N + 2); Matrix f(N + 2, N + 2); // Ajusta valores nas beiradas. for (size_t i = 0; i < N + 2; ++i) { u(0, i) = outside_temp; u(N + 1, i) = outside_temp; u(i, 0) = outside_temp; u(i, N + 1) = outside_temp; } // Calcula o tamanho da caixa interna (onde f e diferente de 0); auto box_first = 2 * N / 5 + 1; auto box_last = 3 * N / 5; // Ajusta valores da matriz de f. for (size_t i = box_first; i <= box_last; ++i) { for (size_t j = box_first; j <= box_last; ++j) { f(i, j) = box_f_value; } } // Computa divisao de n_threads em uma malha 2D. // Threads organizadas como: // 0 3 // 1 4 // 2 5 // dim_x divide as colunas; dim_y divide as linhas. auto dim_x = static_cast(round(sqrt(n_threads))); while (n_threads % dim_x != 0) { --dim_x; } auto dim_y = n_threads / dim_x; // Distribuicao nas linhas e colunas. auto start_col = compute_partition(N, dim_x); auto start_row = compute_partition(N, dim_y); // Matriz com os novos valores de u. Matrix newu{u}; // Cria e inicia as threads. std::vector threads; for (int i = 0; i < n_threads; ++i) { auto ix = i / dim_y; // x index of thread auto iy = i % dim_y; // y index of thread Slice slice{start_row[iy], start_row[iy + 1], start_col[ix], start_col[ix + 1]}; threads.emplace_back(calc_slice, std::ref(u), std::ref(newu), std::cref(f), slice, epsilon, i, n_threads); } // Espera todas as threads acabarem. for (auto &thd : threads) { thd.join(); } auto t_finish = std::chrono::high_resolution_clock::now(); std::cout << "The computation took " << (t_finish - t_start).count() / 1e9 << " seconds.\n"; // Escreve o resultado no arquivo. write_to_file(filename, u); return SUCCESS; } //============================================================================= // // Implementacao das funcoes auxiliares. // // Leitura dos argumentos: // std::tuple read_args(int argc, char *argv[]) { if (argc != 5) { std::cerr << "Use as: " << argv[0] << " \n"; std::exit(BAD_ARGUMENT); } size_t N = std::stoul(argv[1]); if (N < 1) { std::cerr << "Mesh size must be positive.\n"; std::exit(BAD_ARGUMENT); } auto epsilon = std::stod(argv[2]); if (epsilon <= 0) { std::cerr << "Tolerance must be positive.\n"; std::exit(BAD_ARGUMENT); } auto n_threads = std::stoi(argv[4]); if (n_threads <= 0) { std::cerr << "Number of threads must be positive.\n"; std::exit(BAD_ARGUMENT); } return {n_threads, N, epsilon, std::string(argv[3])}; } // Função que calcula os elementos vão para cada thread. std::vector compute_partition(size_t N, size_t p) { // Thread t recebe de init[t] até init[t+1] std::vector inits(p + 1); auto q = N / p, r = N % p; // Threads antes de r recebem q+1 elementos; de r para frente recebem q // elementos. inits[0] = 1; for (size_t i = 0; i < p; ++i) { inits[i + 1] = inits[i] + (i < r ? q + 1 : q); } return inits; } // Escreve o conteudo da matriz no arquivo especificado. void write_to_file(std::string const &filename, Matrix const &u) { std::ofstream file(filename); if (!file.is_open()) { std::cerr << "Error opening " << filename << std::endl; std::exit(BAD_FILE); } for (size_t i = 1; i < u.num_rows() - 1; ++i) { for (size_t j = 1; j < u.num_cols() - 1; ++j) { file << " " << std::setw(15) << u(i, j); } file << std::endl; } if (!file.good()) { std::cerr << "Error writing to " << filename << std::endl; std::exit(BAD_FILE); } } //----------------------------------------------------------------------------- // // Tipo auxiliar para implementar uma barreira // // Uma implementacao (provavelmente com bugs) de barreira. Uma // barreira espera todas as threads chegarem para liberar qualquer // thread. #include class Barrier { pthread_barrier_t _barrier; public: explicit Barrier(int n) { pthread_barrier_init(&_barrier, nullptr, n); } void wait_all() { pthread_barrier_wait(&_barrier); } ~Barrier() { pthread_barrier_destroy(&_barrier); } }; //----------------------------------------------------------------------------- // // Codigo executado pelas threads // // Thread code to do computations for a slice of the matrix. void calc_slice(Matrix &u, Matrix &newu, Matrix const &f, Slice slice, double epsilon, int tid, int n_threads) { // Variaveis estaticas sao compartilhadas. // Exclusao mutua no acesso a error. static std::mutex exclusion; // Barreiras para controle do loop. static Barrier step_barrier(n_threads), restart_barrier(n_threads); // Erro maximo. Inicia com valor alto static double error{0.}; static bool not_done{true}; // Calcula h e faixas de indices das matrizes. auto N = u.num_rows() - 2; auto h = 1. / N; while (not_done) { // Calcula novos valores. double partial_error{0.0}; for (size_t i = slice.start_row; i < slice.end_row; ++i) { for (size_t j = slice.start_col; j < slice.end_col; ++j) { newu(i, j) = 0.25 * (u(i, j - 1) + u(i, j + 1) + u(i - 1, j) + u(i + 1, j) - h * h * f(i, j)); if (auto this_error = std::fabs(newu(i, j) - u(i, j)); this_error > partial_error) { partial_error = this_error; } } } { // Atualiza no erro global compartilhado. Com exclusao. std::unique_lock lock(exclusion); if (error < partial_error) { error = partial_error; } } // Espera todas as threads terminarem os calculos delas. step_barrier.wait_all(); // Na thread 0, troca matrizes u e newu e zera o erro. // Tambem verifica se ja convergiu. if (tid == 0) { u.swap(newu); not_done = error > epsilon; error = 0; } // Todas as threads esperam a troca e calculo de convergencia. restart_barrier.wait_all(); } }