Computer Architecture and Organization
Computer Architecture and Organization
Computer Architecture and Organization
NAME : T. RAGHAVENDRA
SLOT : G1+TG1
CODE:
#include <cstdio>
int main() {
#ifdef __INTEL_COMPILER
#elif __GNUC__
// Only compiled with GNU Compiler
#endif
QUIZ : 2
CODE :
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <omp.h>
#include <mkl.h>
#include "distribution.h"
VSLStreamStatePtr rnStream);
VSLStreamStatePtr rnStream) {
int n_escaped=0;
float x = 0.0f;
float rn;
return n_escaped;
if(argc>1) {
alpha = atof(argv[1]);
if(argc>2) {
x_threshold = atof(argv[2]);
VSLStreamStatePtr rnStream;
//compute diffusion data using function defined in worker.cc and get the timing
} else {
QUIZ : 3
#include <cstdlib>
#include <cstdio>
#include <omp.h>
#include <mkl.h>
#include <vector>
#include <algorithm>
void filter(const long n, const long m, float *data, const float threshold, std::vector<long>
&result_row_ind);
void filter_ref(const long n, const long m, float *data, const float threshold, std::vector<long>
&result_row_ind) {
float sum;
sum = 0.0f;
sum+=data[i*m+j];
}
result_row_ind.push_back(i);
std::sort(result_row_ind.begin(),result_row_ind.end());
if(argc < 2) {
threshold = 0.5;
} else {
threshold = atof(argv[1]);
VSLStreamStatePtr rnStream;
//initialize 2D data
std::vector<long> ref_result_row_ind;
//compute the refernce data using unoptimized refernce function defined above
//compute actual data using the function defined in worker.cc and get the timing
std::vector<long> result_row_ind;
if(ref_result_row_ind.size() != result_row_ind.size()) {
printf("Error: The reference and result vectors have different sizes: %ld
%ld",ref_result_row_ind.size(), result_row_ind.size());
} else {
if(passed) {
// Printing perf
} else {
QUIZ : 4
CODE :
#include <cstdio>
#include <cstdlib>
#include <mkl.h>
#include <omp.h>
#include <hbwmalloc.h>
void runFFTs( const size_t fft_size, const size_t num_fft, MKL_Complex8 *data,
DFTI_DESCRIPTOR_HANDLE *fftHandle);
// Do not modify.
//reference funtion
void runFFTs_ref( const size_t fft_size, const size_t num_fft, MKL_Complex8 *data,
DFTI_DESCRIPTOR_HANDLE *fftHandle) {
int main() {
ref_data[i].real = data[i].real;
ref_data[i].imag = data[i].imag;
DftiCommitDescriptor (*fftHandle);
*(data[i*fft_size+j].real-ref_data[i*fft_size+j].real)
+(data[i*fft_size+j].imag-ref_data[i*fft_size+j].imag)
*(data[i*fft_size+j].imag-ref_data[i*fft_size+j].imag))
< 1.0e-6;
if(within_tolerance) {
// Printing performance
} else {
// Verification failed
DftiFreeDescriptor (fftHandle);
_mm_free(ref_data);
_mm_free(data);
}
QUIZ : 5
CODE :
#include <cstdlib>
#include <cstdio>
#include <math.h>
#include <mpi.h>
#include <omp.h>
#include <assert.h>
#include "L.h"
// + 2.0f*(1.0f-L(x))*(d_(x,t))
// - d_(x, t-1)
float * simulate(const float alpha, const long n_segments, const int n_steps, float *d_buf1, float
*d_buf2, const int rank, const int world_size, const long segments_per_process);
// Do not modify
float * simulate_ref(const float alpha, const long n_segments, const int n_steps, float *d_buf1, float
*d_buf2, const int rank, const int world_size, const long segments_per_process) {
+2.0f*(1.0f-L_x)*(d_t[i])
- d_t1[i]; // The algorithm calls for d(i, t-1) here, but that is currently contained in d_t1
return d_t;
void initialize_buffers(const float alpha, const long n_segments, float *d_buf1, float *d_buf2) {
d_buf1[i] = 100.0*sinf(3.14159*(float)i*dx);
if (ret != MPI_SUCCESS) {
printf("error: could not initialize MPI\n");
MPI_Abort(MPI_COMM_WORLD, ret);
float alpha;
if (argc < 2) {
alpha = 0.2;
} else {
alpha = atof(argv[1]);
MPI_Status stat;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
assert((n_segments-2L)%world_size == 0); // This will make MPI gather much easier to work with
if(rank == 0) {
initialize_buffers(alpha, n_segments, d_buf1, d_buf2);
if(rank == 0) {
d_ref[i] = d_ref_temp[i];
if(rank == 0) {
if(rank == 0) {
if(within_tolerance) {
} else {
// Verification failed
MPI_Finalize();