MPI_Bcast

#include "mpi.h"
#include <stdio.h>
#define N 3

int main(int argc, char *argv[])
{
  int i, myrank, nprocs;
  int buffer[N];
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  printf("myrank = %d\n before bcasting my buffer's data is: \n", myrank);
  for (i = 0; i < N; i++)
  {
    buffer[i] = myrank + i;
    printf("buffer[%d] = %d\n", i, buffer[i]);
  }
  printf("\n");
  MPI_Bcast(buffer, N, MPI_INT, 0, MPI_COMM_WORLD);

  /*result output*/
  printf("after bcasting my buffer's data is:\n");
  for (i = 0; i < N; i++)
    printf("buffer[%d] = %d\n", i, buffer[i]);
  printf("\n");

  MPI_Finalize();
  return 0;
}

MPI_Scatter

#include "mpi.h"
#include "stdlib.h"
#include "stdio.h"
#define N 2
int main(int argc, char **argv)
{
        int size, rank;
        int* send, *recv;
        int i = 0, j = 0;
        MPI_Init(&argc, &argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);

        recv = (int *)malloc(N*sizeof(int));
        for (; j<N; j++)
                recv[j] = 0;
        if (rank == 0)
        {
                send = (int *)malloc(size*N*sizeof(int));
                for (; i < size*N; i++)
                        send[i] = i;
        }
        printf("-----------------------------\nrank = %d\n", rank);
        MPI_Scatter(send, N, MPI_INT, recv, N, MPI_INT, 0, MPI_COMM_WORLD);

        /* output result */
        printf("-----------------------------\nrank = %d\n", rank);
        for (j = 0; j<N; j++)
                printf("传输后recv: recv_buffer[%d] = %d\n", j, recv[j]);
        printf("-----------------------------\n");
        MPI_Finalize();
        return 0;
}

点对点

#include <stdio.h>
#include "mpi.h"

int main(int argc, char **argv)
{
  int rank, size, i, sendData, recvData;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Status status;
  recvData = 0;
  if (rank == 0)
  {
    for (i=1; i<size; i++)
    {
      sendData = i+1000;
      MPI_Send(&sendData, 1, MPI_INT, i, i, MPI_COMM_WORLD);
    }
  }

  if (rank!=0)
  {
    MPI_Recv(&recvData, 1, MPI_INT, 0, rank, MPI_COMM_WORLD, &status);
    printf("进程 %d 从 %d 接收信息，tag = %d 并且 data = %d\n", rank, status.MPI_SOURCE, status.MPI_TAG, recvData);
  }

  MPI_Finalize();
  return 0;
}

MPI_Reduce

#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char **argv)
{
        int size, rank;
        MPI_Init(&argc, &argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        int send = 0, recv = 0;
        send = rank + 1;
        MPI_Reduce(&send, &recv, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
        if (rank == 0)
                printf("The answer is : %d", recv);
        MPI_Finalize();
        return 0;
}

#include <stdio.h>
#include "mpi.h"
#include <stdlib.h>
int main(int argc, char ** argv)
{
        int myid, np, root, m, n, pair[2], answer[2];
        MPI_Init(&argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &myid);
        MPI_Comm_size(MPI_COMM_WORLD, &np);
        printf("Process %d of %d is running \n", myid, np);
        root = 0;
        m = myid;

        MPI_Reduce(&m, &n, 1, MPI_INT, MPI_MAX, root, MPI_COMM_WORLD);
        if (myid == root) printf("The maximum value is %d \n", n);
        pair[0] = (myid+1)%np;
        pair[1] = myid;
        MPI_Reduce(pair, answer, 1, MPI_2INT, MPI_MAXLOC, root, MPI_COMM_WORLD);
        if (myid == root)
                printf("The maximum value is %d on process %d\n", answer[0], answer[1]);
        MPI_Finalize();
        return 0;
}

MPI_Gather

#include "mpi.h"
#include <stdlib.h>
#include <stdio.h>
int main(int argc, char **argv)
{
        int size, rank;
        int send[5];
        int *recv, i = 0;
        MPI_Init(&argc, &argv);
        MPI_Comm_size(MPI_COMM_WORLD, &size);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
        /*fill data*/
        for (;i<5;i++)
                send[i] = i + rank * 10;
        printf("------------------------------------------------------------\n");
        printf("I'm process %d, and the data of my send buffer is:\n", rank);
        for (i = 0; i < 5; i++)
                printf("%d ", send[i]);
        printf("\n");
        if (rank == 0)
        recv = (int *)malloc(size*5*sizeof(int));
        MPI_Gather(send, 5, MPI_INT, recv, 5, MPI_INT, 0, MPI_COMM_WORLD);
        if (rank == 0)
        {
                printf("I'm root process, and the data that I receive is \n");
                for (i = 0; i<size*5; i++)
                        printf("%d ", recv[i]);
        }
        printf("\n");
        MPI_Finalize();
        return 0;
}

OpenMP

static & dynamic

#include <omp.h>
#include <stdio.h>
#define COUNT 12

int main(int argc, char* argv[])
{
    #pragma omp parallel for schedule(static / dynamic) /**/
    for(int i = 0;i < COUNT; i++)
    {
        printf("Thread: %d, Iteration: %d\n", omp_get_thread_num(), i);
    }
    return 0;
}

for reduction

#include<stdio.h>
#include<time.h>
#include<omp.h>
#define NUM_THREADS 4
int main()
{
        omp_set_num_threads(NUM_THREADS);
        int sum = 0;
        #pragma omp parallel for reduction(+:sum) 
                for (int i=1;i<=16;i++)
                {
                        sum += i;
                        printf("i = %d, id = %d\n", i, omp_get_thread_num());
                }
        printf("Sum = %d",sum);
        return 0;
}

#include<stdio.h>
#include<omp.h>
#define NUM_THREADS 4
double sum[NUM_THREADS];
int main(){
    int n=100000;
    double r_pi=3.14159265358979323846;
    double pi=0;
    double step=1.0/n;
    int  i;
    omp_set_num_threads(NUM_THREADS);
    double start=omp_get_wtime();
    #pragma omp parallel private(i)
    {
        double x;
        int id=omp_get_thread_num();
        for(i=id;i<n;i+=NUM_THREADS){
            x=(i+0.5)*step;
            sum[id]+=4.0/(1.0+x*x);
        }
        sum[id]*=step;
    }
    for(int i=0;i<NUM_THREADS;++i){
        pi+=sum[i];
    }
    double end=omp_get_wtime();
    printf("N = %d\n",n);
    printf("PI = %.20f\n",pi);
    printf("PI-REAL_PI= %.20f\n",pi-r_pi);
    printf("Time = %lf\n",end-start);
    return 0;
}

Pi_SPMD

#include <stdio.h>
#include <omp.h>
int num_steps = 10000;
double step;
#define NUM_THREADS 4
int main()
{
  int i;
  double x, pi, sum[NUM_THREADS];
  step = 1.0/(double)num_steps;
  omp_set_num_threads(NUM_THREADS);
  #pragma omp parallel private(i)
  {
    double x;
    int id;
    id = omp_get_thread_num();
    printf("%d\n",id);
    for (i = id, sum[id] = 0.0; i < num_steps; i = i+NUM_THREADS)
    {
      x = (i+0.5)*step;
      sum[id] += 4.0/(1.0+x*x);
    }
  }
  for (i=0, pi = 0.0;i < NUM_THREADS;i++) pi += sum[i] * step;
  printf("%.10lf",pi);
  return 0;
}

Pi_reduction

#include<stdio.h>
#include<omp.h>
static int num_steps = 100000;
double step;
#define NUM_THREADS 2
int main()
{
    int i;
    double pi = 0.0;
    double sum = 0.0;
    double x = 0.0;
    step = 1.0/(double)num_steps;
    omp_set_num_threads(NUM_THREADS);
    #pragma omp parallel for reduction(+:sum) private(x)
        for (i = 1; i<= num_steps; i++)
        {
            x = (i-0.5)*step;
            sum += 4.0/(1.0+x*x);
        }

    pi = sum * step;
    printf("%.10lf\n",pi);
}

shell

run.sh

#!/bin/tcsh
#SBATCH -J wangyz
#SBATCH -p wzhctest
#SBATCH -n 8
#SBATCH --cpus-per-task 1
#SBATCH --time=30
#SBATCH -o %j.out
#SBATCH -e %j.err

module load apps/cp2k/9.1/impi-2020-plumed

mpirun -np 8 ./a.out