% % This file was automatically produced at Feb 27 2003, 22:51:56 by % c2latex -c mpithreads_mpi.c % \documentclass[11pt,a4paper]{article} \setlength{\textwidth}{15cm} \setlength{\textheight}{22.5cm} \setlength{\hoffset}{-2cm} \setlength{\voffset}{-2cm} \begin{document} \expandafter\ifx\csname indentation\endcsname\relax% \newlength{\indentation}\fi \setlength{\indentation}{0.5em} \begin{flushleft} {$/\ast$\it{}$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$\mbox{}\\ $\ast$ FILE: mpithreads\_mpi.c\mbox{}\\ $\ast$ DESCRIPTION:\mbox{}\\ $\ast$ This simple program illustrates the use of MPI in a program obtained \mbox{}\\ $\ast$ by modifying a serial code that performs a dot product. It is the third \mbox{}\\ $\ast$ of four codes used to show the progression from a serial program to a \mbox{}\\ $\ast$ hybrid MPI/Pthreads program. The other relevant codes are:\mbox{}\\ $\ast$ $-$ mpithreads\_serial.c $-$ The serial version\mbox{}\\ $\ast$ $-$ mpithreads\_threads.c $-$ A shared memory programming model using\mbox{}\\ $\ast$ Pthreads\mbox{}\\ $\ast$ $-$ mpithreads\_both.c $-$ A hybrid model that utilizes both MPI and\mbox{}\\ $\ast$ Pthreads to execute on systems that are comprised of clusters\mbox{}\\ $\ast$ of SMP's.\mbox{}\\ $\ast$\mbox{}\\ $\ast$ Use of the SPMD model was chosen and for convenience, with replication \mbox{}\\ $\ast$ of the main data on all nodes. A more memory efficient implementation \mbox{}\\ $\ast$ would be advisable for larger data sets.\mbox{}\\ $\ast$\mbox{}\\ $\ast$ SOURCE: Vijay Sonnad, IBM\mbox{}\\ $\ast$ LAST REVISED: 10/8/99 Blaise Barney\mbox{}\\ $\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast/$}\mbox{}\\ \mbox{}\\ {\tt \#include} $<${\tt{}mpi.h}$>$\mbox{}\\ {\tt \#include} $<${\tt{}stdio.h}$>$\mbox{}\\ {\tt \#include} $<${\tt{}malloc.h}$>$\mbox{}\\ \mbox{}\\ {$/\ast$\it{} \mbox{}\\ The following structure contains the necessary information to allow the \mbox{}\\ function "dotprod" to access its input data and place its output into \mbox{}\\ the structure. Note that this structure is unchanged from the sequential \mbox{}\\ version.\mbox{}\\ $\ast/$}\mbox{}\\ \mbox{}\\ {\bf typedef} {\bf struct} \mbox{}\\ \hspace*{1\indentation}\{\mbox{}\\ \hspace*{3\indentation}{\bf double} $\ast$a;\mbox{}\\ \hspace*{3\indentation}{\bf double} $\ast$b;\mbox{}\\ \hspace*{3\indentation}{\bf double} sum; \mbox{}\\ \hspace*{3\indentation}{\bf int} veclen; \mbox{}\\ \hspace*{1\indentation}\} DOTDATA;\mbox{}\\ \mbox{}\\ {$/\ast$\it{} Define globally accessible variables $\ast/$}\mbox{}\\ \mbox{}\\ {\tt \#define} VECLEN 100\mbox{}\\ DOTDATA dotstr; \mbox{}\\ \mbox{}\\ {$/\ast$\it{}\mbox{}\\ The function dotprod is very similar to the sequential version except that \mbox{}\\ we now have each node working on a different part of the data. As before, \mbox{}\\ all access to the input is through a structure of type DOTDATA and all \mbox{}\\ output from this function is written into this same structure. \mbox{}\\ $\ast/$}\mbox{}\\ \mbox{}\\ {\bf void} $\ast$dotprod()\mbox{}\\ \{\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{} Define and use local variables for convenience $\ast/$}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{\bf int} i, start, end, myid, len;\mbox{}\\ \hspace*{3\indentation}{\bf double} mysum, $\ast$x, $\ast$y;\mbox{}\\ \hspace*{3\indentation}\mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{} Obtain rank of this node $\ast/$}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}MPI\_Comm\_rank (MPI\_COMM\_WORLD, \&myid);\mbox{}\\ \hspace*{5\indentation}\mbox{}\\ \hspace*{3\indentation}len = dotstr.veclen;\mbox{}\\ \hspace*{3\indentation}start = myid$\ast$len;\mbox{}\\ \hspace*{3\indentation}end = start + len;\mbox{}\\ \hspace*{3\indentation}x = dotstr.a;\mbox{}\\ \hspace*{3\indentation}y = dotstr.b;\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{}\mbox{}\\ \hspace*{3\indentation}Perform the dot product and assign result to the appropriate variable in \mbox{}\\ \hspace*{3\indentation}the structure. \mbox{}\\ \hspace*{3\indentation}$\ast/$}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}mysum = 0;\mbox{}\\ \hspace*{3\indentation}{\bf for} (i=start; i$<$end ; i++) \mbox{}\\ \hspace*{4\indentation}\{\mbox{}\\ \hspace*{6\indentation}mysum += (x[i] $\ast$ y[i]);\mbox{}\\ \hspace*{4\indentation}\}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}dotstr.sum += mysum;\mbox{}\\ \mbox{}\\ \}\mbox{}\\ \mbox{}\\ {$/\ast$\it{} \mbox{}\\ As before,the main program does very little computation. It does however make \mbox{}\\ all the calls to the MPI routines. This is not a master$-$worker arrangement \mbox{}\\ and all nodes participate equally in the work. \mbox{}\\ $\ast/$}\mbox{}\\ \mbox{}\\ {\bf int} main ({\bf int} argc, {\bf char}$\ast$ argv[])\mbox{}\\ \{\mbox{}\\ \hspace*{3\indentation}{\bf int} i,len=VECLEN;\mbox{}\\ \hspace*{3\indentation}{\bf int} myid, numprocs;\mbox{}\\ \hspace*{3\indentation}{\bf double} $\ast$a, $\ast$b;\mbox{}\\ \hspace*{3\indentation}{\bf double} mysum, allsum;\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{} MPI Initialization $\ast/$}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}MPI\_Init (\&argc, \&argv);\mbox{}\\ \hspace*{3\indentation}MPI\_Comm\_size (MPI\_COMM\_WORLD, \&numprocs);\mbox{}\\ \hspace*{3\indentation}MPI\_Comm\_rank (MPI\_COMM\_WORLD, \&myid);\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{} Assign storage and initialize values $\ast/$}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}a = ({\bf double}$\ast$) malloc (numprocs$\ast$len$\ast${\bf sizeof}({\bf double}));\mbox{}\\ \hspace*{3\indentation}b = ({\bf double}$\ast$) malloc (numprocs$\ast$len$\ast${\bf sizeof}({\bf double}));\mbox{}\\ \hspace*{2\indentation}\mbox{}\\ \hspace*{3\indentation}{\bf for} (i=0; i$<$len$\ast$numprocs; i++)\mbox{}\\ \hspace*{4\indentation}\{\mbox{}\\ \hspace*{5\indentation}a[i]=1;\mbox{}\\ \hspace*{5\indentation}b[i]=a[i];\mbox{}\\ \hspace*{4\indentation}\}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}dotstr.veclen = len; \mbox{}\\ \hspace*{3\indentation}dotstr.a = a; \mbox{}\\ \hspace*{3\indentation}dotstr.b = b; \mbox{}\\ \hspace*{3\indentation}dotstr.sum=0;\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{} Call the dot product routine $\ast/$}\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}dotprod();\mbox{}\\ \hspace*{3\indentation}mysum = dotstr.sum;\mbox{}\\ \hspace*{3\indentation}printf({\tt"Task \%d partial sum is \%f$\backslash$n"},myid, mysum);\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{$/\ast$\it{} After the dot product, perform a summation of results on each node $\ast/$}\mbox{}\\ \hspace*{3\indentation}MPI\_Reduce (\&mysum, \&allsum, 1, MPI\_DOUBLE, MPI\_SUM, 0, MPI\_COMM\_WORLD);\mbox{}\\ \mbox{}\\ \hspace*{3\indentation}{\bf if} (myid == 0) \mbox{}\\ \hspace*{3\indentation}printf ({\tt"Done. MPI version: sum $=$ \%f $\backslash$n"}, allsum);\mbox{}\\ \hspace*{3\indentation}free (a);\mbox{}\\ \hspace*{3\indentation}free (b);\mbox{}\\ \hspace*{3\indentation}MPI\_Finalize();\mbox{}\\ \} \mbox{}\\ \end{flushleft} \end{document}