%
% This file was automatically produced at Feb 27 2003, 22:51:56 by
% c2latex -c mpithreads_threads.c
%
\documentclass[11pt,a4paper]{article}
\setlength{\textwidth}{15cm}
\setlength{\textheight}{22.5cm}
\setlength{\hoffset}{-2cm}
\setlength{\voffset}{-2cm}
\begin{document}
\expandafter\ifx\csname indentation\endcsname\relax%
\newlength{\indentation}\fi
\setlength{\indentation}{0.5em}
\begin{flushleft}
{$/\ast$\it{}$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$\mbox{}\\
$\ast$ FILE: mpithreads\_threads.c\mbox{}\\
$\ast$ DESCRIPTION:\mbox{}\\
$\ast$   This simple program illustrates the use of Pthreads in a program obtained\mbox{}\\
$\ast$   by modifying a serial code that performs a dot product. It is the second\mbox{}\\
$\ast$   of four codes used to show the progression from a serial program to a\mbox{}\\
$\ast$   hybrid MPI/Pthreads program.  The other relevant codes are:\mbox{}\\
$\ast$      $-$ mpithreads\_serial.c   $-$ The serial version\mbox{}\\
$\ast$      $-$ mpithreads\_mpi.c $-$ A distributed memory programming model with MPI\mbox{}\\
$\ast$      $-$ mpithreads\_both.c $-$ A hybrid model that utilizes both MPI and\mbox{}\\
$\ast$          Pthreads to execute on systems that are comprised of clusters\mbox{}\\
$\ast$          of SMP's.\mbox{}\\
$\ast$\mbox{}\\
$\ast$   The main data is made available to all threads through a globally \mbox{}\\
$\ast$   accessible structure. Each thread works on a different part of the \mbox{}\\
$\ast$   data.  The main thread waits for all the threads to complete their \mbox{}\\
$\ast$   computations, and then it prints the resulting sum.\mbox{}\\
$\ast$\mbox{}\\
$\ast$ SOURCE: Vijay Sonnad, IBM\mbox{}\\
$\ast$ LAST REVISED:  10/8/99 Blaise Barney\mbox{}\\
$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast$$\ast/$}\mbox{}\\
\mbox{}\\
{\tt \#include} $<${\tt{}pthread.h}$>$\mbox{}\\
{\tt \#include} $<${\tt{}stdio.h}$>$\mbox{}\\
{\tt \#include} $<${\tt{}malloc.h}$>$\mbox{}\\
\mbox{}\\
{$/\ast$\it{}   \mbox{}\\
The following structure contains the necessary information to allow the \mbox{}\\
function "dotprod" to access its input data and place its output into \mbox{}\\
the structure.  This structure is unchanged from the sequential version.\mbox{}\\
$\ast/$}\mbox{}\\
\mbox{}\\
{\bf typedef} {\bf struct} \mbox{}\\
\hspace*{1\indentation}\{\mbox{}\\
\hspace*{3\indentation}{\bf double}      $\ast$a;\mbox{}\\
\hspace*{3\indentation}{\bf double}      $\ast$b;\mbox{}\\
\hspace*{3\indentation}{\bf double}     sum; \mbox{}\\
\hspace*{3\indentation}{\bf int}     veclen; \mbox{}\\
\hspace*{1\indentation}\} DOTDATA;\mbox{}\\
\mbox{}\\
{$/\ast$\it{} Define globally accessible variables and a mutex $\ast/$}\mbox{}\\
\mbox{}\\
{\tt \#define} MAXTHRDS 4\mbox{}\\
{\tt \#define} VECLEN 100\mbox{}\\
DOTDATA dotstr; \mbox{}\\
pthread\_t callThd[MAXTHRDS];\mbox{}\\
pthread\_mutex\_t mutexsum;\mbox{}\\
\mbox{}\\
{$/\ast$\it{}\mbox{}\\
The function dotprod is activated when the thread is created.  As before, \mbox{}\\
all input to this routine is obtained from a structure of type DOTDATA and \mbox{}\\
all output from this function is written into this structure. The benefit \mbox{}\\
of this approach is apparent for the multi$-$threaded program: when a thread \mbox{}\\
is created we pass a single argument to the activated function $-$ typically \mbox{}\\
this argument is a thread number. All the other information required by the \mbox{}\\
function is accessed from the globally accessible structure. \mbox{}\\
$\ast/$}\mbox{}\\
\mbox{}\\
{\bf void} $\ast$dotprod({\bf void} $\ast$arg)\mbox{}\\
\{\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}{$/\ast$\it{} Define and use local variables for convenience $\ast/$}\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}{\bf int} i, start, end, offset, len ;\mbox{}\\
\hspace*{3\indentation}{\bf double} mysum, $\ast$x, $\ast$y;\mbox{}\\
\hspace*{3\indentation}offset = ({\bf int})arg;\mbox{}\\
\hspace*{5\indentation}\mbox{}\\
\hspace*{3\indentation}len = dotstr.veclen;\mbox{}\\
\hspace*{3\indentation}start = offset$\ast$len;\mbox{}\\
\hspace*{3\indentation}end   = start + len;\mbox{}\\
\hspace*{3\indentation}x = dotstr.a;\mbox{}\\
\hspace*{3\indentation}y = dotstr.b;\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}{$/\ast$\it{}\mbox{}\\
\hspace*{3\indentation}Perform the dot product and assign result to the appropriate variable in \mbox{}\\
\hspace*{3\indentation}the structure. \mbox{}\\
\hspace*{3\indentation}$\ast/$}\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}mysum = 0;\mbox{}\\
\hspace*{3\indentation}{\bf for} (i=start; i$<$end ; i++) \mbox{}\\
\hspace*{4\indentation}\{\mbox{}\\
\hspace*{6\indentation}mysum += (x[i] $\ast$ y[i]);\mbox{}\\
\hspace*{4\indentation}\}\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}{$/\ast$\it{}\mbox{}\\
\hspace*{3\indentation}Lock a mutex prior to updating the value in the shared structure, and \mbox{}\\
\hspace*{3\indentation}unlock it upon updating.\mbox{}\\
\hspace*{3\indentation}$\ast/$}\mbox{}\\
\hspace*{3\indentation}pthread\_mutex\_lock (\&mutexsum);\mbox{}\\
\hspace*{3\indentation}printf({\tt"Thread \%d adding partial sum of \%f to global sum of \%f$\backslash$n"},\mbox{}\\
\hspace*{10\indentation}arg, mysum, dotstr.sum);\mbox{}\\
\hspace*{3\indentation}dotstr.sum += mysum;\mbox{}\\
\hspace*{3\indentation}pthread\_mutex\_unlock (\&mutexsum);\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}pthread\_exit(({\bf void}$\ast$) 0);\mbox{}\\
\}\mbox{}\\
\mbox{}\\
{$/\ast$\it{} \mbox{}\\
The main program creates threads which do all the work and then print out \mbox{}\\
result upon completion. Before creating the threads, the input data is \mbox{}\\
created. Since all threads update a shared structure, we need a mutex for \mbox{}\\
mutual exclusion. The main thread needs to wait for all threads to complete, \mbox{}\\
it waits for each one of the threads. We specify a thread attribute value \mbox{}\\
that allow the main thread to join with the threads it creates. Note also \mbox{}\\
that we free up handles  when they are no longer needed.\mbox{}\\
$\ast/$}\mbox{}\\
\mbox{}\\
{\bf int} main ({\bf int} argc, {\bf char} $\ast$argv[])\mbox{}\\
\{\mbox{}\\
\hspace*{3\indentation}{\bf int} i;\mbox{}\\
\hspace*{3\indentation}{\bf double} $\ast$a, $\ast$b;\mbox{}\\
\hspace*{3\indentation}{\bf int} status;\mbox{}\\
\hspace*{3\indentation}pthread\_attr\_t attr;\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}{$/\ast$\it{} Assign storage and initialize values $\ast/$}\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}a = ({\bf double}$\ast$) malloc (MAXTHRDS$\ast$VECLEN$\ast${\bf sizeof}({\bf double}));\mbox{}\\
\hspace*{3\indentation}b = ({\bf double}$\ast$) malloc (MAXTHRDS$\ast$VECLEN$\ast${\bf sizeof}({\bf double}));\mbox{}\\
\hspace*{2\indentation}\mbox{}\\
\hspace*{3\indentation}{\bf for} (i=0; i$<$VECLEN$\ast$MAXTHRDS; i++)\mbox{}\\
\hspace*{4\indentation}\{\mbox{}\\
\hspace*{5\indentation}a[i]=1;\mbox{}\\
\hspace*{5\indentation}b[i]=a[i];\mbox{}\\
\hspace*{4\indentation}\}\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}dotstr.veclen = VECLEN; \mbox{}\\
\hspace*{3\indentation}dotstr.a = a; \mbox{}\\
\hspace*{3\indentation}dotstr.b = b; \mbox{}\\
\hspace*{3\indentation}dotstr.sum=0;\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}pthread\_mutex\_init(\&mutexsum, NULL);\mbox{}\\
\hspace*{9\indentation}\mbox{}\\
\hspace*{3\indentation}{$/\ast$\it{} Create threads to perform the dotproduct  $\ast/$}\mbox{}\\
\hspace*{3\indentation}pthread\_attr\_init(\&attr);\mbox{}\\
\hspace*{3\indentation}pthread\_attr\_setdetachstate(\&attr, PTHREAD\_CREATE\_JOINABLE);\mbox{}\\
\mbox{}\\
\hspace*{8\indentation}{\bf for}(i=0;i$<$MAXTHRDS;i++)\mbox{}\\
\hspace*{8\indentation}\{\mbox{}\\
\hspace*{16\indentation}{$/\ast$\it{} \mbox{}\\
\hspace*{16\indentation}Each thread works on a different set of data.\mbox{}\\
\hspace*{16\indentation}The offset is specified by 'i'. The size of\mbox{}\\
\hspace*{16\indentation}the data for each thread is indicated by VECLEN.\mbox{}\\
\hspace*{16\indentation}$\ast/$}\mbox{}\\
\hspace*{16\indentation}pthread\_create( \&callThd[i], \&attr, dotprod, ({\bf void} $\ast$)i); \mbox{}\\
\hspace*{8\indentation}\}\mbox{}\\
\mbox{}\\
\hspace*{8\indentation}pthread\_attr\_destroy(\&attr);\mbox{}\\
\mbox{}\\
\hspace*{8\indentation}{$/\ast$\it{} Wait on the other threads $\ast/$}\mbox{}\\
\hspace*{8\indentation}{\bf for}(i=0;i$<$MAXTHRDS;i++)\mbox{}\\
\hspace*{8\indentation}\{\mbox{}\\
\hspace*{16\indentation}pthread\_join( callThd[i], ({\bf void} $\ast$$\ast$)\&status);\mbox{}\\
\mbox{}\\
\hspace*{8\indentation}\}\mbox{}\\
\mbox{}\\
\hspace*{3\indentation}{$/\ast$\it{} After joining, print out the results and cleanup $\ast/$}\mbox{}\\
\hspace*{3\indentation}printf ({\tt"Done. Threaded version: sum $=$  \%f $\backslash$n"}, dotstr.sum);\mbox{}\\
\hspace*{3\indentation}free (a);\mbox{}\\
\hspace*{3\indentation}free (b);\mbox{}\\
\hspace*{3\indentation}pthread\_mutex\_destroy(\&mutexsum);\mbox{}\\
\hspace*{3\indentation}pthread\_exit(NULL);\mbox{}\\
\}   \mbox{}\\
\mbox{}\\
\end{flushleft}
\end{document}