\documentclass[total,pdf]{prosper} \usepackage[toc,highlight,vsimple]{HA-prosper}% DO NOT EDIT---WILL BE OVERWRITTEN \usepackage{alltt,key,xr,cols,rcs,acro,nick,multicol,%optional,% graphicx,varioref,explanation,booktabs,xspace,psfrag,version} %\usepackage[nolineno,noindent]{lgrind} \usepackage[canUnderstandC]{optional} \includeversion{canUnderstandC} \excludeversion{Cchallenged} %\definecolor{green}{rgb}{0,1,0} \RCS $Revision: 1.5 $ % Copyright (c) 2004 by Nick Urbanik . % This material may be distributed only subject to the terms and % conditions set forth in the Open Publication License, v1.0 or later % (the latest version is presently available at % http://www.opencontent.org/openpub/). \renewcommand*{\bs}{\texttt{\char '134}} % Backslash `\' \newcommand*{\labTitle}{Processes and Threads} \newcommand*{\subject}{Operating Systems and Systems Integration} \newcommand*{\emphcolour}[1]{\emph{\red#1}} \providecommand*{\RPM}{\acro{RPM}\xspace} \providecommand*{\CD}{\acro{CD}\xspace} \providecommand*{\IPC}{\acro{IPC}\xspace} \providecommand*{\UID}{\acro{UID}\xspace} \providecommand*{\GID}{\acro{GID}\xspace} \providecommand*{\SMP}{\acro{SMP}\xspace} \providecommand*{\API}{\acro{API}\xspace} \providecommand*{\OK}{\acro{OK}\xspace} % Get rid of that horrible hash mark in slide numbers in ppr-prv.cls: \def\no{} \newcommand*{\link}[2]{\href{#2}{#1} \path{#2}} \newif\ifpprprvloaded \makeatletter% \@ifclassloaded{ppr-prv}{\pprprvloadedtrue}{\pprprvloadedfalse}% \makeatother% \newcommand{\positionPicture}{% \ifpprprvloaded \else \vspace*{-15mm}\par \hspace*{-13mm}% \fi } \newcommand{\positionPictureOpt}[2]{% \ifpprprvloaded \else \vspace*{#1}\par \hspace*{#2}% \fi } \title{Processes and Threads} \subtitle{What are processes?\\ How does the operating system manage them?} \author{Nick Urbanik\\ \email{nicku@vtc.edu.hk}\\ \institution{A computing department}\\ \footnotesize{}Copyright Conditions: Open Publication License\\ (see \url{http://www.opencontent.org/openpub/})}% %\institution{A computing department} \slideCaption{OSSI --- Processes and Threads --- ver. \RCSRevision} %\Logo{\includegraphics[width=15mm]{ict-logo-smaller}} \DefaultTransition{Wipe} \TitleSlideNav{FullScreen} \NormalSlideNav{ShowBookmarks} \LeftFoot{OSSI --- ver. \RCSRevision} \RightFoot{Processes} \begin{document} \maketitle \tsection{Introduction} \begin{slide}{What is a process?} \begin{itemize} \item A \emphcolour{process} is a program in execution \item Each process has a \emphcolour{process ID} \item In Linux, \begin{alltt} $ \textbf{ps ax} \end{alltt}%$ \item prints one line for each process. \item A program can be executed a number of times simultaneously. \begin{itemize} \item Each is a separate process. \end{itemize} \end{itemize} \end{slide} \begin{slide}{What is a process? --- 2} \label{sld:what-is-a-process-2} \begin{itemize} \item A process includes current values of: \begin{itemize} \item Program counter \item Registers \item Variables \end{itemize} \item A process also has: \begin{itemize} \item The program code \item It's own address space, independent of other processes \item A user that owns it \item A group owner \item An \emphcolour{environment} and a \emphcolour{command line} \end{itemize} \item This information is stored in a \emphcolour{process control block}, or \emphcolour{task descriptor} or \emphcolour{process descriptor} \begin{itemize} \item a data structure in the \OS, in the \emphcolour{process table} \item See slides starting at \S\ref{sld:process-control-blocks}. \end{itemize} \end{itemize} \end{slide} \begin{slide}{What is a thread?} \begin{itemize} \item A \emphcolour{thread} is a lightweight process \begin{itemize} \item Takes less \CPU power to start, stop \end{itemize} \item {\mbox{}\blue{}Part of a single process} \item {\mbox{}\blue{}Shares address space with other threads in the same process} \item Threads can share data more easily than processes \item Sharing data requires \emphcolour{synchronisation}, i.e., locking --- see slide~\ref{sld:intro-to-synchronisation}. \item This shared memory space can lead to complications in programming: \begin{quote}{\small ``Threads often prevent abstraction. In order to prevent deadlock. you often need to know how and if the library you are using uses threads in order to avoid deadlock problems. Similarly, the use of threads in a library could be affected by the use of threads at the application layer.'' \hfill-- \emph{David Korn}}\\ \hspace*{\fill}\tiny{}See page 180, ESR in references, \S\ref{sld:references}. \end{quote} \end{itemize} \end{slide} \begin{slide}{Program counter} \begin{itemize} \item The code of a process occupies memory \item The Program counter (PC) is a \CPU register \item PC holds a memory address\ldots \item \ldots of the next instruction to be fetched and executed \end{itemize} \end{slide} \begin{slide}{Environment of a process} \begin{itemize} \item The \emphcolour{environment} is a set of names and values \item Examples: \begin{alltt} PATH=/usr/bin:/bin:/usr/X11R6/bin HOME=/home/nicku SHELL=/bin/bash \end{alltt} \item In Linux shell, can see environment by typing: \begin{alltt} $ \textbf{set} \end{alltt}%$ \end{itemize} \end{slide} \begin{slide}{Permissions of a Process} \begin{itemize} \item A process executes with the permissions of its owner \begin{itemize} \item The owner is the user that starts the process \end{itemize} \item A Linux process can execute with permissions of another user or group \item If it executes as the owner of the program instead of the owner of the process, it is called \emphcolour{set user \ID} \item Similarly for \emphcolour{set group \ID} programs \end{itemize} \end{slide} \tsection{Multitasking} \begin{slide}{Multitasking} \begin{itemize} \item Our lab PCs have one main \CPU \begin{itemize} \item But multiprocessor machines are becoming increasingly common \item Linux 2.6.x kernel scales to 16 \CPU{}s \end{itemize} \item How execute many processes ``at the same time''? \end{itemize} \end{slide} \begin{slide}{Multitasking --- 2} \begin{itemize} \item CPU rapidly switches between processes that are ``ready to run'' \item Really: only one process runs at a time \item Change of process called a \emphcolour{context switch} \begin{itemize} \item See slide~\S\ref{sld:context-switch} \end{itemize} \item With Linux: see how many context switches/second using \texttt{vmstat} under ``\texttt{system}'' in column ``\texttt{cs}'' \end{itemize} \end{slide} \begin{slide}{Multitasking --- 3} \begin{itemize} \item This diagram shows how the scheduler gives a ``turn'' on the \CPU to each of four processes that are ready to run \end{itemize} \par\bigskip\par% \centering% \includegraphics[width=0.9\slideWidth]{multitasking} \end{slide} \tsection{Start of Process} \begin{slide}{Birth of a Process} \begin{itemize} \item In Linux, a process is born from a \texttt{\red{}fork()} system call \begin{itemize} \item A \emphcolour{system call} is a function call to an operating system service provided by the kernel \end{itemize} \item Each process has a \emphcolour{parent} \item The parent process calls \texttt{\red{}fork()} \item The child inherits (\emphcolour{but cannot change}) the parent environment, open files \item Child is \emphcolour{identical} to parent, except for return value of \texttt{fork()}. \begin{itemize} \item Parent gets child's process ID (\PID) \item Child gets 0 \end{itemize} \end{itemize} \end{slide} \begin{slide}{Process tree} \begin{itemize} \item Processes may have parents and children \item Gives a family tree \item In Linux, see this with commands: \begin{alltt} $ \textbf{pstree} \end{alltt}%$ or \begin{alltt} $ \textbf{ps axf} \end{alltt}%$ \end{itemize} \end{slide} \tsection{Scheduler} \begin{slide}{Scheduler} \begin{itemize} \item OS decides when to run each process that is ready to run (``runable'') \item The part of OS that decides this is the \emphcolour{scheduler} \item Scheduler aims to: \begin{itemize} \item Maximise CPU usage \item Maximise process completion \item Minimise process execution time \item Minimise waiting time for ready processes \item Minimise response time \end{itemize} \end{itemize} \end{slide} \begin{slide}{When to Switch Processes?} \begin{itemize} \item The scheduler may change a process between {\blue{}executing} (or {\blue{}running}) and {\blue{}ready to run} when any of these events happen: \begin{itemize} \item clock interrupt \item I/O interrupt \item Memory fault \item trap caused by error or exception \item system call \end{itemize} \item See slide~\S\ref{sld:process-states} showing the {\blue{}running} and {\blue{}ready to run} process states. \end{itemize} \end{slide} \begin{slide}{Scheduling statistics: \texttt{vmstat}} \begin{itemize} \item The ``\texttt{system}'' columns give statistics about \emphcolour{scheduling}: \begin{itemize} \item ``\texttt{cs}'' --- number of context switches per second \item ``\texttt{in}'' --- number of interrupts per second \end{itemize} \item See slide~\S\ref{sld:context-switch}, \texttt{man vmstat} \end{itemize} \end{slide} \begin{slide}{Interrupts} \begin{itemize} \item Will discuss interrupts in more detail when we cover I/O \item An \emphcolour{interrupt} is an event (usually) caused by hardware that causes: \begin{itemize} \item Saving some CPU registers \item Execution of \emphcolour{interrupt handler} \item Restoration of CPU registers \end{itemize} \item An opportunity for scheduling \end{itemize} \end{slide} \tsection{Process States} \begin{slide}{Process States} \label{sld:process-states} \centering% \includegraphics[width=\linewidth]{process-states} \end{slide} \begin{slide}{What is Most Common State?} \begin{itemize} \item Now, my computer has 160 processes. \item How many are running, how many are ready to run, how many are blocked? \item What do you expect is most common state? \end{itemize} \end{slide} \begin{slide}{Most Processes are Blocked} \label{sld:most-processes-are-blocked} \begin{alltt}{\scriptsize 9:41am up 44 days, 20:12, 1 user, load average: 2.02, 2.06, 2.13 160 processes: 145 sleeping, 2 running, 13 zombie, 0 stopped }\end{alltt} \begin{itemize} \item Here you see that most are sleeping, waiting for input! \item \mbox{}{\green{}Most processes are ``\emphcolour{I/O bound}\green{}''; they spend most time waiting for input or waiting for output to complete} \item With one \CPU, {\blue{}only one process can actually be running at one time} \item However, surprisingly few processes are ready to run \item The \emphcolour{load average} is the average number of processes that are in the ready to run state. \item In output from the top program above, see over last 60 seconds, there are 2.02 processes on average in \acro{RTR} state \end{itemize} \end{slide} \begin{slide}{Linux Process States} \par\vspace*{-4mm}\par \hspace*{-0.1\slideWidth}% \includegraphics[height=0.85\slideWidth]{linux-process-states} \end{slide} \begin{slide}{Linux Process States --- 2} \begin{itemize} \item \emphcolour{Running} --- actually contains two states: \begin{itemize} \item \emphcolour{executing}, or \item \emphcolour{ready to execute} \end{itemize} \item \emphcolour{Interruptable} --- a blocked state \begin{itemize} \item waiting for event, such as: \begin{itemize} \item end of an I/O operation, \item availability of a resource, or \item a signal from another process \end{itemize} \end{itemize} \item \emphcolour{Uninterruptable} --- another blocked state \begin{itemize} \item waiting directly on hardware conditions \item will not accept any signals (even \texttt{SIGKILL}) \end{itemize} \end{itemize} \end{slide} \begin{slide}{Linux Process States --- 3} \begin{itemize} \item \emphcolour{Stopped} --- process is halted \begin{itemize} \item can be restarted by another process \item e.g., a debugger can put a process into stopped state \end{itemize} \item \emphcolour{Zombie} --- a process has terminated \begin{itemize} \item but parent did not \texttt{wait()} for it %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% (see slide~\pageref{sld:wait}) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{itemize} \end{itemize} \end{slide} \begin{slide}{Process States: \texttt{vmstat}} \begin{itemize} \item The ``\texttt{procs}'' columns give info about process states: \item ``\texttt{r}'' --- number of processes that are in the \emphcolour{ready to run} state \item ``\texttt{b}'' --- number of processes that are in the \emphcolour{uninterruptable} blocked state %% \item ``\texttt{w}'' --- number of processes in ready state %%but swapped out (better if zero) \end{itemize} \end{slide} \begin{slide}{Tools for monitoring processes} \begin{itemize} \item Linux provides: \item \texttt{\red\textbf{vmstat}} \begin{itemize} \item Good to monitor over time: \begin{alltt} $ \textbf{vmstat 5} \end{alltt}%$ \end{itemize} \item \texttt{\red\textbf{procinfo}} \begin{itemize} \item Easier to understand than \texttt{vmstat} \item Monitor over time with \begin{alltt} $ \textbf{procinfo -f} \end{alltt}%$ \end{itemize} \item View processes with \texttt{\red\textbf{top}} --- see slides~\ref{sld:top} to \S\ref{sld:top-and-memory} \item The system monitor \texttt{\red\textbf{sar}} shows data collected over time: See \texttt{man~sar}; investigate \texttt{sar~-c} and \texttt{sar~-q} \item See the utilities in the \texttt{procps} software package. You can list them with \begin{alltt} $ \textbf{rpm -ql procps} \end{alltt}%$ \par\vspace*{-1ex}\par \begin{multicols}{5} \begin{alltt} \textbf{\red{}ps free pgrep pkill pmap skill slabtop snice tload top uptime vmstat w watch} \end{alltt}%$ \end{multicols} \end{itemize} \end{slide} \begin{slide}{Monitoring processes in Win 2000} \begin{itemize} \item Windows 2000 provides a tool: \item Start $\to$ Administrative Tools $\to$ Performance. \item Can use this to monitor various statistics \end{itemize} \end{slide} \tsectionandpart[toc=top,bm=top]{Process Monitoring with \texttt{top}} \begin{wideslide}{Process Monitoring --- \texttt{top}} \label{sld:top} \begin{alltt}\scriptsize 08:12:13 up 1 day, 13:34, 8 users, load average: 0.16, 0.24, 0.49 111 processes: 109 sleeping, 1 running, 1 zombie, 0 stopped CPU states: cpu user nice system irq softirq iowait idle total 0.0% 0.0% 3.8% 0.0% 0.0% 0.0% 96.1% Mem: 255608k av, 245064k used, 10544k free, 0k shrd, 17044k buff 152460k active, 63236k inactive Swap: 1024120k av, 144800k used, 879320k free 122560k cached \textbf{ PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME CPU COMMAND } 1253 root 15 0 73996 13M 11108 S 2.9 5.5 19:09 0 X 1769 nicku 16 0 2352 1588 1488 S 1.9 0.6 2:10 0 magicdev 23548 nicku 16 0 1256 1256 916 R 1.9 0.4 0:00 0 top 1 root 16 0 496 468 440 S 0.0 0.1 0:05 0 init 2 root 15 0 0 0 0 SW 0.0 0.0 0:00 0 keventd 3 root 15 0 0 0 0 SW 0.0 0.0 0:00 0 kapmd 4 root 34 19 0 0 0 SWN 0.0 0.0 0:00 0 ksoftirqd/0 6 root 15 0 0 0 0 SW 0.0 0.0 0:00 0 bdflush 5 root 15 0 0 0 0 SW 0.0 0.0 0:11 0 kswapd \end{alltt} \end{wideslide} \begin{slide}[toc=load average,bm=load average]{\texttt{top}: load average} \label{sld:top-load-average} \positionPictureOpt{0pt}{-15mm}% \begin{alltt}\scriptsize 08:12:13 up 1 day, 13:34, 8 users, \textbf{load average: 0.16, 0.24, 0.49} \end{alltt} \begin{itemize} \item \emphcolour{load average} is measured over the last minute, five minutes, fifteen minutes \item Over that time is the average number of processes that are \emphcolour{ready to run}, but which are \emphcolour{not executing} \item A measure of how ``busy'' a computer is. \end{itemize} \end{slide} \begin{slide}{\texttt{top}: process states} \label{sld:top-process-states} \begin{alltt}\scriptsize 111 processes: 109 sleeping, 1 running, 1 zombie, 0 stopped \end{alltt} \begin{description} \item[sleeping] Most processes (109/111) are sleeping, waiting for \IO \item[running] This is the number of processes that are both ready to run and are executing \item[zombie] There is one process here that has terminated, but its parent did not \texttt{wait()} for it. \begin{itemize} \item The \texttt{wait()} system calls are made by a parent process, to get the \texttt{exit()} status of its child(ren). \item This call removes the \emphcolour{process control block} from the \emphcolour{process table}, and the child process does not exist any more. (\S\ref{sld:process-control-blocks}) \end{itemize} \item[stopped] When you press \key{Control-z} in a shell, you will increase this number by 1 \end{description} \end{slide} \begin{slide}[toc=top and memory,bm=top and memory]% {\texttt{top}: Processes and Memory} \label{sld:top-and-memory} \positionPictureOpt{-5mm}{-15mm}% \begin{alltt}{\scriptsize \textbf{ PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME CPU COMMAND }1253 root 15 0 73996 13M 11108 S 2.9 5.5 19:09 0 X }\end{alltt} \begin{description} \item[SIZE] This column is the total size of the process, including the part which is swapped (paged out) out to the swap partition or swap file Here we see that the process X uses a total of 73{,}996\,Kb, i.e., $73{,}996 \times 1024$ bytes $\mbox{}\approx \mbox{}$72MB, where here $1\mbox{MB} = 2^{20}$ bytes. \item[RSS] The \emphcolour{resident set size} is the total amount of \RAM that a process uses, including memory shared with other processes. Here X uses a total of 13MB \RAM, including \RAM shared with other processes. \item[SHARE] The amount of \emphcolour{shared} memory is the amount of \RAM that this process shares with other processes. Here X shares 11,108\,KB with other processes. \end{description} We can see that the total amount of \RAM used exclusively by one process is {\red$\mbox{rss} - \mbox{share}$}. Here we see that X uses about $13 \times 2^{20} - 11{,}108 \times 2^{10} \approx 2\,\mbox{MB}$ \end{slide} \begin{slide}{Virtual Memory: suspended processes} \begin{itemize} \item With memory fully occupied by processes, could have all in blocked state! \item \CPU could be completely idle, but other processes waiting for \RAM \item Solution: \emphcolour{virtual memory} \begin{itemize} \item will discuss details of \acro{VM} in memory management lecture \end{itemize} \item Part or all of process may be saved to swap partition or swap file \end{itemize} \end{slide} \begin{slide}{Suspended Processes} \begin{itemize} \item Could add more states to process state table: \begin{itemize} \item ready and suspended \item blocked and suspended \end{itemize} \end{itemize} \end{slide} \tsectionandpart[toc=Process Control Blocks,bm=Process Control Blocks]% {Process Control Blocks\\[3ex]The Process Table\\[3ex]Data structure in OS to hold information about a process} \begin{slide}{OS Process Control Structures} \label{sld:process-control-blocks} \begin{itemize} \item Every \OS provides \emphcolour{process tables} to manage processes \item In this table, the entries are called \emphcolour{process control blocks} (\PCB{}s), \emphcolour{process descriptor}s or \emphcolour{task descriptor}s. We will use the abbreviation \PCB. \item There is one \PCB for each process%% from the time the \OS %% starts the process to the point where it has terminated and the %% parent process has checked the termination status of the process \item in Linux, \PCB is called \texttt{\red{}task\_struct}, defined in \texttt{include/linux/sched.h} \begin{itemize} \item In a Fedora Core or Red Hat system, you will find it in the file \path{/usr/src/linux-2.*/include/linux/sched.h} if you have installed the \texttt{kernel-source} software package \end{itemize} \end{itemize} \end{slide} \begin{slide}{What is in a PCB} \label{sld:what-is-in-a-pcb} \begin{itemize} \item In slide~\S\ref{sld:what-is-a-process-2}, we saw that a {\blue\PCB contains:} \begin{itemize} \item a process \ID ({\red\PID}) \item \emphcolour{process state} (i.e., executing, ready to run, sleeping waiting for input, stopped, zombie) \item \emphcolour{program counter}, the \CPU register that holds the address of the next instruction to be fetched and executed \item The value of other \emphcolour{CPU registers} the last time the program was switched out of executing by a \emphcolour{context switch} --- see slide~\S\ref{sld:context-switch} \item scheduling priority \item the \emphcolour{user} that owns the process \item the \emphcolour{group} that owns the process \item pointers to the \emphcolour{parent process}, and \emphcolour{child processes} \item Location of \emphcolour{process's data} and \emphcolour{program code} in memory \item List of \emphcolour{allocated resources} (including open \emphcolour{files}) \end{itemize} \item \PCB holds the {\blue{}values as they were when process was last switched out} of executing by a \emphcolour{context switch} --- see slide~\S\ref{sld:context-switch} \end{itemize} \end{slide} \begin{slide}{Context Switch} \label{sld:context-switch} \begin{itemize} \item \OS does a \emphcolour{context switch} when: \begin{itemize} \item stop current process from executing, and \item start the next {\green{}ready to run} process executing on \CPU \end{itemize} \item \OS saves the \emphcolour{execution context} (see \S\ref{sld:execution-context}) to its \PCB \item \OS loads the ready process's execution context from its \PCB \item \emphcolour{When} does a context switch occur? \begin{itemize} \item When a process \emphcolour{blocks}, i.e., goes to sleep, waiting for input or output (\IO), or \item When the scheduler decides the process has had its turn of the \CPU, and it's time to schedule another ready-to-run process \end{itemize} \item A context switch must be as \emphcolour{fast as possible}, or multitasking will be too slow \begin{itemize} \item Very fast in Linux \OS \end{itemize} \end{itemize} \end{slide} \begin{slide}{Execution Context} \label{sld:execution-context} \begin{itemize} \item Also called \emphcolour{state of the process} (but since this term has two meanings, we avoid that term here), \emphcolour{process context} or just \emphcolour{context} \item The \emphcolour{execution context} is all the data that the \OS must {\blue{}save} to stop one process from executing on a \CPU, and {\blue{}load} to start the next process running on a \CPU \item This includes the content of all the \CPU registers, the location of the code,\,\ldots \begin{itemize} \item Includes most of the contents of the process's \PCB. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Program Counter in PCB} \label{sld:program-counter-in-pcb} \begin{itemize} \item What value is in the program counter in the \PCB? \item If it is \emphcolour{not} executing on the \CPU, \begin{itemize} \item The {\blue{}address of the next \CPU instruction} that \emphcolour{will be} fetched and executed the next time the program starts executing \end{itemize} \item If it \emphcolour{is} executing on the \CPU, \begin{itemize} \item The {\blue{}address of the first \CPU instruction} that \emphcolour{was} fetched and executed when the process began executing at the last context switch (\S\ref{sld:context-switch}) \end{itemize} \end{itemize} \end{slide} \begin{slide}[toc=PCB Example,bm=PCB Example]% {Process Control Blocks---Example} \label{sld:process-control-blocks-example-2} \begin{itemize} \item The diagram in slide~\S\ref{sld:process-control-blocks-example-diagram} shows three processes and their process control blocks. \item There are seven snapshots $t_0$, $t_1$, $t_2$, $t_3$, $t_4$, $t_5$ and $t_6$ at which the scheduler has changed process (there has been a context switch---\S\ref{sld:context-switch}) \item On this particular example \CPU, all \IO instructions are 2~bytes long \item The diagram also shows the queue of processes in the: \begin{itemize} \item \emphcolour{Ready queue} (processes that are ready to run, but do not have a \CPU to execute on yet) \item \emphcolour{Blocked}, or \emphcolour{Wait queue}, where the processes have been blocked because they are waiting for \IO to finish. \end{itemize} \end{itemize} \end{slide} \begin{wideslide}[toc=PCB Example Diagram,bm=PCB Example Diagram]% {PCB Example: Diagram} \label{sld:process-control-blocks-example-diagram} \positionPictureOpt{0pt}{-10mm}% \psfrag{t0}[bl][bl]{\huge$t_0$} \psfrag{t1}[bl][bl]{\huge$t_1$} \psfrag{t2}[bl][bl]{\huge$t_2$} \psfrag{t3}[bl][bl]{\huge$t_3$} \psfrag{t4}[bl][bl]{\huge$t_4$} \psfrag{t5}[bl][bl]{\huge$t_5$} \psfrag{t6}[bl][bl]{\huge$t_6$} \resizebox{1.1\slideWidth}{!}{\includegraphics{process-control-blocks}} \end{wideslide} \begin{slide}{PCB Example --- Continued} \begin{itemize} \item In slide \S\ref{sld:process-control-blocks-example-diagram}, \begin{itemize} \item The times $t_0$, $t_1$, $t_2$, $t_3$, $t_4$, $t_5$ and $t_6$ are when the scheduler has selected another process to run. \item Note that these time intervals are \emph{not equal}, they are just the points at which a scheduling change has occurred. \end{itemize} \item Each process has stopped at one stage to perform \IO \begin{itemize} \item That is why each one is put on the \emphcolour{wait queue} once during its execution. \end{itemize} \item Each process has performed \IO once \end{itemize} \end{slide} \begin{slide}[toc=Address of I/O instructions,bm=Address of I/O instructions]% {What is the address of I/O instructions?} \label{sld:address-of-io-instructions} \begin{itemize} \item We are given that all \IO instructions \emphcolour{in this particular example} are {\blue{}two bytes} long (slide~\S\ref{sld:process-control-blocks-example-2}) \begin{itemize} \item We can see that when the process is sleeping (i.e., blocked), then the program counter points to the instruction \emphcolour{after} the \IO instruction \item So for process P1, which blocks with program counter \PC{}\,=\,C0DE\hex, the \IO instruction is at address $\C0\D\E\hex - 2 = \C0\D\C\hex$ \item for process P2, which blocks with program counter \PC{}\,=\,FEED\hex, the \IO instruction is at address $\F\E\E\D\hex - 2 =\F\E\E\B\hex$ \item for process P3, which blocks with program counter \PC{}\,=\,D1CE\hex, the \IO instruction is at address $\D1\C\E\hex - 2 =\D1\C\C\hex$ \end{itemize} \end{itemize} \end{slide} \tsectionandpart[toc=System Calls,bm=System Calls]{Process System Calls\\[2ex]How the OS controls processes\\[2ex]How you use the OS to control processe} %% \begin{slide}{Process System Calls} %% \huge %% \begin{center} %% How the OS controls processes %% \end{center} %% \vfill %% \begin{center} %% How you use the OS to control processes %% \end{center} %% \end{slide} \begin{slide}[toc=System Calls,bm=System Calls]% %{{\red{}Major\textsuperscript{*}}\blue{} process Control System Calls} {Major process Control System Calls} \label{process-system-calls} \begin{itemize} \item \texttt{\textbf{\red{}fork()}} --- start a new process \item \texttt{\textbf{\red{}execve()}} --- replace calling process with machine code from another program file \item \texttt{\textbf{\red{}wait()}}, \texttt{\textbf{\red{}waitpid()}} --- parent process gets status of its' child after the child has terminated, and cleans up the process table entry for the child (stops it being a \emphcolour{zombie}) \item \texttt{\textbf{\red{}exit()}} --- terminate the current process \end{itemize} %{\red\tiny{}* perhaps these will \emph{definitely} be in the exam?} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{slide}{File I/O system calls: a sidetrack} \begin{itemize} \item[] \begin{alltt} #include ssize_t read( int filedes, void *buf, size_t nbytes ); \end{alltt} \item returns number of bytes read, 0 at end of file, $-1$ on error \begin{alltt} ssize_t write( int filedes, void *buf, size_t nbytes ); \end{alltt} \item returns number of bytes written, else $-1$ on error \item Note: these are \emphcolour{unbuffered}, that is, they have effect ``immediately''. \item This is different from \texttt{stdio.h} functions, which are buffered for efficiency. \end{itemize} \end{slide} \begin{slide}[toc=init,bm=init]% {Process IDs and \texttt{\red\textbf{init}}} \begin{itemize} \item Every process has a process \ID (\PID) \item process 0 is the scheduler, part of kernel \item \mbox{}{\red{}process 1} is {\red\texttt{\textbf{init}}}, the {\green{}parent of \emphcolour{all}\green{} other processes} \begin{itemize} \item a normal user process, not part of kernel \item program file is \texttt{\blue{}/sbin/init} \end{itemize} \item All other processes result from \texttt{init} calling the \texttt{\textbf{\red{}fork()}} system call \item This is the \emphcolour{only} way a new process is created by the kernel \end{itemize} \end{slide} \begin{slide}{SUID, SGID and IDs} \begin{itemize} \item Every process has \emphcolour{six} or more \ID{}s associated with it \item \UID and \GID of person who executes program file: \begin{itemize} \item real user \ID, real group \ID \end{itemize} \item \ID{}s used to calculate permissions: \begin{itemize} \item Effective \UID, Effective \GID \end{itemize} \item \ID{}s saved when use \texttt{exec()} system call: \begin{itemize} \item Saved set-user-ID, saved set-group-ID \item idea is can drop special privileges and return to executing with real \UID and real \GID when privilege is no longer required \end{itemize} \end{itemize} \end{slide} \begin{slide}{Other system calls: getting process info} \begin{itemize} \item[]\texttt{\#include } \item[] \texttt{\#include } \item \texttt{pid\_t getpid(void); \ }returns PID of calling process \item \texttt{pid\_t getppid(void); }returns PID of parent \item \texttt{uid\_t getuid(void); \ }returns real user ID of process \item \texttt{uid\_t geteuid(void); }returns effective user ID of process \item \texttt{gid\_t getgid(void); \ }returns real group ID of process \item \texttt{gid\_t getegid(void); }returns effective group ID of process \end{itemize} \end{slide} \begin{slide}{\texttt{fork()}: what it does} \begin{itemize} \item[] \begin{alltt} #include #include pid_t fork(void); \end{alltt} \item returns 0 in child \item returns \PID of child in parent \item returns $-1$ if error \end{itemize} \end{slide} \begin{slide}{Using \texttt{fork()}: pseudocode} \include{fork-pseudocode} %% \begin{alltt} %% if ( ( pid = fork() ) < 0 ) %% fork_error has happened %% else if ( pid == 0 ) /* I am the child */ %% do things the child process should do %% else /* I am the parent */ %% do things the parent should do %% \end{alltt} \end{slide} \begin{slide}{Simple \texttt{fork()} Example (no Checking)} \include{fork-simple} %% \begin{verbatim} %% #include %% #include %% int main() %% { %% int pid = fork(); %% printf( "PID is %d\n", pid ); %% if ( pid == 0 ) %% printf( "I'm the child\n" ); %% else %% printf( "I'm the parent\n" ); %% } %% \end{verbatim} \end{slide} \begin{slide}{An example using \texttt{fork()}} \footnotesize% \include{fork-example-1} %% \begin{verbatim} %% #include %% #include %% #include %% int global = 6; %% char buf[] = "a write to standard output\n"; %% int main( void ) %% { %% int var = 88; /* local variable on the stack */ %% pid_t pid; %% if ( write( STDOUT_FILENO, buf, sizeof (buf) - 1 ) %% != sizeof( buf ) - 1 ) { %% fprintf( stderr, "write error" ); %% exit( 1 ); %% } %% \end{verbatim} \end{slide} \begin{slide}{Example using \texttt{fork()}---(contd.)} \footnotesize% \include{fork-example-2} %% \begin{verbatim} %% printf( "before fork\n" ); /* not flushed */ %% if ( ( pid = fork() ) < 0 ) { %% fprintf( stderr, "fork error\n" ); %% exit( 1 ); %% } else if ( pid == 0 ) { /* child */ %% ++global; %% ++var; %% } else %% sleep( 2 ); /* parent */ %% printf( "pid = %d, global = %d, var = %d\n", %% getpid(), global, var ); %% exit( 0 ); %% } %% \end{verbatim} \end{slide} \begin{slide}{Output of \texttt{fork-example.c}:} \small% \begin{alltt} $ gcc -o fork-example fork-example.c $ ./fork-example a write to standard output before fork pid = 7118, global = 7, var = 89 {\tiny\emphcolour{child's vars changed}} pid = 7117, global = 6, var = 88 {\tiny\emphcolour{parent's copy not changed}} \end{alltt} \end{slide} \begin{slide}{Running \texttt{fork-example} again} \begin{alltt} $ ./fork-example > tmp.out $ cat tmp.out a write to standard output before fork pid = 7156, global = 7, var = 89 before fork pid = 7155, global = 6, var = 88 \end{alltt} \end{slide} \begin{slide}{Why two ``\texttt{before fork}'' messages? } \begin{itemize} \item \texttt{write()} system call not buffered \item \texttt{write()} called before \texttt{fork()}, so one output \item \texttt{printf()} is buffered \begin{itemize} \item line buffered if connected to terminal \item fully buffered otherwise; parent and child both have a copy of the unwritten buffer when redirected \end{itemize} \item \texttt{exit()} causes both parent and child buffers to flush \end{itemize} \end{slide} \begin{slide}{So what does this show?} \begin{itemize} \item It shows that the child is an exact copy of the parent, with all \item variable values, \item buffers, \item open files,\ldots \item All are inherited by the child \end{itemize} \end{slide} \begin{slide}{Running another program --- \texttt{exec()}} \begin{itemize} \item To run another program file \item first call \texttt{fork()} to create a child process \item child calls \texttt{exec()} to replace current copy of parent with a totally new program in execution \end{itemize} \end{slide} \begin{slide}{\texttt{execve()} system call} \begin{itemize} \item[] \input{execve} %% \begin{verbatim} %% #include %% int execve( const char *filename, %% char *const argv[], %% char *const envp[] ); %% \end{verbatim} \item executes the program filename, replaces current process \item Passes the command line in \texttt{argv[]} \item passes the environment variables in \texttt{envp[]} \item Does not return, unless error, when returns with $-1$ \item Usually called through library \texttt{exec*()} calls --- see \texttt{man 3 exec} \end{itemize} \end{slide} \begin{slide}{\texttt{fork()} --- \texttt{exec()} Example} \footnotesize% \include{fork-simple} %% \begin{verbatim} %% #include %% #include %% int main() %% { %% printf( "hello world\n" ); %% int pid = fork(); %% printf( "PID is % %% d\n", pid ); %% if ( pid == 0 ) %% execl( "/bin/ls", "ls", “-l”, NULL ); %% else %% printf( "I'm the parent\n" ); %% } %% \end{verbatim} \end{slide} \begin{slide}{Using \texttt{execl()}} \begin{itemize} \item[] \begin{alltt} int execl( const char *path, const char *arg, ... ); \end{alltt} \item Parameter number: \begin{enumerate} \item gives full path of the program file you want to execute \item gives name of the new process \item specifies the command line arguments you pass to the program \item last is a \texttt{NULL} pointer to end the parameter list. \end{enumerate} \item We must always put a \texttt{NULL} pointer at the end of this list. \end{itemize} \end{slide} \begin{slide}{\texttt{print.c}: a program we call} \include{print} %% \begin{verbatim} %% #include %% #include %% int main( int argc, char *argv[ ] ) %% { %% // argv[0] is the program name %% int num = atoi( argv[1] ); %% int loops = atoi( argv[2] ); %% int i; %% for ( i = 0; i < loops; ++i ) %% printf( "%d ", num ); %% } %% \end{verbatim} \end{slide} \begin{slide}{Calling \texttt{./print} using \texttt{execl()}} \begin{minipage}[t]{1.1\slideWidth} \footnotesize% \vspace*{-3ex}% \hspace*{-2ex}\include{call-print} \end{minipage} %% \begin{verbatim} %% #include %% #include %% int main() %% { %% printf( "hello world\n" ); %% int pid = fork(); %% printf( "PID is %d\n", pid ); %% if ( pid == 0 ) %% execl( "./print", "print", "1", "100", %% NULL ); %% else %% execl( "./print", "print", "2", "100", %% NULL ); %% } %% \end{verbatim} \end{slide} \begin{slide}{\texttt{vfork()} sytem call} \begin{itemize} \item A lightweight \texttt{fork()} \item Designed for running \texttt{execvp()} straight after \begin{itemize} \item modern Linux \texttt{fork()} is very efficient when call \texttt{exec*()} \end{itemize} \item Child does not contain an exact copy of parent address space; \item child calls \texttt{exec()} or \texttt{exit()} after \texttt{fork()} \item parent is suspended till child calls \texttt{fork()} or \texttt{exit()} \end{itemize} \end{slide} \begin{slide}{\texttt{wait()}, \texttt{waitpid()} system calls} \label{sld:wait} \begin{itemize} \item[] \begin{alltt} #include #include pid_t wait( int *status ); pid_t waitpid( pid_t pid, int *status, int options ); \end{alltt} \item return process \ID if \OK, 0, or $-1$ on error \end{itemize} \end{slide} \begin{slide}{\texttt{wait()}, \texttt{waitpid()} system calls} \begin{itemize} \item \texttt{wait()} can block caller until child process terminates \item \texttt{waitpid()} has option to prevent blocking \item \texttt{waitpid()} can wait for a specific child instead of the first child \item if child has terminated already (it's a \emphcolour{zombie}), wait returns immediately, cleaning up the process table data structures for the child \end{itemize} \end{slide} \tsection{A shell program} \begin{slide}{Part of Simple Shell Program} \include{simplesh-a} %% \begin{verbatim} %% int main( int argc, char **argv ) %% { %% char *prog_name = basename( *argv ); %% print_prompt( prog_name ); %% read_command(); %% for ( ;; ) { %% int pid = fork(); %% if ( pid == 0 ) { %% execvp( args[ 0 ], args ); %% } %% wait( NULL ); %% print_prompt( prog_name ); %% read_command(); %% } %% } %% \end{verbatim} \end{slide} \begin{slide}{Windows and Processes} \begin{itemize} \item Windows provides a Win32 API call to create a process: \texttt{CreateProcess()} \item Creates a new process, loads program into that process \item \texttt{CreateProcess()} takes \emphcolour{ten} parameters \end{itemize} \end{slide} \begin{slide}{Windows and Processes --- 2} \begin{itemize} \item Win32 uses \emphcolour{handles} for almost all objects such as files, pipes, sockets, processes and events \item handles can be inherited from parent \item No proper parent-child relationship \begin{itemize} \item caller of \texttt{CreateProcess()} could be considered as parent \item but child cannot determine it's parent \end{itemize} \end{itemize} \end{slide} \begin{wideslide}{\texttt{CreateProcess()} prototype} \begin{itemize} \item \texttt{CreateProcess()} is \emphcolour{much} more complicated than \begin{verbatim} pid_t fork( void ); \end{verbatim} \item Four of the parameters point to \texttt{struct}s, e.g., \begin{itemize} \item \texttt{LPSTARTUPINFO} points to a \texttt{struct} with 4 members \item \texttt{LPPROCESS\_INFORMATION} points to a \texttt{struct} with 18 members! \end{itemize} \end{itemize} \begin{minipage}[t]{1.2\slideWidth} \include{CreateProcess-syntax} %% \begin{alltt} %% #include %% BOOL CreateProcess ( %% LPCTSTR lpApplicationName, // pointer to executable module %% LPTSTR lpCommandLine, // pointer to command line string %% LPSECURITY_ATTRIBUTES lpProcessAttrib, // process security %% LPSECURITY_ATTRIBUTES lpThreadAttrib, // thread security %% BOOL bInheritHandles, // handle inheritance flag %% DWORD dwCreationFlags, // creation flags %% LPVOID lpEnvironment, // pointer to new environment block %% LPCTSTR lpCurrentDirectory, // pointer to current dir name %% LPSTARTUPINFO lpStartupInfo, // pointer to STARTUPINFO %% LPPROCESS_INFORMATION lpProcessInformation // pointer to %% // PROCESS_INFORMATION %% ); %% \end{alltt} \end{minipage} \end{wideslide} \begin{slide}{\texttt{CreateProcess()}} \begin{itemize} \item Can Specify Program in either 1st or 2nd parameter: \begin{itemize} \item first: location of program to execute \item second: command line to execute \end{itemize} \item Creation flags: \begin{itemize} \item if 0, runs in existing window \end{itemize} \end{itemize} \end{slide} \begin{slide}{Example: \texttt{CreateProcess()}} \begin{minipage}[t]{1.1\slideWidth} \scriptsize% \include{win-call-print} \end{minipage} %% \begin{verbatim} %% #include %% #include %% void main() { %% STARTUPINFO si; %% PROCESS_INFORMATION pi; %% memset( &si, 0, sizeof( si ) ); %% si.cb = sizeof(si); %% if ( ! CreateProcess( NULL, %% "..\\..\\print\\Debug\\print.exe 5 100", %% NUinLL, NULL, TRUE, 0, NULL, NULL, &si, &pi) ) %% fprintf( stderr, %% "CreateProcess failed with %d\n" %% GetLastError() ); %% WaitForSingleObject( pi.hProcess, INFINITE ); %% CloseHandle( pi.hProcess ); %% CloseHandle( pi.hThread ); %% } %% \end{verbatim} \end{slide} \begin{slide}{Processes in Linux, Unix, Windows} \begin{minipage}[t]{0.58\slideWidth} \begin{itemize} \item Linux often provides 2 or more processes per application \item Example: apache web server parent process watches for connections, one child process per client \item Linux processes have much less overhead than in Windows \item \texttt{fork()} --- \texttt{exec()} very efficient \item \POSIX threads are very efficient, and faster than \texttt{fork()} --- \texttt{exec()} \end{itemize} \end{minipage}% \hspace*{0.04\slideWidth}% \begin{minipage}[t]{0.38\slideWidth} \begin{itemize} \item Windows have one process per application, but often 2 or more threads \item Windows \texttt{CreateProcess()} takes more time than \texttt{fork()} --- \texttt{exec()} \item \texttt{CreateThread()} takes very much less time than \texttt{CreateProcess()} \end{itemize} \end{minipage} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \tsectionandpart[toc=IPC,bm=IPC]% {IPC\\[3ex]Inter Process Communication\\[3ex]How Processes can Talk to Each Other} \begin{slide}{Problem with Processes} \begin{itemize} \item Communication! \item Processes cannot see the same variables \item Must use \emphcolour{\textbf{I}nter \textbf{P}rocess \textbf{C}ommunication} (IPC) \item IPC Techniques include: \begin{itemize} \item pipes, and named pipes (FIFOs) \item sockets \item messages and message queues \item shared memory regions \end{itemize} \item All have some overhead \end{itemize} \end{slide} \begin{slide}{Interprocess Communication (IPC)} \begin{itemize} \item \emphcolour{Pipe} --- circular buffer, can be written by one process, read by another \begin{itemize} \item related processes can use unnamed pipes \begin{itemize} \item used in {\green{}shell programming}, e.g., the vertical bar `\textbar' in \begin{alltt} $ \textbf{find /etc | xargs file} \end{alltt}%$ \end{itemize} \item unrelated processes can use \emphcolour{named pipes} --- sometimes called {\red\FIFO{}}s \end{itemize} \item \emphcolour{Messages} --- \POSIX provides system calls \texttt{msgsnd()} and \texttt{msgrcv()} \begin{itemize} \item message is block of text with a type \item each process has a message queue, like a mailbox \item processes are suspended when attempt to read from empty queue, or write to full queue. \end{itemize} \end{itemize} \end{slide} \begin{slide}{IPC --- Shared Memory} \begin{itemize} \item \emphcolour{Shared Memory} --- a Common block of memory shared by many processes \item Fastest way of communicating \item Requires synchronisation (See slide~\pageref{sld:intro-to-synchronisation}) \end{itemize} \end{slide} \begin{slide}{IPC --- Signals} \positionPictureOpt{-2.5ex}{0pt}% \begin{itemize} \item Some \emphcolour{signals} can be generated from the keyboard, i.e., \key{Control-C} --- interrupt (\texttt{SIGINT}); \key{Control-\bs} --- quit (\texttt{SIGQUIT}), \key{Control-Z} --- stop (\texttt{SIGSTOP}) \item A process {\blue{}sends a signal} to another process using the \texttt{\red\textbf{kill()}} system call \item signals are implemented as single bits in a field in the \PCB, so cannot be queued \item A process may respond to a signal with: \begin{itemize} \item a \emphcolour{default action} (usually process terminates) \item a \emphcolour{signal handler} function (see \texttt{\red\textbf{trap}} in shell programming notes), or \item ignore the signal {\blue(unless it is \texttt{SIGKILL} or \texttt{SIGSTOP})} \end{itemize} \item A process \emphcolour{cannot ignore}, or handle a \texttt{\red{}SIGSTOP} or a \texttt{\red{}SIGKILL} signal. \begin{itemize} \item A \texttt{\red{}KILL} signal will \emphcolour{always terminate} a process (unless it is in interruptible sleep) \item A \texttt{\red{}SIGSTOP} signal will \emphcolour{always} send a process into the stopped state. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Signals and the Shell} \begin{itemize} \item We can use the \texttt{\red\textbf{kill}} built in command to make the \texttt{\red\textbf{kill()}} system call to \emphcolour{send} a signal \item A shell script uses the \texttt{\red\textbf{trap}} built in command to \emphcolour{handle} a signal \item \emphcolour{Ignoring} the signals \texttt{SIGINT}, \texttt{SIGQUIT} and \texttt{SIGTERM}: \begin{verbatim} trap "" INT QUIT TERM \end{verbatim} \item \emphcolour{Handling} the same signals by printing a message then exiting: {\footnotesize \begin{verbatim} trap "echo 'Got a signal; exiting.';exit 1" INT QUIT TERM \end{verbatim}% } \item Handling the same signals with a function call: \begin{verbatim} signal_handler() { echo "Received a signal; terminating." rm -f $temp_file exit 1 } trap signal_handler INT QUIT TERM \end{verbatim}%$ \item \emphcolour{Sending} a \texttt{SIGKILL} signal to process with \PID 3233: \begin{alltt} $ \textbf{kill -KILL 3233} \end{alltt}%$ \end{itemize} \end{slide} \tsectionandpart[toc=Threads,bm=threads]% {Threads\\[3ex]Lightweight processes that can talk to each other easily} \begin{slide}{Threads and Processes} \begin{minipage}[t]{0.42\slideWidth} \begin{itemize} \item Threads in a process all share the same address space \item Communication easier \item Overhead less \item Problems of \emphcolour{locking} and \emphcolour{deadlock} a major issue \end{itemize} \end{minipage}% \hspace*{0.06\slideWidth}% \begin{minipage}[t]{0.52\slideWidth} \begin{itemize} \item Processes have separate address spaces \item Communication more indirect: \IPC (Inter Process Communication) \item Overhead higher \item Less problem with shared resources (since fewer resources to share!) \end{itemize} \end{minipage} \end{slide} \begin{slide}{Threads have own\ldots} \begin{itemize} \item stack pointer \item register values \item scheduling properties, such as policy or priority \item set of signals they can each block or receive \item own stack data (local variables are local to thread) \end{itemize} \end{slide} \begin{slide}{Threads share a lot} \begin{itemize} \item Changes made by one thread to shared system resources (such as closing a file) will be seen by all other threads. \item Two pointers having the same value point to the same data. \item A number of threads can read and write to the same memory locations, and so you need to explicitly \emphcolour{synchronise} access \end{itemize} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{slide}{Threads in Linux, Unix} \begin{itemize} \item \POSIX is a standard for \UNIX \item Linux implements \POSIX threads \item On Red Hat 8.x, documentation is at \begin{alltt} $ \textbf{info '(libc) POSIX Threads'} \end{alltt}%$ \begin{itemize} \item or in Emacs, \texttt{C-H m libc} then middle-click on \texttt{POSIX threads} \end{itemize} \item Provides: \begin{itemize} \item \emphcolour{semaphores}, \item \emphcolour{mutex}es and \item \emphcolour{condition variables} \end{itemize} for locking (synchronisation) \end{itemize} \end{slide} %% \begin{slide}{\texttt{hello.c}: a simple threaded program} %% \footnotesize% %% \begin{verbatim} %% #include %% #include %% #define NUM_THREADS 5 %% void *PrintHello( void *threadid ) %% { %% printf( "\n%d: Hello World!\n", threadid ); %% pthread_exit( NULL ); %% } %% \end{verbatim} %% \end{slide} \begin{slide}{\texttt{hello.c}: a simple threaded program} \label{sld:hello.c} \scriptsize% %\vspace*{-5ex} \begin{minipage}[t]{1.1\slideWidth} \include{hello} \end{minipage} %% \begin{verbatim} %% int main( int argc, char *argv[] ) %% { %% pthread_t threads[NUM_THREADS]; %% int rc, t; %% for ( t = 0; t < NUM_THREADS; ++t ) { %% printf( "Creating thread %d\n", t ); %% rc = pthread_create( &threads[t], NULL, %% PrintHello, ( void * ) t ); %% if ( rc ) { %% printf( "ERROR; return code from" %% "pthread_create() is %d\n", rc ); %% exit( -1 ); %% } %% } %% pthread_exit( NULL ); %% } %% \end{verbatim} \end{slide} \begin{slide}[toc=Compile POSIX Threads,bm=Compile POSIX Threads]% {How to Compile a POSIX Threads Program} \begin{itemize} \item Need to use the \texttt{libpthread} library \begin{itemize} \item Specify this with the option \texttt{-lpthread} \end{itemize} \item Need to tell the other libraries that they should be \emphcolour{reentrant} (or ``\emphcolour{thread safe}'') \begin{itemize} \item This means that the library uses no static variables that may be overwritten by another thread \item Specify this with the option \texttt{-D\_REENTRANT} \end{itemize} \item So, to compile the program \meta{program}.c, do: \begin{alltt}\small $ \textbf{gcc -D_REENTRANT -lpthread -o \meta{program} \meta{program}.c} \end{alltt}%$ \end{itemize} \end{slide} \begin{slide}{\texttt{pthread\_create()}} \label{sld:pthread_create} \begin{itemize} \item[] \begin{verbatim} #include void * pthread_create( pthread_t *thread, pthread_attr_t *attr, void *(*start_routine)(void *), void *arg ); \end{verbatim} \item returns: 0 if successfully creates thread \item returns error code otherwise \end{itemize} \end{slide} \begin{slide}{\texttt{pthread\_create()}} \label{sld:pthread_create2} \begin{itemize} \item Quite different from \texttt{fork()} \item Thread must always execute a \emphcolour{user-defined function} \item parameters: \begin{enumerate} \item pointer to thread identifier \item attributes for thread, including stack size \item user function to execute \item parameter passed to the user function \end{enumerate} \end{itemize} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{slide}{Problem with threads:} \begin{itemize} \item Avoid 2 or more threads writing or reading and writing same data at the same time \item Avoid \emphcolour{data corruption} \item Need to control access to data, devices, files \item Need \emphcolour{locking} \item Provide three methods of locking: \begin{itemize} \item mutex (\textbf{mut}ual \textbf{ex}clusion) \item semaphores \item condition variables \end{itemize} \end{itemize} \end{slide} \tsectionandpart{Race Condition} \begin{slide}{Race Conditions} \begin{itemize} \item \emphcolour{race condition} --- {\green{}where outcome of computation depends on sheduling} \item an error in coding \item Example: two threads both access same list with code like this: \end{itemize} \begin{verbatim} if ( list.numitems > 0 ) { // Oh, dear, better not change to // other thread here! remove_item( list ); // not here! // ...and not here either: --list.numitems; } \end{verbatim} \end{slide} \begin{slide}{Critical Sections} \begin{itemize} \item \emphcolour{critical resource} --- a device, file or piece of data that cannot be shared \item \emphcolour{critical section} --- part of program only one thread or process should access {\green{}contains a critical resource} \begin{itemize} \item i.e., you lock \emphcolour{data}, \emphcolour{not} {\blue{}code} \end{itemize} \item \mbox{}{\green{}All the code in the previous slide is a critical section} \item Consider the code: \begin{alltt} very_important_count++; \end{alltt} \item executed by two threads on a multiprocessor machine (\SMP= \textbf{s}ymmetric \textbf{m}ulti\textbf{p}rocessor) \end{itemize} \end{slide} \begin{slide}{Race Condition --- one possibility} \footnotesize% %\hspace*{-0.06\slideWidth}% %\begin{tabularx}{1.1\slideWidth}[t]{@{}YY@{}} \begin{tabularx}{\slideWidth}[t]{@{}YY@{}} \textbf{thread 1} & \textbf{thread 2}\\ \midrule% read~\texttt{very\_important\_count}~(5) & \\ add 1 (6) & \\ write~\texttt{very\_important\_count}~(6) & \\ & read~\texttt{very\_important\_count}~(6)\\ & add 1 (7)\\ & write~\texttt{very\_important\_count}~(7) \end{tabularx} \end{slide} \begin{slide}{Example --- another possibility} \footnotesize% %\hspace*{-0.05\slideWidth}% %\begin{tabularx}{1.1\slideWidth}[t]{@{}YY@{}} \begin{tabularx}{\slideWidth}[t]{@{}YY@{}} \textbf{thread 1} & \textbf{thread 2}\\ \midrule% read~\texttt{very\_important\_count}~(5) & \\ & read~\texttt{very\_important\_count}~(5)\\ add 1~(6) & \\ & add 1~(6)\\ write~\texttt{very\_important\_count}~(6) & \\ & write~\texttt{very\_important\_count}~(6) \end{tabularx} \end{slide} \begin{slide}{Solution: Synchronisation} \label{sld:intro-to-synchronisation} \begin{itemize} \item Solution is to {\blue{}recognise} \emphcolour{critical sections} \item use \emphcolour{synchronisation}, i.e., locking, to make sure only one thread or process can enter critical region at one time. \item Methods of synchronisation include: \begin{itemize} \item file locking \item semaphores \item monitors \item spinlocks \item mutexes \end{itemize} \end{itemize} \end{slide} \begin{slide}{File Locking} \label{sld:file-locking} \begin{itemize} \item For example, an \texttt{\textbf{flock()}} system call can be used to provide \emphcolour{exclusive access} to an open file \item The call is \emphcolour{atomic} \begin{itemize} \item It either: \begin{itemize} \item completely succeeds in locking access to the file, or \item it fails to lock access to the file, because another thread or process holds the lock \item No ``half-locked'' state \end{itemize} \item \emphcolour{No race condition} \end{itemize} \item Alternatives can result in race conditions; for example: \begin{itemize} \item thread/process 1 checks lockfile \item thread/process 2 checks lockfile a very short time later \item both processes think they have exclusive write access to the file \item file is corrupted by two threads/processes writing to it at the same time \end{itemize} \end{itemize} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \tsectionandpart[toc=Synchronisation,bm=Synchronisation]% {Methods of Synchronisation\\[2ex]What is it?\\[2ex]mutex, semaphore, condition variables, monitor, spinlock} %% \begin{slide}{Methods of Synchronisation} %% \centering% %% \huge% %% What is \emph{synchronisation}? %% mutex, semaphore, condition variables, monitor, spinlock %% \end{slide} \begin{slide}{Synchronisation} \label{sld:synchronisation}% \begin{itemize} \item \emphcolour{Synchronisation} is a facility that enforces \begin{itemize} \item mutual exclusion and \item event ordering \end{itemize} \item Required when multiple active processes or threads can access shared address spaces or shared \IO resources \item even more critical for \SMP (\emphcolour{S}ymmetric \emphcolour{M}ulti\emphcolour{p}rocessor) systems \begin{itemize} \item kernel can run on any processor \item all processors are of equal importance (there is no one \CPU that is the ``boss'') \item \SMP systems include \PC{}s with more than one \CPU, as you might find in the Golden Shopping Centre \end{itemize} \end{itemize} \end{slide} \begin{slide}{Semaphores} \label{sld:semaphore}% \begin{itemize} \item A variable with three opererations: \begin{itemize} \item \emphcolour{initialise} to non-negative value \item \emphcolour{down} (or \emph{wait}) operation: \begin{itemize} \item decrement variable \item if variable becomes negative, then process or thread executing the \emph{down} operation is blocked %\begin{itemize} \item has nothing to do with the \texttt{wait} system call for a parent process to get status of its child %\end{itemize} \end{itemize} \item \emphcolour{up} (or \emph{signal}) operation: \begin{itemize} \item increment the semaphore variable; \item if value is not positive, then a process or thread blocked by a \emph{down} operation is unblocked. \end{itemize} \end{itemize} \item A semaphore also has a \emphcolour{queue} to hold processes or threads waiting on the semaphore. \end{itemize} \end{slide} \begin{slide}{Semaphores --- 2} \begin{itemize} \item The \emph{up} and \emph{down} semaphore operations are \emphcolour{atomic} \begin{itemize} \item the \emph{up} and \emph{down} operations cannot be interrupted \item each routine is a single, indivisible step \end{itemize} \item Using semaphores---pseudocode \include{semaphore-example} \item \emphcolour{Initialise} semaphore to {\green{}number of processes allowed into critical section at one time} \end{itemize} \end{slide} \begin{slide}[toc=POSIX and Win32,bm=POSIX and Win32]% {{\red{}Mutex}\blue---\POSIX and Win32 Threads} \label{sld:mutex} \begin{itemize} \item \textbf{\emph{mut}}ual \textbf{\emph{ex}}clusion \item Easier to use than semaphores (see slide~\pageref{sld:semaphore}) \item When only one thread or process needs to write to a resource \begin{itemize} \item all other writers refused access \end{itemize} \item A {\green{}special form of the more general \blue{}\emph{semaphore}} \begin{itemize} \item \mbox{}{\red{}Can have only two values}; \item sometimes called \emphcolour{binary semaphores}. \end{itemize} \end{itemize} \end{slide} \begin{slide}[toc=Threads Example 1,bm=Threads Example 1]% {mutex --- \POSIX Threads Example (1)} \label{sld:mutex-example-1} \begin{itemize} \item It is \emphcolour{good practice} to {\green{}put the mutex together with the data it proects} \item I have {\blue{}removed the error checking} from this example to save space---in real code, {\green{}always check library calls for error conditions} \end{itemize} \include{mutex0} \end{slide} \begin{slide}[toc=Threads Example 2,bm=Threads Example 2]% {mutex --- \POSIX Threads Example (2)} \label{sld:mutex-example-2} \small% \include{mutex1} \end{slide} \begin{slide}[toc=Threads Example 3,bm=Threads Example 3]% {mutex --- \POSIX Threads Example (3)} \label{sld:mutex-example-3} \small% \include{mutex2} \end{slide} \begin{slide}[toc=Condition Variables,bm=Condition Variables]% {\POSIX Condition Variables} \begin{itemize} \item Lets threads sleep till a condition about shared data is true \item Basic operations: \begin{itemize} \item signal the condition (when condition is true) \item wait for the condition \begin{itemize} \item suspend the thread till another thread signals the condition \end{itemize} \end{itemize} \item Always associated with a mutex \item Very useful \item \href{http://www.cs.wustl.edu/~schmidt/win32-cv-1.html}{Missing from Windows:} See \path{http://www.cs.wustl.edu/~schmidt/win32-cv-1.html} \end{itemize} \end{slide} \begin{slide}{Monitors} \begin{itemize} \item A \emphcolour{higher level structure for synchronisation} \item Implemented in Java, and some libraries \item main characteristics: \begin{itemize} \item data in monitor is accessible only to procedures in monitor \item a process or thread enters monitor by executing one of its procedures \item Only one process or thread may be executing in the monitor at one time. \end{itemize} \item Can implement with mutexes and condition variables. \end{itemize} \end{slide} \begin{slide}{Spinlocks} \begin{itemize} \item Used in operating system kernels in \SMP systems \item Linux uses kernel spinlocks only for \SMP systems \item a very simple single-holder lock \item if can't get the spinlock, you keep trying (spinning) until you can. \item Spinlocks are: \begin{itemize} \item very small and fast, and \item can be used anywhere \end{itemize} \end{itemize} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \tsectionandpart{Summary and References} \begin{slide}{Summary --- Process States, Scheduling} \begin{itemize} \item Scheduler changes processes between ready to run and running states \begin{itemize} \item context switch: when scheduler changes process or thread \end{itemize} \item Most processes are \emphcolour{blocked}, i.e., sleeping: waiting for \IO \begin{itemize} \item understand the process states \item why a process moves from one state to another \end{itemize} \item Communication between processes is not trivial; {\red\IPC} methods include \vspace*{0.6ex} \begin{minipage}[t]{0.35\slideWidth} \begin{itemize} \item pipes \item messages \end{itemize} \end{minipage}% \hspace*{1ex}% \begin{minipage}[t]{0.35\slideWidth} \begin{itemize} \item shared memory \item signals \item semaphores \end{itemize} \end{minipage} \end{itemize} \end{slide} \begin{slide}{Summary --- Processes and Threads} \begin{itemize} \item With Linux and \UNIX, main {\green{}process system calls} are \textbf{\texttt{\red{}fork()}}, \textbf{\texttt{\red{}exec()}} and \textbf{\texttt{\red{}wait()}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% --- understand the function of each of these \item Windows provides \textbf{\texttt{\red{}CreateProcess()}} and various \textbf{\texttt{\red{}WaitFor\ldots()}} Win32 \API calls \begin{itemize} \item The \textbf{\texttt{\red{}WaitFor\ldots()}} calls have a purpose similar to that of the \textbf{\texttt{\red{}wait()}} system call in Linux and \UNIX \end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item \emphcolour{Threads} are lightweight processes \begin{itemize} \item part of one process \item share address space \item can share data easily \item sharing data requires synchronisation, i.e., locking \end{itemize} \end{itemize} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{slide}{Summary --- Synchronisation} \begin{itemize} \item When two threads of execution can both write to same data or \IO, \begin{itemize} \item Need enforce discipline \item Use \emphcolour{synchronisation} \end{itemize} \item We looked at the following methods of synchronisation: \begin{itemize} \item semaphore \item mutex \item condition variable \item monitor \item spinlock \end{itemize} \item There are other methods we have not examined here. \end{itemize} \end{slide} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{slide}{References} \label{sld:references} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \opt{canUnderstandC}{\ptsize{9}\scriptsize}% \opt{Cchallenged}{\small}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% There are many good sources of information in the library and on the Web about processes and threads. Here are some I recommend: \begin{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item A {\red{}good online tutorial} about \link{\POSIX threads:}{http://www.llnl.gov/computing/tutorials/workshops/workshop/pthreads/MAIN.html} \item \url{http://www.humanfactor.com/pthreads/} provides links to a lot of information about \POSIX threads \item The {\red{}best book} about \POSIX threads is \emphcolour{Programming with POSIX Threads}, David Butenhof, Addison-Wesley, May 1997. Even though it was written so long ago, David wrote much of the \POSIX threads standard, so it really is the definitive work. It made me laugh, too! %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item \emph{Operating Systems: A Modern Perspective: Lab Update}, 2nd Edition, Gary Nutt, Addison-Wesley, 2002. A nice text book that emphasises the practical (like I do!) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item Microsoft \acro{MSDN} provides details of Win32 \API calls and provides examples of code. %% It did not stop me spending 3 hours %% trying to figure out how to compile and run my programs once I had %% written them. I offered a {\red\$50 HK prize} for the first to show %% me how to compile my simple Win32 console application programs with %% the Microsoft command line compiler, ably won by Wendell Lam. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% \end{itemize} %% \end{slide} %% \begin{slide}{References --- 2} %% \tiny% %% \begin{itemize} \item William Stallings, \emph{Operating Systems}, Fourth Edition, Prentice Hall, 2001, chapters 3, 4 and 5 \item Deitel, Deitel and Choffnes, \emph{Operating Systems}, Third Edition, Prentice Hall, 2004, ISBN 0-13-1182827-4, chapters 3, 4 and 5 \item Paul Rusty Russell, \href{http://kernelnewbies.org/documents/kdoc/kernel-locking/lklockingguide.html}{\emph{Unreliable Guide To Locking}} \path{http://kernelnewbies.org/documents/kdoc/kernel-locking/lklockingguide.html} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item W. Richard Stevens, \emph{Advanced Progamming in the UNIX Environment}, Addison-Wesley, 1992 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \end{canUnderstandC}% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \item Eric S. Raymond, \emph{The Art of UNIX Programming}, Addison-Wesley, 2004, ISBN 0-13-142901-9. \end{itemize} \end{slide} \end{document}