\input{gl2.slide-header-beamer}% \usepackage{longtable} \errorcontextlines=99 %% Subtopic Number = '1.111.5' %% Title from filename: 'Maintain an effective data backup strategy' %% Weight: 3 %% Description: %% %% Candidate should be able to plan a backup strategy and backup %% filesystems automatically to various media. Tasks include dumping a raw %% device to a file or vice versa, performing partial and manual backups, %% verifying the integrity of backup files and partially or fully restoring %% backups. %% Key files, terms, and utilities include: %% cpio %% dd %% dump %% restore %% tar \title{1.111.5\\Maintain an effective data backup strategy\\Weight 3} \author[Grant Parnell]{ Grant Parnell\\ \and Geoffrey Robertson \texttt{ge@ffrey.com}\\ \and Nick Urbanik \texttt{nicku@nicku.org}\\ {\tiny This document Licensed under GPL---see section~\ref{sec:license}}}% \subtitle{Linux Professional Institute Certification --- 102}% \mode
{\chead{1.111.5}}% % TODO Nick % This needs a complete makeover % Change to being a presentation rather than a bunch of text. % Much more about tar, with examples. \begin{document} \maketitle \mode
{\thispagestyle{empty}} \begin{frame} \frametitle{Outline} \mode {% %\footnotesize \begin{multicols}{2} \tableofcontents \end{multicols} % You might wish to add the option [pausesections] }% \mode
{% \tableofcontents }% \end{frame} \section{Context} \label{sec:context} \begin{frame} \frametitle{Topic 111 Administrative Tasks [21]}% \framesubtitle{Where we are up to}% \begin{description} \item[1.111.1] Manage users and group accounts and related system files [4] \item[1.111.2] Tune the user environment and system environment variables [3] \item[1.111.3] Configure and use system log files to meet administrative and security needs [3] \item[1.111.4] Automate system administration tasks by scheduling jobs to run in the future [4] % \uline depends on \usepackage[normalem]{ulem}: \item[1.111.5] \textbf{\uline{Maintain an effective data backup strategy [3]}} \item[1.111.6] Maintain system time [4] \end{description} \end{frame} \section{Objectives} \label{sec:objectives} \begin{frame} %% GENERATED SLIDE \frametitle{Description of Objective}% \framesubtitle{1.111.5\ \ Maintain an effective data backup strategy}% \mode{\Large}% Candidate should be able to plan a backup strategy and backup filesystems automatically to various media. Tasks include dumping a raw device to a file or vice versa, performing partial and manual backups, verifying the integrity of backup files and partially or fully restoring backups. \end{frame} \begin{frame}[fragile] \frametitle{Key files, terms, and utilities include:}% \framesubtitle{1.111.5\ \ Maintain an effective data backup strategy} \mode{\large}% \begin{description} \item[\texttt{cpio}] --- creates an archive of data \item[\texttt{dd}] --- can copy raw filesystems \item[\texttt{dump}] --- backs up ext2, ext3 systems \item[\texttt{restore}] --- restores files from \texttt{dump} backups \item[\texttt{tar}] --- creates ``tarballs'' and creates archives on tape \end{description} \end{frame} \section{Deciding what to back up} \label{sec:deciding-what-to-backup} \begin{frame} \frametitle{Backups}% Decide what data is important and how long you can do without it. \begin{itemize} % \pause % \begin{overlay}{1} \item Is this used 24$\times$7 or just business hours? %% \end{overlay} % \pause % \begin{overlay}{2} \item During business hours how long can you do without it? 4 hours, 30 minutes, 5 minutes? %% \end{overlay} % \pause % \begin{overlay}{3} \item How up-to-date is it required to get you running in an emergency? %% \end{overlay} % \pause % \begin{overlay}{4} \item Are you backing up for archival or high availability or espionage? %% \end{overlay} \end{itemize} \end{frame} \section{Types of data} \label{sec:type-of-data} \begin{frame} \frametitle{Examples of Data}% \framesubtitle{Static}% Configurations of running servers. You need these 24$\times$7 but they don't change much. \end{frame} \subsection{Databases} \label{sec:databases} \begin{frame}[fragile] \frametitle{Databases / Transactions --- financial \& otherwise}% These are updated frequently and need to balance. Associated with these are logs \& duplication \& other means of rollback \& integrity checking. With databases it's often a good idea to dump them in a good portable format, especially if the inbuilt format is not cross platform or cross version compatible. \par Example: \begin{semiverbatim} \cmd{mysqldump mydata >mydata.dump} \end{semiverbatim} This will give you a text file which can be used on most mysql versions and possibly adapted to other database packages. \end{frame} \subsection{Log files} \label{sec:logs} \begin{frame}[fragile] \frametitle{Logs}% People don't tend to read them unless something goes wrong in which case they're valuable. These need to be kept but don't need to be restored in a hurry. \end{frame} \subsection{Home directories} \label{sec:home-directories} \begin{frame}[fragile] \frametitle{Home directories}% This is a mixed bag of everything but some policies could be instated to make the admin's life easier. \par E.G., Making specific sub-directories for things and assigning them different backup/restore priorities. \par Often the existence of a home directory is more important than the rest of the contents as it may make a user unable to login without it. \end{frame} \subsection{Code} \label{sec:code} \begin{frame} \frametitle{Code repositories}% Programmers should be accustomed to doing regular backups anyway, they often need to revert to an old version to figure out what they broke. \par Any tools used such as CVS that have a central repository should be backed up almost as often as programmers commit code, at least once a day but they could probably cope with it being missing for half a day. \end{frame} \subsection{Web Sites} \label{sec:web-sites} \begin{frame} \frametitle{High availability --- read only}% Websites frequently used by your clients. \par They can contain dynamic data but customers don't update it. \par This sort of scenario lends itself to frequent replication to a backup server. \end{frame} \subsection{Web sites with volatile data} \label{sec:volatile-web-sites} \begin{frame}[fragile] \frametitle{High availability --- interactive}% Taking a website again, this one might allow the customer to do such things as place orders. The website maintains some state information to allow building of an order. This is the most difficult, the state information can be stored in a replicated database. In the event of web server failure the other one comes into play and the customer may have to login again but the information is kept. (Otherwise complex designs and expensive hardware can be used to seamlessly migrate the state to the other webserver). \end{frame} \section{Important Linux Directories} \label{sec:important-directories} \begin{frame}[fragile] \frametitle{Important Linux directories} \begin{tabularx}{\linewidth}{@{}>{\ttfamily}lY@{}} /var/spool/mail & daily backup\\ /var/lib/mysql & databases --- backup the dumps, and possibly the binary.\\ /var/log & from ``don't care'' to ``backup daily''\\ /etc & backup config changes\\ /home & be selective, but if you can't, backup daily.\\ /home//mail & contains the user's mail folders (may also be \texttt{Mail} or \texttt{Maildir})\\ /home//.ssh & If you login using ssh keys only, this is a must have.\\ /usr/local & locally installed apps \& data Application specifics \end{tabularx} \end{frame} \section{Methods of Backup and Restore} \label{sec:methods-of-backup-and-restore} \subsection{Another directory} \label{sec:another-directory} \begin{frame}[fragile] \frametitle{Backup \& Restore methods}% \framesubtitle{Copy the files to another directory}% This is the poor mans backup and does not offer much peace of mind. It does protect against accidental deletion \& corruption by users. One advantage is that it can be very quick for things such as log files. You can also keep multiple copies, one for every day of the week for example. See \texttt{/etc/logrotate.conf}. \end{frame} \subsection{Standby partition} \label{sec:partition} \begin{frame}[fragile] \frametitle{Backup to a standby partition}% This has about the same level of peace of mind as the above. The backup partition can be left un-mounted after the backup. The backup is slower than the above but the restore operation can be quick. See also ``Broken Mirror'' method below. \end{frame} \subsection{Backup to tape} \label{sec:tape} \begin{frame}[fragile] \frametitle{Backup to tape}% This is probably the most common backup used in the commercial world. It's easy to backup the lot every day provided you have the tape capacity. If you don't, you become more selective as to what to backup. There's a variety of software to do this but there are three main basic systems: \texttt{tar}, \texttt{cpio} and \texttt{dump}. Often commercial software uses these basic systems and provide for labelling \& indexing as well as multi-server capability from a simple GUI. The reason for using the basic systems is you can restore from them if you have to. \end{frame} %----------------------------------------------------------- \subsection{Backup to standby disk} \label{sec:standby-disk} %============================================================================== \begin{frame}[fragile] \frametitle{Backup to standby disk}% This can offer peace of mind and a fairly cheap backup for people that don't require 24$\times$7 service. Basically a removable drive bay houses another hard disk of similar capacity and the entire system is backed up. This can be done partition by partition or file by file using \texttt{dd}, \texttt{cpio} or \texttt{rsync}. \mode{\par\medskip}\par Additional steps can be taken to ensure that the backup is also bootable. The backup drive should be removed once done and treated like a tape. The disadvantage here is that you most likely will need to power down the system twice for one backup. Alternately, if you have an external USB or fire-wire storage medium it becomes possible to do this without downtime. \end{frame} %----------------------------------------------------------- \subsection{Backup to CDROM/DVD} \label{sec:CDR-DVD} %============================================================================== \begin{frame}[fragile] \frametitle{Backup to CDROM/DVD}% Under Linux (as far as I know) there's no software to directly write data without creating an image first. This means there must be sufficient space available. It would be possible to create a bootable CD with restore software and a compressed filesystem but I haven't seen this. It may be OK if you don't have a large filesystem or you have a DVD writer or you're not backing up everything. \end{frame} \section{Raid Systems} \label{sec:raid-systems} \begin{frame}[fragile] \frametitle{RAID System}% Not strictly a backup but a RAID system can protect against hard drive failure by providing redundancy. Data is written simultaneously to 2 or more hard drives and can include parity information. It does not protect against corrupt databases and people removing files. It will corrupt \& remove files equally well on all disks. Linux can do RAID in software very well but the ideal is a hardware solution involving hot swapable disks so they can be replaced while the system is fully running. A RAID system can mean the difference between going on-site at 3am and saying ``Oh dear, we'll replace that first thing in the morning''. Just ensure that you do have a replacement readily available and do not have to wait a week. \end{frame} \section{Raid tape array} \label{sec:raid-tape-array} \begin{frame}[fragile] \frametitle{RAID Tape array}% In a similar manner to RAID 5 disks, data is written in parallel to 5 tape drives which increases throughput and data integrity. \end{frame} \section{Backup Server} \label{sec:backup-server} \begin{frame}[fragile] \frametitle{Backup Server}% All of the methods discussed so far involve direct transfer from server to backup medium. If you have a number of servers it may not be practical to install backup devices on each. Another way is to remotely access the required medium directly (\texttt{/dev/rmt0}) but arbitration of access can be an issue. An increasingly popular way is to provide a super-server with a huge amount of disk space capable of holding everything required by the other servers. Transferring the data can happen at any time in either a batch or continuous process. A batch would be say backup a whole directory at once whereas a continuous operation might be transmitting log information or database updates. The backup server itself may then employ any one or more methods to perform backups of itself, possibly based on some statistical analysis. An example of this is a system called ADSM which employs RAID arrays, multiple tape drives, a tape robot with barcode reader and intelligent software that tells the operators which tapes are to go off-site and which ones it wants back. It essentially is a huge cache that stores frequently changing data locally and stores old data off-site. \end{frame} \section{Broken Mirror} \label{sec:broken-mirror} \begin{frame}[fragile] \frametitle{Broken Mirror}% If you've got about 100GB of data on a mirrored pair of disks and only have a 10 minute backup window this may be for you. Basically you bring the system down, unhook one of the mirrors and replace it with another set of drives and bring the system up again. Mirroring starts from scratch during quiet time and should be finished before load picks up again. With the drive set you just un-hooked this can then be loaded into the standby server and backed up to tape over the course of many hours. Some high end servers can perform this operation without downtime as the hooking up can be done using inbuilt hardware or such things as dual-port fire-wire drive bays. All that is required in this case is an application shutdown, sync, dismount, remount, application start type operation. \end{frame} \section{Command line tools} \label{sec:command-line-tools} \begin{frame}[fragile] \frametitle{Backup Software}% \framesubtitle{Command line tools}% \begin{description} \item[\opt{dd}] \textsf{C}opy and \textsf{C}onvert can be used to copy raw disk blocks, even to tape (yuk). \par Example: \begin{semiverbatim} dd if=/dev/hda1 of=/dev/hdb1 \end{semiverbatim} \newpage \item[\opt{tar }] Tape ARchive --- you all know how to unpack tgz files, and maybe even create them. Just remove the '\texttt{f}' option. It also can be an advantage not to use compression as some drives have this built in. Also, a portion of the tape being corrupt can ruin the rest of the data, whereas you can skip corrupt bits and pickup the next file if not compressed. \par Example: \begin{semiverbatim} tar -c /home cd /tmp; tar -x \end{semiverbatim} \end{description} \end{frame} \begin{frame}[fragile] \frametitle{Backup Software}% \framesubtitle{Command line tools --- continued}% \begin{description} \item[\opt{cpio}] CP I/O --- Similar capabilities of \texttt{tar} but different methodology. \par Example: \begin{semiverbatim} \$ find /home | cpio -oB >/dev/tape \$ cd /tmp; cpio -idB {\medskip\par}% You could use this example strategy with any bulk medium but typically people refer to tapes or a set of tapes and for convenience I'll refer to a tape. If you can fit everything on one tape good for you, life is easy, backup the lot daily. If you don't you'll have to do an incremental backup (ie what's changed) daily and do a whole backup with multiple tapes weekly. Take the weekly backup off-site home from work or over to a trustworthy friend's place. Once a month take a weekly backup to long term storage and keep it for 7 years or something if it's got all your tax info on it. It goes without saying the tapes should be labelled full/incremental and a date, hostname and what sequence in the set they are. Daily backup tapes may be rotated once a week with a new tape supplied once a week for a specific day of the week. Eg week1 will be all new tapes with one shipped off on Monday morning. week2 it'll be a new tape for Sunday morning, week3 it'll be Saturday mornings tape that's new. Alternately, some people believe the weekly or monthly should be on a fresh tape that's never been used. \par\mode{\medskip\par}% With this strategy you get reasonable rotation of the tapes keeping costs down and for archival purposes, if you keep at least a months worth of data on the server you'll be able to go back to any point over the last few years and pull out a file. If you keep at least 3 months on hard disk you'll have 3 copies of this on 3 separate tapes because believe it or not they do fail and it will happen to you. To explain this more fully lets look at the following table and assume we have some wages data every week and the company's just started and there's 4 weeks per month. \end{frame} \begin{frame}[fragile] \frametitle{Rotation \& off-site strategies --- 2}% \setlength{\extrarowheight}{0pt}% {\mode
{\small}\mode{\footnotesize}% \noindent% % \begin{tabular}{@{}llll@{}} % \toprule% % & \textbf {server has} & \textbf{weekly tape has} & \textbf{monthly % tape has}\\ % \midrule% % wk1 & wk1 & wk1 & -- \\ % wk2 & wk1--2 & wk1--2 & -- \\ % wk3 & wk1--3 & wk1--3 & -- \\ % wk4 & wk1--4 & wk1--4, month1 & wk1--4, month1 \\ % wk5 & wk1--5 & wk1--5, month1 & -- \\ % wk6 & wk1--6 & wk1--6, month1 & -- \\ % wk7 & wk1--7 & wk1--7, month1 & -- \\ % wk8 & wk1--8 & wk1--8, month1--2 & wk1--8, month1--2 \\ % wk9 & wk1--9 & wk1--9, month1--2 & -- \\ % wk10 & wk1--10 & wk1--10, month1--2 & -- \\ % wk11 & wk1--11 & wk1--11, month1--2 & -- \\ % wk12 & wk1--12 & wk1--12, month1--3 & wk1--12, month1--3 \\ % wk13 & wk2--13 & wk2--13, month1--3 & -- \\ % wk14 & wk3--14 & wk3--14, month1--3 & -- \\ % wk15 & wk4--15 & wk4--15, month1--3 & -- \\ % wk16 & wk5--16 & wk5--16, month2--4 & wk5--16, month2--4 \\ % wk17 & wk6--17 & wk6--17, month2--4 & -- \\ % wk18 & wk7--18 & wk7--18, month2--4 & -- \\ % wk19 & wk8--19 & wk8--19, month2--4 & -- \\ % wk20 & wk9--20 & wk9--20, month3--5 & wk9--20, month3--5 \\ % wk21 & wk10--21 & wk10--21, month3--5 & -- \\ % \ldots & & & \\ % \bottomrule % \end{tabular} \begin{longtable}{@{}llll@{}} \toprule% \mbox{} & \textbf {server has} & \textbf{weekly tape has} & \textbf{monthly tape has}\\ \midrule% \endfirsthead% \toprule% & \textbf {server has} & \textbf{weekly tape has} & \textbf{monthly tape has}\\ \midrule% \endhead \bottomrule% \multicolumn{4}{r}{\emph{Continued\ldots}} \endfoot \bottomrule% \endlastfoot wk1 & wk1 & wk1 & -- \\ wk2 & wk1--2 & wk1--2 & -- \\ wk3 & wk1--3 & wk1--3 & -- \\ wk4 & wk1--4 & wk1--4, month1 & wk1--4, month1 \\ wk5 & wk1--5 & wk1--5, month1 & -- \\ wk6 & wk1--6 & wk1--6, month1 & -- \\ wk7 & wk1--7 & wk1--7, month1 & -- \\ wk8 & wk1--8 & wk1--8, month1--2 & wk1--8, month1--2 \\ wk9 & wk1--9 & wk1--9, month1--2 & -- \\ wk10 & wk1--10 & wk1--10, month1--2 & -- \\ wk11 & wk1--11 & wk1--11, month1--2 & -- \\ wk12 & wk1--12 & wk1--12, month1--3 & wk1--12, month1--3 \\ wk13 & wk2--13 & wk2--13, month1--3 & -- \\ wk14 & wk3--14 & wk3--14, month1--3 & -- \\ wk15 & wk4--15 & wk4--15, month1--3 & -- \\ wk16 & wk5--16 & wk5--16, month2--4 & wk5--16, month2--4 \\ wk17 & wk6--17 & wk6--17, month2--4 & -- \\ wk18 & wk7--18 & wk7--18, month2--4 & -- \\ wk19 & wk8--19 & wk8--19, month2--4 & -- \\ wk20 & wk9--20 & wk9--20, month3--5 & wk9--20, month3--5 \\ wk21 & wk10--21 & wk10--21, month3--5 & -- \\ \ldots & & & \\ \end{longtable} } \end{frame} \begin{frame} \frametitle{Rotation \& off-site strategies --- 3}% A complete backup and archive strategy should provide a means of going back to any point in time for critical data. Sometimes keeping the whole lot of data is not required. For example you could drop the weekly information and keep the monthly summary information and do a dedicated monthly backup for this data. The monthly data may be optimised and arranged for searching and an index provided but essentially contain all the information from the weekly data. \end{frame} \mode {% \begin{frame} \frametitle{Topics Covered} %\footnotesize \begin{multicols}{2} \tableofcontents[pausesections,pausesubsections] \end{multicols} % You might wish to add the option [pausesections] \end{frame} } \section{License of this document} \label{sec:license} \begin{frame} \frametitle{License Of This Document}% \raggedright% Copyright \copyright\ 2005, 2003 Grant Parnell, Geoffrey Robertson and Nick Urbanik . \par Permission is granted to make and distribute verbatim copies or modified versions of this document provided that this copyright notice and this permission notice are preserved on all copies under the terms of the GNU General Public License as published by the Free Software Foundation---either version 2 of the License or (at your option) any later version. \end{frame} \end{document}