%\documentclass[colorBG,slideColor,troispoints,pdf]{prosper} \documentclass[total,pdf]{prosper} \usepackage[toc,highlight,Tycja]{HA-prosper} \usepackage{alltt,key,xr,cols,rcs,acro,meta,cpp,% graphicx,explanation,multicol,textcomp} \usepackage[nolineno,noindent]{lgrind} %\definecolor{green}{rgb}{0,1,0} % Copyright (c) 2003 by Nick Urbanik . % This material may be distributed only subject to the terms and % conditions set forth in the Open Publication License, v1.0 or later % (the latest version is presently available at % http://www.opencontent.org/openpub/). \RCS $Revision: 1.7 $ \newcommand*{\bs}{\texttt{\char '134}} % Backslash `\' %\newcommand*{\labTitle}{LDAP Directories} \newcommand*{\subject}{Systems and Network Management} \newcommand*{\emphcolour}[1]{\emph{\red#1}} \providecommand*{\RPM}{\acro{RPM}\xspace} \providecommand*{\CD}{\acro{CD}\xspace} \providecommand*{\IPC}{\acro{IPC}\xspace} \providecommand*{\UID}{\acro{UID}\xspace} \providecommand*{\GID}{\acro{GID}\xspace} \providecommand*{\SMP}{\acro{SMP}\xspace} \providecommand*{\API}{\acro{API}\xspace} \providecommand*{\OK}{\acro{OK}\xspace} \providecommand*{\IETF}{\acro{OK}\xspace} \providecommand*{\MS}{\acro{MS}\xspace} % TODO: % Write about how print "@array" and print @array work. % Write about sorting in some detail? % mention the string operations? % using open on a process? \title{Perl}% \subtitle{A language for Systems and Network Administration and Management} \author{Nick Urbanik\\ \email{nicku@vtc.edu.hk}\\ \institution{A computing department}\\ \footnotesize{}Copyright Conditions: Open Publication License\\ (see \url{http://www.opencontent.org/openpub/})}% %% \author{Nick Urbanik \texttt{ {\footnotesize\copyright{}} 2003}\\ %% \footnotesize{}Copyright Conditions: Open Publication License (see %% \url{http://www.opencontent.org/openpub/})}% %\institution{A computing department}% \slideCaption{SNM --- Perl --- ver. \RCSRevision} %%\Logo{\includegraphics[width=15mm]{ict-logo-smaller}} \DefaultTransition{Wipe} \TitleSlideNav{FullScreen} \NormalSlideNav{ShowBookmarks} \LeftFoot{SNM --- ver. \RCSRevision} \RightFoot{Perl} \begin{document} \maketitle \tsection{What is Perl?} \begin{slide}{What is Perl?} \begin{itemize} \item Perl is a programming language \item The best language for processing text \item Cross platform, free, open \item Microsoft have invested heavily in ActiveState to improve support for Windows in Perl \item Has excellent connection to the operating system \item Has enormous range of modules for thousands of application types \end{itemize} \end{slide} \begin{slide}{What is Perl? --- 2} \begin{itemize} \item Robust and reliable (has very few bugs) \item Supports object oriented programming \item Good for big projects as well as small \item Java 1.4 has borrowed one of Perl's best features: \emphcolour{regular expressions} \item Perl has garbage collection \item The ``duct tape of the Internet'' \item Easy to use, since it usually ``does the right thing'' \item Based on freedom of choice: ``There is more than one way to do it!'' --- \TIMTOWTDI{}${}^{\mbox{\texttrademark}}$ \end{itemize} \end{slide} \begin{slide}{Compiled and run each time} \begin{itemize} \item Perl is interpreted, but runs about as fast as a Java program \item Software development is very fast \item The Apache web server provides \texttt{mod\_perl}, allows Perl applications to run very fast \item Used on some very large Internet sites: \begin{itemize} \item The Internet Move Database \item Macromedia, Adobe, \url{http://slashdot.org/} \end{itemize} \end{itemize} \end{slide} \begin{slide}{Perl is Evolving} \begin{itemize} \item Perl 6 will introduce many great features to make Perl \begin{itemize} \item easier to use \item Even more widely usable for more purposes \item Even better for bigger projects \end{itemize} \end{itemize} \end{slide} \begin{slide}{Eclectic} \begin{itemize} \item Borrows ideas from many languages, including: \item C, \Cpp \item Shell \item Lisp \item \BASIC \item \ldots even Fortran \item Many others\ldots \end{itemize} \end{slide} \begin{slide}{Regular Expressions} \begin{itemize} \item One of the best features of Perl \item A new concept for most of you \item \ldots But very useful! \item Used to: \begin{itemize} \item extract information from text \item transform information \item You will spend much time in this topic learning about regular expressions --- see slide~\pageref{sld:regexp} \end{itemize} \end{itemize} \end{slide} \tsection{Example Problem} \begin{slide}{Why should I learn it?} \begin{itemize} \item It will be in the final exam! \begin{itemize} \item Okay, that's to get your attention, but\ldots \end{itemize} \item Consider a real-life sys-admin problem: \begin{itemize} \item You must make student accounts for 1500 students \item TEACHING BEGINS TOMORROW!!! \item The Computing Division has a multi-million dollar application to give you student enrollment data \item \ldots but it can only give you PDF files with a strange and irregular format for now (But Oh, it will be infinitely better in the future! Just wait a year or two\ldots) \end{itemize} \end{itemize} \end{slide} \begin{slide}{The available data} \begin{itemize} \item Has a variable number of lines before the student data begins \item Has a variable number of columns between different files \item Has many rows per enrolled student \item Goes on for dozens of pages, only 7 students per page!!!!!!! \item There are two formats, both equally peculiar!!!! \end{itemize} \end{slide} \begin{slide}{Sample data for new courses:} \par\vspace*{0.1\slideWidth}\par \hspace*{-0.03\slideWidth}% \begin{minipage}[t]{1.2\slideWidth} %% 1 N AU YEUNG Wing Fung M 030366771 Z343399(5) AU YEUNG PONG %% FATHER %% 21-AUG-03 21-AUG-03 26311085 %% 98395065 \begin{alltt}\tiny\bfseries 15 N CHAN Wai Yee F 993175560 H123456(5) 28210216 CHEUNG 10-SEP-01 10-SEP-01 21234567 WAI CHI SISTER 91234567 \end{alltt} \end{minipage} \end{slide} \begin{slide}{Problems} \begin{itemize} \item There is a different number of lines above the student records \item There is a different number of characters within each column from file to file \item There are many files \item The format can change any time the computing division determines necessary \end{itemize} \end{slide} \begin{slide}{Solution in Perl --- 1} \tiny \begin{verbatim} #! /usr/bin/perl -w use strict; my $course; my $year; while ( <> ) { chomp; if ( /^\s*Course :\s(\d+)\s/ ) { $course = $1; undef $year; next; } \end{verbatim}%$ \end{slide} \begin{slide}{Solution in Perl --- 2} \tiny\label{sld:extended-modifier-example}% %\vspace*{-3ex} \begin{verbatim} elsif ( m!^\s*Course :\s(\d+)/(\d)\s! ) { $course = $1; $year = $2; next; } if ( my ( $name, $gender, $student_id, $hk_id ) = m{ \s\s+ # at leaset 2 spaces ( # this matches $name [A-Z]+ # family name is upper case (?:\s[A-Z][a-z]*)+ # one or more given names ) \s\s+ # at leaset 2 spaces ([MF]) # gender \s+ # at least one space (\d{9}) # student id is 9 digits \s\s+ # at leaset 2 spaces ([a-zA-Z]\d{6}\([\dA-Z]\)) # HK ID }x ) \end{verbatim}%$ \end{slide} \begin{slide}{Solution in Perl --- 3} \tiny \begin{verbatim} { print "sex=$gender, student ID = $student_id, ", "hkID = $hk_id, course = $course, name=$name, ", defined $year ? "year = $year\n" : "\n"; next; } warn "POSSIBLE UNMATCHED STUDENT: $_\n" if m!^\s*\d+\s+!; } \end{verbatim} \end{slide} \begin{slide}{But I can use any other language!} \begin{itemize} \item I will give you HK\$200 if you are the first person to write a solution in another language in fewer keystrokes \item Note: the Perl solution given has: \begin{itemize} \item comments \item Plenty of space to show structure \item \ldots and handles exceptional situations (i.e., it is robust) \end{itemize} \item To claim your \$200 from Nick, your solution must have \begin{itemize} \item similar space for comments \item Similar readability and robustness \item Be written in a general purpose language using ordinary libraries \end{itemize} \end{itemize} \end{slide} \begin{slide}{Other Solutions may take Longer to Write } \begin{itemize} \item This program took a very short time to write \item It is very robust \item For problems like this, Perl is second to no other programming language. \end{itemize} \end{slide} \begin{slide}{The hello world program} \begin{verbatim} print "hello world\n" \end{verbatim} \end{slide} \tsection{Variables} \begin{slide}{Variables} \begin{itemize} \item There are three basic types of variable: \item \textbf{\emphcolour{Scalar}} (can be a number or string or\ldots) \item \textbf{\emphcolour{Array}} (an ordered array of scalars) \item \textbf{\emphcolour{Hash}} (an unordered array of scalars indexed by strings instead of numbers) \item Each type distinguished with a ``funny character'' \end{itemize} \end{slide} \begin{slide}{\$Scalars:} \begin{itemize} \item Start with a dollar sign \item Hold a single value, not a collection \item A string is a scalar, so is a number \item Since Perl is a \emphcolour{loosely typed language}, a scalar can be an integer, a floating point number, a character or a string. \begin{itemize} \item Note that later you will see that a scalar can also hold a \emphcolour{reference} to another piece of data, which may also be an array or hash. \end{itemize} \item Examples: \begin{verbatim} $apple = 2; $banana = "curly yellow fruit"; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{@Array} \label{arrayintro} \begin{itemize} \item Starts with a \texttt{@} \item Indexes start at 0, like in C or Java \item Each entry in an array is a scalar. \begin{itemize} \item Multidimensional arrays are made by entry of an array being a reference to another array. \end{itemize} \item See slide~\pageref{sld:arrays} \end{itemize} \end{slide} \begin{slide}{\%Hashes} \label{sld:hashintro} \begin{itemize} \item Unfamiliar concept to many of you \item Like an array, but indexed by a string \item A data structure like a database \item See slide~\pageref{sld:example-hash} \end{itemize} \end{slide} \begin{slide}{Conclusion} \begin{itemize} \item Perl is optimised for text and systems administration programming \item Has great portability \item Is strongly supported by Microsoft \item Has three main built-in data types: \item Scalar: starts with \texttt{\$} \item Array: starts with \texttt{@} \item Hash: starts with \texttt{\%} \end{itemize} \end{slide} \tsection{Perl Community} \begin{slide}{An Overview of Perl} \vspace*{0.1\slideWidth} \begin{center}\Large \mbox{}\blue{}A language for Systems and Network Administration and Management: \vspace*{0.05\slideWidth} An overview of the language \end{center} \end{slide} \begin{slide}{Where do I get Perl?} \begin{itemize} \item For Windows, go to \url{http://www.activestate.com}, download the installer \item For Linux: it will be already installed \item For other platforms: go to \url{http://www.perl.com} \item This is a good source of other information about Perl \end{itemize} \end{slide} \begin{slide}{Where do I get Info about Perl?---1} \begin{itemize} \item {\mbox{}\blue{}On your hard disk:} \begin{itemize} \item \begin{alltt} $ \textbf{perldoc -f \meta{function}} \end{alltt}%$ \begin{itemize} \item will look up the documentation for the built-in \meta{function} (from the documentation \texttt{perlfunc}) \end{itemize} \item\label{sld:faqs}% \begin{alltt} $ \textbf{perldoc -q \meta{word}} \end{alltt}%$ \begin{itemize} \item will look up \meta{word} in the headings of the \FAQ \end{itemize} \item \begin{alltt} $ \textbf{perldoc perl} \end{alltt}%$ \begin{itemize} \item shows a list of much of your locally installed documentation, divided into topics \end{itemize} \item ActiveState Perl provides a Programs menu item that links to online html documentation \end{itemize} \end{itemize} \end{slide} \begin{slide}{Where do I get Info about Perl?---2} %% \begin{minipage}[t]{1.1\slideWidth} %% \begin{multicols}{2} \begin{itemize} \item {\mbox{}\blue{}Web sites:} \begin{itemize} \item \url{http://www.perl.com} \item \url{http://www.activestate.com} \item \url{http://use.perl.org} \end{itemize} \item See slide~\pageref{sld:reference-books} for a list of books. \end{itemize} %% \end{multicols} %% \end{minipage} \end{slide} \begin{slide}{CPAN, PPM: Many Modules} \label{sld:cpan} \begin{itemize} \item A very strong feature of Perl is the community that supports it \item There are tens of thousands of third party modules for many, many purposes: \begin{itemize} \item Eg. \texttt{Net::LDAP} module supports all \LDAP operations, \texttt{Net::LWP} provides a comprehensive web client \end{itemize} \item Installation is easy: \begin{alltt} $ \textbf{sudo perl -MCPAN -e shell} cpan> \textbf{install Net::LDAP} \end{alltt}%$ \item Will check if a newer version is available on the Internet from \CPAN, and if so, download it, compile it, test it, and if it passes tests, install it. \end{itemize} \end{slide} \begin{slide}{PPM: Perl Package Manager} \begin{itemize} \item For Windows \item Avoids need for a C compiler, other development tools \item Download precompiled modules from ActiveState and other sites, and install them: \begin{alltt} C:\bs> \textbf{ppm install Net::LDAP} \end{alltt} \item See documentation with ActiveState Perl \end{itemize} \end{slide} \begin{slide}{Mailing Lists: help from experts} \begin{itemize} \item There are many mailing lists and newsgroups for Perl \item When subscribe to mailing list, receive all mail from list \item When send mail to list, all subscribers receive \item For Windows, many lists at \url{http://www.activestate.com} \end{itemize} \end{slide} \begin{slide}{How to ask Questions on a List} \begin{itemize} \item I receive many email questions from students about many topics \item Most questions are not clear enough to be able to answer in any way except, ``please tell me more about your problem'' \item Such questions sent to mailing lists are often unanswered \item Need to be concise, accurate, and clear \item see also Eric Raymond's \emphcolour{How to Ask Questions the Smart Way} at \url{http://catb.org/~esr/faqs/smart-questions.html} \item Search the \FAQ{}s first---see slide~\pageref{sld:faqs} \end{itemize} \end{slide} \tsection{The Shabang} \begin{slide}{Where is Perl on my system?} \begin{itemize} \item ActiveState Perl installs \texttt{perl.exe} in \texttt{C:\bs Perl\bs perl.exe} \item Linux systems have a standard location for perl at \texttt{/usr/bin/perl} \item On some \UNIX systems, it may be installed at \texttt{/usr/local/bin/perl} \end{itemize} \end{slide} \begin{slide}{How OS knows it's a Perl program---1} \begin{itemize} \item To run your Perl program, \OS needs to call perl \item How does \OS know when to call Perl? \item {\mbox{}\blue{}Linux, Unix:} \begin{itemize} \item programs have \emphcolour{execute} permission: \begin{alltt} $ \textbf{chmod +x \meta{program}} \end{alltt}%$ \begin{itemize} \item \OS reads first 2 bytes of program: if they are ``\texttt{\#!}'' then read to end of line, then use that as the interpreter \item \OS doesn't care what your program file is called \end{itemize} \item If program file is not in a directory on your \texttt{PATH}, call it like this: \begin{alltt} $ \textbf{./\meta{program}} \end{alltt}%$ \end{itemize} \end{itemize} \end{slide} \begin{slide}{How OS knows it's a Perl program---2} \begin{itemize} \item {\mbox{}\blue{}Windows:} \begin{itemize} \item \OS uses the extension of the file to decide what to do (e.g., \texttt{.bat}, \texttt{.exe}) \item Your program names end with \texttt{.pl} \end{itemize} \item {\mbox{}\blue{}For cross platform support:} \begin{itemize} \item Put this at the top of all your programs: \begin{verbatim} #! /usr/bin/perl -w \end{verbatim} \item Name your programs with an extension \texttt{.pl} \end{itemize} \end{itemize} \end{slide} \tsection{Language Overview} \begin{slide}{Language Overview} \label{sld:language-overview} %\begin{itemize} %\item Here we look at these topics: \begin{itemize} \item variables: scalars, arrays and hashes --- \S\pageref{sld:funny-characters}--\S\pageref{sld:examples-of-use-strict} \item compiler warnings, \texttt{use strict;} --- \S\pageref{sld:use-warnings}--\S\pageref{sld:examples-of-use-strict} \item operators, quoting --- \S\pageref{sld:operators-and-quoting}--\S\pageref{sld:quoting} \item input and output --- \S\pageref{sld:input-output} \item statements: --- \S\pageref{statements-for-looping-and-conditions} \begin{itemize} \item \texttt{if}\ldots\texttt{elsif}\ldots\texttt{else} and \texttt{unless} statements --- \S\pageref{sld:if-statements}--\S\pageref{sld:unless-statement} \item \texttt{while}, \texttt{for} and \texttt{foreach} loops --- \S\pageref{sld:while-loop}--\S\pageref{sld:foreach} \begin{itemize} \item iterating over arrays and hashes --- \S\pageref{sld:array-iterate}--\S \pageref{sld:iterating-over-hash-sorted} \end{itemize} \item Exit early from a loop with \texttt{last}, and \texttt{next} --- \S\pageref{sld:exit-a-loop-early} \item ``backwards'' statements --- \S\pageref{sld:backwards-statements}--\S \pageref{sld:backwards-statements-examples} \end{itemize} \end{itemize} %\end{itemize} \end{slide} \begin{slide}{Language Overview --- 2} \label{sld:language-overview-2} \begin{itemize} \item We also will examine: \begin{itemize} \item subroutines, parameters and \texttt{return} statement --- \S\pageref{sld:subroutines}--\S\pageref{sld:parameters-2} \item array operations --- \S\pageref{sld:pushd-pop}--\S\pageref{sec:split-and-join} \item Error reporting: \texttt{die} and \texttt{warn} --- \S\pageref{sld:die-warn} \item Opening files --- \S\pageref{sld:files-and-filehandles}--\S\pageref{sld:open-for-writing} \item executing external programs --- \S\pageref{sec:external-programs}--\S\pageref{sld:backticks} \item regular expressions --- \S\pageref{sld:regexp}--\S\pageref{sld:readable-regexp} \item Special input modes --- \S\pageref{sld:input-record-separator}--\S \pageref{sld:localising-input-record-separator} \item One line Perl programs --- \S\pageref{sld:one-liners} \end{itemize} \end{itemize} \end{slide} \tsection{Data Types} \begin{slide}{Funny Characters \$, @, \%} \label{sld:funny-characters} \begin{itemize} \item Variables in Perl start with a \emphcolour{funny character} \item Why? \item No problem with reserved words: \item can have a variable called \texttt{\$while}, and another variable called \texttt{@while}, and a third called \texttt{\%while}. \item Can \emphcolour{interpolate} value into a \emphcolour{Double-quoted} string (but not a single quoted string): \label{sld:interpolate} \begin{verbatim} my $string = "long"; my $number = 42.42; print "my string is $string ", "and my number is $number\n"; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Arrays} \label{sld:arrays} \begin{itemize} \item Define an array like this: \begin{verbatim} my @array = ( 1, 5, "fifteen" ); \end{verbatim} \item This is an array containing three elements \item The first can be accessed as \texttt{\$array[0]}, second as \texttt{\$array[1]}, the last as \texttt{\$array[2]} \item Note that since each element is a scalar, it has the \texttt{\$} funny character for a scalar variable \emphcolour{value} \item In Perl, \emphcolour{we seldom use an array with an index}---use list processing array operations: \texttt{push}, \texttt{pop}, \texttt{shift}, \texttt{unshift}, \texttt{split}, \texttt{grep}, \texttt{map} and iterate over arrays with the \texttt{foreach} statement---see slide \pageref{sld:foreach} \begin{itemize} \item higher level. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Array Examples} \label{array-examples} \label{sld:qw1} \begin{itemize} \item Use the \texttt{qw//} ``quote words'' operator to help initialise arrays --- see slide~\pageref{sld:quoting} \item See slide~\pageref{sld:foreach} for how the \texttt{foreach} loop works. \begin{verbatim} my @fruit = qw( apple banana mandarin peach pear plum ); foreach my $fruit ( @fruit ) { print "$fruit\n"; } \end{verbatim} \item Note that these two are equivalent: \begin{verbatim} my @fruit = qw( apple banana mandarin peach pear plum ); my @fruit = ( "apple", "banana", "mandarin", "peach", "pear", "plum" ); \end{verbatim} \end{itemize} \end{slide} \begin{slide}{More About Arrays} \label{sld:more-about-arrays} \begin{itemize} \item Instead of initialiasing the array as in slide~\pageref{array-examples}, we can initialise the elements one by one: \begin{verbatim} my @fruit; $fruit[ 0 ] = "apple"; $fruit[ 1 ] = "banana"; # ... $fruit[ 5 ] = "plum"; \end{verbatim}%$ \item We can get a \emphcolour{slice} of an array: \begin{verbatim} my @favourite_fruit = @fruit[ 0, 3 ]; print "@favourite_fruit\n"; \end{verbatim} \begin{itemize} \item execute the program: \end{itemize}\par %\vspace*{-2ex}\par \begin{alltt} $ \texttt{./slice.pl} apple peach \end{alltt}%$ \end{itemize} \end{slide} \begin{slide}{List Assignment} \label{sld:list-assignment} \begin{itemize} \item We can use a list of scalars whenever it makes some sense, e.g., \begin{itemize} \item We can assign a list of scalars to a list of values \end{itemize} \item Examples: \begin{verbatim} my ( @a, $b, $c ) = ( 1, 2, 3 ); my @array = ( @a, $b, $c ); my ( $d, $e, $f ) = @array; \end{verbatim}%$ \end{itemize} \end{slide} \begin{slide}{Even More About Arrays} \label{sld:even-more-about-arrays} \begin{itemize} \item How many elements are in the array? See slide~\pageref{sld:scalar-list-context} \begin{verbatim} print scalar @fruit, "\n" \end{verbatim} \item Does the array contain any data? See slide~\pageref{sld:unless-statement} \begin{verbatim} print "empty\n" unless @fruit; \end{verbatim} \item Is there any data at the index \texttt{\$index}? \begin{verbatim} if ( defined $fruit[ $index ] and $fruit[ $index ] eq "apple" ) { print "found an apple.\n"; } \end{verbatim} \begin{itemize} \item See \texttt{perldoc -f defined}. Also see \texttt{perdoc -f exists}. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Scalar, List Context} \label{sld:scalar-list-context} \begin{itemize} \item Each part of a program expects a value to be either \emphcolour{scalar} or \emphcolour{list} \item Example: \texttt{print} is a list operator, so if you \texttt{print} something, it is in \emphcolour{list context} \item If you look in the \emph{Perl Reference}, you will see \textsf{LIST} shown as a parameter to many functions. \begin{itemize} \item Any value there will be in a \emphcolour{list context} \end{itemize} \item Many built-in functions, and your own functions (see \texttt{perldoc~-f~wantarray}), can give a different result in a scalar or list context \item force scalar context with \texttt{scalar}, e.g., {\footnotesize \begin{verbatim} print "the time is now ", scalar localtime, "\n"; \end{verbatim} } \end{itemize} \end{slide} \begin{slide}{Hashes} \label{sld:example-hash} \begin{itemize} \item Hashes are probably new to you \item {\mbox{}\blue{}Like an array, but indexed by a string} \item Similar idea was implemented in \texttt{java.lang.HashTable} \item Perl hashes are easier to use \end{itemize} \end{slide} \begin{slide}{Initialising a Hash} \label{sld:initialising-hash} \begin{verbatim} my %hash = ( NL => 'Netherlands', BE => 'Belgium' ); \end{verbatim} \begin{itemize} \item This creates a hash with two elements \item one is \texttt{\$hash\{NL\}}, has value ``\texttt{Netherlands}''; \item the other is \texttt{\$hash\{BE\}} with value ``\texttt{Belgium}'' \item The ``\texttt{=>}'' is a ``\emphcolour{quoting comma}''. \begin{itemize} \item It is the same as a comma, but it also quotes the string on its left. \item So you can write the above like this: \begin{verbatim} my %hash = ( 'NL', 'Netherlands', 'BE', 'Belgium' ); \end{verbatim} but the ``\texttt{=>}'' operator make it more clear which is the key and which is the value. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Hash Examples --- 1} \label{sld:hash-example-1} \begin{itemize} \item As with arrays, you make a new element just by assigning to it: \begin{verbatim} my %fruit; $fruit{apple} = "crunchy"; $fruit{peach} = "soft"; \end{verbatim} \item Here, we made two hash elements. \begin{itemize} \item The keys were \texttt{"apple"} and \texttt{"peach"}. \item The corresponding values were \texttt{"cruchy"} and \texttt{"soft"}. \end{itemize} \item You could print the values like this: \begin{verbatim} print "$fruit{apple}, $fruit{peach}\n"; \end{verbatim} prints: \texttt{crunchy, soft} \end{itemize} \end{slide} \begin{slide}{Hash Examples --- 2} \label{sld:hash-example-2} \begin{itemize} \item How to see if a hash is empty? See~\pageref{sld:unless-statement} \begin{verbatim} print "empty\n" unless %fruit; \end{verbatim} \item How to delete a hash element? \begin{verbatim} delete $fruit{coconut}; \end{verbatim}%$ \item Hashes are often useful for storing counts (see slides~\pageref{sld:while-loop}--\pageref{sld:while-and-the-angle-operator} for more about \texttt{while} loops): \begin{verbatim} my %wordcounts; while ( <> ) { chomp; ++$wordcount{$_}; } \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Hash slices} \begin{itemize} \item We can assign some values to part of a hash: \begin{verbatim} $score{fred} = 150; $score{barney} = 100; $score{dino} = 10; \end{verbatim}%$ \item We could use a \emphcolour{list assignment} (see \S\pageref{sld:list-assignment}): {\scriptsize \begin{verbatim} ( $score{fred}, $score{barney}, $score{dino} ) = ( 150, 100, 10 ); \end{verbatim}%$ } \ldots{} too long. A \emphcolour{hash slice} makes this easier: {\scriptsize \begin{verbatim} @score{ "fred", "barney", "dino" } = ( 150, 100, 10 ); \end{verbatim} } \item We can \emphcolour{interpolate} this too (see slides~\pageref{sld:interpolate} and~\pageref{sld:quoting}): {\footnotesize \begin{verbatim} my @players = qw( fred barney dino ); print "scores are @score{@players}\n"; \end{verbatim} } \end{itemize} \end{slide} \begin{slide}{Another Hash Example} \label{sld:another-hash-example} \begin{itemize} \item Often used to keep a count of the number of occurrences of data read in: {\tiny \begin{verbatim} #! /usr/bin/perl -w use strict; our %words; while ( <> ) { next unless /\S/; # Skip blank lines my @line = split; foreach my $word ( @line ) { ++$words{$word}; } } print "Words unsorted, in the order they come from the hash:\n\n"; foreach my $word ( keys %words ) { printf "%4d %s\n", $words{$word}, $word; } \end{verbatim}%$ } %\par\vspace*{-2ex}\par {\scriptsize \item see slide~\pageref{sld:while-loop} for \texttt{while} loop, slide~\pageref{sld:while-and-the-angle-operator} for \texttt{while~(~<>~)}, slide~\pageref{sld:foreach} for the \texttt{foreach} statement, slides~\pageref{sld:unless-statement} and~\pageref{sld:backwards-statements} for the \texttt{unless} statement } \end{itemize} \end{slide} \begin{slide}{Hashes are Not Ordered} \label{sld:hashes-not-ordered} \begin{itemize} \item A \emphcolour{big difference from arrays} is that hashes have \emphcolour{no order}. \item The data in a hash will be available in only an \emphcolour{unpredictable order}. \item See slide~\pageref{sld:hash-iterate} for how to \emphcolour{iterate} over hash elements \end{itemize} \end{slide} \tsection{Good Practice} \begin{slide}{Discipline---\texttt{use warnings}} \label{sld:use-warnings} \begin{itemize} \item Better to let compiler detect problems, not your customer \item Develop your program with all warnings enabled \item Either: \begin{itemize} \item put \texttt{-w} as an option to \texttt{perl} when execute the program, i.e., \begin{itemize} \item Make the first line of your program: \begin{verbatim} #! /usr/bin/perl -w \end{verbatim} \item Or better: put a line: \begin{verbatim} use warnings; \end{verbatim} near the top of your program. \end{itemize} \end{itemize} \end{itemize} \end{slide} \begin{slide}{\texttt{use strict} and Declaring Variables} \label{sld:use-strict-and-declaring-variables} \begin{itemize} \item All programs that are more than a few lines long should have the \emphcolour{pragma} \texttt{use~strict;} \item This turns on additional checking that all variables are declared, all subroutines are okay, and that references to variables are ``hard references'' --- see \texttt{perldoc~strict}. \item All variables that you use in your program need to be declared before they are used with either \texttt{\red{}my} or \texttt{\red{}our}. \item \texttt{\red{}my} defines a local variable that exists only in the scope of the current block, or outside of a block, in the file. \begin{itemize} \item See \texttt{perldoc~my}. \end{itemize} \item \texttt{\red{}our} defines a global variable. \begin{itemize} \item See \texttt{perldoc~our}. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Examples of \texttt{use strict} and Variables} \label{sld:examples-of-use-strict} \begin{itemize} \item Without \texttt{use strict}, a variable just springs into life whenever you use it. \item \emphcolour{Problem}: a {\blue{}typing mistake} in a variable creates a \emphcolour{new variable} and a hard-to-find bug! \item \ldots so \emphcolour{always start your programs like this:} \begin{verbatim} #! /usr/bin/perl use warnings; use strict; \end{verbatim} \item \texttt{use warnings;} enables compile time warnings which help find bugs earlier---see \texttt{perldoc~warnings} \item After \texttt{use strict}, it will be an error to use a variable without declaring it with \texttt{\red{}my} or \texttt{\red{}our}. \begin{itemize} \item Most code examples in these notes define variables with \texttt{\red{}my} or \texttt{\red{}our} \end{itemize} \end{itemize} \end{slide} \tsection{Operators, Quoting} \begin{slide}{Operators and Quoting} \label{sld:operators-and-quoting} \begin{itemize} \item Perl has all the operators from C (and so Java), in same precedence \item Has more operators for strings: \item Join strings with a dot, e.g. \begin{verbatim} print "The sum of 3 and 4 is " . 3 + 4 . "\n"; \end{verbatim} \item {\small{}Quote special characters with backslash, as in C or Java} \begin{verbatim} print "\$value = $value\n"; \end{verbatim}%$ \item Can quote \emphcolour{all} characters using single quotes: {\tiny \begin{verbatim} print 'output of \$perl = "rapid";print \$perl; is "rapid"'; \end{verbatim} } \item Note that double quotes are okay in single quotes, single quotes okay in double quotes. \item Documentation in \texttt{perldoc perlop}. \end{itemize} \end{slide} \begin{slide}{Quoting} \label{sld:quoting} \begin{itemize} \item Perl has lots of ways of quoting, too many to list here \setlength{\extrarowheight}{0pt} \begin{tabular}[t]{@{}>{\ttfamily}l>{\ttfamily}llll@{}} & & \textbf{Meaning} & \textbf{Interpolates} & \textbf{Slide}\\ '{}' & q// & Literal & No & \S\pageref{sld:operators-and-quoting}, \S\pageref{sld:interpolate} \\ "" & qq// & Literal & Yes & \S\pageref{sld:operators-and-quoting}, \S\pageref{sld:interpolate} \\ `{}` & qx// & Command & Yes & \S\pageref{sld:backticks} \\ () & qw// & quote word list & No & \S\pageref{sld:qw1},\S\pageref{sld:qw} \\ // & m// & Pattern match & Yes & \S\pageref{sld:match-operator} \\ s/// & s/// & Substitution & Yes & \S\pageref{sld:substitution-operator} \\ y/// & tr/// & Translation & No & \end{tabular} \begin{itemize} \item See slide~\pageref {sld:interpolate} for meaning of ``interpolate'' \end{itemize} \item \texttt{y///} or \texttt{tr///} works just like the \POSIX \texttt{tr} (translate) program in Linux. %% \item We discuss \texttt{m//} and \texttt{s///} when we get to %% regular expressions (slide~\pageref{sld:regexp} onwards) \end{itemize} \end{slide} \tsection{Input, Output} \begin{slide}{Input and Output} \label{sld:input-output} \begin{itemize} \item Read from standard input like this: \begin{verbatim} my $value = ; \end{verbatim}%$ \item Note that there will be a newline character read at the end \begin{itemize} \item To remove trailing newline, use \texttt{chomp}: \begin{verbatim} chomp $value; \end{verbatim}%$ \item The word \texttt{STDIN} is a predefined \emphcolour{filehandle}. \begin{itemize} \item You can define your own filehandles with the \texttt{open} built-in function. \end{itemize} \end{itemize} \item write to standard output with the list operator \texttt{print} \begin{itemize} \item \texttt{print} takes a list of strings: \begin{verbatim} print "The product of $a and $b is ", $a * $b, "\n"; \end{verbatim} \end{itemize} \end{itemize} \end{slide} \begin{slide}{What is Truth?} \label{sld:what-is-truth} \begin{itemize} \item Anything that has the string value \texttt{""} or \texttt{"0"} is false \item Any other value is true. \item This means: \begin{itemize} \item No number is false except 0 \item any undefined value is false \item any reference is true (see \texttt{perldoc perlref}) \end{itemize} \item Examples: {\tiny \begin{verbatim} 0 # becomes the string "0", so false 1 # becomes the string "1", so true 0.00 # becomes 0, would convert to the string "0", so false "" # The null string, so false "0.00" # the string "0.00", neither empty nor "0", so true undef() # a function returning the undefined value, so false \end{verbatim} } \end{itemize} \end{slide} \tsection{Statements} \begin{slide}{Statements for Looping and Conditions} \label{statements-for-looping-and-conditions} \begin{itemize} \item We look at the following statements in the language: \begin{itemize} \item \texttt{if}\ldots \texttt{elsif}\ldots \texttt{else} statements --- \S\pageref{sld:if-statements} \begin{itemize} \item The \texttt{unless} statement is similar to the \texttt{if} statement --- \S\pageref{sld:unless-statement} \end{itemize} \item \texttt{while} loops --- \S\pageref{sld:while-loop} \begin{itemize} \item processing input using \texttt{while} \item The \texttt{<>} operator \end{itemize} \item \texttt{for} loops --- \S\pageref{sld:for-loop} \item \texttt{foreach} loops --- \S\pageref{sld:foreach} \begin{itemize} \item iterating over arrays and hashes with \texttt{foreach}, \texttt{while} --- \S\pageref{sld:array-iterate}--\S \pageref{sld:iterating-over-hash-sorted} \end{itemize} \item Exit early from a loop with \texttt{last}, and \texttt{next} --- \S\pageref{sld:exit-a-loop-early} \end{itemize} \item We will also look at ``\emphcolour{backwards statements}'' --- \S\pageref{sld:backwards-statements}--\S \pageref{sld:backwards-statements-examples} \end{itemize} \end{slide} \begin{slide}{\texttt{if} Statements} \label{sld:if-statements} \begin{itemize} \item \texttt{if} statements work as in C or Java, except: \begin{itemize} \item braces are required, not optional \item Use \texttt{elsif} instead of \texttt{else if} \end{itemize} \item Example: \begin{verbatim} if ( $age > $max ) { print "Too old\n"; } elsif ( $age < $min ) { print "Too young\n"; } else { print "Just right\n"; } \end{verbatim} \end{itemize} \end{slide} \begin{slide}{\texttt{unless} Statement} \label{sld:unless-statement} \begin{itemize} \item Same as \texttt{if} statement, \begin{itemize} \item except that the block is executed if the condition is \emph{false}: \end{itemize} {\footnotesize \begin{verbatim} unless ( $destination eq $home { print "I'm not going home.\n"; } \end{verbatim}% }% %\par\vspace*{-4ex}\par \begin{center} $\swarrow$\hspace*{2ex}corresponds to:\hspace*{2ex}$\searrow$ \end{center} \begin{minipage}[t]{0.5\slideWidth} \begin{alltt}\footnotesize unless ( \meta{condition} ) \{ \meta{statements\ldots}; \} \end{alltt}% \end{minipage}% \begin{minipage}[t]{0.5\slideWidth} \begin{alltt}\footnotesize if ( ! ( \meta{condition} ) ) \{ \meta{statements\ldots}; \} \end{alltt} \end{minipage} \item \texttt{else} works, but I suggest you don't use it \begin{itemize} \item Use \texttt{if}\ldots\texttt{else} instead \end{itemize} \end{itemize} \end{slide} \begin{slide}{\texttt{while} loop} \label{sld:while-loop} \begin{itemize} \item Just as in C or Java \begin{itemize} \item \ldots{}but braces are required: \end{itemize} {\footnotesize \begin{verbatim} while ( $tickets_sold < 1000 ) { $available = 1000 - $tickets_sold; print "$available tickets are available. ", "How many do you want: "; $purchase = ; chomp $purchase; $tickets_sold += $purchase; } \end{verbatim} } \item \end{itemize} \end{slide} \begin{slide}{Input with \texttt{while}} \label{sld:input-with-while} \begin{itemize} \item Input is often done using \texttt{while}: \begin{alltt} while ( $line = ) \{ \meta{process this \textup{\texttt{$line}}{}} \} \end{alltt} \item This loop will iterate once for each line of input \item will terminate at end of file \end{itemize} \end{slide} \begin{slide}{The Special \texttt{\$\_} variable} \begin{itemize} \item \emphcolour{Nearly every built-in input function}, \emphcolour{many input operators}, \emphcolour{most statements with input} and \emphcolour{regular expressions} use a \emphcolour{special variable} \texttt{\red\$\_} \item If you don't specify a variable, \emphcolour{Perl uses} \texttt{\red\$\_} \item For example, this \texttt{while} loop reads one line from standard input at a time, and prints that line: \begin{verbatim} while ( ) { print; } \end{verbatim} \item \texttt{while} loop reads one line into \texttt{\$\_} at each iteration. \item \texttt{print} statement prints the value of \texttt{\$\_} if you do not tell it to print anything else. \item See the \emph{Perl Reference} on page 2 under \emph{Conventions} \end{itemize} \end{slide} \begin{slide}{\texttt{while} and the \texttt{<>} operator} \label{sld:while-and-the-angle-operator} \begin{itemize} \item Most input is done using the \texttt{<>} operator with a \texttt{while} loop \item The \texttt{<>} operator processes files named on the \emphcolour{command line} \begin{itemize} \item These are called \emphcolour{command line parameters} or \emphcolour{command line arguments} \item If you execute it like this: \begin{verbatim} angle-brackets.pl \end{verbatim} then you have no \emph{command line arguments} passed to the program. \item But if you execute it like this: \begin{verbatim} angle-brackets.pl file_1 file_2 file_3 \end{verbatim} then the \emph{command line} has three \emph{arguments}, which here, happen to be the names of files. \end{itemize} \end{itemize} \end{slide} \begin{slide}{\texttt{while} and the \texttt{<>} operator --- 2} \begin{itemize} \item We most often use the \texttt{<>} operator like this: %\meta{process the line in \textup{\texttt{$_}}{}} \begin{alltt} while ( <> ) \{ \meta{statements\ldots} \} \end{alltt}%$ \item \emphcolour{This loop does a lot}. The pseudocode here shows what it does: \begin{alltt}\tiny if there are no command line arguments, while there are lines to read from standard input read next line into $_ execute \meta{statements\ldots} else for each command line argument open the file while there are lines to read read next line from the file into $_ execute \meta{statements\ldots} close the file \end{alltt} \end{itemize} \end{slide} \begin{slide}{\texttt{for} loop} \label{sld:for-loop} \begin{itemize} \item The \texttt{for} loop works as in C or Java, except that braces are required, not optional. \item Example: \begin{verbatim} for ( $i = 0; $i < $max; ++$i ) { $sum += $array[ i ]; } \end{verbatim} \item Note that we rarely use this type of loop in Perl.\@ Instead, use the higher level \texttt{foreach} loop\ldots \end{itemize} \end{slide} \begin{slide}{\texttt{foreach} loop} \label{sld:foreach} \label{sld:array-iterate} \begin{itemize} \item The \texttt{foreach} loop iterates over an array or list. \item Most useful looping construct in Perl \item It is so good, that Java 1.5 has borrowed this type of loop to simplify iterators. \item An example: adds 1 to each element of an array: \begin{verbatim} foreach my $a ( @array ) { ++$a; } \end{verbatim} \item \texttt{\$a} here is a \emphcolour{reference} to each element of the array, so \item changing \texttt{\$a} actually changes the array element. \item You can write ``\texttt{for}'' or ``\texttt{foreach}'', Perl won't mind. \end{itemize} \end{slide} %% \begin{slide}{Iterating over an Array} %% \label{sld:array-iterate} %% \begin{itemize} %% \item Process all elements in an array like this: %% \begin{alltt} %% foreach my $element ( @array ) \{ %% \meta{work with element} %% \} %% \end{alltt}%$ %% \item \texttt{\$element} is made as a \emph{reference} to the element %% of the array %% \begin{itemize} %% \item if you change \texttt{\$element}, you are actually changing %% the element in the array. %% \end{itemize} %% \item Start at beginning of array, iterate through to the end %% \item This is so easy, Sun has added this kind of syntax to Java 1.5. %% \end{itemize} %% \end{slide} \tsection{Iteration} \begin{slide}{Iterating over a Hash} \label{sld:hash-iterate} \begin{itemize} \item Referring to our example hash in slide~\pageref{sld:example-hash}, we can process each element like this: \begin{alltt} foreach my $key ( keys %hash ) \{ \meta{process \textup{\texttt{$hash\{$key\}{}}}} \} \end{alltt}%$ \begin{itemize} \item \texttt{keys} creates a temporary array of all the keys of the hash \item We then looped through that array with \texttt{foreach}. \end{itemize} \item More efficient is to use the \texttt{each} built in function, which truly iterates through the hash: \begin{alltt} while ( my ( $key, $value ) = each %hash ) \{ \meta{process \textup{\texttt{$key}} and \textup{\texttt{$value{}}}} \} \end{alltt} \end{itemize} \end{slide} \begin{slide}{Iterating over a Hash in Sorted Order} \begin{itemize} \item Did we process the contents of \texttt{\%hash} in alphabetical order in slide~\pageref{sld:hash-iterate}? \begin{itemize} \item No. \item So what do we do if we want to print the elements in order? \begin{itemize} \item In order of key by alphabet? Numerically? \item In order of element by alphabet? Numerically? \end{itemize} \end{itemize} \item Use built in \texttt{sort} function \item see \texttt{perldoc -f sort} \end{itemize} \end{slide} \begin{slide}{Iterating over a Hash in Sorted Order} \label{sld:iterating-over-hash-sorted} \begin{itemize} \item You \emphcolour{cannot sort a hash} \item \ldots{}but you can read all the keys, sort them, then process each element in that order: \begin{alltt} foreach my $key ( sort keys %hash ) \{ \meta{process \textup{\texttt{$hash\{$key\}{}}}} \} \end{alltt}%$ \begin{itemize} \item see \texttt{perldoc sort} \end{itemize} \item A reverse sort: \begin{alltt} foreach my $key ( reverse sort keys %hash ) \{ \meta{process \textup{\texttt{$hash\{$key\}}{}}} \} \end{alltt}%$ \begin{itemize} \item see \texttt{perldoc reverse} \end{itemize} \end{itemize} \end{slide} \tsection{Other Statements} \begin{slide}{Exit a Loop Early} \label{sld:exit-a-loop-early} \begin{itemize} \item Java and C provide \texttt{break} and \texttt{continue} \item Perl provides \texttt{\textbf{\blue{}last}} and \texttt{\textbf{\blue{}next}} \label{sld:qw} \begin{verbatim} my @super_people = qw( Superman Robin Wonder Woman Batman Superboy ); foreach my $person ( @super_people ) { next if $person eq "Robin"; print "$person\n"; last if $person eq "Batman"; } \end{verbatim} \item What do you think this program will print? \end{itemize} \end{slide} \begin{slide}{``Backwards'' Statements} \label{sld:backwards-statements} \begin{itemize} \item Put an \texttt{if}, \texttt{while} or \texttt{foreach} modifier \emphcolour{after a simple statement}. \item You can put a simple statement (i.e., with no braces), and put one of these afterwards: \begin{verbatim} if EXPR unless EXPR while EXPR until EXPR foreach EXPR \end{verbatim} \end{itemize} \end{slide} \begin{slide}{``Backwards'' Statements---Examples} \label{sld:backwards-statements-examples} \begin{itemize} \item Examples: \begin{itemize} \item \begin{verbatim} print $1 if /(\d{9})/; \end{verbatim} is equivalent to: \begin{verbatim} if ( /(\d{9})/ ) { print $1; } \end{verbatim} \item \begin{verbatim} # print unless this is a blank line: print unless /^\s*$/; \end{verbatim}%$ is equivalent to \begin{verbatim} if ( ! /^\s*$/ ) { print; } \end{verbatim}%$ \end{itemize} \end{itemize} \end{slide} \tsection{List Operations} \begin{slide}{Array Operations---\texttt{push} and \texttt{pop}} \label{sld:pushd-pop} \begin{itemize} \item The documentation for these is in the very loo--oong document \texttt{perlfunc}, and is best read with \texttt{perldoc -f \meta{Function}} \begin{description} \item[\texttt{\blue{}push}] add a value at the end of an array, e.g., \begin{verbatim} my @array = ( 1, 2, 3 ); push @array, 4; # now @array contains ( 1, 2, 3, 4 ) \end{verbatim} \begin{itemize} \item Do \texttt{perldoc -f push} \end{itemize} \item[\texttt{\blue{}pop}] remove and return value from end of an array \begin{verbatim} my @array = ( 1, 2, 3 ); my $element = pop @array; # now @array contains ( 1, 2 ) # and $element contains 3 \end{verbatim} \begin{itemize} \item Do \texttt{perldoc -f pop} \end{itemize} \end{description} \end{itemize} \end{slide} \begin{slide}{Array Ops---\texttt{shift} and \texttt{unshift}} \label{sld:shift-unshift} \begin{description} \item[\texttt{\blue{}shift}] remove and return value from the beginning of an array, e.g., \begin{verbatim} my @array = ( 1, 2, 3 ); my $element = shift @array; # now @array contains ( 2, 3 ) # and $element contains 1 \end{verbatim} \begin{itemize} \item Do \texttt{perldoc -f shift} \end{itemize} \item[\texttt{\blue{}unshift}] add value to the beginning of an array, e.g., \begin{verbatim} my @array = ( 1, 2, 3 ); unshift @array, 4; # now @array contains ( 4, 1, 2, 3 ) \end{verbatim} \begin{itemize} \item Do \texttt{perldoc -f unshift} \end{itemize} \end{description} \end{slide} \begin{slide}{\texttt{split} and \texttt{join}} \label{sec:split-and-join} \begin{itemize} \item Do \texttt{perldoc -f split} and \texttt{perldoc -f join}. \item \texttt{\textbf{\blue{}split}} splits a string into an array: {\scriptsize \begin{verbatim} my $pwline = "nicku:x:500:500:Nick Urbanik:/home/nicku:/bin/bash"; my ( $userid, $pw, $userid_number, $group_id_number, $name, $home_dir, $shell ) = split /:/, $pwline; \end{verbatim}%$ } \item Another application is reading two or more values on the same input line: \begin{verbatim} my ( $a, $b ) = split ' ', ; \end{verbatim} \item \texttt{\textbf{\blue{}join}} is the opposite of \texttt{split} and joins an array into a string: \begin{verbatim} my $pwline = join ':', @pwfields; \end{verbatim}%$ \end{itemize} \end{slide} \tsection{Subroutines} \begin{slide}{Subroutines} \label{sld:subroutines} \begin{itemize} \item See \texttt{perldoc perlsub} \item Syntax: \begin{alltt} sub \meta{subroutine_name} \{ \meta{statements\ldots} \} \end{alltt} \end{itemize} \end{slide} \begin{slide}{Parameters --- 1} \label{sld:parameters-1} \begin{itemize} \item Subroutines calls pass their parameters to the subroutine in an list named \texttt{@\_}. It is best to show with an example: \begin{verbatim} #! /usr/bin/perl -w use strict; sub product { my ( $a, $b ) = @_; return $a * $b; } print "enter two numbers on one line: a b "; my ( $x, $y ) = split ' ', ; print "The product of $x and $y is ", product( $x, $y ), "\n"; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Parameters --- 2} \label{sld:parameters-2} \begin{itemize} \item parameters are passed in one list \texttt{@\_}. \item If you are passing one parameter, then the builtin function \texttt{shift} will conveniently remove the first item from this list, e.g., \begin{verbatim} sub square { my $number = shift; return $number * $number; } \end{verbatim}%$ \end{itemize} \end{slide} \tsection{Error Handling} \begin{slide}{Checking for Errors: \texttt{die} and \texttt{warn}} \label{sld:die-warn} \begin{itemize} \item System calls can fail; examples: \begin{itemize} \item Attempt to read a file that doesn't exist \item Attempt to execute an external program that you do not have permission to execute \end{itemize} \item In Perl, use the \texttt{\textbf{\blue{}die}} built in function with the \texttt{or} operator to terminate (or raise an exception) on error: \begin{verbatim} chdir '/tmp' or die "can't cd to tmp: $!"; \end{verbatim}%$ \item \texttt{die} and \texttt{warn} both print a message to \texttt{STDERR}, but \texttt{die} will raise a fatal exception, \texttt{warn} will continue \item If no newline at the end of string, \texttt{die} and \texttt{warn} print the program name and line number where were called \item \texttt{\$!} holds the value of the last system error message \end{itemize} \end{slide} \tsection{File and Process I/O} \begin{slide}{Files and Filehandles} \label{sld:files-and-filehandles} \begin{itemize} \item \texttt{STDIN}, \texttt{STDOUT} and \texttt{STDERR} are predefined filehandles \item You can define your own using the \texttt{open} built-in function \item Generally use all upper-case letters by convention \item Example: \texttt{open} for input: \begin{verbatim} use strict; open PASSWD, '<', "/etc/passwd" or die "unable to open passwd file: $!"; while ( ) { my ( $user ) = split /:/; print "$user\n"; } close PASSWD; \end{verbatim}%$ \end{itemize} \end{slide} \begin{slide}{Open for Writing} \label{sld:open-for-writing} \begin{itemize} \item To create a new file for output, use ``\texttt{>}'' instead of ``\texttt{<}'' with the file name. \begin{verbatim} use strict; open OUT, '>', "data.txt" or die "unable to open data.txt: $!"; for ( my $i = 0; $i < 10; ++$i ) { print OUT "Time is now ", scalar localtime, "\n"; } close OUT; \end{verbatim}%$ \item Note there is \emphcolour{no comma} after the filehandle in \texttt{print} \item To append to a file if it exists, or otherwise create a new file for output, use ``\texttt{>{}>}'' instead of ``\texttt{>}'' with the file name. \end{itemize} \end{slide} \begin{slide}{Executing External Programs} \label{sec:external-programs} \begin{itemize} \item Many ways of doing this: \begin{itemize} \item \texttt{system} built-in function \item backticks \item many other ways not covered here. \end{itemize} \end{itemize} \end{slide} \begin{slide}{\texttt{system}} \begin{itemize} \item Example: \begin{verbatim} my @cmd = ( 'useradd', '-c', "\"$name\"", '-p', $hashed_passwd, $id ); print "@cmd\n"; system @cmd; \end{verbatim}%$ \item This also works: {\scriptsize \begin{verbatim} system "useradd -c \"$name\" -p \"$hashed_passwd\" $id"; \end{verbatim}%$ } \item {\mbox{}\green{}difference:} second form is usually passed to a command shell (such as \texttt{/bin/sh} or \texttt{CMD.EXE}) to execute, whereas the first form is executed directly. \end{itemize} \end{slide} \begin{slide}{Was \texttt{system} Call Successful?} \begin{itemize} \item Check that the return value was zero: \end{itemize} {\scriptsize \begin{verbatim} if ( system( "useradd -c \"$name\" -p \"$hashed_passwd\" $id" ) != 0 ) { print "useradd failed"; exit; } \end{verbatim}%$ } \begin{itemize} \item This is usually written in Perl more simply using the built in function \texttt{\red{}die}, and the \texttt{or} operator: \end{itemize} {\scriptsize \begin{verbatim} system( "useradd -c \"$name\" -p \"$hashed_passwd\" $id" ) == 0 or die "useradd failed"; \end{verbatim}%$ } \end{slide} \begin{slide}{Was \texttt{system} Call Successful? --- 2} \begin{itemize} \item I usually prefer to call \texttt{system} like this: \begin{verbatim} my @cmd = ( 'useradd', '-c', "\"$name\"", '-p', $hashed_passwd, $id ); print "@cmd\n"; system @cmd == 0 or die "Can't execute @cmd"; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Backticks: \texttt{`\ldots`} or \texttt{qx\{\ldots\}}} \label{sld:backticks} \begin{itemize} \item Perl provides \emphcolour{command substitution} \item Just like in shell programming, where the \item output of the program replaces the code that calls it: \begin{verbatim} print `ls -l`; \end{verbatim} \item Note that you can write \texttt{qx\{\ldots\}} instead: \begin{verbatim} print qx{df -h /}; \end{verbatim} \begin{itemize} \item \texttt{qx//} is mentioned in slide~\pageref{sld:quoting} \end{itemize} \end{itemize} \end{slide} \begin{slide}{See the perl summary} \begin{itemize} \item The Perl summary on the subject web site provides\ldots well, a good summary! \item Called \texttt{perl.pdf} \item Stored in same directory as these notes \end{itemize} \end{slide} \tsection{Regular Expressions} \begin{slide}{Regular Expressions} \label{sld:regexp} \vspace*{0.1\slideWidth} \begin{center} \mbox{}\blue{}Regular Expressions are available as part of the programming languages Java, JScript, Visual Basic and VBScript, JavaScript, C, \Cpp, C\#, elisp, Perl, Python, Ruby, PHP, sed, awk, and in many applications, such as editors, grep, egrep. \vspace*{0.05\slideWidth} \Large{}Regular Expressions help you master your data.\\ \tiny{}--- Sales Department. \end{center} \end{slide} \begin{slide}{What is a Regular Expression?} \begin{itemize} \item Powerful. \item Low level description: \begin{itemize} \item Describes some text \item Can use to: \begin{itemize} \item Verify a user's input \item Sift through large amounts of data \end{itemize} \end{itemize} \item High level description: \begin{itemize} \item {\mbox{}\green{}Allow you to master your data} \end{itemize} \end{itemize} \end{slide} \begin{slide}{Regular Expressions as a language} \begin{itemize} \item Can consider regular expressions as a language \item Made of two types of characters: \begin{itemize} \item \emphcolour{Literal} characters \begin{itemize} \item Normal text characters \item Like words of the program \end{itemize} \item \emphcolour{Metacharacters} \begin{itemize} \item The special characters \texttt{+} \texttt{?} \texttt{.} \texttt{*} \texttt{\^{}} \texttt{\$} \texttt{(} \texttt{)} \texttt{[} \texttt{\{} \textbar{} \bs %} \item Act as the grammar that combines with the words according to a set of rules to create and expression that communicates an idea \end{itemize} \end{itemize} \end{itemize} \end{slide} \begin{slide}{How to use a Regular Expression} \vspace*{0.2\slideWidth} \begin{center}\Large \mbox{}\blue{}How to make a regular expression as part of your program \end{center} \end{slide} \begin{slide}{What do they look like?} \begin{itemize} \item In Perl, a regular expression begins and ends with `\texttt{/}', like this: \texttt{/abc/} \item \texttt{/abc/} matches the string ``\texttt{abc}'' \begin{itemize} \item Are these literal characters or metacharacters? \end{itemize} \item Returns true if matches, so often use as condition in an \texttt{if} statement \end{itemize} \end{slide} \begin{slide}{Example: searching for ``\texttt{Course:}''} \begin{itemize} \item Problem: want to print all lines in all input files that contain the string ``\texttt{Course:}'' \begin{verbatim} while ( <> ) { my $line = $_; if ( $line =~ /Course:/ ) { print $line; } } \end{verbatim} \item Or more concisely: \begin{verbatim} while ( <> ) { print if $_ =~ /Course:/; } \end{verbatim}%$ \item or even: \begin{verbatim} print if /Course:/ while <>; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{The ``match operator'' \texttt{=$\sim$}} \label{sld:match-operator} \begin{itemize} \item If just use \texttt{/Course:/}, this returns true if \texttt{\$\_} contains the string ``\texttt{Course:}'' \item If want to test another string variable \texttt{\$var} to see if it contains the regular expression, use \item \texttt{\$var =\~{} /regular expression/} \item Under what condition is this true? \end{itemize} \end{slide} \begin{slide}{The ``match operator'' \texttt{=$\sim$} --- 2} \begin{verbatim} # sets the string to be searched: $_ = "perl for Win32"; # is 'perl' inside $_? if ( $_ =~ /perl/ ) { print "Found perl\n" }; # Same as the regex above. # Don't need the =~ as we are testing $_: if ( /perl/ ) { print "Found perl\n" }; \end{verbatim}% \end{slide} \begin{slide}{\texttt{/i} --- Matching without case sensitivity} \footnotesize \begin{verbatim} $_ = "perl for Win32"; \end{verbatim} \begin{verbatim} # this will fail because the case doesn't match: if ( /PeRl/ ) { print "Found PeRl\n" }; \end{verbatim} \begin{verbatim} # this will match, because there is an 'er' in 'perl': if ( /er/ ) { print "Found er\n" }; \end{verbatim} \begin{verbatim} # this will match, because there is an 'n3' in 'Win32': if ( /n3/ ) { print "Found n3\n" }; \end{verbatim} \begin{verbatim} # this will fail because the case doesn't match: if ( /win32/ ) { print "Found win32\n" }; \end{verbatim} \begin{verbatim} # This matches because the /i at the end means # "match without case sensitivity": if ( /win32/i ) { print "Found win32 (i)\n" }; \end{verbatim} \end{slide} \begin{slide}{Using \texttt{!$\sim$} instead of \texttt{=$\sim$}} \begin{verbatim} # Looking for a space: print "Found!\n" if / /; # both these are the same, but reversing the logic with # unless and !~ print "Found!!\n" unless $_ !~ / /; print "Found!!\n" unless !~ / /; \end{verbatim} \end{slide} \begin{slide}{Embedding variables in regexps} \begin{verbatim} # Create two variables containing # regular expressions to search for: my $find = 32; my $find2 = " for "; if ( /$find/ ) \{ print "Found '$find'\n" }; if ( /$find2/ ) \{ print "Found '$find2'\n" }; # different way to do the above: print "Found $find2\n" if /$find2/; \end{verbatim} \begin{itemize} \item This is the meaning of the ``Yes'' under ``Interpolates'' in the table on slide~\pageref{sld:quoting} on the row for \texttt{m//} \end{itemize} \end{slide} \begin{slide}{The Metacharacters} \vspace*{0.1\slideWidth} \begin{center}\Large \mbox{}\blue{}The funny characters \vspace*{0.05\slideWidth} What they do \vspace*{0.05\slideWidth} How to use them \end{center} \end{slide} \begin{slide}{Character Classes \texttt{[...]}} \begin{verbatim} my @names = ( "Nick", "Albert", "Alex", "Pick" ); foreach my $name ( @names ) { if ( $name =~ /[NP]ick/ ) { print "$name: Out for a Pick Nick\n"; else { print "$name is not Pick or Nick\n"; } } \end{verbatim} \begin{itemize} \item Square brackets \emphcolour{match \underline{one} single character} \end{itemize} \end{slide} \begin{slide}{Examples of use of \texttt{[...]}} \begin{itemize} \item Match a capital letter: \texttt{[ABCDEFGHIJKLMNOPQRSTUVWXYZ]} \item Same thing: \texttt{[A-Z]} \item Match a vowel: \texttt{[aeiou]} \item Match a letter or digit: \texttt{[A-Za-z0-9]} \end{itemize} \end{slide} \begin{slide}{Negated character class: \texttt{[\^{}...]}} \begin{itemize} \item Match any single character that is \emphcolour{not} a letter: \texttt{[\^{}A-Za-z]} \item Match any character that is not a space or a tab: \mbox{\texttt{[\^{} \bs t]}} \end{itemize} \end{slide} \begin{slide}{Example using \texttt{[\^{}...]}} \begin{itemize} \item This simple program prints only lines that contain characters that are not a space: \begin{verbatim} while ( <> ) { print $_ if /[^ ]/; } \end{verbatim}%$ \item This prints lines that \emphcolour{start with} a character that is not a space: \begin{verbatim} while ( <> ) { print if /^[^ ]/; } \end{verbatim} \item Notice that \texttt{\^{}} has two meanings: one inside \texttt{[...]}, the other outside. \end{itemize} \end{slide} \begin{slide}{Shorthand: Common Character Classes} \begin{itemize} \item Since matching a digit is very common, Perl provides \texttt{\bs d} as a short way of writing \texttt{[0-9]} \item \texttt{\bs D} matches a non-digit: \texttt{[\^{}0-9]} \item \texttt{\bs s} matches any whitespace character; shorthand for \mbox{\texttt{[ \bs t\bs n\bs r\bs f]}} \item \texttt{\bs S} non-whitespace, \mbox{\texttt{[\^{} \bs t\bs n\bs r\bs f]}} \item \texttt{\bs w} word character, \texttt{[a-zA-Z0-9\_]} \item \texttt{\bs W} non-word character, \mbox{\texttt{[\^{}a-zA-Z0-9\_]}} \end{itemize} \end{slide} \begin{slide}{Matching any character} \begin{itemize} \item The dot matches any character except a newline \item This matches any line with \emphcolour{at least 5} characters before the newline: \begin{verbatim} print if /...../; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Matching the beginning or end} \begin{itemize} \item to match a line that contains \emphcolour{exactly} five characters before the newline: \begin{verbatim} print if /^.....$/; \end{verbatim}%$ \item the \texttt{\^{}} matches the beginning of the line. \item the \texttt{\$} matches at the end of the line \end{itemize} \end{slide} \begin{slide}{Matching Repetitions: \texttt{* + ? \{n,m\}}} \begin{itemize} \item To match {\blue{}zero or more}: \begin{itemize} \item \texttt{/a*/} will match zero or more letter `\texttt{a}', so matches ``\mbox{}'', ``\texttt{a}'', ``\texttt{aaaa}'', ``\texttt{qwereqwqwer}'', or the nothing in front of \emphcolour{anything}! \end{itemize} \item to match {\blue{}at least one}: \begin{itemize} \item \texttt{/a+/} matches at least one ``\texttt{a}'' \item \texttt{/a?/} matches zero or one ``\texttt{a}'' \item \texttt{/a\{3,5\}/} matches between 3 and 5 ``\texttt{a}''s. \end{itemize} \end{itemize} \end{slide} \begin{slide}{Example using \texttt{.*}} {\small \begin{verbatim} $_ = 'Nick Urbanik '; print "found something in <>\bs n" if /<.*>/; # Find everything between quotes: $_ = 'He said, "Hi there!", and then "What\'s up?"'; print "quoted!\n" if /"[^"]*"/; print "too much!\n" if /".*"/; \end{verbatim} } \end{slide} \begin{slide}{Capturing the Match with \texttt{(...)}} \begin{itemize} \item Often want to scan large amounts of data, extracting important items \item Use parentheses and regular expressions \item Silly example of capturing an email address: \begin{verbatim} $_ = 'Nick Urbanik '; print "found $1 in <>\n" if /<(.*)>/; \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Capturing the match: greediness} \begin{itemize} \item Look at this example: \end{itemize} {\small \begin{verbatim} $_ = 'He said, "Hi there!", and then "What\'s up?"'; print "$1\n" if /"([^"]*)"/; print "$1\n" if /"(.*)"/; \end{verbatim}%$ } \begin{itemize} \item What will each print? \item The first one works; the second one prints: \begin{verbatim} "Hi there!", and then "What's up? \end{verbatim} \item Why? \item Because \texttt{*}, \texttt{?}, \texttt{+}, \texttt{\{m,n\}} are \emphcolour{greedy}! \item They match as much as they possibly can! \end{itemize} \end{slide} \begin{slide}{Being Stingy (not Greedy): \texttt{?}} \begin{itemize} \item Usually greedy matching is what we want, but not always \item How can we match as little as possible? \item Put a \texttt{?} after the quantifier: \begin{tabular}{@{}>{\ttfamily}ll@{}} *? & Match 0 or more times\\ +? & Match 1 or more times\\ ?? & Match 0 or 1 time\\ \{n,\}? & Match at least n times\\ \{n,m\}? & Match at least n, but no more than m times \end{tabular} \end{itemize} \end{slide} \begin{slide}{Being Less Greedy: Example} \begin{itemize} \item We can solve the problem we saw earlier using non-greedy matching: \end{itemize} {\small \begin{verbatim} $_ = 'He said, "Hi there!", and then "What\'s up?"'; print "\$1\n" if /"([^"]*)"/; print "\$1\n" if /"(.*?)"/; \end{verbatim}%$ } \begin{itemize} \item These both work, and match only: \begin{verbatim} Hi there! \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Sifting through large amounts of data} \begin{itemize} \item Imagine you need to create computing accounts for thousands of students \item As input, you have data of the form: \begin{itemize} \item Some heading on the top of each page \item More headings with other content, including blank lines \item A tab character separates the columns \end{itemize} \begin{verbatim} 123456789 H123456(1) 234567890 I234567(2) 345678901 J345678(3) ... ... 987654321 A123456(1) \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Capturing the Match: \texttt{(...)}} \begin{verbatim} # useradd() is a function defined elsewhere # that creates a computer account with # username as first parameter, password as # the second parameter while ( <> ) { if ( /^(\d{9})\t([A-Z]\d{6}\([\dA]\))/ ) { my $student_id = $1; my $hk_id = $2; useradd( $student_id, $hk_id ); } } \end{verbatim} \end{slide} \begin{slide}{The Substitution Operator \texttt{s///}} \label{sld:substitution-operator} \begin{itemize} \item Sometimes want to \emphcolour{replace} one string with another (editing) \item Example: want to replace \texttt{Nicholas} with \texttt{Nick} on input files: \begin{verbatim} while ( <> ) { $_ =~ s/Nicholas/Nick/; print $_; } \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Avoiding leaning toothpicks: \texttt{/\bs /\bs /}} \begin{itemize} \item Want to change a filename, edit the directory in the path from, say \texttt{/usr/local/bin/filename} to \texttt{/usr/bin/filename} \item Could do like this: \begin{itemize} \item \texttt{s/\bs /usr\bs /local\bs /bin\bs //\bs /usr/\bs bin\bs //;} \item but this makes me dizzy! \end{itemize} \item We can do this instead: \begin{itemize} \item \texttt{s!/usr/local/bin/!/usr/bin/!;} \end{itemize} \item Can use any character instead of \texttt{/} in \texttt{s///} \begin{itemize} \item For \emphcolour{matches}, can put \texttt{m//}, and use any char instead of~\texttt{/} \item Can also use parentheses or braces: \item \texttt{s\{...\}\{...\}} or \texttt{m\{...\}} \end{itemize} \end{itemize} \end{slide} \begin{slide}{Substitution and the \texttt{/g} modifier} \begin{itemize} \item If an input line contains: \item Nicholas Urbanik read ``Nicholas Nickleby'' \item then the output is: \item Nick Urbanik read ``Nicholas Nickleby'' \item How change all the Nicholas in one line? \item Use the \texttt{/g} (global) modifier: \begin{verbatim} while ( <> ) { $_ =~ s/Nicholas/Nick/g; print $_; } \end{verbatim} \end{itemize} \end{slide} \begin{slide}{Readable regex: \texttt{/x} Modifier} \label{sld:readable-regexp} \begin{itemize} \item Sometimes regular expressions can get long, and need comments inside so others (or you later!) understand \item Use \texttt{/x} at the end of \texttt{s///x} or \texttt{m//x} \item Allows white space, newlines, comments \item See example on slide~\pageref{sld:extended-modifier-example} \end{itemize} \end{slide} \tsection{Other Topics} \begin{slide}{Special Vars: Input Record Separator} \label{sld:input-record-separator} \begin{itemize} \item When I described the \texttt{<>} operator, I lied a little \item As \texttt{while ( <> ) \{ \ldots \}} executes, it {\green{}iterates once per record}, \emphcolour{not} just {\green{}once per line}. \item The definition of what a record is is given by the special built-in variable the \emphcolour{Input Record Separator} \texttt{\$/} \begin{itemize} \item default value is a newline, so by default read one line at a time \end{itemize} \item But useful alternatives are \emphcolour{paragraph mode} and the \emphcolour{whole-file mode} \end{itemize} \end{slide} \begin{slide}{Paragraph, Whole-file Modes} \label{sld:paragraph-whole-file-modes} \begin{itemize} \item To input in paragraph mode, put this line before you read input: \begin{verbatim} $/ = ""; \end{verbatim}%$ \item Then when you read input, it will be split at \emphcolour{two or more newlines} \begin{itemize} % \item The newlines will not be part of the record \item You could split the fields at the newlines \end{itemize} \item To slurp a whole file into one string, you can do: \begin{verbatim} undef $/; $_ = ; # slurp whole file into $_ s/\n[ \t]+/ /g; # fold indented lines \end{verbatim}%$ \item See \texttt{perldoc -f paragraph}, \texttt{perldoc perlvar} and \texttt{perldoc -f local} for \emphcolour{important} information on how to localise the change to \texttt{\$/}. \end{itemize} \end{slide} \begin{slide}{\texttt{local}ising Global Variables} \label{sld:localising-input-record-separator} \begin{itemize} \item It is not a good idea to globally change \texttt{\$/}, (or even \texttt{\$\_}) \begin{itemize} \item Your program may \texttt{use} other modules, and they may behave differently if \texttt{\$/} is changed. \item Best to \emphcolour{localise} the change to \texttt{\$/} (or \texttt{\$\_},\,\ldots) \end{itemize} \item Example localising whole-file mode: \begin{verbatim} my $content; open FH, "foo.txt" or die $!; { local $/; $_ = ; } close FH; \end{verbatim} \item For paragraph mode, put: \texttt{local \$/ = "";} \end{itemize} \end{slide} \begin{slide}{One Line Perl Programs} \label{sld:one-liners} \begin{itemize} \item Called ``one liners'' \item Just execute on the command line \item See \texttt{perldoc perlrun} \item Example: \item \begin{alltt}\scriptsize $ \textbf{perl -pi '.backup' -e 's/Silly/Sensible/g' fileA fileB} \end{alltt}%$ \begin{itemize} \item edits the files \texttt{fileA} and \texttt{fileB} \item makes backups of the original files in \texttt{fileA.backup} and \texttt{fileB.backup} \item substitutes all instances of ``Silly'' and replaces them with ``Sensible''. \end{itemize} \item Useful for editing configuration files in shell scripts, automating tasks \end{itemize} \end{slide} \begin{slide}{References} \label{sld:reference-books}% \tiny \begin{itemize} \item \emph{Learning Perl, 3rd Edition}, Randal L. Schwartz and Tom Phoenix, ISBN 0-596-00132-0, O'Reilly, July 2001. \begin{itemize} \item The second edition is fine, too. Don't bother with the first edition, it is too old. \end{itemize} \item \emph{Perl Reference Guide}, Johan Vromans, handed out to each one of you, and \emphcolour{will be handed out in the final examination}. {\green{}Become familiar with it.} \item \emph{Perl for System Administration: Managing multi-platform environments with Perl}, David N. Blank-Edelman, ISBN 1-56592-609-9, O'Reilly, July 2000. \item \emph{Perl Cookbook, 2nd Edition}, Tom Christiansen and Nathan Torkington, ISBN 0-596-00313-7, O'Reilly, August 2003 \begin{itemize} \item The first edition is fine, too. \end{itemize} \item Don't forget \texttt{perldoc} and all the other documentation on your hard disk. \item \emph{Object Oriented Perl}, Damian Conway, ISBN 1-884777-79-1, Manning, 2000. --- A more advanced book for those wanting to build bigger projects in Perl. \end{itemize} \end{slide} \end{document}