%% ================================================================================ %% This LaTeX file was created by AbiWord. %% AbiWord is a free, Open Source word processor. %% You may obtain more information about AbiWord at www.abisource.com %% ================================================================================ \documentclass[12pt]{article} \usepackage[T1]{fontenc} \usepackage{calc} \usepackage{hyperref}\usepackage{setspace} \usepackage{multicol} \usepackage[normalem]{ulem} \usepackage{color} \setlength{\oddsidemargin}{1.250000in-1in} \setlength{\textwidth}{\paperwidth - 1.250000in-1.250000in} \begin{document} \begin{center} \end{itemize} \end{slide} \begin{slide}{Perl} \begin{itemize} \end{center} \begin{center} A language for Systems and Network Administration and Management \end{center} \item \end{itemize} \end{slide} \begin{slide}{What is Perl?} \begin{itemize} \item Perl is a programming language \item The best language for processing text \item Cross platform, free, open \item Microsoft have invested heavily in ActiveState to improve support for Windows in Perl \item Has excellent connection to the operating system \item Has enormous range of modules for thousands of application types \item \end{itemize} \end{slide} \begin{slide}{What is Perl? 2} \begin{itemize} \item Robust and reliable (has very few bugs) \item Supports object oriented programming \item Good for big projects as well as small \item Java 1.4 has borrowed one of Perl's best features: \textit{regular expressions} \item Perl has garbage collection \item The ``duct tape of the Internet'' \item Easy to use, since it usually ``does the right thing'' \item Based on freedom of choice: ``There is more than one way to do it!'' \item TIMTOWTDI \item \item \end{itemize} \end{slide} \begin{slide}{Compiled and run each time} \begin{itemize} \item Perl is interpreted, but runs about as fast as a Java program \item Software development is very fast \item The Apache web server provides mod\_perl, allows Perl applications to run very fast \item Used on some very large Internet sites: \item The Internet Move Database \item Macromedia, Adobe, http://slashdot.org/ \item \end{itemize} \end{slide} \begin{slide}{Perl is Evolving} \begin{itemize} \item Perl 6 will introduce many great features to make Perl \item easier to use \item Even more widely usable for more purposes \item Even better for bigger projects \item \end{itemize} \end{slide} \begin{slide}{Eclectic} \begin{itemize} \item Borrows ideas from many languages, including: \item C, C++ \item Shell \item Lisp \item BASIC \item \ldotseven Fortran \item Many others\ldots \item \end{itemize} \end{slide} \begin{slide}{Regular Expressions} \begin{itemize} \item One of the best features of Perl \item A new concept for most of you \item \ldotsBut very useful! \item Used to: \item extract information from text \item transform information \item You will spend much time in this topic learning about regular expressions \item \end{itemize} \end{slide} \begin{slide}{Why should I learn it?} \begin{itemize} \item It will be in the final exam! \item Okay, that's to get your attention, but\ldots \item Consider a real-life sys-admin problem: \item You must make student accounts for 1500 students \item TEACHING BEGINS TOMORROW!!! \item The Computing Division has a multi-million dollar application to give you student enrollment data \item \ldotsbut it can only give you PDF files with a strange and irregular format for now (But Oh, it will be infinitely better in the future! Just wait a year or two\ldots) \item \end{itemize} \end{slide} \begin{slide}{The available data} \begin{itemize} \item Has a variable number of lines before the student data begins \item Has a variable number of columns between different files \item Has many rows per enrolled student \item Goes on for dozens of pages, only 7 students per page!!!!!!! \item There are two formats, both equally peculiar!!!! \item \end{itemize} \end{slide} \begin{slide}{Sample data for new courses:} \begin{itemize} \item }{15 N CHAN Wai Yee F 993175560 H123456(5) 28210216 CHEUNG \item 10-SEP-01 10-SEP-01 21234567 WAI CHI SISTER 91234567 \item \item \end{itemize} \end{slide} \begin{slide}{Problems} \begin{itemize} \item There is a different number of lines above the student records \item There is a different number of characters within each column from file to file \item There are many files \item The format can change any time the computing division determines necessary \item \end{itemize} \end{slide} \begin{slide}{Solution in Perl} \begin{itemize} \item \tiny{}\#! /usr/bin/perl -w \item \item \tiny{}use strict; \item \item \tiny{}my \$course; \item \tiny{}my \$year; \item \item \tiny{}while ( <> ) \item \tiny{}\{ \item \tiny{} chomp; \item \tiny{} \item \tiny{} if ( /\^{}\ensuremath{\backslash}s*Course :\ensuremath{\backslash}s(\ensuremath{\backslash}d+)\ensuremath{\backslash}s/ ) \item \tiny{} \{ \item \tiny{} \$course = \$1; \item \tiny{} undef \$year; \item \tiny{} next; \item \tiny{} \} \item \tiny{} elsif ( m!\^{}\ensuremath{\backslash}s*Course :\ensuremath{\backslash}s(\ensuremath{\backslash}d+)/(\ensuremath{\backslash}d)\ensuremath{\backslash}s! ) \item \tiny{} \{ \item \tiny{} \$course = \$1; \item \tiny{} \$year = \$2; \item \tiny{} next; \item \tiny{} \} \item \tiny{} if ( \item \tiny{} my ( \$name, \$gender, \$student\_id, \$hk\_id ) \item \tiny{} \#= m!\ensuremath{\backslash}s\ensuremath{\backslash}s+([A-Z]+(?: [A-Z][a-z]*)+)\ensuremath{\backslash}s\ensuremath{\backslash}s+([MF])\ensuremath{\backslash}s+(\ensuremath{\backslash}d\{9\})\ensuremath{\backslash}s\ensuremath{\backslash}s+([a-zA-Z]\ensuremath{\backslash}d\{6\}\ensuremath{\backslash}([\ensuremath{\backslash}dA-Z]\ensuremath{\backslash}))! ) \item \tiny{} = m\{ \item \tiny{} \ensuremath{\backslash}s\ensuremath{\backslash}s+ \# at leaset 2 spaces \item \tiny{} ( \# this matches \$name \item \tiny{} [A-Z]+ \# family name is upper case \item \tiny{} (?:\ensuremath{\backslash}s[A-Z][a-z]*)+ \# one or more given names \item \tiny{} ) \item \tiny{} \ensuremath{\backslash}s\ensuremath{\backslash}s+ \# at leaset 2 spaces \item \tiny{} ([MF]) \# gender \item \tiny{} \ensuremath{\backslash}s+ \# at least one space \item \tiny{} (\ensuremath{\backslash}d\{9\}) \# student id is 9 digits \item \tiny{} \ensuremath{\backslash}s\ensuremath{\backslash}s+ \# at leaset 2 spaces \item \tiny{} ([a-zA-Z]\ensuremath{\backslash}d\{6\}\ensuremath{\backslash}([\ensuremath{\backslash}dA-Z]\ensuremath{\backslash})) \# HK ID \item \tiny{} \}x \item \tiny{} ) \item \tiny{} \{ \item \tiny{} print "sex=\$gender, student ID = \$student\_id, ", \item \tiny{} "hkID = \$hk\_id, course = \$course, name=\$name, ", \item \tiny{} defined \$year ? "year = \$year\ensuremath{\backslash}n" : "\ensuremath{\backslash}n"; \item \tiny{} next; \item \tiny{} \} \item \tiny{} warn "POSSIBLE UNMATCHED STUDENT: \$\_\ensuremath{\backslash}n" if m!\^{}\ensuremath{\backslash}s*\ensuremath{\backslash}d+\ensuremath{\backslash}s+!; \item \tiny{}\} \item \end{itemize} \end{slide} \begin{slide}{But I can use any other language!} \begin{itemize} \item I will give you HK\$200 if you are the first person to write a solution in another language in fewer keystrokes \item Note: the Perl solution given has: \item comments \item Plenty of space to show structure \item \ldotsand handles exceptional situations (i.e., it is robust) \item To claim your \$200 from Nick, your solution must have \item similar space for comments \item Similar readability and robustness \item Be written in a general purpose language using ordinary libraries \item \end{itemize} \end{slide} \begin{slide}{Any other solution may take longer to write } \begin{itemize} \item This program took a very short time to write \item It is very robust \item For problems like this, Perl is second to no other programming language. \item \end{itemize} \end{slide} \begin{slide}{The hello world program} \begin{itemize} \item print ``hello world\ensuremath{\backslash}n'' \item \end{itemize} \end{slide} \begin{slide}{Variables} \begin{itemize} \item There are three basic types of variable: \item \textbf{\textit{{Scalar}}}{ (can be a number or string or\ldots)} \item \textbf{\textit{{Array}}}{ (an ordered array of scalars)} \item \textbf{\textit{{Hash}}}{ (an unordered array of scalars indexed by strings instead of numbers)} \item Each type distinguished with a ``funny character'' \item \end{itemize} \end{slide} \begin{slide}{\$Scalars:} \begin{itemize} \item Start with a dollar sign \item Hold a single value, not a collection \item A string is a scalar, so is a number \item Examples: \item \$apple = 2; \item \$banana = ``curly yellow fruit''; \item \item \end{itemize} \end{slide} \begin{slide}{@Array} \begin{itemize} \item Starts with a @ \item Indexes start at 0, like in C \item \item \item \end{itemize} \end{slide} \begin{slide}{\%Hashes} \begin{itemize} \item Unfamiliar concept to many of you \item Like an array, but indexed by a string \item A data structure like a database \item \end{itemize} \end{slide} \begin{slide}{Conclusion} \begin{itemize} \item Perl is optimised for text and systems administration programming \item Has great portability \item Is strongly supported by Microsoft \item Has three main built-in data types: \item Scalar: starts with \$ \item Array: starts with @ \item Hash: starts with \% \item \end{document}