#! /bin/sh # Copyright (C) 2004 Nick Urbanik # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. file=artificial-student-data.txt [ $# -gt 0 ] && file=$1 [ -r $file ] || { echo "Need to be able to read $file"; exit 1; } # 10. Download a copy of the bogus student registration data from # http://ictlab.tyict.vtc.edu.hk/snm/lab/regular- # expressions/artificial-student-data.txt. Use this for the # following exercises, together with the grep program: # (a) Search for all students with the name "CHAN" echo "=================================================================" echo "Part (a):" echo "=================================================================" egrep '^.{6}CHAN[^A-Za-z0-9]' $file echo echo "Show Word Count:" egrep '^.{6}CHAN[^A-Za-z0-9]' $file | wc echo "=================================================================" echo "=================================================================" echo # (b) Search for all students whose student number begins and ends # with 9, and with any other digits in between. echo "=================================================================" echo "Part (b):" echo "=================================================================" egrep '9[0-9]{7}9' $file echo echo "Show Word Count:" egrep '9[0-9]{7}9' $file | wc echo "=================================================================" echo "=================================================================" echo # (c) Search for all student records where the Hong Kong ID has a # letter, not a number, in the parentheses. echo "=================================================================" echo "Part (c):" echo "=================================================================" egrep '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file echo echo "Show Word Count:" egrep '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file | wc echo "=================================================================" echo "=================================================================" echo # (d) Do the same exercises, but display only the students' names, # or student number. You will need a program such as awk (or even # cut) to select the appropriate columns from the output of grep. echo "=================================================================" echo "Part (d)(a):" echo "=================================================================" cut -b7-28 artificial-student-data.txt \ | grep -i 'chan[^a-z]' \ | egrep -o '([A-Za-z]+,? )+[A-Za-z]+' echo echo "Show Word Count:" cut -b7-28 artificial-student-data.txt \ | grep -i 'chan[^a-z]' \ | egrep -o '([A-Za-z]+,? )+[A-Za-z]+' \ | wc echo "=================================================================" echo echo "=================================================================" echo "Part (d)(a) Alternative solution:" echo "=================================================================" egrep -o '^.{6}CHAN[^A-Za-z].{20}' artificial-student-data.txt \ | egrep -o '([A-Za-z]{2,},? )+[A-Za-z]+' echo echo "Show Word Count:" egrep -o '^.{6}CHAN[^A-Za-z].{20}' artificial-student-data.txt \ | egrep -o '([A-Za-z]{2,},? )+[A-Za-z]+' \ | wc echo "=================================================================" echo echo "=================================================================" echo "Part (d)(a) Second alternative solution:" echo "=================================================================" cut -b7-28 artificial-student-data.txt \ | grep -i 'chan[^a-z]' \ | sed -r 's/ *$//' echo echo "Show Word Count:" cut -b7-28 artificial-student-data.txt \ | grep -i 'chan[^a-z]' \ | sed -r 's/ *$//' \ | wc echo "=================================================================" echo "=================================================================" echo echo "=================================================================" echo "Part(d)(b)" echo "=================================================================" egrep -o '9[0-9]{7}9' $file echo echo "Show Word Count:" egrep -o '9[0-9]{7}9' $file | wc echo "=================================================================" echo "=================================================================" echo echo "=================================================================" echo "Part(d)(c)" echo "=================================================================" egrep -o '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file echo echo "Show Word Count:" egrep -o '[A-Za-z][0-9]{6}\([A-Za-z]\)' $file | wc