#! /usr/bin/env python
# 1) Regular Expressions Test
#
# Read a file of (extended per egrep) regular expressions (one per line),
# and apply those to all files whose names are listed on the command line.
# Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
# against a five /etc/termcap files. Tests using more elaborate patters
# would also be interesting. Your code should not break if given hundreds
# of regular expressions or binary files to scan.
# This implementation:
# - combines all patterns into a single one using ( ... | ... | ... )
# - reads patterns from stdin, scans files given as command line arguments
# - produces output in the format <file>:<lineno>:<line>
# - is only about 2.5 times as slow as egrep (though I couldn't run
# Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
import string
import sys
import re
def main():
pats = map(chomp, sys.stdin.readlines())
bigpat = '(' + '|'.join(pats) + ')'
prog = re.compile(bigpat)
for file in sys.argv[1:]:
try:
fp = open(file, 'r')
except IOError, msg:
print "%s: %s" % (file, msg)
continue
lineno = 0
while 1:
line = fp.readline()
if not line:
break
lineno = lineno + 1
if prog.search(line):
print "%s:%s:%s" % (file, lineno, line),
def chomp(s):
return s.rstrip('\n')
if __name__ == '__main__':
main()
|