ssoehdata
diff --git a/‎Python4Everybody/Ch_11_RegExp/assigndraft.py
Lines changed: 14 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/assigndraft.py
Lines changed: 14 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/assignment1.py
Lines changed: 31 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/assignment1.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/charmatch.py
Lines changed: 13 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/charmatch.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/example.py
Lines changed: 456 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/example.py
Lines changed: 456 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/findall.py
Lines changed: 13 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/findall.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/findall2.py
Lines changed: 8 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/findall2.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/findall3.py
Lines changed: 11 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/findall3.py
Lines changed: 11 additions & 0 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/greedy.py
100644100755
Lines changed: 68 additions & 10 deletions b/‎Python4Everybody/Ch_11_RegExp/greedy.py
100644100755
Lines changed: 68 additions & 10 deletions
diff --git a/‎Python4Everybody/Ch_11_RegExp/matchext.py
Lines changed: 7 additions & 0 deletions b/‎Python4Everybody/Ch_11_RegExp/matchext.py
Lines changed: 7 additions & 0 deletions
@@ -0,0 +1,14 @@
+# [x] extract the numbers in the file 
+# [] cast as ints if necessary 
+# [] compute the sum of the numbers 
+
+import re 
+handle = open('sampledata.txt')
+for line in handle:
+    line = line.rstrip() 
+    x = re.findall('[0-9]+', line)
+    if len(x) > 0:
+       
+        print(x)
+
+    
@@ -0,0 +1,31 @@
+#Finding Numbers in a Haystack
+
+#In this assignment you will read through and parse a file with text and numbers. You will extract all the numbers in the file and compute the sum of the numbers.
+#Data Files
+
+#We provide two files for this assignment. One is a sample file where we give you the sum for your testing and the other is the actual data you need to process for the assignment.
+
+    #Sample data: http://py4e-data.dr-chuck.net/regex_sum_42.txt (There are 90 values with a sum=445833)
+    #Actual data: http://py4e-data.dr-chuck.net/regex_sum_2003014.txt (There are 97 values and the sum ends with 724)
+
+#These links open in a new window. Make sure to save the file into the same folder as you will be writing your Python program. Note: Each student will have a distinct data file for the assignment - so only use your own data file for analysis.
+
+#Data Format
+
+#The file contains much of the text from the introduction of the textbook except that random numbers are inserted throughout the text. Here is a sample of the output you might see:
+
+#Why should you learn to write programs? 7746
+#12 1929 8827
+#Writing programs (or programming) is a very creative 
+#7 and rewarding activity.  You can write programs for 
+#many reasons, ranging from making your living to solving
+#8837 a difficult data analysis problem to having fun to helping 128
+#someone else solve a problem.  This book assumes that 
+#everyone needs to know how to program ...
+
+#The sum for the sample text above is 27486. The numbers can appear anywhere in the line. There can be any number of numbers in each line (including none).
+
+#Handling The Data
+
+#The basic outline of this problem is to read the file, look for integers using the re.findall(), looking for a regular expression of '[0-9]+' and then converting the extracted strings to integers and summing up the integers.
+
@@ -0,0 +1,13 @@
+# search for lines that start with 'F', followed by 
+# 2 characters, followed by 'm' 
+
+
+# the '..' in the ^F..m search are placeholders for 
+# any strings, e.g. Fxxm, F12m, F!@m  etc.
+import re 
+hand = open('mbox-short.txt')
+for line in hand:
+    line = line.rstrip()
+    if re.search('^F..m:', line):
+        print(line)
+
@@ -0,0 +1,13 @@
+# this code uses findall() to find the lines with email addresses 
+# and extract one or more addresses from each of those lines.
+
+import re 
+s = 'A message from csev@umich.edu to cwen@iupui.edu about meeting @2PM' 
+Ast = re.findall('\S+@\S+', s)
+print(Ast)
+
+
+# makes use of the ' \S ' two-character sequence that matches
+#  a non-whitespace  cjaracter (\S).abs
+
+# \S+  matches as many non-whitespace characters as possible. 
@@ -0,0 +1,8 @@
+import re 
+hand = open('mbox-short.txt')
+for line in hand:
+    line = line.rstrip() 
+    x = re.findall('\S+@\S+', line)
+    if len(x) > 0:
+        print(x)
+
@@ -0,0 +1,11 @@
+# search for lines that have an at sign between characters
+# the characters must be a letter or a number
+
+import re 
+hand = open('mbox-short.txt')
+for line in hand:    
+    line = line.rstrip() 
+    x = re.findall('[a-zA-Z0-9]\S*@\S*[a-zA-Z]',line)
+    if len(x) > 0:
+        print(x)
+
@@ -1,10 +1,68 @@
-# greedy matching / when a string can match more than
-# one possible string it matches the largest found 
-
-import re 
-x = 'From: Using the : character'
-y = re.findall('^F.+:', x)
-print(y)
-
-# in the above, ' From Using the : ' is returned instead of merely 'From: ' as it
-# is longer 
+# demonstrates greedy matching 
+# greedy matching returns the largest possible string 
+
+# the repeat characters (*) and (+) push outward in both directions 
+# to match the largest possible string
+
+# so, the code below returns not only 'From' but 'From: Using the : '
+
+import re 
+x = 'From: Using the  : character'
+y = re.findall('^F.+:', x)
+print(y)
+
+
+# ^F == first character in the match is an F 
+
+# .+ == one or more characters 
+
+# :  == last character in the match is a colon
+
+
+#   Non-Greedy 
+# the .+?  returns one or more characters but not greedy
+
+# the example below returns 'From:'
+
+import re 
+x = 'From: Using the  : character'
+y = re.findall('^F.+?:', x)
+print(y)
+
+
+# fine-tuning str extraction 
+
+# \S  == at least one non-whitespace  character (one or more)
+
+# \S+ == at least one non-whitespace character (one or more)
+
+# example below returns:
+#  ['stephen.marquard@uct.ac.za']
+
+import re 
+U = 'From stephen.marquard@uct.ac.za  Sat Jan  5 09:14:16 2008'
+T = re.findall('\S+@\S+', U)
+print(T)
+
+# the following: 
+# ^From (\S+@)\S+) 
+
+# would look for str starting with From followed by a space 
+# then the rest of expression
+
+import re 
+U = 'From stephen.marquard@uct.ac.za  Sat Jan  5 09:14:16 2008'
+T = re.findall('^From \S+@\S+', U)
+print(T)
+
+
+# N.B.: adding ( ) as in the example below tells it to 
+# return what is inside the ( ) , although you still match 
+# the strings beginning with From 
+#  i.e.  what you place inside the ( ) is what is returned
+
+
+import re 
+U = 'From stephen.marquard@uct.ac.za  Sat Jan  5 09:14:16 2008'
+T = re.findall('^From (\S+@\S+)', U)
+print(T)
@@ -0,0 +1,7 @@
+import re 
+x = '2 numbers are 19 and 42'
+y = re.findall('[0-9]+',x)
+z = re.findall('[AEIOU]+', x)
+print(y)
+print(z)
+