Extract a string between 2 substrings (Python)

vegaseat 2 Tallied Votes 1K Views Share

A function to take a text and extract the string between two given substrings. Allows you to find the nth occurence. Also safeguards against dead ends.

''' str_extract_between_nth.py
extract the text between two substrings using Python's split function
can apply nth occurrence of substring1 and nth occurrence of substring2
extraction is case sensitive
Tested with Python27 and Python33  by vegaseat  04feb2013
'''

def extract_between(text, sub1, sub2, nth=1):
    """
    extract a substring from text between two given substrings
    sub1 (nth occurrence) and sub2 (nth occurrence)
    arguments are case sensitive
    """
    # prevent sub2 from being ignored if it's not there
    if sub2 not in text.split(sub1, nth)[-1]:
        return None
    return text.split(sub1, nth)[-1].split(sub2, nth)[0]


text = "The quick brown fox jumps over the lazy dog."

print(repr(extract_between(text, 'The', 'fox')))
print('-'*20)
# there is no 'fox' after lower case 'the'
print(repr(extract_between(text, 'the', 'fox')))

''' result ...
' quick brown '
--------------------
None
'''

print('='*20)

# test nth occurrence of substrings
text2 = '''\
The quick brown fox jumps over the lazy dog. The rather sly fox 
laughed at the stupid dog.    
'''

# check the second occurrence, nth=2
print(repr(extract_between(text2, 'The', 'fox', nth=2)))
print('-'*20)
# picks up the 'the' from 'rather'
# add a space after 'the' to make it stand alone
print(repr(extract_between(text2, 'the', 'fox', nth=2)))

''' result ...
' rather sly '
--------------------
'r sly '
'''