import re
data = """<html>
<pre>
A Short Study of Notation Efficiency
CACM August, 1960
Smith Jr., H. J.
CA600802 JB March 20, 1978 9:02 PM
205 4 164
210 4 164
214 4 164
642 4 164
1 5 164
</pre>
</html>
"""
expr = re.compile('(?<=<pre>).+?(?=[\d\s]*</pre>)', re.S)
print expr.search(data).group(0).strip()