import urllib
from urllib.request import urlopen
address='http://www.iitb.ac.in/acadpublic/RunningCourses.jsp?deptcd=EE&year=2012&semester=1'
source= urlopen(address).read()
source=str(source)
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
def handle_data(self, data):
x=str(data)
if x != ('rntttt') or ('rnttttt') or ('rnrnttt'):
print("Encountered some data:",x)
parser = MyHTMLParser(strict=False)
parser.feed(source)
上面的代码不能工作。它仍然打印"rntttt"之类的东西。有什么建议吗?
if x != ('rntttt') or ('rnttttt') or ('rnrnttt')
应该if x not in ('rntttt', 'rnttttt', 'rnrnttt')
或更好:
if not x.isspace()
你的第一个代码被计算为:
if (x != ('rntttt')) or 'rnttttt' or 'rnrnttt'
注意最后的值被计算为它们自己!只有空字符串将计算False
,因此该条件将始终通过
可能是t和r等的数量在变化,试试这个:
if x.replace('r','').replace('n','').replace('t','').strip():
print("Encountered some data:",x)