使用python解析网页内容

 <pre class="python" name="code">class GetWebResult():
def __init__(self):
jobs_id = 139366
self.jobs_url = 'http://cbsp-wm-bl01/icase/api/rest/test/jobresults/?ids=%d' % jobs_id@staticmethod

def get_soup(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'}
req = urllib2.Request(url, headers=headers)
f = urllib2.urlopen(req, timeout=10)
html = f.read()
f.close()
return htmldef main(self):
save_path = os.path.join(os.getcwd(), 'JobResult.xls')
string_of_results = self.get_soup(self.jobs_url)
tmp = string_of_results.replace('null', "\"null\"").replace('true', "\"true\"").replace('false', "\"false\"")
results = eval(tmp)[0].get('results')
list_of_dict_result = []
for result in results:
list_of_dict_result.append(self.convert_result(result))
key_list = ['Name', 'Image', 'Subsystem', 'Functionality', 'Objective', 'Assumptions', 'Steps', 'Tickets', 'Metrics', 'Notes', 'Result']
xls = Excel(save_path)
xls.insert_sheet(sheet_name='results', list_dict=list_of_dict_result, list_key=key_list)

@staticmethod
def convert_result(dict):
dict_result = {}
case = dict.get('case')
dict_result['Tickets'] = dict.get('tickets')
dict_result['Metrics'] = dict.get('metrics')
dict_result['Notes'] = dict.get('notes')
dict_result['Result'] = dict.get('result')
dict_result['Name'] = case.get('name')
dict_result['Image'] = case.get('image').get('name')
dict_result['Subsystem'] = case.get('taxonomy').get('subsystem').get('name')
dict_result['Functionality'] = case.get('taxonomy').get('functional_area').get('name')
dict_result['Objective'] = case.get('objective').decode('utf-8')
dict_result['Assumptions'] = case.get('assumptions')
dict_result['Steps'] = case.get('steps').decode('utf-8')
return dict_result</pre>

weinxin
我的微信
有问题微信找我
DannyWu

发表评论

:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:

Protected with IP Blacklist CloudIP Blacklist Cloud