http://www.baidu.com/s?wd=python

wd后面的参数就是在百度搜索引擎里面输入的关键字。

分析页面:

获取每一页的链接。

代码:

root@kali:~/py# more table.py 

import urllib

import urllib2

from lxml import etree

#输入python关键字进行查询

text = "python"

starurl = "http://www.baidu.com/s?wd=%s" % text

html = urllib.urlopen(starurl).read()

PageUrlList = []

page = etree.HTML(html.lower().decode('utf-8'))

#crapy pageurl list

#解析出id为page的所有div下的a标签的href属性,如果要显示a标签的内容则把“@href”替换成“text()”即可

hrefs = page.xpath("//div[@id='page']//a/@href")

for href in hrefs:

    hrefurl = "http://www.baidu.com"+href

    PageUrlList.append(hrefurl)

print "list:"

print PageUrlList

运行结果

root@kali:~/py# python table.py 

list:

['http://www.baidu.com/s?wd=python&pn=10&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=20&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=30&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=40&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=50&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=60&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=70&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=80&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=90&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum', 'http://www.baidu.com/s?wd=python&pn=10&oq=python&ie=utf-8&usm=4&rsv_pq=897a8df20000da9b&rsv_t=075dbfoz2dplnlb7ts%2boyopf06je%2bi1j1whmgcrvjurdkieecwvsl%2bhdvum&rsv_page=1']