话不多说,只是为了数据分析而已
import time
import requests
import re
import xlwt
list_url='http://lygfdc.com/WebSite/Portal/List.aspx?nodeid=4278FFB377C0D3FD'
def get_lists():
lists_all = []
for i in range(1, 14):
add = f'http://lygfdc.com/WebSite/Portal/List.aspx?nodeid=4278FFB377C0D3FD&page={i}'
lists_all.append(add)
url = []
titie = []
for list_url in lists_all:
res=requests.get(url=list_url).text
list_html=re.findall('<ul class="default ind_TopNewList">(.*?)</ul>',res)[0]
lists=re.finditer(r'<a href="(?P<uuu>.*?)" target="_blank" title=".*?">(?P<name>.*?)</a>', list_html)
lists=re.finditer(r'<li><a href="(?P<uuu>.*?)".*?>(?P<name>.*?)</a><span class="time">.*?</span></li>',list_html)
for it in lists:
t=it.group("name")
u=it.group('uuu')
if t not in titie:
titie.append(t)
url.append(u)
else:
continue
print(list_url+'链接获取完成!')
return url
def get_shuju(area,url,g):
url='http://lygfdc.com/WebSite/Portal/'+url
res=requests.get(url).text
shuju=re.findall(rf'<h1 class="h1title">(.*?)</h1>.*?{area}.*?房共成交(.*?)套,成交面积(.*?)平方.*?其中住宅成交(.*?)套,成交面积(.*?)平方米',res,re.S)[0]
return shuju
if __name__=='__main__':
urls = get_lists()
a=True
while a:
new_xls = xlwt.Workbook(encoding='utf-8')
new_sheet = new_xls.add_sheet('sheet1')
new_sheet.write(0, 1, '月份')
new_sheet.write(0, 2, '总销售')
new_sheet.write(0, 3, '总面积')
new_sheet.write(0, 4, '住宅套数')
new_sheet.write(0, 5, '住宅面积')
print(r'输入你要获取地区编号:1-6'
'\n1、海州区'
'\n2、连云区'
'\n3、赣榆区'
'\n4、东海县'
'\n5、灌云县'
'\n6、灌南县')
b=input('输入要获取区域编号:')
if b=='1':
area='海州'
elif b=='2':
area='连云区'
elif b == '3':
area = '赣榆'
elif b == '4':
area = '东海'
elif b == '5':
area = '灌云'
elif b == '6':
area = '灌南'
else:
print('输入正确编号!')
continue
g = 1
for url in urls:
shuju=get_shuju(area,url,g)
title = shuju[0].split(':')[-1]
new_sheet.write(g,0,g)
new_sheet.write(g, 1, title)
new_sheet.write(g, 2, shuju[1])
new_sheet.write(g, 3, shuju[2])
new_sheet.write(g, 4, shuju[3])
new_sheet.write(g, 5, shuju[4])
new_xls.save(f'{area}.xls')
print(str(g) + '、' + title + '-已获取--' + area + '共成交' + shuju[1] + '套,成交面积' + shuju[2] + '平方米,其中住宅成交' + shuju[3] + '套,成交面积' + shuju[4] + '平方米')
time.sleep(0.5)
g += 1
print(f'{area}.xls表格文件已保存,请查阅!'
f'\n============================================================================================================')
0 评论