连云港楼市月销量爬虫
话不多说,只是为了数据分析而已
import time import requests import re import xlwt list_url='http://lygfdc.com/WebSite/Portal/List.aspx?nodeid=4278FFB377C0D3FD' def get_lists(): lists_all = [] for i in range(1, 14): add = f'http://lygfdc.com/WebSite/Portal/List.aspx?nodeid=4278FFB377C0D3FD&page={i}' lists_all.append(add) url = [] titie = [] for list_url in lists_all: res=requests.get(url=list_url).text list_html=re.findall('<ul class="default ind_TopNewList">(.*?)</ul>',res)[0] lists=re.finditer(r'<a href="(?P<uuu>.*?)" target="_blank" title=".*?">(?P<name>.*?)</a>', list_html) lists=re.finditer(r'<li><a href="(?P<uuu>.*?)".*?>(?P<name>.*?)</a><span class="time">.*?</span></li>',list_html) for it in lists: t=it.group("name") u=it.group('uuu') if t not in titie: titie.append(t) url.append(u) else: continue print(list_url+'链接获取完成!') return url def get_shuju(area,url,g): url='http://lygfdc.com/WebSite/Portal/'+url res=requests.get(url).text shuju=re.findall(rf'<h1 class="h1title">(.*?)</h1>.*?{area}.*?房共成交(.*?)套,成交面积(.*?)平方.*?其中住宅成交(.*?)套,成交面积(.*?)平方米',res,re.S)[0] return shuju if __name__=='__main__': urls = get_lists() a=True while a: new_xls = xlwt.Workbook(encoding='utf-8') new_sheet = new_xls.add_sheet('sheet1') new_sheet.write(0, 1, '月份') new_sheet.write(0, 2, '总销售') new_sheet.write(0, 3, '总面积') new_sheet.write(0, 4, '住宅套数') new_sheet.write(0, 5, '住宅面积') print(r'输入你要获取地区编号:1-6' '\n1、海州区' '\n2、连云区' '\n3、赣榆区' '\n4、东海县' '\n5、灌云县' '\n6、灌南县') b=input('输入要获取区域编号:') if b=='1': area='海州' elif b=='2': area='连云区' elif b == '3': area = '赣榆' elif b == '4': area = '东海' elif b == '5': area = '灌云' elif b == '6': area = '灌南' else: print('输入正确编号!') continue g = 1 for url in urls: shuju=get_shuju(area,url,g) title = shuju[0].split(':')[-1] new_sheet.write(g,0,g) new_sheet.write(g, 1, title) new_sheet.write(g, 2, shuju[1]) new_sheet.write(g, 3, shuju[2]) new_sheet.write(g, 4, shuju[3]) new_sheet.write(g, 5, shuju[4]) new_xls.save(f'{area}.xls') print(str(g) + '、' + title + '-已获取--' + area + '共成交' + shuju[1] + '套,成交面积' + shuju[2] + '平方米,其中住宅成交' + shuju[3] + '套,成交面积' + shuju[4] + '平方米') time.sleep(0.5) g += 1 print(f'{area}.xls表格文件已保存,请查阅!' f'\n============================================================================================================')
非特殊说明,本文版权归原作者所有,转载请注明出处
评论列表
发表评论