文章
位置: 首页 >文章
python 获取全国所有的省、市、县、镇、村
- 爬虫
- 2021-09-30
- 1630
- 0
获取全国所有的省、市、县、镇、村
from requests_html import HTMLSession
import requests
import time
import re
import datetime
import json
session = HTMLSession()
import mysql.connector
mydb = mysql.connector.connect(
host="localhost",
user="root",
passwd="123456",
database="test"
)
mycursor = mydb.cursor()
sql = "TRUNCATE TABLE area"
mycursor.execute(sql)
mydb.commit()
# 获取省与直辖市
def get_Province_list():
# 返回一个 response 对象
response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/index.html') # 单位秒数
content = response.html.find('table.provincetable', first=True)
li_list = content.find('a')
for li in li_list:
url = li.attrs['href']
code = re.findall("\d+",url)[0]
area_code = str(code)+'0000000000'
area_name=li.text
# SQL 插入语句
sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s, %s, %s, %s)"
val = (area_name, area_code, 0 ,0)
mycursor.execute(sql, val)
pid = mycursor.lastrowid
mydb.commit() # 数据表内容有更新,必须使用到该语句
get_City_list(code,area_code, pid)
#获取市
def get_City_list(code,parent_code, pid):
# 返回一个 response 对象
response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'+code+'.html') # 单位秒数
content = response.html.find('table.citytable', first=True)
citys = content.find('tr.citytr')
for city in citys:
td_list = city.find('td')
ycode = td_list[0].find('a')[0]
code1 = ycode.text[0:4]
area_name = td_list[1].find('a')[0].text
area_code = ycode.text[0:12]
# SQL 插入语句
sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s, %s, %s, %s)"
val = (area_name, area_code, parent_code, pid)
mycursor.execute(sql, val)
pid1 = mycursor.lastrowid
mydb.commit() # 数据表内容有更新,必须使用到该语句
get_County_list(code,code1,area_code,pid1)
#获取县级市
def get_County_list(code,code1,parent_code,pid1):
# 返回一个 response 对象
response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'+code+'/'+code1+'.html')
content = response.html.find('table.countytable', first=True)
if content:
citys = content.find('tr.countytr')
for city in citys:
td_list = city.find('td')
ycode=td_list[0].find('a')
if len(ycode) == 0:
area_code =td_list[0].text
area_name = td_list[1].text
else:
area_code = ycode[0].text[0:12]
area_name = td_list[1].find('a')[0].text
code2 = area_code[2:4]
code3 = ycode[0].text[0:6]
sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s,%s, %s, %s)"
val = (area_name, area_code, code, pid1)
mycursor.execute(sql, val)
pid2 = mycursor.lastrowid
mydb.commit() # 数据表内容有更新,必须使用到该语句
get_jd_data(code,code2,code3,area_code,pid2)
def get_jd_data(code,code2,code3,parent_code,pid2):
response = session.get('http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2020/' + code + '/' + code2 + '/' + code3 + '.html')
content = response.html.find('table.towntable', first=True)
if content:
citys = content.find('tr.towntr')
for city in citys:
td_list = city.find('td')
ycode = td_list[0].find('a')
if len(ycode) == 0:
area_code = td_list[0].text
area_name = td_list[1].text
else:
area_code = ycode[0].text[0:12]
area_name = td_list[1].find('a')[0].text
sql = "INSERT INTO area (area_name, area_code,parent_code,parent_id) VALUES (%s,%s, %s, %s)"
val = (area_name, area_code, parent_code, pid2)
mycursor.execute(sql, val)
mydb.commit() # 数据表内容有更新,必须使用到该语句
if __name__ == '__main__':
get_Province_list()
表结构:
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for area
-- ----------------------------
DROP TABLE IF EXISTS `area`;
CREATE TABLE `area` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`area_code` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区编码',
`area_name` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '地区名称',
`parent_code` varchar(100) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '上级编码',
`parent_id` bigint(20) NULL DEFAULT 0 COMMENT '上级ID',
`status` tinyint(1) NULL DEFAULT 1 COMMENT '状态 0 禁用 1 启用',
`create_time` timestamp(0) NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 3184 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci COMMENT = '地区' ROW_FORMAT = Dynamic;
SET FOREIGN_KEY_CHECKS = 1;
转载:欢迎来到本站,转载请注明文章出处https://www.ormcc.com ,欢迎加入技术讨论群599606903
下一篇:CSS跳动的文字
文章评论
- 0
- 0
- 0
- 分享
- 打赏
ormcc
一个爱捣鼓的程序员
- 160
会员 - 87
今日访问 - 607
文章
IP访问121772次,运行1480天
评论排行
文章归档
- 还没有相关文章