Skip to content

Commit 5cf6e31

Browse files
committed
update
1 parent 5a8267d commit 5cf6e31

File tree

46,825 files changed

+47176
-3423
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46,825 files changed

+47176
-3423
lines changed

README.md

Lines changed: 261 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,276 @@
22

33
中华人民共和国行政区划:省级、地级、县级、乡级和村级
44

5+
> Gitee <https://gitee.com/netnr/zoning>
6+
7+
> GitHub <https://github.com/netnr/zoning>
8+
59
----------
6-
# 数据来源
10+
# 来源
711
<http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/>
812

913
<http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017>
1014

1115
----------
12-
# 使用方法
16+
# 使用
1317
- 打开页面 <http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html>
1418
- 打开浏览器控制台(推荐谷歌,请不要用IE系列,谢谢)
1519
- 拷贝脚本`zoning.js`的内容粘贴到控制台运行
1620

1721
----------
18-
# 注意事项
19-
初次抓取可能会出错,请求太频繁,浏览器可能会卡顿,也会出现网络错误等问题
22+
# 注意
23+
首次抓取会出现大量失败请求,再次抓取会从浏览器缓存获取,非常快
24+
25+
----------
26+
# 代码
27+
```
28+
var zoning = {
29+
//版本号
30+
version: "1.0.0",
31+
//载入js脚本
32+
getScript: function (src, success) {
33+
var ele = document.createElement("SCRIPT");
34+
ele.src = src;
35+
ele.type = "text/javascript";
36+
document.getElementsByTagName("HEAD")[0].appendChild(ele);
37+
//加载完成回调
38+
if (success != undefined) {
39+
ele.onload = ele.onreadystatechange = function () {
40+
if (!this.readyState || this.readyState == "loaded" || this.readyState == "complete") { success(); }
41+
}
42+
}
43+
},
44+
//参数配置
45+
config: {
46+
//jszip CDN
47+
urljszip: "https://lib.baomitu.com/jszip/3.1.4/jszip.min.js",
48+
//fileSaver CDN
49+
urlfilesaver: "https://lib.baomitu.com/FileSaver.js/2014-11-29/FileSaver.min.js",
50+
//抓取首页
51+
urlprefix: "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/",
52+
//起始深度
53+
deep: 1,
54+
//最大深度
55+
//5 村 约46800
56+
//4 街道 约3380
57+
deepmax: 5,
58+
//抓取过程信息
59+
item: {
60+
//父级编码
61+
id: "00",
62+
//请求相对地址
63+
href: "index"
64+
}
65+
},
66+
//fetch 抓取
67+
grab: function (urlprefix, deep, item) {
68+
if (item.href == null) {
69+
return false;
70+
}
71+
var url = urlprefix;
72+
switch (deep) {
73+
case 4:
74+
url += item.id.substr(0, 2) + "/";
75+
break;
76+
case 5:
77+
url += item.id.substr(0, 2) + "/" + item.id.substr(2, 2) + "/";
78+
break;
79+
}
80+
url += item.href + ".html";
81+
82+
zoning.taskcount += 1;
83+
84+
//fetch 抓取 gb2312
85+
fetch(url).then(res => res.blob()).then(blob => {
86+
var reader = new FileReader();
87+
reader.onload = function () {
88+
var list = zoning.matcharray(reader.result, item, deep);
89+
zoning.taskcount -= 1;
90+
if (list.length > 0 && deep < zoning.config.deepmax) {
91+
for (var i = 0; i < list.length; i++) {
92+
var li = list[i];
93+
deep += 1;
94+
zoning.grab(urlprefix, deep, li);
95+
deep -= 1;
96+
}
97+
}
98+
}
99+
reader.readAsText(blob, 'GBK');
100+
}).catch(function (e) {
101+
var obj = {};
102+
obj.item = item;
103+
obj.url = url;
104+
obj.error = e;
105+
zoning.catchdata.push(obj);
106+
zoning.taskcount -= 1;
107+
});
108+
},
109+
//任务量
110+
taskcount: 0,
111+
//抓取数量
112+
matchcount: 0,
113+
//抓取异常记录
114+
catchdata: [],
115+
//抓取结果数据
116+
matchdata: {},
117+
//匹配抓取内容
118+
matcharray: function (data, item, deep) {
119+
var arr = [];
120+
if (deep != 5) {
121+
//匹配 市辖区 无链接 项
122+
data.replace(/<td>[0-9]{12}<\/td><td>.*?<\/td>/g, function (x) {
123+
var mat = x.split('</td><td>');
124+
var obj = {};
125+
obj.href = null;
126+
obj.id = mat[0].split('>')[1];
127+
obj.text = mat[1].split('<')[0];
128+
arr.push(obj);
129+
});
130+
}
131+
data = data.replace(/'/g, '"').replace(/<br\/>/g, "");
132+
//匹配所有的A标签
133+
var reg = /<a[^>]*href=['"]([^"]*)['"][^>]*>(.*?)<\/a>/g;
134+
var matchs = data.match(reg);
135+
var filename = "00";
136+
switch (deep) {
137+
//首页
138+
case 1:
139+
if (!matchs) {
140+
return [];
141+
}
142+
for (var i = 0; i < matchs.length; i++) {
143+
var mat = matchs[i];
144+
var obj = {};
145+
obj.id = mat.split('"')[1].split('.')[0];
146+
obj.href = obj.id;
147+
obj.text = mat.split('>')[1].split('<')[0];
148+
arr.push(obj);
149+
}
150+
break;
151+
case 2:
152+
case 3:
153+
case 4:
154+
if (!matchs) {
155+
return [];
156+
}
157+
for (var i = 0; i < matchs.length; i++) {
158+
var mat = matchs[i];
159+
var obj = {};
160+
obj.href = mat.split('"')[1].split('.')[0];
161+
obj.id = mat.split('>')[1].split('<')[0];
162+
mat = matchs[++i];
163+
obj.text = mat.split('>')[1].split('<')[0];
164+
arr.push(obj);
165+
}
166+
break;
167+
case 5:
168+
//匹配 村委会 无连接
169+
data.replace(/<td>[0-9]{12}<\/td><td>[0-9]{3}<\/td><td>.*?<\/td>/g, function (x) {
170+
var mat = x.split('</td><td>');
171+
var obj = {};
172+
obj.href = null;
173+
obj.id = mat[0].split('>')[1];
174+
obj.text = mat[2].split('<')[0];
175+
arr.push(obj);
176+
});
177+
break;
178+
}
179+
//根据深度 得到文件名(编码)
180+
switch (deep) {
181+
case 2:
182+
filename = item.id;
183+
break;
184+
case 3:
185+
filename = item.id.substr(0, 4);
186+
break;
187+
case 4:
188+
filename = item.id.substr(0, 6);
189+
break;
190+
case 5:
191+
filename = item.id.substr(0, 9);
192+
break;
193+
}
194+
zoning.matchdata[filename] = arr;
195+
//记录请求结果数量
196+
zoning.matchcount += 1;
197+
return arr;
198+
},
199+
//外部调用生成下载
200+
zip: function () {
201+
zoning.ziping(zoning.matchdata, zoning.catchdata);
202+
},
203+
//内部调用生成下载
204+
ziping: function (matchdata, catchdata) {
205+
zoning.getScript(zoning.config.urljszip, function () {
206+
zoning.getScript(zoning.config.urlfilesaver, function () {
207+
var zip = new JSZip();
208+
var data = {};
209+
for (var i in matchdata) {
210+
var di = matchdata[i];
211+
for (var j = 0; j < di.length; j++) {
212+
delete di[j].href;
213+
switch (i.length) {
214+
case 2:
215+
di[j].id = di[j].id.substr(0, 4);
216+
break;
217+
case 4:
218+
di[j].id = di[j].id.substr(0, 6);
219+
break;
220+
case 6:
221+
di[j].id = di[j].id.substr(0, 9);
222+
break;
223+
}
224+
}
225+
data[i] = di;
226+
zip.file(i + ".json", JSON.stringify(di));
227+
}
228+
zip.file("all.json", JSON.stringify(data));
229+
if (catchdata.length) {
230+
zip.file('catch.json', JSON.stringify(catchdata));
231+
}
232+
zip.generateAsync({ type: "blob" }).then(function (content) {
233+
saveAs(content, "zoning.zip");
234+
});
235+
});
236+
});
237+
},
238+
//开始运行
239+
run: function () {
240+
zoning.startTime = new Date().valueOf();
241+
zoning.taskid = setInterval(function () {
242+
console.log("count:" + zoning.matchcount, "taskcount:" + zoning.taskcount);
243+
if (zoning.taskcount == 0) {
244+
clearInterval(zoning.taskid);
245+
zoning.zip();
246+
}
247+
}, 1000 * 4);
248+
console.log('fetching ... please see the network tab');
249+
zoning.grab(zoning.config.urlprefix, zoning.config.deep, zoning.config.item);
250+
}
251+
};
252+
253+
//开始运行 可手动调用
254+
zoning.run();
255+
256+
//下载zip,抓取完成后
257+
//zoning.zip();
258+
259+
/*
260+
* 注意:
261+
*
262+
* 首次抓取会出现大量失败请求,再次抓取会从浏览器缓存获取,非常快。
263+
*
264+
* 文件:
265+
* 00.json 根数据
266+
* 12.json 二级数据
267+
* 1234.json 三级数据
268+
* 123456.json 四级数据
269+
* 123456789.json 五级数据
270+
*
271+
* 其他:
272+
* all.json 所有数据
273+
* catch.json 抓取异常记录(有异常时,经测试有5个页面请求失败)
274+
*/
275+
```
20276

21-
由于浏览器有缓存机制,第二次开始从缓存获取页面内容,很快能完成
277+
> [联系打赏](https://ss.netnr.com/contact)

dist/zoning/11.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{"id":"110100000000","text":"市辖区"}]
1+
[{"id":"1101","text":"市辖区"}]

dist/zoning/1101.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{"id":"110101000000","text":"东城区"},{"id":"110102000000","text":"西城区"},{"id":"110105000000","text":"朝阳区"},{"id":"110106000000","text":"丰台区"},{"id":"110107000000","text":"石景山区"},{"id":"110108000000","text":"海淀区"},{"id":"110109000000","text":"门头沟区"},{"id":"110111000000","text":"房山区"},{"id":"110112000000","text":"通州区"},{"id":"110113000000","text":"顺义区"},{"id":"110114000000","text":"昌平区"},{"id":"110115000000","text":"大兴区"},{"id":"110116000000","text":"怀柔区"},{"id":"110117000000","text":"平谷区"},{"id":"110118000000","text":"密云区"},{"id":"110119000000","text":"延庆区"}]
1+
[{"id":"110101","text":"东城区"},{"id":"110102","text":"西城区"},{"id":"110105","text":"朝阳区"},{"id":"110106","text":"丰台区"},{"id":"110107","text":"石景山区"},{"id":"110108","text":"海淀区"},{"id":"110109","text":"门头沟区"},{"id":"110111","text":"房山区"},{"id":"110112","text":"通州区"},{"id":"110113","text":"顺义区"},{"id":"110114","text":"昌平区"},{"id":"110115","text":"大兴区"},{"id":"110116","text":"怀柔区"},{"id":"110117","text":"平谷区"},{"id":"110118","text":"密云区"},{"id":"110119","text":"延庆区"}]

dist/zoning/110101.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{"id":"110101001000","text":"东华门街道办事处"},{"id":"110101002000","text":"景山街道办事处"},{"id":"110101003000","text":"交道口街道办事处"},{"id":"110101004000","text":"安定门街道办事处"},{"id":"110101005000","text":"北新桥街道办事处"},{"id":"110101006000","text":"东四街道办事处"},{"id":"110101007000","text":"朝阳门街道办事处"},{"id":"110101008000","text":"建国门街道办事处"},{"id":"110101009000","text":"东直门街道办事处"},{"id":"110101010000","text":"和平里街道办事处"},{"id":"110101011000","text":"前门街道办事处"},{"id":"110101012000","text":"崇文门外街道办事处"},{"id":"110101013000","text":"东花市街道办事处"},{"id":"110101014000","text":"龙潭街道办事处"},{"id":"110101015000","text":"体育馆路街道办事处"},{"id":"110101016000","text":"天坛街道办事处"},{"id":"110101017000","text":"永定门外街道办事处"}]
1+
[{"id":"110101001","text":"东华门街道办事处"},{"id":"110101002","text":"景山街道办事处"},{"id":"110101003","text":"交道口街道办事处"},{"id":"110101004","text":"安定门街道办事处"},{"id":"110101005","text":"北新桥街道办事处"},{"id":"110101006","text":"东四街道办事处"},{"id":"110101007","text":"朝阳门街道办事处"},{"id":"110101008","text":"建国门街道办事处"},{"id":"110101009","text":"东直门街道办事处"},{"id":"110101010","text":"和平里街道办事处"},{"id":"110101011","text":"前门街道办事处"},{"id":"110101012","text":"崇文门外街道办事处"},{"id":"110101013","text":"东花市街道办事处"},{"id":"110101014","text":"龙潭街道办事处"},{"id":"110101015","text":"体育馆路街道办事处"},{"id":"110101016","text":"天坛街道办事处"},{"id":"110101017","text":"永定门外街道办事处"}]

dist/zoning/110101001.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101001001","text":"多福巷社区居委会"},{"id":"110101001002","text":"银闸社区居委会"},{"id":"110101001005","text":"东厂社区居委会"},{"id":"110101001006","text":"智德社区居委会"},{"id":"110101001007","text":"南池子社区居委会"},{"id":"110101001008","text":"黄图岗社区居委会"},{"id":"110101001009","text":"灯市口社区居委会"},{"id":"110101001010","text":"正义路社区居委会"},{"id":"110101001011","text":"甘雨社区居委会"},{"id":"110101001013","text":"台基厂社区居委会"},{"id":"110101001014","text":"韶九社区居委会"},{"id":"110101001015","text":"王府井社区居委会"}]

dist/zoning/110101002.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101002001","text":"隆福寺社区居委会"},{"id":"110101002002","text":"吉祥社区居委会"},{"id":"110101002003","text":"黄化门社区居委会"},{"id":"110101002004","text":"钟鼓社区居委会"},{"id":"110101002005","text":"魏家社区居委会"},{"id":"110101002006","text":"汪芝麻社区居委会"},{"id":"110101002008","text":"景山东街社区居委会"},{"id":"110101002009","text":"皇城根北街社区居委会"}]

dist/zoning/110101003.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101003001","text":"交东社区居委会"},{"id":"110101003002","text":"福祥社区居委会"},{"id":"110101003003","text":"大兴社区居委会"},{"id":"110101003005","text":"府学社区居委会"},{"id":"110101003007","text":"鼓楼苑社区居委会"},{"id":"110101003008","text":"菊儿社区居委会"},{"id":"110101003009","text":"南锣鼓巷社区居委会"}]

dist/zoning/110101004.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101004001","text":"交北头条社区居委会"},{"id":"110101004002","text":"北锣鼓巷社区居委会"},{"id":"110101004003","text":"国子监社区居委会"},{"id":"110101004004","text":"钟楼湾社区居委会"},{"id":"110101004005","text":"宝钞南社区居委会"},{"id":"110101004006","text":"五道营社区居委会"},{"id":"110101004009","text":"分司厅社区居委会"},{"id":"110101004011","text":"国旺社区居委会"},{"id":"110101004012","text":"花园社区居委会"}]

dist/zoning/110101005.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101005001","text":"海运仓社区居委会"},{"id":"110101005002","text":"北新仓社区居委会"},{"id":"110101005004","text":"门楼社区居委会"},{"id":"110101005005","text":"十三条社区居委会"},{"id":"110101005006","text":"民安社区居委会"},{"id":"110101005008","text":"九道湾社区居委会"},{"id":"110101005009","text":"北官厅社区居委会"},{"id":"110101005010","text":"青龙社区居委会"},{"id":"110101005011","text":"小菊社区居委会"},{"id":"110101005012","text":"藏经馆社区居委会"},{"id":"110101005014","text":"草园社区居委会"},{"id":"110101005016","text":"前永康社区居委会"}]

dist/zoning/110101006.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101006001","text":"南门仓社区居委会"},{"id":"110101006002","text":"七条社区居委会"},{"id":"110101006003","text":"二条社区居委会"},{"id":"110101006006","text":"六条社区居委会"},{"id":"110101006007","text":"豆瓣社区居委会"},{"id":"110101006008","text":"八条社区居委会"},{"id":"110101006009","text":"总院社区居委会"}]

dist/zoning/110101007.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101007001","text":"史家社区居委会"},{"id":"110101007002","text":"内务社区居委会"},{"id":"110101007003","text":"演乐社区居委会"},{"id":"110101007005","text":"礼士社区居委会"},{"id":"110101007006","text":"朝内头条社区居委会"},{"id":"110101007007","text":"朝西社区居委会"},{"id":"110101007009","text":"竹杆社区居委会"},{"id":"110101007011","text":"大方家社区居委会"},{"id":"110101007013","text":"新鲜社区居委会"}]

dist/zoning/110101008.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101008005","text":"赵家楼社区居委会"},{"id":"110101008006","text":"西总布社区居委会"},{"id":"110101008007","text":"大雅宝社区居委会"},{"id":"110101008012","text":"苏州社区居委会"},{"id":"110101008014","text":"外交部街社区居委会"},{"id":"110101008015","text":"站东社区居委会"},{"id":"110101008016","text":"金宝街北社区居委会"}]

dist/zoning/110101009.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101009003","text":"胡家园社区居委会"},{"id":"110101009004","text":"新中街社区居委会"},{"id":"110101009005","text":"清水苑社区居委会"},{"id":"110101009006","text":"新中西里社区居委会"},{"id":"110101009008","text":"十字坡社区居委会"},{"id":"110101009010","text":"东外大街社区居委会"},{"id":"110101009012","text":"东环社区居委会"},{"id":"110101009013","text":"香河园北里社区居委会"},{"id":"110101009015","text":"工人体育馆社区居委会"},{"id":"110101009016","text":"东外大街北社区居委会"}]

dist/zoning/110101010.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101010001","text":"交通社区居委会"},{"id":"110101010002","text":"林调社区居委会"},{"id":"110101010003","text":"民旺社区居委会"},{"id":"110101010008","text":"安德路社区居委会"},{"id":"110101010009","text":"二区社区居委会"},{"id":"110101010011","text":"七区社区居委会"},{"id":"110101010013","text":"化工社区居委会"},{"id":"110101010014","text":"安德里社区居委会"},{"id":"110101010015","text":"兴化社区居委会"},{"id":"110101010016","text":"人定湖社区居委会"},{"id":"110101010017","text":"小黄庄社区居委会"},{"id":"110101010018","text":"总政社区居委会"},{"id":"110101010019","text":"安贞苑社区居委会"},{"id":"110101010020","text":"地坛社区居委会"},{"id":"110101010021","text":"黄寺社区居委会"},{"id":"110101010022","text":"新建路社区居委会"},{"id":"110101010023","text":"东河沿社区居委会"},{"id":"110101010024","text":"西河沿社区居委会"},{"id":"110101010026","text":"青年湖社区居委会"},{"id":"110101010027","text":"和平里社区居委会"}]

dist/zoning/110101011.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101011001","text":"前门东大街社区居委会"},{"id":"110101011006","text":"大江社区居委会"},{"id":"110101011008","text":"草厂西社区居委会"},{"id":"110101011009","text":"草厂东社区居委会"}]

dist/zoning/110101012.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[{"id":"110101012001","text":"兴隆都市馨园社区居委会"},{"id":"110101012002","text":"新世界家园社区居委会"},{"id":"110101012003","text":"崇文门东大街社区居委会"},{"id":"110101012004","text":"崇文门西大街社区居委会"},{"id":"110101012005","text":"西花市南里东区社区居委会"},{"id":"110101012006","text":"西花市南里西区社区居委会"},{"id":"110101012007","text":"大桥社区居委会"},{"id":"110101012008","text":"新怡家园社区居委会"},{"id":"110101012009","text":"西花市南里南区社区居委会"},{"id":"110101012010","text":"国瑞城西区社区居委会"},{"id":"110101012011","text":"国瑞城中区社区居委会"},{"id":"110101012012","text":"国瑞城东区社区居委会"}]

0 commit comments

Comments
 (0)