Skip to content

Commit

Permalink
发布2021.220321.220704版
Browse files Browse the repository at this point in the history
  • Loading branch information
xiangyuecn committed Sep 7, 2022
1 parent 4121d5f commit ba3854f
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 32 deletions.
28 changes: 14 additions & 14 deletions README.md

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions assets/AGT/WebviewSettingsCodes/exec/city_to_db.js

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions assets/AGT/WebviewSettingsCodes/exec/city_to_sql.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion assets/csv-ver-date.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
更新ok_data_level*.csv、ok_geo*.csv后,需要同时更新这里的数据时间
**************/
(function(){
var OpenVer="2021.220321.220704",OpenDate="2022-07-04";
var OpenVer="2021.220707.220907",OpenDate="2022-09-07";

var UrlRoot="https://xiangyuecn.gitee.io/areacity-jsspider-statsgov/";
var UrlTool=UrlRoot+"assets/AreaCity-Geo-Transform-Tools.html";
Expand Down
125 changes: 125 additions & 0 deletions assets/pinyin-python-server/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# -*- coding:utf-8 -*-
"""
GitHub: https://github.com/xiangyuecn/AreaCity-JsSpider-StatsGov/assets/pinyin-python-server
使用的HanLP (https://github.com/hankcs/HanLP) 语言处理库
【1】安装Miniconda
conda版本随意
【2】安装pyhanlp
https://github.com/hankcs/pyhanlp/wiki/Windows
测试发现python3.7.1 windows下ssl有问题无法安装,conda切换成python 3.6.4测试安装正常
安装好后运行一下hanlp命令,会提示下载,看第3步
【3】下载字典和jar
参考半自动配置: https://github.com/hankcs/pyhanlp/wiki/%E6%89%8B%E5%8A%A8%E9%85%8D%E7%BD%AE
字典和jar存放目录一般在Miniconda3[\envs\py36]\Lib\site-packages\pyhanlp\static
jar直接下载最新releases
字典最好直接clone仓库/data目录最新版本(用svn下载速度快很多,无需model数据),一样的在存储目录内放一个data文件夹,releases对bug处理稍微滞后一点。
另外需要修改hanlp.properties,给root赋值为当前目录完整路径。
svn: https://github.com/hankcs/HanLP/trunk/data
【4】运行
python server.py
【5】浏览器访问
http://127.0.0.1:9527/pinyin?txt=要拼的文字
"拼音。m" 返回结果 {c:0,m:"",v:["pin","yin","F。","Fm"]},c=0时代表正常,其他代表出错,m为错误原因,拼音如果是字母符号会用F打头
"""
import sys
if sys.version_info.major < 3:
print("Require python3 environment!")
exit(1)


from pyhanlp import *

import traceback
import time
import json
import urllib
from http.server import HTTPServer, BaseHTTPRequestHandler


class HttpHandler(BaseHTTPRequestHandler):
def _response(self, path, args):
startTime=time.time()
code=200
rtv={'c':0,'m':'','v':''}

try:
if args:
args=urllib.parse.parse_qs(args).items()
args=dict([(k,v[0]) for k,v in args])
else:
args={}
# ****************************************
# ***************页面开始*****************
# ****************************************

# ==>
if path=="/":
rtv["v"]="服务器已准备好"

# ==>
elif path=="/pinyin":
txt=args.get("txt","")
pinyin_list = HanLP.convertToPinyinList(txt)
list=[]
Pinyin=JClass("com.hankcs.hanlp.dictionary.py.Pinyin")
for i in range(pinyin_list.size()):
pinyin=pinyin_list[i]
if pinyin==Pinyin.none5:
list.append('F'+txt[i])
else:
list.append(pinyin.getPinyinWithoutTone())

rtv["v"]=list


# ****************************************
# ****************页面结束****************
# ****************************************
else:
code=404
rtv["c"]=404
rtv["m"]="路径"+path+"不存在"
except Exception as e:
rtv["c"]=1
rtv["m"]='服务器错误:'+str(e)+"\n"+traceback.format_exc()

rtv["T"]=int(startTime*1000)
rtv["D"]=int((time.time()-startTime)*1000)
try:
rtv=json.dumps(rtv,ensure_ascii=False)
except Exception as e:
rtv={'c':2,'m':'服务器返回数据错误:'+str(e)+"\n"+traceback.format_exc(),'v':''}
rtv=json.dumps(rtv,ensure_ascii=False)

self.send_response(code)
self.send_header('Content-type', 'text/json; charset=utf-8')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header('Access-Control-Allow-Methods', '*')
self.send_header('Access-Control-Allow-Headers', '*')
self.send_header('Access-Control-Max-Age', '86400')
self.end_headers()
self.wfile.write(rtv.encode())

def do_OPTIONS(self):
self._response("/", {})

def do_GET(self):
path,args=urllib.parse.splitquery(self.path)
self._response(path, args)

def do_POST(self):
args = self.rfile.read(int(self.headers['content-length'])).decode("utf-8")
self._response(self.path, args)


httpd = HTTPServer(('127.0.0.1', 9527), HttpHandler)
httpd.serve_forever()

8 changes: 5 additions & 3 deletions src/1_5_合并.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ var amapDifference={

//和高德对比完后qq地图数据替换处理
var fixQQmapReplaceFillAfterAmap={
//修正前缀和统计局一致
620200:{name:"嘉峪关市",level:3,replaceAs:{codePrefix:"620201",name:"市辖区"}}
//修正前缀和其他直筒子市一致 00结尾,统计是01结尾
620200:{name:"嘉峪关市",level:3,replaceAs:{codePrefix:"620200",name:"市辖区"}}
};

//qq地图数据和统计局+MCA前三级数据有效的差异 和处理方式
Expand Down Expand Up @@ -174,6 +174,8 @@ var gov3Difference={
,232718:{name:"加格达奇区",asID:"232761"}
,632825:{name:"大柴旦行政委员会",asID:"632857"}

,620200:{name:"市辖区",asID:"620201"} //嘉峪关市 使用00结尾 统计局是01

//MCA和qq id相同,但名称不同的,这里明确取qq的名称
//,431121:{name:"祁阳市",useQQ:true}

Expand Down Expand Up @@ -203,7 +205,7 @@ function SCode(itm,level){
level=1;
var cur=itm,p=cur.parent;
while(p){
if(p.code!=cur.code){
if(p.code!=cur.code || p.name!=cur.name){
level++;
};
cur=p;
Expand Down
8 changes: 6 additions & 2 deletions src/3_格式化.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ function add(txt){
"P2": val[4],

"ext_id": 0
,"ext_name": ""
,"ext_name": val[3]

,isExt:true
});
Expand All @@ -96,6 +96,10 @@ for(var i=0;i<pinyinList.length;i++){
console.error("存在重复ID",o);
throw new Error();
};
if(!/^[2469]$/.test((o.id+"").length)){
console.error("ID非预期",o);
throw new Error();
}
idMP[o.id]=o;
};

Expand All @@ -114,7 +118,7 @@ for(var i=0;i<pinyinList.length;i++){
idMP[o.pid].child.push(o);
};

o.ext_name=o.isExt?"":(o.ext_name||o.name);
o.ext_name=o.ext_name||o.name;
o.name2=o.name;
if(!o.isExt){
if(o.ext_id==0){
Expand Down
8 changes: 5 additions & 3 deletions src/坐标和边界/map_geo.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
注:本来想采百度的,但经过使用发现百度数据有严重问题,参考 肃宁县、路南区 边界,百度数据大量线段交叉的无效polygon(百度已更新,不能复现了),没有人工无法修正;并且高德对镂空性质(Hole)的地块处理比百度强,参考池州市对铜陵市的飞地处理。所以放弃使用百度地图数据。
附百度测试以防忘记:https://lbsyun.baidu.com/jsdemo.htm#c1_10 唐山在天津的飞地好像被百度干掉了就算了,观察铜陵市飞地处理(铜陵市、池州市、安庆市);比如池州市的两个polygon,如果不计算位置关系,是完全不知道应该进行Union操作还是Difference操作。
采集百度的源码于2019-10-19 20:07 SVN:71删除
在高德地图测试页面,选到iframe上下文中执行
在高德地图测试页面,选到demo_iframe上下文中执行
https://lbs.amap.com/api/javascript-api/example/district-search/draw-district-boundaries
加载数据
Expand All @@ -22,6 +23,7 @@ https://lbs.amap.com/api/javascript-api/example/district-search/draw-district-bo
var s=document.createElement("script");s.src=url+"Step2_2_Pinyin_WebApi.txt?t="+Date.now();document.body.appendChild(s)
然后再次运行本代码,如果中途因错误停止,根据提示重复运行
2022-9-7 此页面接口已出现滑动验证码,需要手动处理,似乎是500次弹一次验证码
*/
"use strict";
AMap.LngLat;
Expand All @@ -38,7 +40,7 @@ var Load_Thread_Count=4;//模拟线程数


var logX=top.document.createElement("div");
logX.innerHTML='<div class="LogX" style="position: fixed;bottom: 80px;right: 100px;padding: 50px;background: #0ca;color: #fff;font-size: 16px;width: 600px;z-index:9999999"></div>';
logX.innerHTML='<div class="LogX" style="position: fixed;top: 10px;right: 10px;padding: 50px;background: #0ca;color: #fff;font-size: 16px;width: 600px;z-index:9999999"></div>';
top.document.body.appendChild(logX);
logX=top.document.querySelectorAll(".LogX");
logX=logX[logX.length-1];
Expand All @@ -51,7 +53,7 @@ function LogX(txt){
}
if(!top.document.querySelector(".DataTxt")){
var div=top.document.createElement("div");
div.innerHTML=('<div style="position: fixed;bottom: 80px;left: 100px;padding: 20px;background: #0ca;z-index:9999999">输入'+PinyinWebApiSaveName+'.txt<textarea class="DataTxt"></textarea></div>');
div.innerHTML=('<div style="position: fixed;top: 10px;left: 100px;padding: 20px;background: #0ca;z-index:9999999">输入'+PinyinWebApiSaveName+'.txt<textarea class="DataTxt"></textarea></div>');
top.document.body.appendChild(div);
};

Expand Down
7 changes: 4 additions & 3 deletions src/坐标和边界/map_geo_格式化.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,11 @@ for(var k=0;k<DATA_GEO.length;k++){
};

//找到最小的一个坐标,环从这个坐标开始,免得每次采集起点不一样导致差异
var min=999.999999,idx=0;
var minX=999.999999,minY=minX,idx=0;
for(var i=0;i<arr.length;i++){
if(arr[i][0]<min){
min=arr[i][0];
var x=arr[i][0],y=arr[i][1];
if(x<minX || (x==minX && y<minY)){
minX=x;minY=y;
idx=i;
}
};
Expand Down

0 comments on commit ba3854f

Please sign in to comment.