forked from tumashu/pyim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyim-dcache.el
273 lines (218 loc) · 10.5 KB
/
pyim-dcache.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
;;; pyim-dcache.el --- dcache tools for pyim. -*- lexical-binding: t; -*-
;; * Header
;; Copyright (C) 2021 Free Software Foundation, Inc.
;; Author: Feng Shu <[email protected]>
;; Maintainer: Feng Shu <[email protected]>
;; URL: https://github.com/tumashu/pyim
;; Keywords: convenience, Chinese, pinyin, input-method
;; This file is part of GNU Emacs.
;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; Code:
;; * 代码 :code:
(require 'cl-lib)
(require 'pyim-common)
(require 'pyim-pymap)
(require 'pyim-scheme)
(require 'url-util)
(defgroup pyim-dcache nil
"Dcache for pyim."
:group 'pyim)
(defcustom pyim-dcache-directory (locate-user-emacs-file "pyim/dcache/")
"一个目录,用于保存 pyim 词库对应的 cache 文件."
:type 'directory
:group 'pyim)
(defcustom pyim-dcache-backend 'pyim-dhashcache
"词库后端引擎,负责缓冲词库并提供搜索词条的算法。
目前有两个选项:
1. `pyim-dhashcache'
2. `pyim-dregcache'
`pyim-dhashcache' 是 pyim 默认使用的后端,使用 hashtable 实现,搜
索词条速度很快,但消耗内存多。
`pyim-dregcache' 消耗内存少,搜索速度和词库大小成反比,当词库小于
100M 时,速度还可以,可以尝试,需要注意的是,这个后端只支持全拼和
双拼输入法,不支持型码输入法,如果使用这个后端,用户需要自己在
Emacs 配置中添加 (require \\='pyim-dregcache)."
:type '(radio (const pyim-dhashcache)
(const pyim-dregcache)))
(defvar pyim-dcache-auto-update t
"是否自动创建和更新词库对应的 dcache 文件.
这个变量默认设置为 t, 用户添加新的词库文件时,pyim 会自动生成相关
的 dcache 文件。
一般不建议将这个变量设置为 nil,除非有以下情况:
1. 用户的词库已经非常稳定,并且想通过禁用这个功能来降低
pyim 对资源的消耗。
2. 自动更新功能无法正常工作,用户通过手工从其他机器上拷贝
dcache 文件的方法让 pyim 正常工作。")
;; ** Dcache 变量初始化相关函数
(defmacro pyim-dcache-init-variable (variable &optional fallback-value)
"初始化 VARIABLE.
如果 VARIABLE 的值为 nil, 则使用 `pyim-dcache-directory' 中对应文
件的内容来设置 VARIABLE 变量, 如果此时 VARIABLE 取值还是 nil, 那
么就将 VARIABLE 的值设置为 FALLBACK-VALUE."
`(when (and (symbolp ',variable) (not ,variable))
(setq ,variable (or (pyim-dcache-get-value ',variable)
,fallback-value
(make-hash-table :test #'equal)))))
(defun pyim-dcache-get-value (variable)
"从 `pyim-dcache-directory' 中读取与 VARIABLE 对应的文件中保存的值."
(let ((file (expand-file-name (url-hexify-string (symbol-name variable))
pyim-dcache-directory)))
(pyim-dcache-get-value-from-file file)))
(defun pyim-dcache-get-value-from-file (file)
"读取保存到 FILE 里面的 value."
(when (and (> (length file) 0)
(file-exists-p file))
(with-temp-buffer
(insert-file-contents file)
(ignore-errors
(read (current-buffer))))))
;; ** Dcache 保存变量相关函数
(defun pyim-dcache-save-variable (variable value &optional auto-backup-threshold)
"将 VARIABLE 变量的取值保存到 `pyim-dcache-directory' 中对应文件中.
如果 VALUE 的长度小于先前保存值的长度的 AUTO-BACKUP-THRESHOLD 倍,
那么先前保存的值将自动备份到相应的备份文件。"
(let ((file (expand-file-name (url-hexify-string (symbol-name variable))
pyim-dcache-directory)))
(pyim-dcache-save-value-to-file value file auto-backup-threshold)))
(defun pyim-dcache-save-value-to-file (value file &optional auto-backup-threshold)
"将 VALUE 保存到 FILE 文件中.
如果 VALUE 的长度小于 FILE 中上次保存值的长度的
AUTO-BACKUP-THRESHOLD 倍, 那么原值将自动备份到 FILE 对应的备份文
件。"
(make-directory (file-name-directory file) t)
(let* ((backup-file (concat file "-backup-" (format-time-string "%Y%m%d%H%M%S")))
(orig-value (pyim-dcache-get-value-from-file file))
(orig-length (pyim-dcache--value-length orig-value))
(length (pyim-dcache--value-length value)))
;; (when (and (numberp auto-backup-threshold)
;; (< length (* auto-backup-threshold orig-length)))
;; (pyim-dcache-save-value-to-file orig-value backup-file)
;; (message "PYIM: 生成备份文件 %S, 请检查原文件 %S 是否损坏!!!" backup-file file))
(when value
(with-temp-buffer
(insert ";; -*- lisp-data -*-\n")
(insert ";; Auto generated by `pyim-dhashcache--save-variable-to-file', don't edit it by hand!\n")
(insert (format ";; Build time: %s\n\n" (current-time-string)))
(insert (let ((print-level nil)
(print-circle t)
(print-length nil))
(prin1-to-string value)))
(insert "\n\n")
(insert ";; Local\sVariables:\n") ;Use \s to avoid a false positive!
(insert ";; coding: utf-8-unix\n")
(insert ";; End:")
(pyim-dcache-write-file file)))))
(defun pyim-dcache--value-length (value)
"获取 VALUE 的某个可以作为长度的值."
(or (ignore-errors
(if (hash-table-p value)
(hash-table-count value)
(length value)))
0))
(defun pyim-dcache-write-file (filename &optional confirm)
"A helper function to write dcache files."
(let ((coding-system-for-write 'utf-8-unix)
(create-lockfiles nil))
(and confirm
(file-exists-p filename)
;; NS does its own confirm dialog.
(not (and (eq (framep-on-display) 'ns)
(listp last-nonmenu-event)
use-dialog-box))
(or (y-or-n-p (format-message
"File `%s' exists; overwrite? " filename))
(user-error "Canceled")))
(write-region (point-min) (point-max) filename nil :silent)
(message "Saving file %s..." filename)))
(defun pyim-dcache-create-files-md5 (files)
"为 FILES 生成 md5 字符串。"
;; 当需要强制更新 dict 缓存时,更改这个字符串。
(let ((version "v1"))
(md5 (prin1-to-string
(mapcar (lambda (file)
(list version file (nth 5 (file-attributes file 'string))))
files)))))
;; ** Dcache 重新加载变量相关函数
(defmacro pyim-dcache-reload-variable (variable)
"从 `pyim-dcache-directory' 重新读取并设置 VARIABLE 的值."
`(when (symbolp ',variable)
(setq ,variable (or (pyim-dcache-get-value ',variable)
(make-hash-table :test #'equal)))))
;; ** Dcache 获取当前可用后端接口
(cl-defgeneric pyim-dcache-backend ()
"返回当前可用的 dcache backend."
(if (featurep pyim-dcache-backend)
pyim-dcache-backend
'pyim-dhashcache))
;; ** Dcache 初始化功能接口
(cl-defgeneric pyim-dcache-init-variables ()
"初始化 dcache 缓存相关变量.")
;; ** Dcache 检索词条功能接口
(cl-defgeneric pyim-dcache-get (_key &optional _from)
"从 FROM 中搜索 KEY, 得到对应的取值.
FORM 是一个包含下面几个符号的 list, 每个符号代表一种类型的
dcache.
* 编码 -> 词条
1. code2word 用编码搜索词条
2. shortcode2word 用简码搜索词条
3. icode2word 用编码搜索个人词条
4. ishortcode2word 用简码搜索个人词条
* 词条 -> 编码
1. word2code 用词条搜索编码
* 词条 -> 词频
1. iword2count 搜索个人词条的词频
2. iword2count-recent-10-words 搜索最近输入10个词条的词频
3. iword2count-recent-50-words 搜索最近输入50个词条的词频
如果 FROM 是 nil, 那么 fallback 到 \\='(icode2word code2word)."
nil)
;; ** Dcache 加词功能接口
(cl-defgeneric pyim-dcache-insert-word (word code prepend)
"将词条 WORD 插入到 dcache 中。
如果 PREPEND 为 non-nil, 词条将放到 CODE 已有对应词条的最前面。")
;; ** Dcache 删词功能
(cl-defgeneric pyim-dcache-delete-word (word)
"将中文词条 WORD 从个人词库中删除")
;; ** Dcache 更新功能接口
(cl-defgeneric pyim-dcache-update (&optional force)
"读取并加载所有相关词库 dcache, 如果 FORCE 为真,强制加载。")
;; ** Dcache 更新词频功能接口
(cl-defgeneric pyim-dcache-update-wordcount (word &optional wordcount-handler)
"更新 WORD 词频.
1. 如果 WORDCOUNT-HANDLER 是一个函数:那么其返回值将作为词频保存,
参数为原有词频。
2. 如果 WORDCOUNT-HANDLER 是一个数值:那么这个数值直接作为词频保存。
3. 如果 WORDCOUNT-HANDLER 为其他值:词频不变.")
;; ** Dcache 升级功能接口
(cl-defgeneric pyim-dcache-upgrade ()
"升级词库缓存.")
;; ** Dcache 排序功能接口
(cl-defgeneric pyim-dcache-sort-words (words)
"对 WORDS 进行排序。"
words)
;; ** Dcache 保存功能接口
(cl-defgeneric pyim-dcache-save-caches ()
"保存 dcache.
将用户选择过的词生成的缓存和词频缓存的取值
保存到它们对应的文件中.")
;; ** Dcache 导出功能接口
(cl-defgeneric pyim-dcache-export-words-and-counts (file &optional confirm ignore-counts)
"将个人词条以及词条对应的词频信息导出到文件 FILE.
如果 FILE 为 nil, 提示用户指定导出文件位置, 如果 CONFIRM 为
non-nil,文件存在时将会提示用户是否覆盖,默认为覆盖模式")
(cl-defgeneric pyim-dcache-export-personal-words (file &optional confirm)
"将用户的个人词条导出为 pyim 词库文件.
如果 FILE 为 nil, 提示用户指定导出文件位置, 如果 CONFIRM 为 non-nil,
文件存在时将会提示用户是否覆盖,默认为覆盖模式。")
;; * Footer
(provide 'pyim-dcache)
;;; pyim-dcache.el ends here