-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathgo.py
41 lines (34 loc) · 1.15 KB
/
go.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# -*- coding: utf-8 -*-
import os, sys
from word.prepro_file import prepro_file
from word.TF_IDF_Compute import TF_IDF_Compute
def curDir():
#运行目录
#CurrentPath = os.getcwd()
#return CurrentPath
#当前脚本
#return sys.argv[0]
#当前脚本目录
ScriptPath = os.path.split( os.path.realpath( sys.argv[0] ) )[0]
print ScriptPath
return ScriptPath
#预处理文件目录
PreprocessResultDir = os.path.join(os.getcwd(), "test")
#预处理文件名
PreprocessResultName = "pro_res.txt"
#搜索结果文件目录
ResultFileNameDir = os.path.join(os.getcwd(), "test")
#搜索结果文件名
ResultFileName = "result.txt"
def Preprocess(file_url):
PreResUrl = os.path.join(PreprocessResultDir, PreprocessResultName)
prepro_file(file_url,PreResUrl)
def TF_IDF(*words):
PreResUrl = os.path.join(PreprocessResultDir, PreprocessResultName)
ResFileUrl = os.path.join(ResultFileNameDir, ResultFileName)
return TF_IDF_Compute(PreResUrl,ResFileUrl,*words)
path = os.path.join(os.getcwd(), 'test', 'doc')
if __name__ == '__main__':
Preprocess(path)
# 查找关键字所在的文档名
print TF_IDF("JAVA")