-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfilter.py
executable file
·196 lines (169 loc) · 7.2 KB
/
filter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import pandas as pd
from classifier import classifier
import os
import subprocess
from subprocess import Popen, PIPE, STDOUT
import ast
import re
# PROJECT_PATH = ".\\project"
PROJECT_PATH = "./PythonC_Sample"
GITCLONE_URL = "https://github.com/"
def exe_command(command):
print("command: "+command)
process = Popen(command, stdout=PIPE, stderr=STDOUT, shell=True)
result = ""
with process.stdout:
for line in iter(process.stdout.readline, b''):
try:
result = result + line.decode().strip() + "$#$"
print(line.decode().strip())
except Exception as e:
pass
exitcode = process.wait()
return result, process, exitcode
def loadCommitSHA_and_repoName():
# data1 = pd.read_csv(path + './Cpp+Python_issues_closed.csv')
# data2 = pd.read_csv(path + './Cpp+Python_issues_open.csv')
# sha_list1 = list(data1['commit_sha'])
# sha_list2 = list(data2['commit_sha'])
# repo_list1 = list(data1['repo_fullname'])
# repo_list2 = list(data2['repo_fullname'])
# return sha_list1 + sha_list2, repo_list1+repo_list2:q
data = pd.read_csv('./tagged_commit.csv')
sha_list = list(data['commit_sha'])
repo_list = list(data['repo_fullname'])
return sha_list, repo_list
def findAllFile(path):
for root, ds, fs in os.walk(path):
for f in fs:
# yield f
yield os.path.join(root, f)
def downloadProject(git_url,repo_name,sha):
if not os.path.exists(PROJECT_PATH+"/"+repo_name):
exe_command("cd " + PROJECT_PATH + " && git clone "+ git_url)
exe_command("cd " + PROJECT_PATH + "/" + repo_name + " && git fetch origin "+ sha)
exe_command("cd " + PROJECT_PATH + "/" + repo_name + "&& git reset --hard " + sha)
def remove_readonly(func, path, _):
"Clear the readonly bit and reattempt the removal"
os.chmod(path, stat.S_IWRITE)
func(path)
def mkdir(path):
folder = os.path.exists(path)
if not folder:
os.makedirs(path)
print("folder create success")
else:
print("folder already exist")
def rmdir(path):
shutil.rmtree(path, onerror=remove_readonly)
folder = os.path.exists(path)
if not folder:
print("folder removed")
else:
print("folder remove failed")
def checkProject(path):
# mkdir(PROJECT_PATH)
# sha_list, repo_list = loadCommitSHA_and_repoName()
# with open("inter_project_index.txt", "a+", encoding="utf8", errors="ignore") as f:
# for i in range(0,len(repo_list)):
# git_url = GITCLONE_URL + repo_list[i] + ".git"
# repo_name = repo_list[i].split('/')[1]
# downloadProject(git_url,repo_name,sha_list[i])
# repo_name = "c_extension"
clf = classifier(".c .py .i .pyx")
clf.creatCPYClassifier()
# clf.printStates()
c_func_list = []
inter_type_list = []
# for file in findAllFile(PROJECT_PATH+ "/" + repo_name+"/"):
# for file in findAllFile(path):
file = path
#if file == "./tensorflow/tensorflow/lite/python/interpreter.py":
#import pdb
#pdb.set_trace()
is_inter, state = clf.Match(file)
is_inter = True
state = "41"
if is_inter:
# f.write(str(i)+"-"+repo_name+"-"+str(file)+"\\n")
# f.write(repo_name + "-" + str(file) + " \n")
################################
# cffi
################################
if state == "10":
inter_type_list.append("cffi")
################################
# ctypes
################################
if state == "21":
inter_type_list.append("ctypes")
################################
# cython
################################
if state == "60":
inter_type_list.append("cython")
################################
# SWIG
################################
if state == "51":
inter_type_list.append("SWIG")
################################
# C_extension
################################
if state == "41":
inter_type_list.append("CE")
with open(file, 'r', encoding="utf8", errors="ignore") as f:
file_content = f.read()
#step1: find the PyMethodDef function
py_method_def_pattern = r'PyMethodDef\s+\w+\s*\[\s*\]\s*=\s*\{(.*?\};)'
py_method_defs = re.findall(py_method_def_pattern, file_content, re.DOTALL)
#step2: find python-c function mapping
ml_name_pattern = r'\{\s*"([^"]+)"\s*,\s*([^,]+),'
ml_names = []
for method_def in py_method_defs:
ml_names.extend(re.findall(ml_name_pattern, method_def))
func_mapping=[]
for tup in ml_names:
func_mapping.append(tup+(file,))
c_func_list.extend(func_mapping)
################################
# CPython
################################
if state == "31":
inter_type_list.append("CPY")
#print("c_func_list:",c_func_list)
#print("inter_type_list:",inter_type_list)
return c_func_list, inter_type_list
# def checkProject(file):
# c_func_list = []
# with open(file, 'r', encoding="utf8", errors="ignore") as f:
# file_content = f.read()
# #step1: find the PyMethodDef function
# py_method_def_pattern = r'PyMethodDef\s+\w+\s*\[\s*\]\s*=\s*\{(.*?\};)'
# py_method_defs = re.findall(py_method_def_pattern, file_content, re.DOTALL)
# #step2: find python-c function mapping
# ml_name_pattern = r'\{\s*"([^"]+)"\s*,\s*([^,]+),'
# ml_names = []
# for method_def in py_method_defs:
# ml_names.extend(re.findall(ml_name_pattern, method_def))
# # 通过PyArg_ParseTupleAndKeywords函数确定mapping函数的参数类型?然后再在PyMethodDef中找到定义的名字即可?
# # 先通过PyMethodDef确定当前文件中确定的目标函数,然后再通过PyArg_ParseTupleAndKeywords或PyArg_ParseTuple找到目标函数的参数,即可完成信息收集,最后再与C语言中的
# func_mapping=[]
# for tup in ml_names:
# func_mapping.append(tup+(file,))
# para_type = ""
# map_func_name = func_mapping[0][1]
# func_name = func_mapping[0][0]
# method_pattern = re.escape(map_func_name) + r"([\s\S]*)"
# match = re.search(method_pattern, file_content)
# key = match.groups()[0]
# # type_pattern = r'PyArg_ParseTupleAndKeywords\(.*"(.*)".*\)'
# type_pattern = r'PyArg_ParseTuple\(.*"(.*)".*\)'
# match = re.search(type_pattern, key)
# para_type = match.groups()[0]
# print(para_type)
# c_func_list.extend(func_mapping)
# print(c_func_list)
# inter_type_list = []
# inter_type_list.append("CE")
# return c_func_list, inter_type_list