-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfilter_javac.py
executable file
·173 lines (143 loc) · 6.75 KB
/
filter_javac.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import pandas as pd
from classifier_javac import classifier
import os
import subprocess
from subprocess import Popen, PIPE, STDOUT
import ast
import re
# PROJECT_PATH = ".\\project"
PROJECT_PATH = "./PythonC_Sample"
GITCLONE_URL = "https://github.com/"
def exe_command(command):
print("command: "+command)
process = Popen(command, stdout=PIPE, stderr=STDOUT, shell=True)
result = ""
with process.stdout:
for line in iter(process.stdout.readline, b''):
try:
result = result + line.decode().strip() + "$#$"
print(line.decode().strip())
except Exception as e:
pass
exitcode = process.wait()
return result, process, exitcode
def loadCommitSHA_and_repoName():
# data1 = pd.read_csv(path + './Cpp+Python_issues_closed.csv')
# data2 = pd.read_csv(path + './Cpp+Python_issues_open.csv')
# sha_list1 = list(data1['commit_sha'])
# sha_list2 = list(data2['commit_sha'])
# repo_list1 = list(data1['repo_fullname'])
# repo_list2 = list(data2['repo_fullname'])
# return sha_list1 + sha_list2, repo_list1+repo_list2:q
data = pd.read_csv('./tagged_commit.csv')
sha_list = list(data['commit_sha'])
repo_list = list(data['repo_fullname'])
return sha_list, repo_list
def findAllFile(path):
for root, ds, fs in os.walk(path):
for f in fs:
# yield f
yield os.path.join(root, f)
def downloadProject(git_url,repo_name,sha):
if not os.path.exists(PROJECT_PATH+"/"+repo_name):
exe_command("cd " + PROJECT_PATH + " && git clone "+ git_url)
exe_command("cd " + PROJECT_PATH + "/" + repo_name + " && git fetch origin "+ sha)
exe_command("cd " + PROJECT_PATH + "/" + repo_name + "&& git reset --hard " + sha)
def remove_readonly(func, path, _):
"Clear the readonly bit and reattempt the removal"
os.chmod(path, stat.S_IWRITE)
func(path)
def mkdir(path):
folder = os.path.exists(path)
if not folder:
os.makedirs(path)
print("folder create success")
else:
print("folder already exist")
def rmdir(path):
shutil.rmtree(path, onerror=remove_readonly)
folder = os.path.exists(path)
if not folder:
print("folder removed")
else:
print("folder remove failed")
def find_nth_rindex(text, sub, n):
current_pos = len(text)
for _ in range(n + 1):
current_pos = text.rfind(sub, 0, current_pos)
return current_pos
def checkProject(path):
# mkdir(PROJECT_PATH)
# sha_list, repo_list = loadCommitSHA_and_repoName()
# with open("inter_project_index.txt", "a+", encoding="utf8", errors="ignore") as f:
# for i in range(0,len(repo_list)):
# git_url = GITCLONE_URL + repo_list[i] + ".git"
# repo_name = repo_list[i].split('/')[1]
# downloadProject(git_url,repo_name,sha_list[i])
# repo_name = "c_extension"
clf = classifier(".c .h")
clf.creatCPYClassifier()
# clf.printStates()
c_func_list = []
inter_type_list = []
# for file in findAllFile(PROJECT_PATH+ "/" + repo_name+"/"):
for file in findAllFile(path):
#if file == "./tensorflow/tensorflow/lite/python/interpreter.py":
#import pdb
#pdb.set_trace()
is_inter, state = clf.Match(file)
if is_inter:
################################
# JNI
################################
if state == "0":
inter_type_list.append("JNI")
with open(file, 'r', encoding="utf8", errors="ignore") as f:
file_content = f.read()
#step1: find the PyMethodDef function
#pattern = r"JNIEXPORT\s+\w+\s+JNICALL\s+(Java_[\w_]+)_(\w+)\(JNIEnv\s+\*\w+, jobject\s+\w+"
#pattern = r"JNIEXPORT\s+\w+\s+JNICALL\s+(Java_[\w_]+)_([\w_]+)\(JNIEnv\s+\*\w+, jobject\s+\w+"
#pattern = r"JNIEXPORT\s+\w+\s+JNICALL\s+(Java_[\w_]+)_([\w_]+)"
# 可能还需要区分动态绑定和静态绑定,动态绑定主要是RegisterNatives函数中得JNINativeMethod结构体中进行绑定,原理与CPython类似
# pattern = r"JNIEXPORT\s+\w+\s+JNICALL\s+(Java_[\w_]+)_(_[\w_]+)"
# 静态绑定
pattern = r"JNIEXPORT\s+\w+\s+JNICALL\s+(Java_[\w_]+)_([\w_]+)"
matches = re.finditer(pattern, file_content, re.DOTALL)
func_mapping=[]
for match in matches:
full_name = match.group(1) + "_" + match.group(2)
name1 = match.group(1)
name1 = name1[name1.index("_") + 1:].replace("_", "-") + ":" + match.group(2)
ori_full_name = full_name
# delete __[para]
index = full_name.rfind("__")
if index != -1:
full_name = full_name[:index]
#handle _1 in function name
count_under = full_name.count("_1")
full_name = full_name.replace("_1","_")
# index = find_nth_rindex(full_name, '_', count_under)
# func_name = full_name[index + 1:]
func_name = name1
#underscore_positions = [pos for pos, char in enumerate(full_name) if char == '_']
#print(len(underscore_positions))
#if len(underscore_positions) <= 2:
func_mapping.append((func_name,file[file.rfind("/") + 1:file.index(".")] + ":" + ori_full_name,file))
#else:
#for pos in underscore_positions[1:]:
#first_part = full_name[:pos]
#second_part = full_name[pos + 1:]
#print(second_part)
#func_mapping.append((second_part,full_name,file))
# 动态绑定
pattern = r"JNINativeMethod\s+\w+_\w+\[[\d\s]*\]\s+=\s+\{(.*?\};)"
jv_method_defs = re.findall(pattern, file_content, re.DOTALL)
if len(jv_method_defs) != 0:
pattern2 = r"\{\"(.*)\",\s*\"(.*)\",\s*\(.*?\)\s*(\w+)\}"
matches = re.findall(pattern2, jv_method_defs[0])
for match in matches:
func_mapping.append((matches[0], matches[2], file))
c_func_list.extend(func_mapping)
#print("c_func_list:",c_func_list)
#print("inter_type_list:",inter_type_list)
return c_func_list, inter_type_list