-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGMADCC.py
102 lines (82 loc) · 3.92 KB
/
GMADCC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#=============================================================================#
# GMADCC Algorythm #
# To solve random choise on disconected Softwere class graps. #
# By: Masoud Azizi Email: [email protected] #
#=============================================================================#
import builtins
import inspect
import ntpath
import os
import random
import re
import string
from difflib import SequenceMatcher
from time import time
class GMADisconnectedClassClassifier:
paths=None
disconectedClassesIndex=None
#=============================================================================#
# Vareables
def __init__(self,paths):
self.paths=paths
super().__init__()
#=============================================================================#
def longest_common_substring(self, s1, s2):
m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))]
longest, x_longest = 0, 0
for x in range(1, 1 + len(s1)):
for y in range(1, 1 + len(s2)):
if s1[x - 1] == s2[y - 1]:
m[x][y] = m[x - 1][y - 1] + 1
if m[x][y] > longest:
longest = m[x][y]
x_longest = x
else:
m[x][y] = 0
return s1[x_longest - longest: x_longest]
# Get longest common substring as precents
def similarity(self,s1, s2):
# longest_common_substring * 2 cuz we have this string in all two strings(s1 and s2)
return (len(self.longest_common_substring(s1, s2))*2) / (len(s1) + len(s2)) * 100
#=============================================================================#
def diff(self, str1, str2):
return SequenceMatcher(lambda x: x == " ", str1 or "", str2 or "").ratio()*100
#=============================================================================#
def setMaxMatchFinder(self, set1, set2):
lenSet1 = len(set1)
lenSet2 = len(set2)
smallestSet = min(lenSet1, lenSet2)
setSimilarity = len(set(set1) & set(set2))
return 100*setSimilarity/smallestSet
#=============================================================================#
def getFileContants(self, path):
# path=os.path.join("SourceCodes/gfx.src/",path)
f = open(path.replace("\\\\","/").replace("\\","/").replace("+",""), "r", errors='ignore')
strInc = "#include "
# filecontant = f.read()
#
Lines = f.readlines()
filecontant=list()
# Strips the newline character
for line in Lines:
if line.startswith(strInc):
filecontant.append(line.strip().replace("\"","")[len(strInc):])
# print(filecontant)
f.close()
return filecontant
def getFileName(self,path):
return ntpath.basename(path)
#=============================================================================#
def getSim(self,ci,oci):
className = self.getFileName(self.paths[ci])
classText = self.getFileContants(self.paths[ci])
otherClassName = self.getFileName(self.paths[oci])
otherClassText = self.getFileContants(self.paths[oci])
sim = round(
self.setMaxMatchFinder(classText, otherClassText)/4+ # good
self.diff(classText, otherClassText)/4+ # Algo runtime takes 22.560689 secends, so I ebabled it \
self.similarity(className, otherClassName)/8+ # good # Algo runtime takes 0.208008 secends, so I ebabled it
# similarity(classText, otherClassText)/10+ #+ very massive algo, so I disabled it
self.diff(className, otherClassName)/8 # goood # Algo runtime takes 0.379022 secends, so I disabled it
, 2)
return sim