-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbase_parser.py
More file actions
66 lines (59 loc) · 2.04 KB
/
base_parser.py
File metadata and controls
66 lines (59 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import csv
class BaseParser:
"""The base class for commit log analysis
"""
def __init__(self, name):
self.repo_name = name
self.cmts = []
def parse(self, cmtlog_fp):
"""The general parser that split all the cmt logs
"""
pass;
def message2csv(self, outputfile):
with open(outputfile, 'wb') as f:
writer = csv.writer(f, delimiter='\t')
for cmt in cmts:
writer.writerow([cmt['commitno'], cmt['message']])
def latest_cmt(self):
return self.cmts[0]
def printN(self, N=2):
print '-----------------------------'
print '#commits:', len(self.cmts)
print '-----------------------------'
for i in range(N):
#print i, '| revno.', self.cmts[i]['commitno'], '| msg:', self.cmts[i]['message']
print i, self.cmts[i]
print '......'
#print len(self.cmts)-1, '| revno', self.cmts[-1]['commitno'], '| msg:', self.cmts[-1]['message']
print len(self.cmts)-1, self.cmts[-1]
print '-----------------------------'
def kwselect(self, kws, pop=0.1):
"""Given a list of keywords, return the commits that contains all the keywords
For more complicated filter, use kwfilter
"""
res = []
kwmap = {}
for cmt in self.cmts:
contains = False
for kw in kws:
kwl = kw.lower()
msg = cmt['message'].lower()
if kwl in msg:
contains = True
if kw not in kwmap:
kwmap[kw] = 1
else:
kwmap[kw] += 1
if contains == True:
res.append(cmt)
print '-----------------------------------------------------------------------------'
print 'The too popular keywords (count >= 50): '
print '(perhaps we need to comment them out)'
for kw in kwmap:
if kwmap[kw] >= pop * len(self.cmts):
print kw, kwmap[kw]
print '-----------------------------------------------------------------------------'
print 'Number of commits we select out:'
print len(res)
print '-----------------------------------------------------------------------------'
return res