---
+++
@@ -27,12 +27,12 @@
class FactorizedLine:
def __init__(s, parent = None):
- s.parent = parent
+ s.parent = parent # Parent node
s.lines = [] # Non-factorized lines
- s.word_count = [] # Word count by columns
- s.factorized_lines = [] # List of FactorizedLine
- s.factorized_words = [] # List of FactorizedLine
- s.line_count = 0 # List of FactorizedLine
+ s.word_count = [] # Occurence count by columns
+ s.factorized_lines = [] # List of FactorizedLine bellow this node
+ s.factorized_words = [] # Words compusing this node
+ s.line_count = 0 # Number of lines bellow this node
s.folded = True
def get_level(s):
@@ -56,11 +56,13 @@
return lvl
+ # Add a new (plaintext) line
def add_line(s, line):
s.lines.append(line)
while len(s.word_count) < len(line.words):
s.word_count.append({})
+ # Update the statistic of word occurence by column
for word_no, word in enumerate(line.words):
if not word.isCommon():
if not word in s.word_count[word_no]:
@@ -72,10 +74,10 @@
for word_no, word in enumerate(line.words):
if not word.isCommon():
s.word_count[word_no][word] -= 1
-
- def get_max(s):
- col_max = 0
- max = 0
+
+ def get_most_occuring_word(s):
+ col_max = 0 # Column where the most occuring word is
+ max = 0 # Number of occurences
word_max = ""
for col_no, col in enumerate(s.word_count):
@@ -87,19 +89,21 @@
return (col_max, word_max, max)
+ ## Factorize all lines
def factorize(s, col_no, word):
factorized = FactorizedLine(s)
to_remove = []
- new_list = []
-
+ non_factorized_lines = []
+
+ # For each line
for line in s.lines:
if len(line.words) <= col_no:
- new_list.append(line)
+ non_factorized_lines.append(line)
continue
if line.words[col_no] == word:
to_remove.append(line)
else:
- new_list.append(line)
+ non_factorized_lines.append(line)
for line in to_remove:
s.remove_line(line)
@@ -108,16 +112,17 @@
factorized.build_common_words()
s.factorized_lines.append(factorized)
- s.lines = new_list
+ s.lines = non_factorized_lines
def build_common_words(s):
+ # Find the line with less columns
min_line = s.lines[0].words
- min = len(min_line)
+ min_line_len = len(min_line)
for line in s.lines:
- if len(line.words) < min:
- min = len(line.words)
+ if len(line.words) < min_line_len:
min_line = line.words
+ min_line_len = len(line.words)
for word in min_line:
s.factorized_words.append(copy(word))
@@ -125,7 +130,7 @@
def build(s):
s.line_count = len(s.lines)
while len(s.lines) != 0:
- col_max, word_max, max = s.get_max()
+ col_max, word_max, max = s.get_most_occuring_word()
if max <= 1:
break
@@ -205,7 +210,7 @@
def isCommon(s):
return s.is_common
-class Log:
+class LogFile:
def __init__(s, file, opt):
with open(file, 'r') as input:
s.tree = FactorizedLine()
@@ -238,5 +243,6 @@
if len(args) == 0:
opt_parser.print_help()
exit(1)
- log = Log(args[0], opt)
+ log = LogFile(args[0], opt)
+ #log.display()
gui.start(log, opt)
Generated with KisssPM