Factorize
  • Overview
  • Repository
  • Tickets
  • Statistics
  • Projects

Repository

Add some comments, rename some variable for readability

Parent commits : 22efb65edd433a68df5294aadece16d80aa5a348,
Children commits : 2916ca1955bd7be7b04f9c7ab09f10f403ff406d,

By Laurent Defert on 2011-04-11 01:04:34
Add some comments, rename some variable for readability

Browse content
Difference with parent commit 22efb65edd433a68df5294aadece16d80aa5a348
Files modified:
factorize.py
--- 
+++ 
@@ -27,12 +27,12 @@
 
 class FactorizedLine:
 	def __init__(s, parent = None):
-		s.parent = parent
+		s.parent = parent		# Parent node
 		s.lines = []			# Non-factorized lines
-		s.word_count = []		# Word count by columns
-		s.factorized_lines = []		# List of FactorizedLine
-		s.factorized_words = []		# List of FactorizedLine
-		s.line_count = 0		# List of FactorizedLine
+		s.word_count = []		# Occurence count by columns
+		s.factorized_lines = []	# List of FactorizedLine bellow this node
+		s.factorized_words = [] # Words compusing this node
+		s.line_count = 0		# Number of lines bellow this node
 		s.folded = True
 
 	def get_level(s):
@@ -56,11 +56,13 @@
 
 		return lvl
 
+	# Add a new (plaintext) line
 	def add_line(s, line):
 		s.lines.append(line)
 		while len(s.word_count) < len(line.words):
 			s.word_count.append({})
 
+		# Update the statistic of word occurence by column
 		for word_no, word in enumerate(line.words):
 			if not word.isCommon():
 				if not word in s.word_count[word_no]:
@@ -72,10 +74,10 @@
 		for word_no, word in enumerate(line.words):
 			if not word.isCommon():
 				s.word_count[word_no][word] -= 1
-
-	def get_max(s):
-		col_max = 0
-		max = 0
+            
+	def get_most_occuring_word(s):
+		col_max = 0  # Column where the most occuring word is
+		max = 0  # Number of occurences
 		word_max = ""
 
 		for col_no, col in enumerate(s.word_count):
@@ -87,19 +89,21 @@
 
 		return (col_max, word_max, max)
 
+	## Factorize all lines
 	def factorize(s, col_no, word):
 		factorized = FactorizedLine(s)
 		to_remove = []
-		new_list = []
-
+		non_factorized_lines = []
+
+		# For each line
 		for line in s.lines:
 			if len(line.words) <= col_no:
-				new_list.append(line)
+				non_factorized_lines.append(line)
 				continue
 			if line.words[col_no] == word:
 				to_remove.append(line)
 			else:
-				new_list.append(line)
+				non_factorized_lines.append(line)
 
 		for line in to_remove:
 			s.remove_line(line)
@@ -108,16 +112,17 @@
 
 		factorized.build_common_words()
 		s.factorized_lines.append(factorized)
-		s.lines = new_list
+		s.lines = non_factorized_lines
 
 	def build_common_words(s):
+		# Find the line with less columns
 		min_line = s.lines[0].words
-		min = len(min_line)
+		min_line_len = len(min_line)
 
 		for line in s.lines:
-			if len(line.words) < min:
-				min = len(line.words)
+			if len(line.words) < min_line_len:
 				min_line = line.words
+				min_line_len = len(line.words)
 	
 		for word in min_line:
 			s.factorized_words.append(copy(word))
@@ -125,7 +130,7 @@
 	def build(s):
 		s.line_count = len(s.lines)
 		while len(s.lines) != 0:
-			col_max, word_max, max = s.get_max()
+			col_max, word_max, max = s.get_most_occuring_word()
 
 			if max <= 1:
 				break
@@ -205,7 +210,7 @@
 	def isCommon(s):
 		return s.is_common
 
-class Log:
+class LogFile:
 	def __init__(s, file, opt):
 		with open(file, 'r') as input:
 			s.tree = FactorizedLine()
@@ -238,5 +243,6 @@
 	if len(args) == 0:
 		opt_parser.print_help()
 		exit(1)
-	log = Log(args[0], opt)
+	log = LogFile(args[0], opt)
+	#log.display()
 	gui.start(log, opt)

Generated with KisssPM