KisssPM
  • Overview
  • Repository
  • Tickets
  • Statistics
  • Credits
  • Projects

Repository

Parse only text files

Parent commits : 2c0a99a18bb079e23dfa571b74bd675032948012,
Children commits : 5e72d6b322ad99ded3c34d6a4ac71b4c350880ad,

By Laurent Defert on 2015-04-19 16:53:26
Parse only text files

Pygment sometimes badly detects the mimetype of a file on files.
Its parser will then get stuck when processing this file.

Browse content
Difference with parent commit 2c0a99a18bb079e23dfa571b74bd675032948012
Files modified:
kissspm/lexer.py
--- 
+++ 
@@ -37,7 +37,7 @@
     if mime is None:
         mime = get_mime(content)
 
-    if mime in ['application/x-object', 'application/x-executable', 'application/x-archive']:
+    if mime_is_binary(mime):
         return None
 
     try:
@@ -65,3 +65,15 @@
     if mime == 'text/x-shellscript':
         mime = 'application/x-shellscript'
     return mime
+
+def mime_is_binary(mimetype):
+    txt_mime = mimetype.startswith('text/')
+    txt_mime |= mimetype in ('application/x-shellscript',
+                             'application/x-sh',
+                             'application/x-sql',
+                             'application/x-x509-ca-cert',
+                             'application/xml',
+                             'application/xml-dtd',
+                             'application/x-latex',
+                             'application/x-mpegURL')
+    return not txt_mime

kissspm/tabs/forge.py
--- 
+++ 
@@ -9,6 +9,7 @@
     from kissspm.ordereddict import OrderedDict
 
 from ..git import Git
+from ..lexer import mime_is_binary
 from ..pages.copy_page import CopyPage
 from ..pages.rst_page import rst_from_file
 from ..pages.paginate_tmpl import PaginateTmpl
@@ -75,7 +76,7 @@
                 commit_stats._build_tree_stats(repo.repo['refs/heads/master'].tree)
                 mimetype = ''
 
-                mimecount = [(mime, count) for mime, count in commit_stats.mime_type_count.iteritems() if (mime.startswith('text/x-') or mime.startswith('application/x-'))]
+                mimecount = [(mime, count) for mime, count in commit_stats.mime_type_count.iteritems() if not mime_is_binary(mime)]
                 mimecount.sort(key=lambda x: -x[1])
 
                 # TODO: Count the number of lines to determine the mimetype

kissspm/tabs/stats.py
--- 
+++ 
@@ -4,8 +4,8 @@
 from itertools import izip, chain
 
 from ..CairoPlot import bar_plot, dot_line_plot, pie_plot
-from ..magic import Magic
-
+
+from ..lexer import get_mime, mime_is_binary
 from ..pages.page import Page
 from ..pages.template_page import TemplatePage
 from .repository.repository import RepositoryTab
@@ -67,8 +67,7 @@
                 self._build_tree_stats(entry['sha'])
             else:
                 # Guess mimetype
-                m = Magic(mime=True)
-                mime = m.from_buffer(str(self.repo.repo[entry['sha']]))
+                mime = get_mime(str(self.repo.repo[entry['sha']]))
                 self.mime_type_count[mime] = self.mime_type_count.get(mime, 0) + 1
 
     def render_miniature_activity(self, output):
@@ -173,15 +172,14 @@
             else:
                 # Guess mimetype
                 self.current_commit_sha.add(entry['sha'])
-                m = Magic(mime=True)
                 content = str(self.repo.repo[entry['sha']])
-                mime = m.from_buffer(content)
-                if mime.startswith('text/'):
-                    lines = len(content.splitlines())
+                mime = get_mime(content)
+                if mime_is_binary(mime):
+                    self.sloc_cache[entry['sha']] = 0
+                else:
+                    lines = content.count('\n')
                     sloc += lines
                     self.sloc_cache[entry['sha']] = lines
-                else:
-                    self.sloc_cache[entry['sha']] = 0
 
         return sloc
 

Generated with KisssPM