httpserve
  • Overview
  • Repository
  • Tickets
  • Statistics

Repository

Add a separate method to do HEAD requests, remove global variables

Parent commits : 3a40f5ab2e84bdce5c7d83fbd6ebf892f219533e,
Children commits : b5122219cdcf5b04146d05f3b1935b712ab54d45,

By Laurent Defert on 2013-06-08 16:37:41
Add a separate method to do HEAD requests, remove global variables

Browse content
Difference with parent commit 3a40f5ab2e84bdce5c7d83fbd6ebf892f219533e
Files modified:
httpget.py
--- 
+++ 
@@ -26,8 +26,6 @@
 CONFIG_SECTION = 'client'
 IO_SIZE = 1024 * 1024
 CLIENT_AGENT = 'httpget/1.0'
-args = None
-conf = None
 
 def get_term_size():
     # From: http://bytes.com/topic/python/answers/837969-unable-see-os-environ-columns
@@ -199,7 +197,7 @@
             if self.scheme == 'https':
                 self.port = 443
 
-        url = url.replace('//', '/').strip('/')
+        url = url.replace('//', '/')
 
         if not '/' in url:
             self.host = url
@@ -328,44 +326,58 @@
         self.http_status = 0
 
 class Download(HTTPConnection):
-    def __init__(self, url, show_progress_bar=True):
+    def __init__(self, url, conf, show_progress_bar):
+        self.conf = conf
+        self.length = None
+        self.content_type = None
         self.parse_url(url)
         if show_progress_bar:
             self.progress_bar = ProgressBar(threading.Lock())
         else:
             self.progress_bar = VoidProgressBar()
 
+    def read_attrs(self):
+        while True:
+            headers = {'User-Agent:': CLIENT_AGENT}
+            head = self.get_connection()
+            head.request('HEAD', self.path, headers=headers)
+            head = head.getresponse()
+
+            if head.status in (301, 302, 307):  # Temporary redirect
+                url = head.msg.getheader('location')
+                self.parse_url(url)
+                print '[HTTP %i] Redirected to %s' % (head.status, self.url)
+                continue
+
+            if head.status != 200:  # Ok
+                print 'Invalid status code %s %s' % (self.url, head.status)
+                return
+            break
+
+        self.length = head.getheader('Content-Length', None)
+        if self.length is not None:
+            self.length = int(self.length)
+
+        print "headers:", head.getheaders()
+        self.content_type = head.getheader('Content-Type', None)
+
+        # Remove encoding type
+        if self.content_type is not None:
+            for separator in [',', ';']:
+                if separator in self.content_type:
+                    self.content_type = self.content_type.split(separator, 1)[0]
+        print 'content-type', self.content_type
+
     def download(self, hdd_filename):
-        headers = {'User-Agent:': CLIENT_AGENT}
-        head = self.get_connection()
-        head.request('HEAD', self.path, headers=headers)
-        head = head.getresponse()
-
-        if head.status in (301, 302, 307):  # Temporary redirect
-            url = head.msg.getheader('location')
-            self.parse_url(url)
-            print '[HTTP %i] Redirected to %s' % (head.status, self.url)
-            head = self.get_connection()
-            head.request('HEAD', self.url, headers=headers)
-            head = head.getresponse()
-
-        if head.status != 200:  # Ok
-            print 'Invalid status code %s %s' % (self.url, head.status)
-            return
-
-        length = head.getheader('Content-Length', None)
-        if length is not None:
-            length = int(length)
-
-        hdd_file = HDDFile(hdd_filename, length)
-
-        self.progress_bar.SetMax(length)
+        hdd_file = HDDFile(hdd_filename, self.length)
+
+        self.progress_bar.SetMax(self.length)
 
         urls = [self.url]
 
         # When the filesize is known, try downloading from the server pool
-        if length is not None and length > IO_SIZE:
-            for url in args.urls_list.split(','):
+        if self.length is not None and self.length > IO_SIZE:
+            for url in self.conf.urls_list.split(','):
                 urls += [url + self.path]
             chunk_no = 0
         else:
@@ -380,9 +392,9 @@
 
         processes = []
         for url_no, url in enumerate(urls):
-            if len(processes) == args.nconn:
+            if len(processes) == self.conf.nconn:
                break
-            get = HTTPGet(hdd_file, url, chunk_no, 1, length, self.progress_bar)
+            get = HTTPGet(hdd_file, url, chunk_no, 1, self.length, self.progress_bar)
             get.start()
             processes.append(get)
 
@@ -400,11 +412,11 @@
                             chk_no, chk_count = missing.pop(0)
                         else:
                             chk_no = chunk_no
-                            chk_count = length / IO_SIZE / len(processes) / 2
+                            chk_count = self.length / IO_SIZE / len(processes) / 2
                             if chk_count < 1:
                                 chk_count = 1
                             chunk_no += chk_count
-                        get = HTTPGet(hdd_file, get.url, chk_no, chk_count, length, self.progress_bar)
+                        get = HTTPGet(hdd_file, get.url, chk_no, chk_count, self.length, self.progress_bar)
                         get.start()
                         _processes.append(get)
                     else:
@@ -416,7 +428,7 @@
             sleep(0.1)
 
         print
-        hdd_file.close()
+        return hdd_file
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Serve a file over http.')
@@ -471,8 +483,10 @@
 
     for url in args.url:
         try:
-            d = Download(url)
-            d.download(d.filename)
+            d = Download(url, args, True)
+            d.read_attrs()
+            f = d.download(d.filename)
+            f.close()
         except KeyboardInterrupt:
             break
         except Exception: