Package magma :: Package magma :: Module sha1_gnutella
[hide private]
[frames] | no frames]

Source Code for Module magma.magma.sha1_gnutella

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3   
  4  # MAGnet MAnifest Management - Readout and create lists of magnets in yaml format.  
  5  #  
  6  # Copyright © 2008 Arne Babenhauserheide 
  7  #  
  8  # This program is free software: you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation, either version 3 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/> 
 20   
 21   
 22  """Sha1 Gnutella - Readout files and calculate the sha1-hashes. Output them base32 encoded.  
 23   
 24  Usage:  
 25   - sha1_gnutella.py <file1> <file2> ...  
 26   
 27   
 28  Output:  
 29  <sha1-hash1> <filename1> 
 30  <sha1-hash2> <filename2> 
 31  """  
 32   
 33  #### Background #### 
 34   
 35  __depends__ = 'hashlib, base64' 
 36   
 37  #### Background #### 
 38   
 39  #### Imports #### 
 40   
 41  # We need the sha1-hashing function from haslib 
 42  from hashlib import sha1 
 43   
 44  # and for encoding the hash we need the base32 encoding function from base64 
 45  from base64 import b32encode 
 46   
 47  from os.path import isfile 
 48   
 49  #### Imports #### 
 50   
 51  #### Constants #### 
 52   
 53  #: The length of the segments to read in one go.  
 54  FILE_FRAGMENT_SIZE_BYTES =  10 * 1024 * 1024 # 10 MiB, because it doesn't really make a difference, but like this, the disk might be able to do something else for a moment, if the files are bigger than 10MiB, and smaller files get accessed only once.  
 55   
 56  #### Constants #### 
 57   
 58   
59 -def sha1_gnutella(filepath, fragment_size = FILE_FRAGMENT_SIZE_BYTES):
60 """Calculate the sha1 hash and output it base32 encoded as is required by magnet links. 61 62 @param filepath: The path to the file to hash. Must be a file! This function is too low level to add recursion into subdirectories. 63 @type filepath: String 64 65 @param fragment_size: Length of the fragment to read in bytes. 66 @type fragment_size: Int 67 68 @return: The sha1 hash, base32 encoded (String). 69 """ 70 71 # As really first part, we check, if the path leads to a file. If it doesn't we just return. 72 if not isfile(filepath): 73 raise Exception("Error when reading file. Does the path exist? " + filepath) 74 75 # First we create 76 #: The file-object 77 try: 78 file = open(filepath, "r") 79 except: 80 print "Error when reading file. Does the path exist?", filepath 81 raise Exception("Error when reading file. Does the path exist? " + filepath) 82 # Then we get 83 #: A line of data readout from the file. 84 fragment = file.read(fragment_size) 85 86 # so we can calculate arbitrary large files without running out of memory. 87 88 # Next we call in 89 #: The sha1-function 90 sha = sha1() 91 92 # Into which we can then route the data. 93 94 # If the fragment is smaller than the fragment_size, 95 if len(fragment) < fragment_size and False: 96 # we can close the file instantly 97 # and then get the sha1. 98 # This way we get rid uf uselessly open files at once. 99 file.close() 100 sha.update(fragment) 101 else: 102 # while the fragment isn't empty 103 # We put the fragments into sha1. 104 105 # We first update once 106 sha.update(fragment) 107 108 # And then only update as long as the length of the fragment is equal to the fragment size. 109 # If we'd test, if it's empty, we'd need one useless read. 110 while len(fragment) == fragment_size: 111 # Get the next fragment 112 fragment = file.read(fragment_size) 113 # And update the sha1 114 sha.update(fragment) 115 file.close() 116 # Via digest, this function can then output 117 # the sha1 hash as bytes. 118 # we put it through bb32encode() to generate 119 #: The base32 encoded sha1-hash 120 xt_sha1 = b32encode(sha.digest()) 121 122 # At last return the base32 encoded sha1 hash of the file. 123 return xt_sha1 124 125 #### User-Input #### 126 127 # This sample input just calculates the sha1-hash of the script itself. 128 129 filepath = "sha1_gnutella.py" 130 # filepath = raw_input("Dateiname und Pfad eingeben: ") 131 132 #### User-Input #### 133 134 #### Self-Test #### 135
136 -def test_speed(files, min=1024 * 1024, max=15*1024*1024, step=1024 * 1024):
137 """Test different settings of the fragment_size. 138 139 Result: The fragment_size is almost irrelevant. Anything between 512kiB and 10MiB offers about the same speed. 140 141 @param files: The files to hash. 142 @type files: List 143 @return: None 144 """ 145 from time import time 146 start_time = 0 147 stop_time = 0 148 for i in range(min, max, step): 149 start_time = time() 150 for j in files: 151 try: 152 sha1_gnutella(j, i) 153 except: pass 154 stop_time = time() 155 print "Segment_Size (MiB):", i / 1024.0 / 1024.0, "time:", stop_time - start_time
156
157 -def help():
158 """Display the usage information.""" 159 print __doc__ 160 161 # If the script is called directly 162 # and given a commandline argument, 163 # parse that file. 164 165 if __name__ == "__main__": 166 from sys import argv 167 if len(argv) < 2 or argv[1] in ["-h", "--help", "?"]: 168 help() 169 170 else: 171 # Calculate a sha1 for all given files. 172 for i in argv[1:]: 173 try: 174 print sha1_gnutella(i), i 175 except: pass 176 177 ### Self-Test #### 178