magma.magma.sha1

Source Code for Module magma.magma.sha1_gnutella

1 #!/usr/bin/env python 2 # encoding: utf-8 3 4 # MAGnet MAnifest Management - Readout and create lists of magnets in yaml format. 5 # 6 # Copyright © 2008 Arne Babenhauserheide 7 # 8 # This program is free software: you can redistribute it and/or modify 9 # it under the terms of the GNU General Public License as published by 10 # the Free Software Foundation, either version 3 of the License, or 11 # (at your option) any later version. 12 # 13 # This program is distributed in the hope that it will be useful, 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 # GNU General Public License for more details. 17 # 18 # You should have received a copy of the GNU General Public License 19 # along with this program. If not, see <http://www.gnu.org/licenses/> 20 21 22 """Sha1 Gnutella - Readout files and calculate the sha1-hashes. Output them base32 encoded. 23 24 Usage: 25 - sha1_gnutella.py <file1> <file2> ... 26 27 28 Output: 29 <sha1-hash1> <filename1> 30 <sha1-hash2> <filename2> 31 """ 32 33 #### Background #### 34 35 __depends__ = 'hashlib, base64' 36 37 #### Background #### 38 39 #### Imports #### 40 41 # We need the sha1-hashing function from haslib 42 from hashlib import sha1 43 44 # and for encoding the hash we need the base32 encoding function from base64 45 from base64 import b32encode 46 47 from os.path import isfile 48 49 #### Imports #### 50 51 #### Constants #### 52 53 #: The length of the segments to read in one go. 54 FILE_FRAGMENT_SIZE_BYTES = 10 * 1024 * 1024 # 10 MiB, because it doesn't really make a difference, but like this, the disk might be able to do something else for a moment, if the files are bigger than 10MiB, and smaller files get accessed only once. 55 56 #### Constants #### 57 58

59 -def sha1_gnutella(filepath, fragment_size = FILE_FRAGMENT_SIZE_BYTES):

60 """Calculate the sha1 hash and output it base32 encoded as is required by magnet links. 61 62 @param filepath: The path to the file to hash. Must be a file! This function is too low level to add recursion into subdirectories. 63 @type filepath: String 64 65 @param fragment_size: Length of the fragment to read in bytes. 66 @type fragment_size: Int 67 68 @return: The sha1 hash, base32 encoded (String). 69 """ 70 71 # As really first part, we check, if the path leads to a file. If it doesn't we just return. 72 if not isfile(filepath): 73 raise Exception("Error when reading file. Does the path exist? " + filepath) 74 75 # First we create 76 #: The file-object 77 try: 78 file = open(filepath, "r") 79 except: 80 print "Error when reading file. Does the path exist?", filepath 81 raise Exception("Error when reading file. Does the path exist? " + filepath) 82 # Then we get 83 #: A line of data readout from the file. 84 fragment = file.read(fragment_size) 85 86 # so we can calculate arbitrary large files without running out of memory. 87 88 # Next we call in 89 #: The sha1-function 90 sha = sha1() 91 92 # Into which we can then route the data. 93 94 # If the fragment is smaller than the fragment_size, 95 if len(fragment) < fragment_size and False: 96 # we can close the file instantly 97 # and then get the sha1. 98 # This way we get rid uf uselessly open files at once. 99 file.close() 100 sha.update(fragment) 101 else: 102 # while the fragment isn't empty 103 # We put the fragments into sha1. 104 105 # We first update once 106 sha.update(fragment) 107 108 # And then only update as long as the length of the fragment is equal to the fragment size. 109 # If we'd test, if it's empty, we'd need one useless read. 110 while len(fragment) == fragment_size: 111 # Get the next fragment 112 fragment = file.read(fragment_size) 113 # And update the sha1 114 sha.update(fragment) 115 file.close() 116 # Via digest, this function can then output 117 # the sha1 hash as bytes. 118 # we put it through bb32encode() to generate 119 #: The base32 encoded sha1-hash 120 xt_sha1 = b32encode(sha.digest()) 121 122 # At last return the base32 encoded sha1 hash of the file. 123 return xt_sha1 124 125 #### User-Input #### 126 127 # This sample input just calculates the sha1-hash of the script itself. 128 129 filepath = "sha1_gnutella.py" 130 # filepath = raw_input("Dateiname und Pfad eingeben: ") 131 132 #### User-Input #### 133 134 #### Self-Test #### 135

136 -def test_speed(files, min=1024 * 1024, max=15*1024*1024, step=1024 * 1024):

137 """Test different settings of the fragment_size. 138 139 Result: The fragment_size is almost irrelevant. Anything between 512kiB and 10MiB offers about the same speed. 140 141 @param files: The files to hash. 142 @type files: List 143 @return: None 144 """ 145 from time import time 146 start_time = 0 147 stop_time = 0 148 for i in range(min, max, step): 149 start_time = time() 150 for j in files: 151 try: 152 sha1_gnutella(j, i) 153 except: pass 154 stop_time = time() 155 print "Segment_Size (MiB):", i / 1024.0 / 1024.0, "time:", stop_time - start_time

156

157 -def help():

158 """Display the usage information.""" 159 print __doc__ 160 161 # If the script is called directly 162 # and given a commandline argument, 163 # parse that file. 164 165 if __name__ == "__main__": 166 from sys import argv 167 if len(argv) < 2 or argv[1] in ["-h", "--help", "?"]: 168 help() 169 170 else: 171 # Calculate a sha1 for all given files. 172 for i in argv[1:]: 173 try: 174 print sha1_gnutella(i), i 175 except: pass 176 177 ### Self-Test #### 178