1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Sha1 Gnutella - Readout files and calculate the sha1-hashes. Output them base32 encoded.
23
24 Usage:
25 - sha1_gnutella.py <file1> <file2> ...
26
27
28 Output:
29 <sha1-hash1> <filename1>
30 <sha1-hash2> <filename2>
31 """
32
33
34
35 __depends__ = 'hashlib, base64'
36
37
38
39
40
41
42 from hashlib import sha1
43
44
45 from base64 import b32encode
46
47 from os.path import isfile
48
49
50
51
52
53
54 FILE_FRAGMENT_SIZE_BYTES = 10 * 1024 * 1024
55
56
57
58
60 """Calculate the sha1 hash and output it base32 encoded as is required by magnet links.
61
62 @param filepath: The path to the file to hash. Must be a file! This function is too low level to add recursion into subdirectories.
63 @type filepath: String
64
65 @param fragment_size: Length of the fragment to read in bytes.
66 @type fragment_size: Int
67
68 @return: The sha1 hash, base32 encoded (String).
69 """
70
71
72 if not isfile(filepath):
73 raise Exception("Error when reading file. Does the path exist? " + filepath)
74
75
76
77 try:
78 file = open(filepath, "r")
79 except:
80 print "Error when reading file. Does the path exist?", filepath
81 raise Exception("Error when reading file. Does the path exist? " + filepath)
82
83
84 fragment = file.read(fragment_size)
85
86
87
88
89
90 sha = sha1()
91
92
93
94
95 if len(fragment) < fragment_size and False:
96
97
98
99 file.close()
100 sha.update(fragment)
101 else:
102
103
104
105
106 sha.update(fragment)
107
108
109
110 while len(fragment) == fragment_size:
111
112 fragment = file.read(fragment_size)
113
114 sha.update(fragment)
115 file.close()
116
117
118
119
120 xt_sha1 = b32encode(sha.digest())
121
122
123 return xt_sha1
124
125
126
127
128
129 filepath = "sha1_gnutella.py"
130
131
132
133
134
135
136 -def test_speed(files, min=1024 * 1024, max=15*1024*1024, step=1024 * 1024):
137 """Test different settings of the fragment_size.
138
139 Result: The fragment_size is almost irrelevant. Anything between 512kiB and 10MiB offers about the same speed.
140
141 @param files: The files to hash.
142 @type files: List
143 @return: None
144 """
145 from time import time
146 start_time = 0
147 stop_time = 0
148 for i in range(min, max, step):
149 start_time = time()
150 for j in files:
151 try:
152 sha1_gnutella(j, i)
153 except: pass
154 stop_time = time()
155 print "Segment_Size (MiB):", i / 1024.0 / 1024.0, "time:", stop_time - start_time
156
158 """Display the usage information."""
159 print __doc__
160
161
162
163
164
165 if __name__ == "__main__":
166 from sys import argv
167 if len(argv) < 2 or argv[1] in ["-h", "--help", "?"]:
168 help()
169
170 else:
171
172 for i in argv[1:]:
173 try:
174 print sha1_gnutella(i), i
175 except: pass
176
177
178