Package magma :: Package magma :: Module magma_list
[hide private]
[frames] | no frames]

Source Code for Module magma.magma.magma_list

  1  #!/usr/bin/env python 
  2  # encoding: utf-8 
  3   
  4  # MAGnet MAnifest Management - Readout and create lists of magnets in yaml format.  
  5  #  
  6  # Copyright © 2008 Arne Babenhauserheide 
  7  #  
  8  # This program is free software: you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation, either version 3 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program.  If not, see <http://www.gnu.org/licenses/> 
 20   
 21   
 22  """Tools for reading out magma lists.  
 23   
 24  Usage: magma_list.py CMD [OPT] 
 25   
 26  Commands:  
 27      - magnets_from_magma MAGMA_FILE 
 28      - magma_from_files MAGMA FILE1 FILE2 ...  
 29   
 30  Examples:  
 31      - magma_list.py magnets_from_magma example-0.4.magma 
 32      - magma_list.py magma_from_files example-0.4.magma blah.txt foo.txt gam.txt 
 33  """ 
 34   
 35  __depends__ = "yaml" 
 36   
 37  try: # local install 
 38          from create_simple_magma_list import FILE_LIST_NAME, URN_PARAMETER_NAME, MAGMA_0_4_HEADER, FILENAME_PARAMETER 
 39  except: # Installed version 
 40          from magma.create_simple_magma_list import FILE_LIST_NAME, URN_PARAMETER_NAME, MAGMA_0_4_HEADER, FILENAME_PARAMETER 
 41   
 42   
 43  # We need yaml loading and dumping as main component.  
 44  from yaml import load, dump 
 45   
 46  # Now we need the central magma class. This does most of the work.  
 47   
48 -class Magma(object):
49 """A Magma List."""
50 - def __init__(self, magma_file=None, input_files=None, creator=None, data=None, yaml_data=None, *args, **kwds):
51 """A Magma list. 52 53 @param magma_file: The path to the magma file, either absolute or relative the the location this script is invoked from. 54 @type magma_file: String 55 56 @param input_files: A list of file_paths of input files which should be included in the magma list. 57 @type input_files: List 58 59 @param creator: An identifier for the creator of the Magma file. 60 @type creator: String 61 62 @param yaml_data: Raw file data as read from a Magma file. 63 @type yaml_data: String 64 65 @param data: A native Python dict with the data. 66 @type data: Dict 67 68 """ 69 #: All data inside the Magma list 70 self.data = None 71 # if we get data, we assign it. 72 if data is not None: 73 self.data = data 74 # If we get data in yaml format (String), we read it out. 75 elif yaml_data is not None: 76 self.data = self.readout_yaml(yaml_data) 77 # If we get a magma file, we readout the file itself. 78 elif magma_file is not None: 79 self.data = self.readout_magma_from_file(magma_file) 80 # If we get single files, we create the magma file from the files. 81 elif input_files is not None: 82 self.data = self.create_magma_from_files(file_paths=input_files) 83 else: 84 raise Exception("No input specified. Youl need to pass one of data=DATA, magma_file=PATH or input_files=[PATH1, PATH2, ...]") 85 86 # If we get a creator name, also add that. 87 if creator is not None: 88 self.data["creator"] = creator 89 90 #: All files inside the Magma list. 91 self.files = self.files() 92 93 #: All magnets inside the Magma list. 94 self.magnets = self.get_file_magnets() 95 96 #: The metadata of this Magma list (excluding the files). 97 self.metadata = self.get_magma_metadata()
98
99 - def __str__(self):
100 """Return a human readable representation of the yaml file. 101 102 In effect return the yaml data with the magic header. 103 104 @return: A human readable representation of the Magma file in yaml format (String).""" 105 # Then return its Magma representation (Magma Header + yaml data). 106 return self.dump()
107 108
109 - def remove_empty_entries(self):
110 """Remove empty entries in the Magma, for example empty alt-locs. 111 112 We remove each entry by hand, because we should know which entries can be created by this program - and if we don't we know that have to change our coding practice :) .""" 113 114 # Just tell every file to clean itself. 115 for i in self.files: 116 i.remove_empty_entries() 117
118 - def create_magma_from_files(self, file_paths):
119 """Create a magma file from given file paths. 120 121 @return: The Python dict for a Magma file - the magma data. 122 """ 123 try: 124 from create_simple_magma_list import Magma as SimpleMagma 125 except: 126 from magma.create_simple_magma_list import Magma as SimpleMagma 127 magma = SimpleMagma(file_paths) 128 return magma.data 129
130 - def is_magma(self, yaml_data):
131 """Check, if a given yaml data begins with the Magma header. 132 133 @param yaml_data: Data in yaml format 134 @type yaml_data: String 135 @return: True or False 136 """ 137 return yaml_data[:len(MAGMA_0_4_HEADER)] == MAGMA_0_4_HEADER
138
139 - def readout_yaml(self, yaml_data):
140 """Readout a magma list from data in yaml format. 141 142 @param yaml_data: Data in yaml format. 143 @type yaml_data: String 144 @return: The Python dict for a Magma file - the magma data. 145 """ 146 # First we now need to check, that this file is in MAGMAv0.4 format. 147 assert self.is_magma(yaml_data), "Doesn't begin with Magma v0.4 header " + MAGMA_0_4_HEADER 148 # Now readout the data via yaml and return it. 149 data = load(yaml_data) 150 # yaml returns None, if nothing is avaible. We need an empty dict though, if the Magma file was empty. 151 if data is None: 152 data = {} 153 return data
154
155 - def readout_magma_from_file(self, file_path):
156 """Readout a magma list from the file_path 157 158 @param file_path: The path to the magma file, either absolute or relative the the location this script is invoked from. 159 @type file_path: String 160 @return: The Python dict for a Magma file - the magma data. """ 161 # First get the Magma file. This cries out, if the file doesn't exist. 162 file_handle = open(file_path, "r") 163 # Read the data. 164 file_data = file_handle.read() 165 # And close the file. 166 file_handle.close() 167 # At the end, return the data. 168 return self.readout_yaml(file_data)
169
170 - def files(self):
171 """Get the files from the magma data. 172 173 @return: A list of all MagmaFile objects from the Magma. 174 """ 175 # The files are held as simple list inside the magma file. 176 #: The list of files inside the Magma list. 177 file_list = [] 178 179 # If the Magma list contains files, we read them. 180 if FILE_LIST_NAME in self.data.keys(): 181 # We turn every file dict inside the magma list into a MagmaFile object and return them. 182 for i in self.data[FILE_LIST_NAME]: 183 file_list.append(MagmaFile(i)) 184 # Now the only thing left is returning the list. 185 return file_list
186
187 - def get_file_magnets(self):
188 """Get the magnet links of the files. 189 190 # TODO: Include Alt-Locs. 191 192 @return: A list of all magnet links inside the Magma. 193 """ 194 #: The list of magnets 195 magnets = [] 196 # Add the magnet of each of the files. 197 for i in self.files: 198 magnets.append(i.magnet) 199 return magnets 200
201 - def get_magma_metadata(self):
202 """Get only the metadata, without the list of files. 203 204 @return: A dict which contains only the metadata of the Magma without the files. 205 """ 206 # First we need an empty dictionary to hold the metadata (we want to carve out the files key, so we just add all other keys). 207 magma_metadata = {} 208 # Now we add all subdictionaries, except the "files" subdict. 209 for i in self.data: 210 if i != FILE_LIST_NAME: 211 magma_metadata[i] = self.data[i] 212 # That's it. 213 return magma_metadata
214
215 - def magma_v0_2_filelist_representation(self):
216 """@return: A list of files which can be read by MAGMAv0.2 compliant programs.""" 217 # First we get the list parameter. 218 data = "list:\n" 219 220 # Then we append a magnet for each file. 221 for i in self.files: 222 data += ' - "' + i.magnet + '"\n' 223 224 return data
225
226 - def magma_v0_2_data(self):
227 """@return: Data in Magma v0.2 format""" 228 # First we begin with the MAGMAV0.2 header line. 229 data = "#MAGMAv0.2\n" 230 231 # Then we append the file list. 232 data += magma_v0_2_filelist_representation() 233 234 return data 235 236
237 - def save(self, path, magma_versions = [0.2, 0.4]):
238 """Save the magma list to the path. 239 240 If the folder to save in doesn't exist, just raise an Exception. 241 242 TODO: Clean out empty entries in the files and the metadata before saving. 243 self.data["gnutella"]["alt-locs"] = [] should disappear (if there are no other keys than alt-locs in gnutella, the gnutella dict should completely disappear, too). 244 245 @param path: The path to the target file. 246 @type path: String 247 248 @param magma_versions: The versions of the magma specification to be supported. 249 @type magma_versions: List of Floats 250 251 @return: None 252 """ 253 # First remove empty entries, so we get a nice looking file. 254 self.remove_empty_entries() 255 # First get the file handle for the magma file 256 file_handle = open(path, "w") 257 # Then write the data in yaml format. 258 file_handle.write(self.dump(magma_versions=magma_versions)) 259 # And close the file handle. 260 file_handle.close()
261
262 - def print_data(self):
263 """Print the magma file in the yaml format. 264 265 @return: A string representation of the Magma in yaml format. 266 """ 267 # Then print the data 268 print self.__str__()
269
270 - def get_yaml_data(self):
271 """Return the yaml representation of the data. 272 273 @return: The yaml representation of the data.""" 274 return dump(self.data, default_flow_style=False)
275
276 - def dump(self, magma_versions = [0.2, 0.4]):
277 """Return the yaml data. 278 279 @return: A string representation of the Magma in yaml format. 280 """ 281 # If we want 0.2 support, we add it. 282 if 0.2 in magma_versions and 0.4 in magma_versions: 283 # But first we remove the former 0.2 list. 284 if "list" in self.data.keys(): 285 del(self.data["list"]) 286 # Now we add a new list from the data we got. This can REMOVE DEPRECATED DATA. 287 return "#MAGMAv0.4\n" + self.get_yaml_data() + "\n# Magmav0.2 compatibility section\n" + self.magma_v0_2_filelist_representation() 288 elif 0.4 in magma_versions: 289 return "#MAGMAv0.4\n" + self.get_yaml_data()
290 291 292 # Also we have files inside the magma list. They hate a few extra attributes to be easier to use than raw dictionaries. 293
294 -class MagmaFile(object):
295 """A file inside the magma list. 296 297 Provides some abstractions for files inside Magma objects. 298 299 >>> magma_file = MagmaFile({FILENAME_PARAMETER: "input_file.txt", "urn": {"sha1": "3UJCLAOIZVCNAIT7TQYFLAP7ZNFW6G2G"}}) 300 >>> print magma_file.data 301 {'urn': {'sha1': '3UJCLAOIZVCNAIT7TQYFLAP7ZNFW6G2G'}, 'Filename': 'input_file.txt'} 302 >>> print magma_file.sha1_hash 303 3UJCLAOIZVCNAIT7TQYFLAP7ZNFW6G2G 304 >>> print magma_file.name 305 input_file.txt 306 >>> print magma_file.magnet 307 magnet:?xt=urn:sha1:3UJCLAOIZVCNAIT7TQYFLAP7ZNFW6G2G&dn=input_file.txt 308 309 310 """ 311
312 - def __init__(self, data, *args, **kwds):
313 """A file inside the magma list. 314 315 @param data: The data of this file. 316 @type data: Dict 317 """ 318 #: All data of the file 319 self.data = data 320 #: The name of the file, so we can easily print it. Every MagmaFile needs a filename. 321 self.name = self.data[FILENAME_PARAMETER] 322 #: The sha1 hash of the file. Every MagmaFile needs a sha1 hash. 323 self.sha1_hash = self.data[URN_PARAMETER_NAME]["sha1"] 324 #: The list of alternate urls. 325 self.urls = self.readout_urls() 326 #: The Other Gnutella clients who have the file. 327 self.Gnutella_alt_locs = self.readout_gnet_alt_locs()
328 329
330 - def __str__(self):
331 """Show nicely readable information about the file. 332 333 @return: Nicely readable information about the file (String). """ 334 return self.name + " " + self.sha1_hash
335
336 - def readout_urls(self):
337 """Readout alternate urls from the data 338 339 @return: A list of URLs which can be used as alternate fallback sources (which are unsafe, though, since it isn't sure that they will point to the same file when times change). """ 340 # If we have urls, return them. 341 if "urls" in self.data.keys(): 342 return self.data["urls"] 343 # Else return an empty list. 344 else: 345 # First create the list, so adding new urls gets saved in the file. 346 self.data["urls"] = [] 347 # And return the empty list. 348 return self.data["urls"]
349 350
351 - def readout_gnet_alt_locs(self):
352 """Readout the Alt-Locs from the data 353 354 @return: A list of IPs with Port numbers which can be used together with the sha1 hash to construct alt-locs for Gnutella. """ 355 ALT_LOC_STRING = "alt-locs" 356 # If we have alt-locs, return them. 357 if "gnutella" in self.data.keys() and ALT_LOC_STRING in self.data["gnutella"].keys(): 358 return self.data["gnutella"][ALT_LOC_STRING] 359 # Else return an empty list. 360 else: 361 # First create the list, so adding new alt-locs gets saved in the file. 362 if not "gnutella" in self.data.keys(): 363 self.data["gnutella"] = {} 364 self.data["gnutella"][ALT_LOC_STRING] = [] 365 # And return the empty list. 366 return self.data["gnutella"][ALT_LOC_STRING]
367 368
369 - def parse_magnet(self):
370 """Parse the magnet link from the data inside the file. 371 372 @return: A magnet link for the file (String). """ 373 # From this we create the xt. 374 xt = "urn:sha1:" + self.sha1_hash 375 # And from it we get dn 376 dn = self.name 377 378 # Now we get our url parameters. 379 params = [("xt", xt ), ("dn", dn )] 380 381 # Additionally we want to add alt-locs. 382 gnet_alt_locs = self.readout_gnet_alt_locs() 383 # They need to be transformed to the correct xs parameter, though. 384 for i in gnet_alt_locs: 385 params.append(("xs", "http://" + str(i) + "/uri-res/N2R?" + xt)) 386 387 388 # Now get the urlencode method 389 from urllib import urlencode 390 #: The magnet link is a String. 391 magnet = "magnet:?" + urlencode(params) 392 # TODO: Include alt-locs. 393 return magnet 394 395 #: The magnet link of the file 396 magnet = property(fget=parse_magnet, fset=setattr) 397
398 - def remove_empty_entries(self):
399 """Remove empty entries from the file, for example empty alt-locs.""" 400 # Check each key, if it points to an empty dictionary, or only to empty subelements. 401 402 # First check for empty subkeys. 403 for i in self.data: 404 try: 405 # Iterate over the items inside the key 406 for j in self.data[i].keys(): 407 try: 408 # if it has a length and the length is 0, we remove it. 409 if len(self.data[i][j]) == 0: 410 del(self.data[i][j]) 411 except: pass 412 except: pass 413 414 # Now check for empty keys. 415 for i in self.data.keys(): 416 try: 417 # If the length of the thing is 0, the whatever-it-is empty and we can remove it. 418 if len(self.data[i]) == 0: 419 del(self.data[i]) 420 except: # If we can't get a length, we just ignore it. It seems to be data/content, not structure. 421 pass 422 #TODO: Make this rekursive, so we can clean arbitrary deeply nested structures. 423
424 -def usage():
425 """Usage instructions. 426 427 @return: Usage instructions (String).""" 428 # Just use the docstring. 429 usage = __doc__.split("\n\n")[1:] 430 usage_string = "\n\n".join(usage) 431 return usage_string 432
433 -def _test():
434 """Do all doctests. 435 436 @return: None""" 437 from doctest import testmod 438 testmod() 439 440 # If this gets called automatically, load the magma list and print the magnets, or create a magma list. 441 if __name__ == "__main__": 442 # _test() 443 from sys import argv 444 if len(argv) < 3: 445 print usage() 446 elif argv[1] == "magnets_from_magma": 447 magma = Magma(magma_file=argv[2]) 448 for i in magma.magnets: 449 print i 450 elif argv[1] == "magma_from_files": 451 magma = Magma(input_files=argv[3:]) 452 magma.save(argv[2]) 453 else: 454 print usage() 455