F:/thesis_austausch/dissertation/code_docu_doxygen/GZ_Parser/gz_parser.py

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 '''
00004 This is the gz_parser.py module, which uses an external config file (e.g. config_gz_parser.ini) to parse through a directory with *.gz files. The server_classes.py is also needed.
00005 
00006 Reading University
00007 MSc in Network Centered Computing
00008 a.weise - a.weise@reading.ac.uk - December 2005
00009 '''
00010 
00011 import os, sys, string, re, stat
00012 from server_classes import LogFileParser
00013 import ConfigParser, getopt
00014 
00015 gz_list = [] #save *.gz files
00016   
00017 
00018 def LoadConfig(file, config={}):
00019     """
00020     This functions returns a dictionary with key's of the form
00021     <section>.<option> and the values .
00022     
00023     source: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65334
00024     """
00025     config = config.copy()
00026     cp = ConfigParser.ConfigParser()
00027     cp.read(file)
00028     for sec in cp.sections():
00029         name = string.lower(sec)
00030         for opt in cp.options(sec):
00031             config[name + "." + string.lower(opt)] = string.strip(cp.get(sec, opt))
00032     return config
00033 
00034 def parse_directory(arg, dirname, fnames):
00035     '''
00036     This function "walks" through a given directory and considers all srbLOG*.gz files. The name and last modified time are saved in a list (2 dimensional array). The function should be used with os.path.walk(path, function_name, arg)!
00037     '''
00038     d = os.getcwd()
00039     # change into log file directory
00040     try:
00041         os.chdir(dirname)
00042     except:
00043         print "could not find directory \"%s\"" % dirname
00044         return -1
00045     # for each file   
00046     for f in fnames:
00047         # check if file and if file is a log file e.g. srbLog.20051003.gz
00048         if (not os.path.isfile(f)) or (None == re.search('^srbLog[_0-9.-]*.gz', f)):
00049             continue
00050         # get last modified time
00051         date = os.stat(f)[stat.ST_MTIME]
00052         # create tupel
00053         tupel = (date, f)
00054         # save last modified time and filename into am arrray (list)
00055         gz_list.append(tupel)
00056     # change back into the working directory
00057     os.chdir(d)
00058 
00059 def get_keywords(filus):
00060     '''
00061     This function extracts keyword from a give file!
00062     '''
00063     keys = []
00064     
00065     try:
00066         file_fd = file(filus, 'r')
00067     except IOError, e:
00068         print "Problem with keyword file -> ",  e
00069         return -1
00070     
00071     content = file_fd.readlines()# save file content as list (1 line == 1 entry)
00072 
00073     file_fd.close()
00074 
00075     content = remove_item(content, "#") # remove comments
00076     content = remove_item(content, "\n")# remove linebreaks
00077 
00078     for i in range(len(content)):
00079         content[i] = content[i].strip()
00080         content[i] = content[i].rstrip(",")
00081         content[i] = content[i].split(",")
00082         for a in range(len(content[i])):
00083             keys.append(content[i][a])
00084     
00085     for i in range(len(keys)):
00086         keys[i] = keys[i].strip() # remove whitespace
00087         keys[i] = keys[i].split(":")
00088         
00089     return keys
00090         
00091 def remove_item(listus, item):
00092     '''
00093     This function removes "items" form a list object rekursiv.
00094     '''
00095     
00096     while(1):
00097         
00098         for i in range(len(listus)):
00099             if -1 != listus[i].find(item, 0, 1):
00100                 del listus[i]
00101                 remove_item(listus, item)
00102                 break
00103         else:
00104             break
00105         
00106     return listus
00107      
00108 def gunzip(filus, name_temp_file="temp_srbLog"):
00109     '''
00110     This function unzips a *.gz file using the system tool gunzip. Make sure when calling the function the file exists in this directory. The function creates a temporary file and leave the orignal *.gz file untouched!
00111     '''
00112     if (not os.path.isfile(filus)):
00113         return -1
00114     else:
00115         command = "gunzip -c %s > %s" % (filus, name_temp_file)
00116         os.system(command)
00117         return 0
00118     
00119 def delete_file(filus):
00120     ''' 
00121     This functions deletes a given file.
00122     '''
00123     try:
00124         os.remove(filus)
00125         return 0
00126     except:
00127         print "could not delete -> ", filus
00128         return -1
00129 
00130 def usage_exit(progname, msg=None):
00131     '''
00132     This function displays the usage of the program and terminated the script.
00133     '''
00134     if msg:
00135         print msg
00136         print
00137     print "usage: %s -h|--help -c|--config -v|--verbose " % progname
00138     os._exit(-1)
00139 
00140 ########################################################################
00141 
00142 def start():
00143     '''
00144     This function starts the application.
00145     '''
00146     global gz_list
00147     gz_list = [] #save *.gz files
00148 
00149     configfile = ""
00150     verbose = 0
00151 
00152     # evaluate parameters    
00153     try:
00154         opts, args = getopt.getopt(sys.argv[1:], 'c:vh', ['config=', 'verbose', 'help'])
00155         for opt, value in opts:
00156             if opt in ('-h','--help'):
00157                 msg = "Help:\n-c or --config\t->\tdefines config file, if no config file given, default values are used\n-v or --verbose\t->\tactivates printing of messages [debug option]\n-h or --help\t->\tprints this help"
00158                 usage_exit(sys.argv[0], msg)
00159             elif opt in ('-c','--config'):
00160                 value = value.replace("=", "")
00161                 configfile = os.getcwd()+"/"+value
00162             elif opt in ('-v','--verbose'):
00163                 verbose = 1
00164             else:
00165                 usage_exit(sys.argv[0], "Wrong use of parameter")
00166     except getopt.error, e:
00167         usage_exit(sys.argv[0], e)
00168     
00169     # load config file or default values
00170     if (configfile != ""):
00171         # check if file exists
00172         if(1 == os.path.exists(configfile)):
00173             config = LoadConfig(configfile)
00174         else:
00175             # if file NOT exists terminate program
00176             print "Sorry, a given file does NOT exist !\nPlease try again!\n\n"
00177             os._exit(-1)
00178     else:
00179         msg = "\nNo config file spezified !\n"
00180         usage_exit(sys.argv[0], msg)
00181 
00182     print "\n\n------ GZ SRB LOG FILE PARSER ------"
00183     
00184     workingpath = os.getcwd()
00185     
00186     path_srb_gz = config.get("path.path_srb_gz")
00187     path_srb_gz = path_srb_gz.rstrip("/")
00188     path_xml_file = config.get("path.path_xml_file")
00189     path_xml_file = path_xml_file.rstrip("/")
00190     xml_file_name = "gz_client_log.xml"
00191     
00192     # check if the configuration is correct
00193     if(0 == os.path.exists(path_srb_gz)):
00194         print "Could not locate log file archive path under %s !\nMaybe change configuration file and try again!\n\n" % path_srb_gz
00195         os._exit(-1)
00196     
00197     if(0 == os.path.exists(path_xml_file)):
00198         print "Could not locate xml path under %s !\nMaybe change configuration file and try again!\n\n" % path_xml_file
00199         os._exit(-1)
00200         
00201     keyword = config.get("file.keyword")
00202     keyword_path = config.get("path.path_keyword")
00203     if keyword != None:
00204         keyword = keyword.strip()
00205     if keyword_path != None:
00206         keyword_path = keyword_path.rstrip("/")
00207     if(keyword_path == '' or keyword_path == None):
00208         keyword_path = workingpath
00209     else:
00210         if (-1 != keyword_path.find("/", 0, 1)):
00211             # first character "/"
00212             pass
00213         else:
00214             keyword_path = workingpath+"/"+keyword_path
00215 
00216     keyword_list = get_keywords(keyword_path+"/"+keyword)
00217         
00218     ignore_error = config.get("misc.ignore_error")
00219     if ("" != ignore_error):
00220         ignore_error = ignore_error.split(",")
00221         for i in range(len(ignore_error)):
00222             ignore_error[i] = int(ignore_error[i].strip())
00223     
00224     parserus = LogFileParser(path_srb_gz, keyword_list, ignore_error, os.getcwd(), "temp_client_log.xml", verbose)
00225 
00226     os.path.walk(path_srb_gz, parse_directory, gz_list)
00227     d = os.getcwd()
00228     os.chdir(path_srb_gz)
00229     if (0 < len(gz_list)):
00230         try:
00231             for x in range(len(gz_list)):
00232                 print "\n"
00233                 print x+1,
00234                 print ". parsing -> \"%s\"\n" % gz_list[x][1]
00235                 gunzip(gz_list[x][1])
00236                 status = os.stat(gz_list[x][1])
00237                 parserus.analyse_log_file("temp_srbLog", file_time=status[8])
00238                 delete_file("temp_srbLog")
00239         except:
00240             os.remove("temp_srbLog")
00241             os.chdir(d)
00242             os.remove("temp_client_log.xml")
00243             print "Problem parsing log files -> terminating !"
00244             os._exit(0)
00245        
00246     else:
00247         print "Could not find any srbLog*.gz files!"
00248         os._exit(0)
00249     
00250     os.chdir(d)
00251     
00252     
00253     test_file = "%s/%s" % (path_xml_file, xml_file_name)
00254     
00255     # check if a gz_client_log.xml already there, if yes change name
00256     c = 1
00257     while(1):
00258         if(0 == os.path.exists(test_file)):
00259             break
00260         test_file = "%s/%d_%s" % (path_xml_file, c, xml_file_name)
00261         c += 1
00262         
00263     print "\ncopy xml file ..."
00264     command = "cp temp_client_log.xml %s" % test_file
00265     os.system(command)
00266     
00267     # delete temporary xml file
00268     delete_file("temp_client_log.xml")
00269     
00270     print "\n\ndone ... \n\n"
00271 
00272 if __name__ == '__main__':
00273     start()

Generated on Sun Mar 5 18:04:16 2006 for GZ Parser by  doxygen 1.4.6-NO