00001
00002
00003 '''
00004 This is the gz_parser.py module, which uses an external config file (e.g. config_gz_parser.ini) to parse through a directory with *.gz files. The server_classes.py is also needed.
00005
00006 Reading University
00007 MSc in Network Centered Computing
00008 a.weise - a.weise@reading.ac.uk - December 2005
00009 '''
00010
00011 import os, sys, string, re, stat
00012 from server_classes import LogFileParser
00013 import ConfigParser, getopt
00014
00015 gz_list = []
00016
00017
00018 def LoadConfig(file, config={}):
00019 """
00020 This functions returns a dictionary with key's of the form
00021 <section>.<option> and the values .
00022
00023 source: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/65334
00024 """
00025 config = config.copy()
00026 cp = ConfigParser.ConfigParser()
00027 cp.read(file)
00028 for sec in cp.sections():
00029 name = string.lower(sec)
00030 for opt in cp.options(sec):
00031 config[name + "." + string.lower(opt)] = string.strip(cp.get(sec, opt))
00032 return config
00033
00034 def parse_directory(arg, dirname, fnames):
00035 '''
00036 This function "walks" through a given directory and considers all srbLOG*.gz files. The name and last modified time are saved in a list (2 dimensional array). The function should be used with os.path.walk(path, function_name, arg)!
00037 '''
00038 d = os.getcwd()
00039
00040 try:
00041 os.chdir(dirname)
00042 except:
00043 print "could not find directory \"%s\"" % dirname
00044 return -1
00045
00046 for f in fnames:
00047
00048 if (not os.path.isfile(f)) or (None == re.search('^srbLog[_0-9.-]*.gz', f)):
00049 continue
00050
00051 date = os.stat(f)[stat.ST_MTIME]
00052
00053 tupel = (date, f)
00054
00055 gz_list.append(tupel)
00056
00057 os.chdir(d)
00058
00059 def get_keywords(filus):
00060 '''
00061 This function extracts keyword from a give file!
00062 '''
00063 keys = []
00064
00065 try:
00066 file_fd = file(filus, 'r')
00067 except IOError, e:
00068 print "Problem with keyword file -> ", e
00069 return -1
00070
00071 content = file_fd.readlines()
00072
00073 file_fd.close()
00074
00075 content = remove_item(content, "#")
00076 content = remove_item(content, "\n")
00077
00078 for i in range(len(content)):
00079 content[i] = content[i].strip()
00080 content[i] = content[i].rstrip(",")
00081 content[i] = content[i].split(",")
00082 for a in range(len(content[i])):
00083 keys.append(content[i][a])
00084
00085 for i in range(len(keys)):
00086 keys[i] = keys[i].strip()
00087 keys[i] = keys[i].split(":")
00088
00089 return keys
00090
00091 def remove_item(listus, item):
00092 '''
00093 This function removes "items" form a list object rekursiv.
00094 '''
00095
00096 while(1):
00097
00098 for i in range(len(listus)):
00099 if -1 != listus[i].find(item, 0, 1):
00100 del listus[i]
00101 remove_item(listus, item)
00102 break
00103 else:
00104 break
00105
00106 return listus
00107
00108 def gunzip(filus, name_temp_file="temp_srbLog"):
00109 '''
00110 This function unzips a *.gz file using the system tool gunzip. Make sure when calling the function the file exists in this directory. The function creates a temporary file and leave the orignal *.gz file untouched!
00111 '''
00112 if (not os.path.isfile(filus)):
00113 return -1
00114 else:
00115 command = "gunzip -c %s > %s" % (filus, name_temp_file)
00116 os.system(command)
00117 return 0
00118
00119 def delete_file(filus):
00120 '''
00121 This functions deletes a given file.
00122 '''
00123 try:
00124 os.remove(filus)
00125 return 0
00126 except:
00127 print "could not delete -> ", filus
00128 return -1
00129
00130 def usage_exit(progname, msg=None):
00131 '''
00132 This function displays the usage of the program and terminated the script.
00133 '''
00134 if msg:
00135 print msg
00136 print
00137 print "usage: %s -h|--help -c|--config -v|--verbose " % progname
00138 os._exit(-1)
00139
00140
00141
00142 def start():
00143 '''
00144 This function starts the application.
00145 '''
00146 global gz_list
00147 gz_list = []
00148
00149 configfile = ""
00150 verbose = 0
00151
00152
00153 try:
00154 opts, args = getopt.getopt(sys.argv[1:], 'c:vh', ['config=', 'verbose', 'help'])
00155 for opt, value in opts:
00156 if opt in ('-h','--help'):
00157 msg = "Help:\n-c or --config\t->\tdefines config file, if no config file given, default values are used\n-v or --verbose\t->\tactivates printing of messages [debug option]\n-h or --help\t->\tprints this help"
00158 usage_exit(sys.argv[0], msg)
00159 elif opt in ('-c','--config'):
00160 value = value.replace("=", "")
00161 configfile = os.getcwd()+"/"+value
00162 elif opt in ('-v','--verbose'):
00163 verbose = 1
00164 else:
00165 usage_exit(sys.argv[0], "Wrong use of parameter")
00166 except getopt.error, e:
00167 usage_exit(sys.argv[0], e)
00168
00169
00170 if (configfile != ""):
00171
00172 if(1 == os.path.exists(configfile)):
00173 config = LoadConfig(configfile)
00174 else:
00175
00176 print "Sorry, a given file does NOT exist !\nPlease try again!\n\n"
00177 os._exit(-1)
00178 else:
00179 msg = "\nNo config file spezified !\n"
00180 usage_exit(sys.argv[0], msg)
00181
00182 print "\n\n------ GZ SRB LOG FILE PARSER ------"
00183
00184 workingpath = os.getcwd()
00185
00186 path_srb_gz = config.get("path.path_srb_gz")
00187 path_srb_gz = path_srb_gz.rstrip("/")
00188 path_xml_file = config.get("path.path_xml_file")
00189 path_xml_file = path_xml_file.rstrip("/")
00190 xml_file_name = "gz_client_log.xml"
00191
00192
00193 if(0 == os.path.exists(path_srb_gz)):
00194 print "Could not locate log file archive path under %s !\nMaybe change configuration file and try again!\n\n" % path_srb_gz
00195 os._exit(-1)
00196
00197 if(0 == os.path.exists(path_xml_file)):
00198 print "Could not locate xml path under %s !\nMaybe change configuration file and try again!\n\n" % path_xml_file
00199 os._exit(-1)
00200
00201 keyword = config.get("file.keyword")
00202 keyword_path = config.get("path.path_keyword")
00203 if keyword != None:
00204 keyword = keyword.strip()
00205 if keyword_path != None:
00206 keyword_path = keyword_path.rstrip("/")
00207 if(keyword_path == '' or keyword_path == None):
00208 keyword_path = workingpath
00209 else:
00210 if (-1 != keyword_path.find("/", 0, 1)):
00211
00212 pass
00213 else:
00214 keyword_path = workingpath+"/"+keyword_path
00215
00216 keyword_list = get_keywords(keyword_path+"/"+keyword)
00217
00218 ignore_error = config.get("misc.ignore_error")
00219 if ("" != ignore_error):
00220 ignore_error = ignore_error.split(",")
00221 for i in range(len(ignore_error)):
00222 ignore_error[i] = int(ignore_error[i].strip())
00223
00224 parserus = LogFileParser(path_srb_gz, keyword_list, ignore_error, os.getcwd(), "temp_client_log.xml", verbose)
00225
00226 os.path.walk(path_srb_gz, parse_directory, gz_list)
00227 d = os.getcwd()
00228 os.chdir(path_srb_gz)
00229 if (0 < len(gz_list)):
00230 try:
00231 for x in range(len(gz_list)):
00232 print "\n"
00233 print x+1,
00234 print ". parsing -> \"%s\"\n" % gz_list[x][1]
00235 gunzip(gz_list[x][1])
00236 status = os.stat(gz_list[x][1])
00237 parserus.analyse_log_file("temp_srbLog", file_time=status[8])
00238 delete_file("temp_srbLog")
00239 except:
00240 os.remove("temp_srbLog")
00241 os.chdir(d)
00242 os.remove("temp_client_log.xml")
00243 print "Problem parsing log files -> terminating !"
00244 os._exit(0)
00245
00246 else:
00247 print "Could not find any srbLog*.gz files!"
00248 os._exit(0)
00249
00250 os.chdir(d)
00251
00252
00253 test_file = "%s/%s" % (path_xml_file, xml_file_name)
00254
00255
00256 c = 1
00257 while(1):
00258 if(0 == os.path.exists(test_file)):
00259 break
00260 test_file = "%s/%d_%s" % (path_xml_file, c, xml_file_name)
00261 c += 1
00262
00263 print "\ncopy xml file ..."
00264 command = "cp temp_client_log.xml %s" % test_file
00265 os.system(command)
00266
00267
00268 delete_file("temp_client_log.xml")
00269
00270 print "\n\ndone ... \n\n"
00271
00272 if __name__ == '__main__':
00273 start()