import java.text.SimpleDateFormat import java.util.Calendar import java.util.TimeZone import java.util.regex.Matcher /* 34567890123456789012345678901234567890123456789012345678901234567890123456789 * ***************************************************************************** * This code is distributed under the MIT License, see * http://www.opensource.org/licenses/mit-license.php * * ----------------------------------------------------------------------------- * The MIT License * * Copyright (c) 2011, M-PLIFY S.A. * 21, rue Glesener * L-1631 LUXEMBOURG * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * ----------------------------------------------------------------------------- * * ******************************************************************************* ******************************************************************************* * A script to process raw Voxeo logfiles into a single logfile with higher * readability. Written in Groovy 1.8. * * How to install: * * - You need a JVM and the "Groovy" interpreter * The JVM can be obtained from: * http://www.oracle.com/technetwork/java/javase/downloads/index.html * "Groovy" can be obtained from * http://groovy.codehaus.org/ * See http://groovy.codehaus.org/Installing+Groovy for install instructions. * * * How to use: * * - Save this script as a file name "LogReader.groovy". Do not use the * name "VoxeoLogReader.groovy" as that will clash with the class name of * a class in the script. * * - Download the Voxeo log files you want to examine into a single * directory, e.g. "VoxeoLogs" * * - If they are compressed, uncompress them. On Unix, this is done with: * $ gunzip VoxeoLogs/* * * - Run this script, passing the list of all the log files on the command * line. On Unix, this can be done using: * * $ find VoxeoData -name '*.txt' | xargs groovy LogReader.groovy > output.txt * * - Open "output.txt" with your preferred text editor or pager * * 2011.07.06 - It's usable. Ship it! * 2011.07.11 - Removed dependence on "Joda Time" ******************************************************************************/ /** * --------------------------------------------------------------- * A class to hold common line info * --------------------------------------------------------------- */ abstract class Line { static enum Interpreter { CCX, MOT } final Date when // logging time final String host // hostname, may or may not be qualified final Interpreter intp // what interpreter logged this final List text // possibly multiline text public final static SimpleDateFormat sdf static { sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") sdf.setCalendar(Calendar.getInstance(TimeZone.getTimeZone("UTC"))) } /** * Helper to map a String to an Interpreter value */ protected static Interpreter toInterpreter(String x) { switch (x) { case 'CCX': return Interpreter.CCX; case 'MOT': return Interpreter.MOT; default: throw new IllegalArgumentException("Unknown value ${x}") } } /** * Constructor */ Line(Date when, String host, String intp, List text) { assert when assert host assert intp assert text!=null // could be empty this.when = when this.host = host this.intp = toInterpreter(intp) this.text = text } /** * Textable "when" (in UTC) */ String getWhenAsStr() { return sdf.format(when) } /** * Write the "lead-in" for the line on output. It will be preceded by the "indent" String, * which is supposed to be an indentation. The "latest" date is used to decide whether a * new date should be printed out. * "w" is not typed and may be a "Writer" or a "PrintStream". * Return the new "latest", which is this.when. */ protected Date writeLeadIn(w, Date latest, String indent) { assert w assert latest if (!indent) { indent = '' } // if the date changed, then write the new date onto a separate line if (latest != when) { w << indent << '------' << getWhenAsStr() << ' UTC ------\n' } // write the lead-in now: indentation and interpreter w << indent << '[' << intp << ']' return when } /** * Write the text */ protected void writeText(w, String indent, String extraIndent) { if (text.size() == 1) { // just append to the current line w << text[0] << '\n' } else { // go to next line and write each line of text separately w << '\n' if (!extraIndent) { extraIndent = '' } text.each { it -> w << indent << extraIndent << it << '\n' } } } } /** * --------------------------------------------------------------- * A class to hold information about a line logged for the "session" * --------------------------------------------------------------- */ class SessionLine extends Line { final int x // Unsure what this is, always 1 final int y // Unsure what this is, always 1 final String callerId // set if detected in message, otherwise null final String calledId // set if detected in message, otherwise null final String sessionEvent // set if detected in message (in that case, it has been cut from text) final String sessionAction // set if detected in message (in that case, it has been cut from text /** * Helper to re-multiline text for a session; it comes with explicitly escaped newlines * and carriage returns; returns a possibly empty "List of String" */ static List cleanText(String text) { assert text != null text = text.replace('\\r\\n','\n') // re-insert newline text = text.replace('\\n','\n') // fix stray \\n text = text.replace('\\r','') // fix stray \\r LineNumberReader lnr = new LineNumberReader(new StringReader(text)) def res = [] try { def line while ((line = lnr.readLine()) != null) { line = line.trim() // drop empty lines if (line) { res << line } } } finally { lnr.close() } return res } /** * Constructor */ SessionLine(Date when, String host, String intp, int x, int y, String callerId, String calledId, String sessionEvent, String sessionAction, String text) { super(when, host, intp, cleanText(text)) this.x = x this.y = y this.callerId = callerId this.calledId = calledId this.sessionEvent = sessionEvent this.sessionAction = sessionAction } /** * Pretty-print this to "w", which is a "Writer or a "PrintStream". * Output will be preceded by the "indent" String, which is supposed to be an indentation. * The "latest" date is used to decide whether a new date should be printed out. * Return the new "latest", which is this.when. */ Date prettyPrint(w, Date latest, String indent) { assert w assert latest if (!indent) { indent = '' } Date newLatest = writeLeadIn(w, latest, indent) if (callerId) { w << "[callerId=" << callerId << "]" } if (calledId) { w << "[calledId=" << calledId << "]" } if (sessionEvent) { w << "[event=" << sessionEvent << "]" } if (sessionAction) { w << "[action=" << sessionAction << "]" } writeText(w, indent, ' ') return newLatest } } /** * --------------------------------------------------------------- * A class to hold information about a line logged for the "child session" * --------------------------------------------------------------- */ class ChildSessionLine extends Line { final int x // Unsure what this is, always 1 final String logger // similar to a class name final String callerId // set if detected in message, otherwise null final String calledId // set if detected in message, otherwise null final String childSessionId // hexadecimal child session id string /** * Helper to re-multiline text for a child session; it comes with explicitly escaped newlines * and carriage returns; returns a possibly empty "List of String" */ private static List cleanText(String text, String logger) { assert text != null if (logger?.endsWith('wireLog')) { LineNumberReader lnr = new LineNumberReader(new StringReader(text)) def res = [] try { def line while ((line = lnr.readLine()) != null) { // do not trim (line reader "chomps" the newline away), do not drop empty lines res << line } } finally { lnr.close() } return res } else { return SessionLine.cleanText(text) } } /** * Constructor */ ChildSessionLine(String childSessionId, Date when, String host, String intp, int x, String logger, String callerId, String calledId, String text) { super(when, host, intp, cleanText(text,logger)) assert logger assert childSessionId this.childSessionId = childSessionId this.x = x this.logger = logger this.callerId = callerId this.calledId = calledId } /** * Pretty-print this to "w", which is a "Writer or a "PrintStream". * Output will be preceded by the "indent" String, which is supposed to be an indentation. * The "latest" date is used to decide whether a new date should be printed out. * Return the new "latest", which is this.when. */ Date prettyPrint(w, Date latest, String indent) { assert w assert latest if (!indent) { indent = '' } Date newLatest = writeLeadIn(w, latest, indent) w << "[" << childSessionId << "]" if (logger) { // only display the last path element of the logger List tokens = logger.tokenize('.') w << "[" << tokens[tokens.size()-1] << "]" } if (callerId) { w << "[callerId=" << callerId << "]" } if (calledId) { w << "[calledId=" << calledId << "]" } writeText(w, indent, ' ') return newLatest } } /** * --------------------------------------------------------------- * Session collects the information about a Session, the common info and the logging lines * --------------------------------------------------------------- */ class Session { final String sessionId final int customerId final Date when final List lines = [] /** * Constructor */ Session(String sessionId, int customerId, Date when) { assert sessionId assert customerId assert when this.sessionId = sessionId this.customerId = customerId this.when = when } /** * Textable "when" (in UTC) */ String getWhenAsStr() { return Line.sdf.format(when) } /** * Pretty-print this to "w", which is a "Writer or a "PrintStream". */ void prettyPrint(w) { assert w w.println(">>> Session ${sessionId} for customer ${customerId} starting at ${getWhenAsStr()} UTC") Date latest = when // Note that "prettyPrint" is dynamically resolved - it is not an abstract method on Line! lines.each { it -> latest = it.prettyPrint(w,latest,' ') } w.println("<<< Session ${sessionId} ending at ${Line.sdf.format(latest)}") } } /** * --------------------------------------------------------------- * The main reader called by MAIN FUNCTION of the script * --------------------------------------------------------------- */ class VoxeoLogReader { // Various patterns private static final PAT1 = ~/^(\w+) (\d+) (\d+):(\d+):(\d+) (\w+) (\d+) (\w+(\.\w+)*) (\w+) (\d+) ([a-f0-9]{32}) (.*)$/ private static final PAT2 = ~/^([a-f0-9]{32}) (\d+) (\w+(\.\w+)*) (.*)$/ private static final PAT3 = ~/^(\d+) (\d+) (.*)$/ private static final PAT_CALLERID = ~/\bcallerID=(\w+)\b/ private static final PAT_CALLEDID = ~/\bcalledID=(\w+)\b/ private static final PAT_SESSION_EVENT = ~/^Session event: (\w+(\.\w+)*) (.*)$/ private static final PAT_SESSION_ACTION = ~/^Session action: (\w+(\.\w+)*) (.*)$/ // The map of the month names private static final MONTH_MAP = [ jan : 1, feb : 2, mar : 3, apr : 4, may : 5, jun : 6, jul : 7, aug : 8, sep : 9, oct : 10, nov : 11, dec : 12 ] private static final JAVA_MONTH_MAP = [ jan : Calendar.JANUARY, feb : Calendar.FEBRUARY, mar : Calendar.MARCH, apr : Calendar.APRIL, may : Calendar.MAY, jun : Calendar.JUNE, jul : Calendar.JULY, aug : Calendar.AUGUST, sep : Calendar.SEPTEMBER, oct : Calendar.OCTOBER, nov : Calendar.NOVEMBER, dec : Calendar.DECEMBER ] // The Gregorian GMT calendar can be reused, so create it once private static final Calendar GREGORIAN_GMT_CALENDAR = new GregorianCalendar(TimeZone.getTimeZone("GMT")); /** * A helper function that makes a "Date" out of a Matcher that matched against a line containing datetime info */ private static Date makeDate(Matcher m) { def javaMonth = JAVA_MONTH_MAP[(m.group(1)).toLowerCase()] def dayOfMonth = (m.group(2)) as Integer def hour = (m.group(3)) as Integer def minute = (m.group(4)) as Integer def second = (m.group(5)) as Integer def tz = (m.group(6)) def year = (m.group(7)) as Integer if (tz == 'GMT') { Calendar cal = GREGORIAN_GMT_CALENDAR cal.set Calendar.YEAR, year cal.set Calendar.MONTH, javaMonth cal.set Calendar.DAY_OF_MONTH, dayOfMonth cal.set Calendar.HOUR_OF_DAY, hour cal.set Calendar.MINUTE, minute cal.set Calendar.SECOND, second cal.set Calendar.MILLISECOND, 0 return cal.getTime() } else { // need special mapping, i.e. add code as needed assert false } } /** * A helper function that extract callerId and calledId from text, if they exist */ private static Map extractCallIds(String text) { assert text!=null def callerId def calledId def m1 = PAT_CALLERID.matcher(text) def m2 = PAT_CALLEDID.matcher(text) if (m1) { callerId = (m1.group(1)); if (callerId == 'null') { callerId = null } } if (m2) { calledId = (m2.group(1)); if (calledId == 'null') { calledId = null } } return [ callerId : callerId, calledId : calledId ] } /** * A helper function to extract session action and event */ private static Map extractSessionActionAndEvent(String text) { def event def action def newText def mse = PAT_SESSION_EVENT.matcher(text) if (mse) { event = mse.group(1) newText = mse.group(3) } else { def msa = PAT_SESSION_ACTION.matcher(text) if (msa) { action = msa.group(1) newText = msa.group(3) } else { newText = text } } return [ event : event, action : action, newText : newText ] } /** * Single line handling */ private static Map processSingleLineRest(Map params, String rest) { def msub1 = PAT2.matcher(rest) if (msub1) { // This is information about a child session! // Inverse the sessions ids here... def childSessionId = params['firstSessionId'] def parentSessionId = (msub1.group(1)) def x = (msub1.group(2)) as Integer // Unsure what this is, it's always 1 def logger = (msub1.group(3)) def text = (msub1.group(5)) def callIds = extractCallIds(text) def callerId = callIds['callerId'] def calledId = callIds['calledId'] return [ parentSessionId : parentSessionId, childSessionLine : new ChildSessionLine(childSessionId, params['when'], params['host'], params['intp'], x, logger, callerId, calledId, text), sessionLine : null ] } else { def msub2 = PAT3.matcher(rest) if (msub2) { // This is information about the current session def x = (msub2.group(1)) as Integer // Unsure what this is, it's always 1 def y = (msub2.group(2)) as Integer // Unsure what this is, it's always 1 def text = (msub2.group(3)) def callIds = extractCallIds(text) def callerId = callIds['callerId'] def calledId = callIds['calledId'] def saes = extractSessionActionAndEvent(text) def event = saes['event'] def action = saes['action'] text = saes['newText'] // override text return [ parentSessionId : params['firstSessionId'], childSessionLine : null, sessionLine : new SessionLine(params['when'], params['host'], params['intp'], x, y, callerId, calledId, event, action, text) ] } else { return null } } } /** * Single line handling */ private static void processSingleLine(String line, Map sessions) { def mtop = PAT1.matcher(line) Session checkForThrowOut if (mtop) { def params = [:] params['when'] = makeDate(mtop) params['host'] = (mtop.group(8)) params['intp'] = (mtop.group(10)) params['customerId'] = (mtop.group(11)) as Integer params['firstSessionId'] = (mtop.group(12)) // it is unsure whether this is the child or parent session id! def rest = (mtop.group(13)) // // Check the "rest". // def paramsRest = processSingleLineRest(params, rest) if (!paramsRest) { System.err.println("Unmatched at second level: ${line}") // drop this line } else { def parentSessionId = paramsRest['parentSessionId'] assert parentSessionId if (!sessions[parentSessionId]) { def nSession = new Session(parentSessionId, params['customerId'], params['when']) sessions[parentSessionId] = nSession } Session xSession = sessions[parentSessionId] assert params['customerId'] == xSession.customerId if (paramsRest['sessionLine']) { xSession.lines << paramsRest['sessionLine'] checkForThrowOut = xSession } else { assert paramsRest['childSessionLine'] xSession.lines << paramsRest['childSessionLine'] } } } else { System.err.println("Unmatched at first level: ${line}") // drop this line } // // Can the session be thrown out? // if (checkForThrowOut) { assert checkForThrowOut.lines Line latestLine = checkForThrowOut.lines[-1] if (latestLine.intp == Line.Interpreter.CCX && latestLine.text && latestLine.text[-1].startsWith('Session SESSION_END')) { sessions.remove(checkForThrowOut.sessionId) System.err.println("Dumping session ${checkForThrowOut.sessionId} of ${checkForThrowOut.getWhenAsStr()}; there are now ${sessions.size()} sessions in the map") checkForThrowOut.prettyPrint(System.out) } } } /** * Read all the files passed in the "list". The passed list will be modified! */ public static void readThem(List files, String encoding) { if (!files) { throw new IllegalArgumentException("You have to pass a list of files to read") } if (!encoding) { throw new IllegalArgumentException("You have to pass the encoding of the files that will be read") } // // Sort files trivially per name; this SHOULD order them by date as the date is // in their name and in a way that makes them sortable, but one should check later. // files.sort { File it -> it.getName() } // // Map to hold sessions being examined // def sessions = [:] // // Iteration over all files is go // files.each { File it -> System.err.println("Reading file ${it}") LineNumberReader lnr // TODO: What if this sees an enormous line? try { lnr = new LineNumberReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(it)), encoding)) // // Iteration over all lines of a file is go // String line while ((line = lnr.readLine()) != null) { // >>>>>>>>>>>>>> processSingleLine(line,sessions) // <<<<<<<<<<<<<< } } catch (Exception exe) { System.err.println("Caught exception ${exe} while reading file ${it}") exe.printStackTrace() } finally { if (lnr!=null) { lnr.close() } } } // // Dump session data // List sessionList = sessions.values() as List sessionList.sort { it.when } sessionList.each { Session it -> System.err.println("Dumping session ${it.sessionId} of ${it.getWhenAsStr()} ") it.prettyPrint(System.out) } } } /** * THE MAIN FUNCTION OF THE SCRIPT */ def fileList = [] args.each { String it -> fileList << new File(it) } VoxeoLogReader.readThem(fileList,'UTF-8')