import java.text.SimpleDateFormat
import java.util.Calendar
import java.util.TimeZone
import java.util.regex.Matcher

/* 34567890123456789012345678901234567890123456789012345678901234567890123456789
 * *****************************************************************************
 * This code is distributed under the MIT License, see 
 * http://www.opensource.org/licenses/mit-license.php
 * 
 * -----------------------------------------------------------------------------
 * The MIT License
 *
 * Copyright (c) 2011, M-PLIFY S.A.
 *                     21, rue Glesener
 *                     L-1631 LUXEMBOURG
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to 
 * deal in the Software without restriction, including without limitation the
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 
 * sell copies of the Software, and to permit persons to whom the Software is 
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 * -----------------------------------------------------------------------------
 * 
 * 
 *******************************************************************************
 *******************************************************************************
 * A script to process raw Voxeo logfiles into a single logfile with higher
 * readability. Written in Groovy 1.8.
 * 
 * How to install:
 * 
 * - You need a JVM and the "Groovy" interpreter
 *   The JVM can be obtained from: 
 *     http://www.oracle.com/technetwork/java/javase/downloads/index.html
 *   "Groovy" can be obtained from
 *     http://groovy.codehaus.org/
 *     See http://groovy.codehaus.org/Installing+Groovy for install instructions.
 *   
 *    
 * How to use:
 * 
 * - Save this script as a file name "LogReader.groovy". Do not use the
 *   name "VoxeoLogReader.groovy" as that will clash with the class name of
 *   a class in the script.
 *    
 * - Download the Voxeo log files you want to examine into a single 
 *   directory, e.g. "VoxeoLogs"
 *    
 * - If they are compressed, uncompress them. On Unix, this is done with:
 *   $ gunzip VoxeoLogs/*
 *   
 * - Run this script, passing the list of all the log files on the command
 *   line. On Unix, this can be done using:
 *   
 *   $ find VoxeoData -name '*.txt' | xargs groovy LogReader.groovy > output.txt
 *   
 * - Open "output.txt" with your preferred text editor or pager
 * 
 * 2011.07.06 - It's usable. Ship it!
 * 2011.07.11 - Removed dependence on "Joda Time"
 ******************************************************************************/

/** 
 * ---------------------------------------------------------------
 * A class to hold common line info
 * --------------------------------------------------------------- 
 */

abstract class Line {

    static enum Interpreter {
        CCX, MOT
    }

    final Date        when // logging time
    final String      host // hostname, may or may not be qualified
    final Interpreter intp // what interpreter logged this
    final List        text // possibly multiline text

    public final static SimpleDateFormat sdf

    static {
        sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
        sdf.setCalendar(Calendar.getInstance(TimeZone.getTimeZone("UTC")))
    }

    /**
     * Helper to map a String to an Interpreter value
     */

    protected static Interpreter toInterpreter(String x) {
        switch (x) {
            case 'CCX': return Interpreter.CCX;
            case 'MOT': return Interpreter.MOT;
            default: throw new IllegalArgumentException("Unknown value ${x}")
        }
    }

    /**
     * Constructor
     */

    Line(Date when, String host, String intp, List text) {
        assert when
        assert host
        assert intp
        assert text!=null // could be empty
        this.when = when
        this.host = host
        this.intp = toInterpreter(intp)
        this.text = text
    }

    /**
     * Textable "when" (in UTC)
     */

    String getWhenAsStr() {
        return sdf.format(when)
    }

    /**
     * Write the "lead-in" for the line on output. It will be preceded by the "indent" String,
     * which is supposed to be an indentation. The "latest" date is used to decide whether a
     * new date should be printed out.
     * "w" is not typed and may be a "Writer" or a "PrintStream".
     * Return the new "latest", which is this.when.
     */

    protected Date writeLeadIn(w, Date latest, String indent) {
        assert w
        assert latest
        if (!indent) {
            indent = ''
        }
        // if the date changed, then write the new date onto a separate line
        if (latest != when) {
            w << indent << '------' << getWhenAsStr() << ' UTC ------\n'
        }
        // write the lead-in now: indentation and interpreter
        w << indent << '[' << intp << ']'
        return when
    }

    /**
     * Write the text
     */

    protected void writeText(w, String indent, String extraIndent) {
        if (text.size() == 1) {
            // just append to the current line
            w << text[0] << '\n'
        }
        else {
            // go to next line and write each line of text separately
            w << '\n'
            if (!extraIndent) {
                extraIndent = ''
            }
            text.each { it -> w << indent << extraIndent << it << '\n' }
        }
    }
}

/** 
 * ---------------------------------------------------------------
 * A class to hold information about a line logged for the "session"
 * ---------------------------------------------------------------
 */

class SessionLine extends Line {

    final int    x             // Unsure what this is, always 1
    final int    y             // Unsure what this is, always 1
    final String callerId      // set if detected in message, otherwise null
    final String calledId      // set if detected in message, otherwise null
    final String sessionEvent  // set if detected in message (in that case, it has been cut from text)
    final String sessionAction // set if detected in message (in that case, it has been cut from text

    /**
     * Helper to re-multiline text for a session; it comes with explicitly escaped newlines
     * and carriage returns; returns a possibly empty "List of String"
     */

    static List cleanText(String text) {
        assert text != null
        text = text.replace('\\r\\n','\n') // re-insert newline
        text = text.replace('\\n','\n')    // fix stray \\n
        text = text.replace('\\r','')      // fix stray \\r
        LineNumberReader lnr = new LineNumberReader(new StringReader(text))
        def res = []
        try {
            def line
            while ((line = lnr.readLine()) != null) {
                line = line.trim()
                // drop empty lines
                if (line) {
                    res << line
                }
            }
        }
        finally {
            lnr.close()
        }
        return res
    }

    /**
     * Constructor
     */

    SessionLine(Date when, String host, String intp, int x, int y, String callerId, String calledId, String sessionEvent, String sessionAction, String text) {
        super(when, host, intp, cleanText(text))
        this.x = x
        this.y = y
        this.callerId = callerId
        this.calledId = calledId
        this.sessionEvent = sessionEvent
        this.sessionAction = sessionAction
    }

    /**
     * Pretty-print this to "w", which is a "Writer or a "PrintStream".
     * Output will be preceded by the "indent" String, which is supposed to be an indentation.
     * The "latest" date is used to decide whether a new date should be printed out.
     * Return the new "latest", which is this.when.
     */

    Date prettyPrint(w, Date latest, String indent) {
        assert w
        assert latest
        if (!indent) {
            indent = ''
        }
        Date newLatest = writeLeadIn(w, latest, indent)
        if (callerId) {
            w  << "[callerId=" << callerId << "]"
        }
        if (calledId) {
            w  << "[calledId=" << calledId << "]"
        }
        if (sessionEvent) {
            w  << "[event=" << sessionEvent << "]"
        }
        if (sessionAction) {
            w  << "[action=" << sessionAction << "]"
        }
        writeText(w, indent, '         ')
        return newLatest
    }

}

/**
 * --------------------------------------------------------------- 
 * A class to hold information about a line logged for the "child session"
 * --------------------------------------------------------------- 
 */

class ChildSessionLine extends Line  {

    final int     x         // Unsure what this is, always 1
    final String  logger    // similar to a class name
    final String  callerId  // set if detected in message, otherwise null
    final String  calledId  // set if detected in message, otherwise null
    final String  childSessionId // hexadecimal child session id string

    /**
     * Helper to re-multiline text for a child session; it comes with explicitly escaped newlines
     * and carriage returns; returns a possibly empty "List of String"
     */

    private static List cleanText(String text, String logger) {
        assert text != null
        if (logger?.endsWith('wireLog')) {
            LineNumberReader lnr = new LineNumberReader(new StringReader(text))
            def res = []
            try {
                def line
                while ((line = lnr.readLine()) != null) {
                    // do not trim (line reader "chomps" the newline away), do not drop empty lines
                    res << line
                }
            }
            finally {
                lnr.close()
            }
            return res
        }
        else {
            return SessionLine.cleanText(text)
        }
    }


    /**
     * Constructor
     */

    ChildSessionLine(String childSessionId, Date when, String host, String intp, int x, String logger, String callerId, String calledId, String text) {
        super(when, host, intp, cleanText(text,logger))
        assert logger
        assert childSessionId
        this.childSessionId = childSessionId
        this.x              = x
        this.logger         = logger
        this.callerId       = callerId
        this.calledId       = calledId
    }

    /**
     * Pretty-print this to "w", which is a "Writer or a "PrintStream".
     * Output will be preceded by the "indent" String, which is supposed to be an indentation.
     * The "latest" date is used to decide whether a new date should be printed out.
     * Return the new "latest", which is this.when.
     */

    Date prettyPrint(w, Date latest, String indent) {
        assert w
        assert latest
        if (!indent) {
            indent = ''
        }
        Date newLatest = writeLeadIn(w, latest, indent)
        w << "[" << childSessionId << "]"
        if (logger) {
            // only display the last path element of the logger
            List tokens = logger.tokenize('.')
            w  << "[" << tokens[tokens.size()-1] << "]"
        }
        if (callerId) {
            w  << "[callerId=" << callerId << "]"
        }
        if (calledId) {
            w  << "[calledId=" << calledId << "]"
        }
        writeText(w, indent, '         ')
        return newLatest
    }
}

/**
 * --------------------------------------------------------------- 
 * Session collects the information about a Session, the common info and the logging lines
 * --------------------------------------------------------------- 
 */

class Session {

    final String sessionId
    final int    customerId
    final Date   when
    final List   lines = []

    /**
     * Constructor
     */

    Session(String sessionId, int customerId, Date when) {
        assert sessionId
        assert customerId
        assert when
        this.sessionId  = sessionId
        this.customerId = customerId
        this.when       = when
    }

    /**
     * Textable "when" (in UTC)
     */

    String getWhenAsStr() {
        return Line.sdf.format(when)
    }

    /**
     * Pretty-print this to "w", which is a "Writer or a "PrintStream".
     */

    void prettyPrint(w) {
        assert w
        w.println(">>> Session ${sessionId} for customer ${customerId} starting at ${getWhenAsStr()} UTC")
        Date latest = when
        // Note that "prettyPrint" is dynamically resolved - it is not an abstract method on Line!
        lines.each { it -> latest = it.prettyPrint(w,latest,'       ') }
        w.println("<<< Session ${sessionId} ending at ${Line.sdf.format(latest)}")
    }
}

/**
 * --------------------------------------------------------------- 
 * The main reader called by MAIN FUNCTION of the script
 * --------------------------------------------------------------- 
 */

class VoxeoLogReader {

    // Various patterns

    private static final PAT1 = ~/^(\w+) (\d+) (\d+):(\d+):(\d+) (\w+) (\d+) (\w+(\.\w+)*) (\w+) (\d+) ([a-f0-9]{32}) (.*)$/
    private static final PAT2 = ~/^([a-f0-9]{32}) (\d+) (\w+(\.\w+)*) (.*)$/
    private static final PAT3 = ~/^(\d+) (\d+) (.*)$/
    private static final PAT_CALLERID = ~/\bcallerID=(\w+)\b/
    private static final PAT_CALLEDID = ~/\bcalledID=(\w+)\b/
    private static final PAT_SESSION_EVENT  = ~/^Session event: (\w+(\.\w+)*) (.*)$/
    private static final PAT_SESSION_ACTION = ~/^Session action: (\w+(\.\w+)*) (.*)$/

    // The map of the month names

    private static final MONTH_MAP = [ jan : 1, feb : 2, mar : 3, apr : 4, may : 5, jun : 6, jul : 7, aug : 8, sep : 9, oct : 10, nov : 11, dec : 12 ]    
    private static final JAVA_MONTH_MAP = [ jan : Calendar.JANUARY, 
                                            feb : Calendar.FEBRUARY, 
                                            mar : Calendar.MARCH, 
                                            apr : Calendar.APRIL, 
                                            may : Calendar.MAY, 
                                            jun : Calendar.JUNE, 
                                            jul : Calendar.JULY, 
                                            aug : Calendar.AUGUST, 
                                            sep : Calendar.SEPTEMBER, 
                                            oct : Calendar.OCTOBER, 
                                            nov : Calendar.NOVEMBER, 
                                            dec : Calendar.DECEMBER ]
    
    // The Gregorian GMT calendar can be reused, so create it once
    
    private static final Calendar GREGORIAN_GMT_CALENDAR = new GregorianCalendar(TimeZone.getTimeZone("GMT"));
    
    /**
     * A helper function that makes a "Date" out of a Matcher that matched against a line containing datetime info
     */

    private static Date makeDate(Matcher m) {
        def javaMonth  = JAVA_MONTH_MAP[(m.group(1)).toLowerCase()]  
        def dayOfMonth = (m.group(2)) as Integer
        def hour       = (m.group(3)) as Integer
        def minute     = (m.group(4)) as Integer
        def second     = (m.group(5)) as Integer
        def tz         = (m.group(6))
        def year       = (m.group(7)) as Integer
        if (tz == 'GMT') {
            Calendar cal = GREGORIAN_GMT_CALENDAR
            cal.set Calendar.YEAR, year
            cal.set Calendar.MONTH, javaMonth
            cal.set Calendar.DAY_OF_MONTH, dayOfMonth
            cal.set Calendar.HOUR_OF_DAY, hour
            cal.set Calendar.MINUTE, minute
            cal.set Calendar.SECOND, second
            cal.set Calendar.MILLISECOND, 0            
            return cal.getTime()
        }
        else {
            // need special mapping, i.e. add code as needed
            assert false
        }
    }

    /**
     * A helper function that extract callerId and calledId from text, if they exist
     */

    private static Map extractCallIds(String text) {
        assert text!=null
        def callerId
        def calledId
        def m1 = PAT_CALLERID.matcher(text)
        def m2 = PAT_CALLEDID.matcher(text)
        if (m1) { callerId = (m1.group(1)); if (callerId == 'null') { callerId = null } }
        if (m2) { calledId = (m2.group(1)); if (calledId == 'null') { calledId = null } }
        return [ callerId : callerId, calledId : calledId ]
    }

    /**
     * A helper function to extract session action and event
     */

    private static Map extractSessionActionAndEvent(String text) {
        def event
        def action
        def newText
        def mse = PAT_SESSION_EVENT.matcher(text)
        if (mse) {
            event   = mse.group(1)
            newText = mse.group(3)
        }
        else {
            def msa = PAT_SESSION_ACTION.matcher(text)
            if (msa) {
                action = msa.group(1)
                newText = msa.group(3)
            }
            else {
                newText = text
            }
        }
        return [ event : event, action : action, newText : newText ]
    }


    /**
     * Single line handling
     */

    private static Map processSingleLineRest(Map params, String rest) {
        def msub1 = PAT2.matcher(rest)
        if (msub1) {
            // This is information about a child session!
            // Inverse the sessions ids here...
            def childSessionId  = params['firstSessionId']
            def parentSessionId = (msub1.group(1))
            def x           = (msub1.group(2)) as Integer // Unsure what this is, it's always 1
            def logger      = (msub1.group(3))
            def text        = (msub1.group(5))
            def callIds     = extractCallIds(text)
            def callerId    = callIds['callerId']
            def calledId    = callIds['calledId']
            return [ parentSessionId  : parentSessionId,
                childSessionLine : new ChildSessionLine(childSessionId, params['when'], params['host'], params['intp'], x, logger, callerId, calledId, text),
                sessionLine      : null ]
        }
        else {
            def msub2 = PAT3.matcher(rest)
            if (msub2) {
                // This is information about the current session
                def x    = (msub2.group(1)) as Integer // Unsure what this is, it's always 1
                def y    = (msub2.group(2)) as Integer // Unsure what this is, it's always 1
                def text = (msub2.group(3))
                def callIds   = extractCallIds(text)
                def callerId  = callIds['callerId']
                def calledId  = callIds['calledId']
                def saes      = extractSessionActionAndEvent(text)
                def event     = saes['event']
                def action    = saes['action']
                text          = saes['newText'] // override text
                return [ parentSessionId  : params['firstSessionId'],
                    childSessionLine : null,
                    sessionLine      : new SessionLine(params['when'], params['host'], params['intp'], x, y, callerId, calledId, event, action, text) ]
            }
            else {
                return null
            }
        }
    }

    /**
     * Single line handling
     */

    private static void processSingleLine(String line, Map sessions) {
        def mtop = PAT1.matcher(line)
        Session checkForThrowOut
        if (mtop) {
            def params = [:]
            params['when']           = makeDate(mtop)
            params['host']           = (mtop.group(8))
            params['intp']           = (mtop.group(10))
            params['customerId']     = (mtop.group(11)) as Integer
            params['firstSessionId'] = (mtop.group(12)) // it is unsure whether this is the child or parent session id!
            def rest                 = (mtop.group(13))
            //
            // Check the "rest".
            //
            def paramsRest = processSingleLineRest(params, rest)
            if (!paramsRest) {
                System.err.println("Unmatched at second level: ${line}")
                // drop this line
            }
            else {
                def parentSessionId = paramsRest['parentSessionId']
                assert parentSessionId
                if (!sessions[parentSessionId]) {
                    def nSession = new Session(parentSessionId,  params['customerId'], params['when'])
                    sessions[parentSessionId] = nSession
                }
                Session xSession = sessions[parentSessionId]
                assert params['customerId'] == xSession.customerId
                if (paramsRest['sessionLine']) {
                    xSession.lines << paramsRest['sessionLine']
                    checkForThrowOut = xSession
                } else {
                    assert paramsRest['childSessionLine']
                    xSession.lines << paramsRest['childSessionLine']
                }                
            }
        }
        else {
            System.err.println("Unmatched at first level: ${line}")
            // drop this line
        }
        //
        // Can the session be thrown out?
        //
        if (checkForThrowOut) {
            assert checkForThrowOut.lines
            Line latestLine = checkForThrowOut.lines[-1]
            if (latestLine.intp == Line.Interpreter.CCX && latestLine.text && latestLine.text[-1].startsWith('Session SESSION_END')) {
                sessions.remove(checkForThrowOut.sessionId)                
                System.err.println("Dumping session ${checkForThrowOut.sessionId} of ${checkForThrowOut.getWhenAsStr()}; there are now ${sessions.size()} sessions in the map")
                checkForThrowOut.prettyPrint(System.out)                
            }
        } 
    }

    /**
     * Read all the files passed in the "list". The passed list will be modified!
     */

    public static void readThem(List files, String encoding) {
        if (!files) {
            throw new IllegalArgumentException("You have to pass a list of files to read")
        }
        if (!encoding) {
            throw new IllegalArgumentException("You have to pass the encoding of the files that will be read")
        }
        //
        // Sort files trivially per name; this SHOULD order them by date as the date is
        // in their name and in a way that makes them sortable, but one should check later.
        //
        files.sort { File it -> it.getName() }
        //
        // Map to hold sessions being examined
        //
        def sessions = [:]
        //
        // Iteration over all files is go
        //
        files.each { File it ->
            System.err.println("Reading file ${it}")
            LineNumberReader lnr // TODO: What if this sees an enormous line?
            try {
                lnr = new LineNumberReader(new InputStreamReader(new BufferedInputStream(new FileInputStream(it)), encoding))
                //
                // Iteration over all lines of a file is go
                //
                String line
                while ((line = lnr.readLine()) != null) {
                    // >>>>>>>>>>>>>>
                    processSingleLine(line,sessions)
                    // <<<<<<<<<<<<<<
                }
            }
            catch (Exception exe) {
                System.err.println("Caught exception ${exe} while reading file ${it}")
                exe.printStackTrace()
            }
            finally {
                if (lnr!=null) {
                    lnr.close()
                }
            }
        }
        //
        // Dump session data
        //
        List sessionList = sessions.values() as List
        sessionList.sort { it.when }
        sessionList.each { Session it ->
            System.err.println("Dumping session ${it.sessionId} of ${it.getWhenAsStr()} ")
            it.prettyPrint(System.out)
        }
    }
}

/**
 * THE MAIN FUNCTION OF THE SCRIPT
 */

def fileList = []
args.each { String it -> fileList << new File(it) }
VoxeoLogReader.readThem(fileList,'UTF-8')