#! /opt/imh-python/bin/python
""" Scan a URL, giving details about the process. """

# Author: Daniel K

import re
import sys
import sh
import os
import pwd
from sh import strace
import logging
import subprocess
from collections import defaultdict
from argparse import ArgumentParser
from rads.common import colors
from rads.common import setup_logging
from parse_url import parse_url


LOGGER = logging.getLogger(__name__)

def correctly_typecast_str(string):
    if string is None:
        return ''
    return string


def colorize(text, color, end='none'):
    """Apply a color to text data"""
    return '%s%s%s' % (colors()[color], text, colors()[end])


def set_logging(is_quiet, verbosity, output_file):
    ''' Setup the logging in a separate function '''
    if is_quiet:
        logging_level = logging.CRITICAL
    else:
        if None is verbosity:
            logging_level = logging.ERROR
        elif 1 == verbosity:
            logging_level = logging.WARNING
        elif 2 == verbosity:
            logging_level = logging.INFO
        elif 3 == verbosity:
            logging_level = logging.DEBUG
        else:
            logging_level = logging.DEBUG

    if output_file == '':
        setup_logging(loglevel=logging_level, print_out=sys.stderr)
    else:
        setup_logging(
            logfile=output_file,
            loglevel=logging_level,
            print_out=False
        )


def parse_args():
    '''
        Parse command line arguments
    '''

    parser = ArgumentParser(description=__doc__)

    parser.add_argument(
        "-v", "--verbose", action='count',
        help="Print verbose output. May be added multiple times."
    )

    parser.add_argument(
        "-q", "--quiet", action='store_true',
        help="Do not output logging. Overrides -v."
    )

    parser.add_argument(
        "-o", "--output", action='store', type=str, default='',
        help="Output logging to the specified file."
    )

    parser.add_argument(
        'url', metavar='URL', type=str, nargs='?',
        help=(
            "Path to begin searching for CMS. "
            "If none is given, assume the current directory"
        )
    )

    args = parser.parse_args()

    if args.url is None:
        print "URL not given"
        sys.exit(1)


    set_logging(args.quiet, args.verbose, args.output)

    return args.url

class FileData(object):
   ''' Data for an opened file '''
   filehandle = None
   data_read = 0
   data_written = 0

   def open(self, filehandle):
       ''' init file with filename '''
       self.filehandle = filehandle

   def read(self, bytes):
       ''' Keep track of data read from file '''

       self.data_read = self.data_read + bytes
       #print "Read %d bytes so far" % self.data_read

   def write(self, bytes):
       self.data_written = self.data_written + bytes
       #print "Read %d bytes so far" % self.data_written


class FileHandle(object):
    ''' Keep track of open file handles, starting over when closed '''


    filename = None
    access = None
    is_open = False
    data_read = 0
    type = None


    def reset(self):
        ''' Reset info for file handle '''
        self.filename = None
        self.access = None
        self.is_open = False
        self.data_read = 0
        self.type = None

    def open(self, filename, access):
        ''' Handle a reqeust to open a filename '''
        if self.is_open:
            LOGGER.error(
                "Opening an already open file '%s' to '%s'",
                self.filename,
                filename
            )

        self.reset()

        self.filename = filename
        self.access = access
        self.is_open = True
        self.type = 'file'

        #print "Opened file '%s' as %s" % (self.filename, self.access)


    def socket_open(self):
        if self.is_open:
            LOGGER.error(
                "Opening an already open file '%s' to '%s'",
                self.filename,
                filename
            )

        self.reset()
        self.is_open = True
        self.type = 'socket'


    def read(self, files, bytes):
        ''' Handle data read from file '''

        if not self.is_open:
            LOGGER.error("Reading %d bytes into unopened handle", bytes)
            print colorize("Reading %d bytes into unopened handle" % bytes, 'red')
            self.filename = "Unknown"
            self.access= "unknown"

        files[self.filename].read(bytes)

    def write(self, files, bytes):
        ''' Handle data read from file '''

        if not self.is_open:
            LOGGER.error("Writing %d bytes into unopened handle", bytes)
            print colorize("Writeing %d bytes into unopened handle" % bytes, 'red')
            self.filename = "Unknown"
            self.access= "unknown"

        files[self.filename].write(bytes)

    def close(self, files):
        if not self.is_open:
            LOGGER.error("Closing unopened handle")
            print colorize("Closing unopened handle", 'red')

        self.reset()

class Handler(object):

    files = defaultdict(FileData)
    fh = defaultdict(FileHandle)
    failed_files = []
    commands = defaultdict(int)
    user_time = 0
    system_time = 0
    unfinished_open = FileHandle()


    def __init__(self):
        self.open(0, 'STDIN', "unknown")
        self.open(1, 'STDOUT', "unknown")
        self.open(2, 'STDERR', "unknown")


    def failedopen(self, filename, access):
        self.failed_files.append(filename)

    def open(self, handle, filename, access):
        #print "Opening %d" % handle
        self.fh[handle].open(filename, access)

    def unfinished(self, filename, access):
        #print "Opening %d" % handle
        self.unfinished_open.filename = filename
        self.unfinished_open.access = access

    def resumed(self, handle):
        self.fh[handle].open(
            self.unfinished_open.filename,
            self.unfinished_open.access
        )
        self.unfinished_open.filename = None
        self.unfinished_open.access = None

    def failedresumed(self):
        self.failed_files.append( self.unfinished_open.filename)

    def socket_open(self, handle):
        #print "Opening socket on %d" % handle
        self.fh[handle].socket_open()

    def read(self, handle, bytes):
        #print "Reading %d" % handle
        self.fh[handle].read(self.files, bytes)

    def write(self, handle, bytes):
        #print "Reading %d" % handle
        self.fh[handle].write(self.files, bytes)

    def close(self, handle):
        #print "Closing %d" % handle
        self.fh[handle].close(self.files)

    def print_stats(self):
        print "\nStats:\n"
        for file in self.files:
            if file is None:
                print "None file"
                continue
            if 'home' in file or 'STD' in file:
            #if True:
                print "%s read %d bytes" % (file, self.files[file].data_read)

        for file in self.files:
            #if 'home' in file or 'STD' in file:
            if not self.files[file].data_written == 0:
                print "%s wrote %d bytes" % (file, self.files[file].data_written)

        print "Failed files: %d " % len(self.failed_files)

        #for command in self.commands:
        for command in sorted(self.commands, key=self.commands.get):
            print "%s: %f" % (command, self.commands[command])

        print "User time:   %f" % self.user_time
        print "System time: %f" % self.system_time

    def print_sockets(self):
        print "Sockets: ",
        for key in self.fh:
            if self.fh[key].is_open and sefl.fh[key].type == 'file':
                print "%d: '%s', " % (key, self.fh[key].filename),
        print "\n\n"


def process_data(line, handler):
    ''' Process individial line of strace output '''

    command = None
    time1 = None

    command_match = re.search(r'^[^(a-z]+([a-z_]+)\(', line)
    if command_match is not None:
        command = command_match.group(1)
    else:
        command = "unknown"

    time_match = re.search(r'^\s*([0-9.]+).*<([0-9.]+)>$', line)
    if time_match is not None:
        time1 = float(time_match.group(1))
        time2 = float(time_match.group(2))
        handler.user_time =  handler.user_time + time2
        handler.system_time =  handler.system_time + time1

    if command and time1:
        handler.commands[command] = handler.commands[command] + time1

    open_error_match = re.search(r'open\("([^"]+)", ([A-Z_|]+)\)\s*=\s*-1', line)
    open_match = re.search(r'open\("([^"]+)", ([A-Z_|]+)[^)]*\)\s*=\s*([0-9]+)', line)
    socket_match = re.search(r'socket\([^)]*\)\s*=\s*([0-9]+)', line)
    read_match = re.search(r'read\(([0-9]+), [^)]*\)\s*=\s*([0-9]+)', line)
    write_match = re.search(r'write\(([0-9]+), [^)]*\)\s*=\s*([0-9]+)', line)
    close_match = re.search(r'close\(([0-9]+)\)', line)
    unfinished_match = re.search(r'open\("([^"]+)", ([A-Z_|]+)\s*<unfinished', line)
    resumed_match = re.search(r'open resumed>\s*\)\s*=\s*([0-9]+)', line)
    resumed_error_match = re.search(r'open resumed>\s*\)\s*=\s*-1', line)

    if open_error_match is not None:
        filename = open_error_match.group(1)
        access = open_error_match.group(2)
        handler.failedopen(filename, access)
    elif open_match is not None:
        filename = open_match.group(1)
        access = open_match.group(2)
        handle = int(open_match.group(3))
        handler.open(handle, filename, access)
    elif unfinished_match is not None:
        filename = unfinished_match.group(1)
        access = unfinished_match.group(2)
        handler.unfinished(filename, access)
    elif resumed_match is not None:
        handle = int(resumed_match.group(1))
        handler.resumed(handle)
    elif resumed_error_match is not None:
        handler.failedresumed()
    elif "open" in line:
        print colorize("Missed open? %s" % line, 'red')
    elif socket_match is not None:
        handle = int(socket_match.group(1))
        handler.socket_open(handle)
    elif read_match is not None:
        handle = int(read_match.group(1))
        bytes = int(read_match.group(2))
        handler.read(handle, bytes)
    elif write_match is not None:
        handle = int(write_match.group(1))
        bytes = int(write_match.group(2))
        handler.write(handle, bytes)
    elif close_match is not None:
        handle = int(close_match.group(1))
        handler.close(handle)
    elif command in [
        "ioctl",
        "brk",
        "munmap",
        "mmap",
        "fstat",
        "shmdt",
        "shmctl",
        "lseek",
        "lstat",
        "mprotect",
        "smget",
        "smat",
        "futex",
        "stat",
        "fcntl",
        "execve",
        "access",
        "read",
        "arch_prctl",
        "set_tid_address",
        "set_robust_list",
        "rt_sigaction",
        "rt_sigprocmask",
        "getrlimit",
        "statfs",
        "getcwd",
        "clone",
        "unknown",
        "getdents",
        "connect",
        "uname",
        "alarm",
        "getuid",
        "shmget",
        "shmat",
        "unlink",
        "exit_group",
    ]:
        pass
        #print "pass"
    else:
        pass
        print "Command: %s - line: %s " % (command,line)

    

def scan_url(php_binary, hostname, uri, file):
    ''' Scan the given URL '''

    #file = '/home/commit13/public_html/mentor/index.php'
    #file = '/home/hosting/public_html/test.php'


    for line in strace(
        "-Trf",
        php_binary,
        file,
        _iter="err",
        _env={
            "HTTP_HOST": hostname,
            "REQUEST_URI": uri
        }
    ):
        if isinstance(line, int):
            continue
        yield line


def main():
    ''' Main function for scan_url '''

    url = parse_args()

    handler = Handler()

    url_data = parse_url(url)

    try:
        uid = pwd.getpwnam(url_data['user']).pw_uid
    except KeyError:
        LOGGER.error("No UID found for %s", user)
        sys.exit(1)

    print "User %s has id %d" % (url_data['user'], uid)

    os.setuid(uid)
    os.chdir(url_data['cwd'])

    php_binary = '/usr/local/bin/php'

    for line in scan_url(php_binary, url_data['hostname'], url_data['uri'], url_data['file']):
        process_data(line, handler)

    handler.print_stats()

if __name__ == "__main__":
    main()
