#! /bin/usr/python
#
# GREATERSURGEON
#   this script decrypts and/or decompresses the log files that are generated by GREATERDOCTOR
#	modified from the GreaterSurgeon parser
#
# usage:
#   greatersurgeon.py [-i <FILE>] [-o <FILE>] [-d] [-x] [-e <DIRECTORY>] [-s] [-p <password>]
#
# options:
#   -i
#       path to the input file to decrypt/decompress
#   -o
#       path to the output file
#   -d
#       decompress the file (for the json output file only)
#   -x
#       decrypt the file
#   -e 
#       extract any embedded binaries to the specified directory (for the json output file only)
#   -s
#       write a summary of the JSON output file
#   -p
#       password to decrypt the file
#	-n
#		used for output of NTFS module
#	-u
#		used when decrypted log file to process the unicode
#
# example:
#
#   As an example of how this tool is used, assume that GREATERDOCTOR was executed
# with the following command line arguments:
#
#   vtuner.exe -v -system -l log.enc -o json.gz.enc -x -w mypassword
#
#   In this case, GREATERDOCTOR will perform a system scan with verbose output that
# is logged to a file called "log.enc" in the current working directory.  The "log.enc"
# file will be encrypted.  GREATERDOCTOR will also create a JSON output file called
# "json.gz.enc" in the current working directory.  The JSON output is first compressed
# using GZIP compression and then encrypted.  Once these files have been exfiltrated to
# a trusted host, the Python script "parse.py" will be used to process these log files.
# Each file must the operated on individually.  Here is an example of how this is done:
#
#   python.exe greatersurgeon.py -i log.enc -o log.txt -x -p mypassword
#   python.exe greatersurgeon.py -i json.gz.enc -o json.txt -d -x -e C:\BINARIES
#
#   After executing the tool as described above, the log file will be decrypted and
# saved as "log.txt" in the current working directory.  The original encrypted file
# remains unchanged.  The JSON output file is first decrypted and then decompressed with
# the results being saved as "json.txt" in the current working directory.  In addition, any
# binaries contained within the JSON output file are extracted and saved within the
# "C:\BINARIES" directory.
#
# expected output:
#
#   In the example below this script is being used to decrypt and then decompress a JSON
# output file that was generated with GREATERDOCTOR. For more information regarding the
# "trying again" message below reference the comment block right before the decompression
# code block below.
#
# C:\TEST>python.exe greatersurgeon.py -i json.gz.enc -o json.txt -x -d
#
# GREATERSURGEON [ output parser ]
#
#  +decrypting 'json.gz.enc' [ this may take some time... ]
#   decryption complete
#
#  +decompressing 'json.gz.enc'
#   trying again with null byte appended to file due to malformed gzip
#   decompression complete
#
# +saving raw json to json.txt
#
# +parsing complete
#
#

import json, zlib, gzip, sys, binascii, base64, os, struct, hashlib
from optparse import OptionParser

class ProcessGreaterSurgeonJson:
    def __init__(self,data,dirname=None):
        try:
            self.dirname = dirname
            self.j = json.loads(data)
            self.system = self.j['system']
            self.processes = self.j['system']['processes']
            self.modules = self.j['system']['modules']
            self.drivers = self.j['system']['drivers']
            
            self.keyloggerHooks = []
            self.keyloggerModules = []
            if self.system.has_key('keylogger_hooks'):
                self.keyloggerHooks = self.system['keylogger_hooks']
            
            if self.system.has_key('keylogger_candidate_modules'):
                self.keyloggerModules = self.system['keylogger_candidate_modules']
                
        except:
            print "Unexpected error:", sys.exc_info()[0]
            sys.exit(0)
           
    def dump(self):
        print self
    
    def write_file(self,name,data):
        #create dir if needed
        if(not os.path.isdir(self.dirname)):
            os.makedirs(self.dirname)
            
        fileName = self.dirname+'\\'+os.path.basename(name)+'.bin'
        f = open(fileName,'wb')
        f.write(data)
        f.close()
        print '  extracted file to %s' % (fileName,)
    
    def dump_module(self,m,name,indent=0):
        ret_str = []
        ret_str.append(' '*indent+'Module Score:%d\n' % m['score'])
        ret_str.append(' '*indent+'MD5:%s\n' % binascii.hexlify(self.decrypt_blob(m['md5'])))
        ret_str.append(' '*indent+'SHA1:%s\n' % binascii.hexlify(self.decrypt_blob(m['sha1'])))
        ret_str.append(' '*indent+'Entropy:%d\n' % m['entropy'])
        ret_str.append(' '*indent+'Service:%s\n' % m['service'])
        ret_str.append(' '*indent+'Hijacked Service:%s\n' % m['hijacked_service'])
        ret_str.append(' '*indent+'Signed:%s\n' % m['signed'])
        ret_str.append(' '*indent+'Microsoft:%s\n' % m['ms'])
        ret_str.append(' '*indent+'Packed:%s\n' % m['packed'])
        ret_str.append(' '*indent+'PE Checksum:%s\n' % m['pe_checksum'])
        ret_str.append(' '*indent+'PE Header Size:%s\n' % m['header_size'])
        ret_str.append(' '*indent+'PE Section Ordering:%s\n' % m['section_ordering'])
        ret_str.append(' '*indent+'PE Size Of Code:%s\n' % m['size_of_code'])
        ret_str.append(' '*indent+'Cache Match:%s\n' % m['cache_match'])
        ret_str.append(' '*indent+'Linker:%s\n' % m['linker'])
        ret_str.append(' '*indent+'Registry Persistence:%s\n' % m['reg_persist'])
        ret_str.append(' '*indent+'Protected:%s\n' % m['protected'])
        ret_str.append(' '*indent+'Keylogger:%s\n' % m['keylogger'])
        
        if(m.has_key('file') and self.dirname != None):
            self.write_file(name, self.decrypt_blob(m['file']))
        return ''.join(ret_str)
    
    def decrypt_blob(self,blob):
        return zlib.decompress(base64.b64decode(blob))
    
    def dump_injected_thread(self,t,indent=0):
        MEM_TYPE = { 'MEM_IMAGE': 0x1000000,
                     'MEM_PRIVATE':0x20000} 
        MEM_PROTECT = {'PAGE_NOACCESS':0x01,     
                       'PAGE_READONLY':0x02,     
                       'PAGE_READWRITE':0x04 ,    
                       'PAGE_WRITECOPY':0x08 ,    
                       'PAGE_EXECUTE':0x10  ,   
                       'PAGE_EXECUTE_READ':0x20 ,    
                       'PAGE_EXECUTE_READWRITE':0x40 }  
        ret_str = []
        ret_str.append(' '*indent+'Start Address:%08X\n' % t[0])
        ret_str.append(' '*indent+'State:%08X\n' % t[1])
        ret_str.append(' '*indent+'Type:%08X ( ' % t[2])
        for k,v in MEM_TYPE.iteritems():
            if((t[2] & v) == v):
                ret_str.append('%s ' % k)
        ret_str.append(')\n') 
        ret_str.append(' '*indent+'Protect:%08X ( ' % t[3])
        for k,v in MEM_PROTECT.iteritems():
            if((t[3] & v) == v):
                ret_str.append('%s ' % k)
        ret_str.append(')\n') 
        #ret_str.append('Start Address:%08X' % t[4])  #Not sure how to dump disasm
        return ''.join(ret_str)
    
    def dump_process(self,p,indent=0):
        ret_str = []
        ret_str.append(' '*indent+'Process Score:%d\n' % p['score'])
        ret_str.append(' '*indent+'Executable:%s\n' % p['exe'])
        ret_str.append(' '*indent+'Command Line:%s\n' % p['cmdline'])
        ret_str.append(' '*indent+'Hidden:%s\n' % p['hidden'])
        ret_str.append(' '*indent+'Suspended:%s\n' % p['suspended'])
        ret_str.append(' '*indent+'GUI:%s\n' % p['gui'])
        ret_str.append(' '*indent+'Reg Persistence:%s\n' % p['run'])
        ret_str.append(' '*indent+'Service:%s\n' % p['service'])
        ret_str.append(' '*indent+'Entry Point mismatch:%s\n' % p['entrypoint'])
        
        name = p['module'].keys()[0]
        ret_str.append(' '*indent+'Module:\n') 
        ret_str.append(self.dump_module(p['module'][name],name,indent=indent+1))
        if(len(p['injected_threads']) > 0):
            for t in p['injected_threads']:
                ret_str.append(' '*indent+'Injected Thread:\n')
                ret_str.append(self.dump_injected_thread(t,indent=indent+1))    
        return ''.join(ret_str)
        
        
    def dump_driver(self,p,indent=0):
        ret_str = []
        ret_str.append(' '*indent+'Driver Score: %d\n' % p['score'])
        ret_str.append(' '*indent+'Path: %s\n' % p['module'].keys()[0])
        
        name = p['module'].keys()[0]
        ret_str.append(' '*indent+'Module:\n') 
        ret_str.append(self.dump_module(p['module'][name],name,indent=indent+1))
    
        return ''.join(ret_str)
    
    def print_summary(self,indent=0):
        ret_str = []
        
        if 0 != len(self.keyloggerHooks):
            ret_str.append(' '*indent+'Keylogger Hooks:\n')
            
            for h in self.keyloggerHooks:
                ret_str.append(' '*indent+' handle: 0x%x / type 0x%x / offset 0x%x\n' % (h[0], h[1], h[2],))
        
        if 0 != len(self.keyloggerModules):
            ret_str.append(' '*indent+'Keylogger Candidate Modules:\n')
        
            for k in self.keyloggerModules:
                ret_str.append(' '*indent+'%s\n' % k)
        
        for p in self.processes:
            name = p['module'].keys()[0]
            if((p['score']+p['module'][name]['score'] ) >= 50):
                ret_str.append(' '*indent+'Process:\n')
                ret_str.append(self.dump_process(p,indent=2))
                ret_str.append('\n\n')
        
        for name,m in self.modules.iteritems():
            if(m['score'] >= 50):
                ret_str.append(' '*indent+'Module:\n')
                ret_str.append(' '*1+'%s \n' % name)
                ret_str.append(self.dump_module(m,name,indent=2))
                ret_str.append('\n\n')
                
        for d in self.drivers:
            name = d['module'].keys()[0]
            if((d['score']+d['module'][name]['score'] ) >= 50):
                ret_str.append(' '*indent+'Driver:\n')
                ret_str.append(self.dump_driver(d,indent=2))
                ret_str.append('\n\n')
        
        
        return ''.join(ret_str)
    
    def dump_summary(self):
        t = self.print_summary()
        if len(t) == 0:
            #nothing malicious found
            t = "Scan did not flag anything malicious"
        return t
    

    def __str__(self):
        return json.dumps(self.j,indent=4,ensure_ascii=False)
   
   
##############
# DECRYPTION #
##############



def decrypt(data,decryptkey):

	def MX():
		return ((z>>5^y<<2) + (y>>3^z<<4)) ^((sum^y) + (decryptkey[(p&3)^e]^z))
	decrypteddata = ""
	
	
	while(len(data)%512<>0):
		data+='\x00'			
	sum = 0
	
	delta=0x9e3779b9
	blocksize = 512
	
	for t in range(0, len(data), blocksize):
		v = list(struct.unpack("%dI" % (blocksize / 4),data[t:t+blocksize]))
		
		mask = 0xffffffff
		n = len(v)
		q = 6 + 52/n
		sum = (q * delta) & mask
		y = v[0]
		
		while sum != 0:
			e = ((sum >>2)&mask) &3
			p = n - 1
			while p > 0:
				z = v[p-1]
				y = v[p] = (v[p] - MX()) & mask
				p -= 1
			
			z = v[n-1]
			y = v[0] = (v[0] - MX()) & mask
			sum = (sum - delta) & mask
		decrypteddata += struct.pack("%dI" % (blocksize / 4), *v)
	return decrypteddata
	
		
def main():
    global key
    plainText = []
    outputFile = ""

    # define the command line arguments for this script
    parser = OptionParser()
    parser.add_option("-i", "--inputFile", action="store", type="string", dest="inputFile",help="input file to parse (decrypt, decompress, etc...)")
    parser.add_option("-o", "--outputFile", action="store", type="string",dest="outputFile",help="output file")
    parser.add_option("-d", "--decompress", action="store_true", dest="decompress",help="decompress the file (json only)")
    parser.add_option("-x", "--decrypt", action="store_true", dest="decrypt",help="decrypt the file")
    parser.add_option("-e", "--extract",action="store", type="string", dest="extractDir",help="extract any embedded binaries to the specified directory (json only)")
    parser.add_option("-s", "--summary",action="store_false",dest="summary",help="summarize the data (json only)")
    parser.add_option("-p", "--password",action="store", type="string", dest="password", help="Password for decrypting the GreaterDoctor files.")
    parser.add_option("-z", "--zlibdecompress",action="store_true", dest="zlibdecompress", help="Decompress using zlib, used from NTFSMFT data.")
    parser.add_option("-u", "--unicode",action="store_true", dest="unicode", help="Process using unicode, used for encrypted Log Data.")
    parser.add_option("-n", "--ntfsmft",action="store_true", dest="ntfsmft", help="Process ntfsmft encryptionblocks.")
	
   
    # parse the options given on the command line
    (options, args) = parser.parse_args()
    
    # make sure input/output files were provided
    if ( (options.inputFile == None) or (options.outputFile == None) ):
        print '\nError: input/output file required\n'
        parser.print_help()
        sys.exit(0)
        
    # do sanity checks of the command line arguments
    # 'extract' / 'summary' should only be used with 'decompress' (json)
    if ( (None != options.extractDir) or (None != options.summary) ):
        if (None == options.decompress):
            print '\nError: compressed json file required for this option\n'
            parser.print_help()
            sys.exit(0)

    if( (options.extractDir == None) and (options.summary == None) and (options.decrypt == None) and (options.decompress == None) and (options.zlibdecompress == None) and (options.ntfsmft == None) ):
        print '\nError: at least one option [-d / -e / -s / -x / -z / -n] is required\n'
        parser.print_help()
        sys.exit(0)

    if(options.decrypt == True and options.password == None):
        print '\nError: specified decryption but no password (-p) given.\n'
        parser.print_help()
        sys.exit(0)
        
    # print a quick heading to the screen
    print '\nGREATERSURGEON [ output parser ]\n'
 

		
    if(options.ntfsmft != None):
	
		if(options.password):
			hash = hashlib.md5()
			hash.update(options.password)
			decryptkey = struct.unpack("4I",hash.digest())
	
		fi = open(options.inputFile, "rb")
		fo = open(options.outputFile, "wb")
		encryptionblock = fi.read(16)
		dataprocessed=16
		blockcount = 0
		while encryptionblock != "":
			uncompressedSize = struct.unpack("<I",encryptionblock[:4])[0]
			compressedSize = struct.unpack("<I",encryptionblock[4:8])[0]
			encryptedSize = struct.unpack("<I",encryptionblock[8:12])[0]
			compressedFlag = struct.unpack("<B",encryptionblock[12])[0]
			encryptedFlag = struct.unpack("<B",encryptionblock[13])[0]
			#print str(uncompressedSize) + "_" + str(compressedSize) + "_" + str(encryptedSize) + "_" + str(compressedFlag) + "_" + str(encryptedFlag) + "_" + str(len(encryptionblock)) + "_" + str(dataprocessed)
			if(uncompressedSize==0):
				print "Successfully processed %d NTFS MFT data blocks" % (blockcount)
				break;
			datablock = fi.read(encryptedSize)
			dataprocessed+=encryptedSize
			if(len(datablock)<encryptedSize):
				print "Error reading input data, encryption block was not shorter than given length"
				sys.exit(0)
			if(encryptedFlag>0):
				if not(options.decrypt):
					print "Error - file data is encrypted but decrypt option was not selected"
					sys.exit(0)
				datablock = decrypt(datablock,decryptkey)
				datablock = datablock[:compressedSize]
			#do decryption
			if(compressedFlag>0):
				datablock = zlib.decompress(datablock)
			
			fo.write(datablock)
			
			#sys.exit(0)
			encryptionblock = fi.read(16)
			dataprocessed+=16
			blockcount+=1
			
		fi.close()
		fo.close()
		sys.exit(0)
		

         
	
    if(options.decrypt):
        def MX():
            return ((z>>5^y<<2) + (y>>3^z<<4)) ^((sum^y) + (k[(p&3)^e]^z))

        fi = open(options.inputFile, "rb")
        fo = open(options.outputFile, "wb")

        #hash the password to make the key
        hash = hashlib.md5()
        hash.update(options.password)
        k = struct.unpack("4I",hash.digest())
           
        try:
            data = fi.read()
            fi.close()
            while(len(data)%512<>0):
                data+='\x00'			
            sum = 0
            
            delta=0x9e3779b9
            blocksize = 512
            
            for t in range(0, len(data), blocksize):
                v = list(struct.unpack("%dI" % (blocksize / 4),data[t:t+blocksize]))
                
                mask = 0xffffffff
                n = len(v)
                q = 6 + 52/n
                sum = (q * delta) & mask
                y = v[0]
               
                while sum != 0:
                    e = ((sum >>2)&mask) &3
                    p = n - 1
                    while p > 0:
                        z = v[p-1]
                        y = v[p] = (v[p] - MX()) & mask
                        p -= 1
                        
                    z = v[n-1]
                    y = v[0] = (v[0] - MX()) & mask
                    sum = (sum - delta) & mask
                
                fo.write(struct.pack("%dI" % (blocksize / 4), *v))
            fo.close()    
            
        except:
            print "ERROR DECRYPTING: \n%s\n%s\nsum: %d" % (sys.exc_info()[1], sys.exc_info()[2], sum)
            exit()

        print "Finished decrypting."
    
        
    #################
    # DECOMPRESSION #
    #################
    #
    # OK, so this is a bit of a hack...  Notice that when we decrypt the file (code block
    # directly above this) trailing zeros are chopped off (they were required as padding for
    # our encryption algorithm.  The problem is that this truncation sometimes destroys the
    # GZIP file format.  According to RFC-1952 "GZIP File Format Specification Version 4.3"
    # the last four bytes of the file contain the size of the original (uncompressed) input
    # data modulo 2 ^ 32.  If too many zeros are chopped off this exception will be thrown:
    #
    # struct.error: unpack requires a string argument of length 4
    #
    # So either way your script is going to BSOD if the GZIP file is mangled during the null byte
    # truncation festival in the previous code block.  How do we fix this?  There may be a better
    # method to do this but I much prefer the "git 'r done" solution.  The solution presented here
    # is a lot like walking through a mine field.  We start off by attempting to read the GZIP file.
    # If an exception wasn't thrown then we profit.  If an exception was thrown then we append a null
    # byte to the end of the file and try to read the GZIP file again.  This is only attempted four
    # times before the script reports and error and exits.
    #
    
    if (options.decompress != None):
        # determine the path to the file that contains the GZIP data
        gzipPath = ''
        if (options.decrypt == True):
            gzipPath = options.outputFile
        else:
            gzipPath = options.inputFile

        # echo that we are starting decompression
        print ' +decompressing \'%s\'' % (gzipPath)

        # initialize this variable
        uncompressedData = '<ERROR>'

        # loop that does the decompressing using the gzip module
        for i in range(4):
            tmpFileObj = ''
            try:
                # try to read the gzip file
                tmpFileObj = gzip.open(gzipPath, 'rb')
                uncompressedData = tmpFileObj.read()
                tmpFileObj.close()

                # no exception was thrown so we bail from this for loop
                break
            except struct.error:
                # print a warning
                print('  trying again with null byte appended to file due to malformed gzip')

                # close the file if it is still open
                if (tmpFileObj):
                    tmpFileObj.close()

                # add a null byte to the end of the file
                tmpFileObj = open(gzipPath, 'ab')
                tmpFileObj.write('\x00')
                tmpFileObj.close()
            except:
                # if we are here then we recieved an unexpected exception so we bail
                print('\n FATAL: unexpected exception: %s\n%s' % (sys.exc_info()[1], sys.exc_info()[2]))
                sys.exit(1)

        # at this point make sure the read was successful
        if (uncompressedData == '<ERROR>'):
            print('\n FATAL: could not uncompress GZIP file')
            sys.exit(1)

        # we were able to successfully read the GZIP file
        print('  decompression complete')

        # summarize output - will also extract files if options.extractDir has been set
        if(options.summary != None):
            print '\n +saving summary of scanner results to \'%s\'' % (options.outputFile)
            p = ProcessGreaterSurgeonJson(uncompressedData,dirname=options.extractDir)
            output = p.dump_summary()
                
            #save summary
            f = open(options.outputFile, 'wb')
            f.write(output.encode('utf8'))
            f.close()

        # dump full json - will also extract files if options.extractDir has been set
        else:
            print '\n +saving raw json to %s' % (options.outputFile)
            p = ProcessGreaterSurgeonJson(uncompressedData,dirname=options.extractDir)

            #invoke to extract files if needed
            if(options.extractDir != None):
                p.dump_summary()
                
            #write out raw json
            f = open(options.outputFile,'wb')
            t = p.__str__()
            f.write(t.encode('utf8'))
            f.close()
			

    
    if(options.zlibdecompress != None):
        print "Doing zlib decompression\n"
        if(options.decrypt):
		file = open(options.outputFile,'rb')
        else:
		file = open(options.inputFile,'rb')
        
        data = file.read()
        file.close()
        file = open(options.outputFile,'wb')
        file.write(zlib.decompress(data))
        file.close()
		
    if(options.unicode != None):
        print "Doing unicode modification\n"
        file = open(options.outputFile,'rb')
        data = file.read()
        file.close()
        file = open(options.outputFile,'wb')
        file.write(data.rstrip('\x00'))
        file.close()
	
		
    # echo that we are done
    print '\n +parsing complete'
    
if __name__ == "__main__":
    main()