#!/usr/bin/python -u

# **********************************************************
# Copyright 2010-2016 VMware, Inc.  All rights reserved. -- VMware Confidential
# **********************************************************

# vm-support: 1. Read manifest files
#             2. Execute specified commands
#             3. Archive commands' results

# Threading in vm-support
#
#    Action = Copy operation | Shell command
#             ( | Prune files | Censor files)
#
# o First, an action threadpool and a tar thread is created
#
# o Actions found in all manifests are inserted into one "action queue"
# o Action threads take their work from the action queue
#    - Copy actions will be handed over into the tar queue
#    - Command/Run actions will spawn a new process to handle/monitor the
#      execution
#
# o The actions' results are appended into one tar queue
# o Each queue provides certain amount of space
#    - If a new element needs to be inserted into a full queue then the caller
#      will block
#
# o The tar thread takes its work from the tar queue
#
# o If the action queue and thereafter the tar queue is empty then all
#   actions were processed
#
#   Queue      Thread | Process   Queue     Thread            Tar
#
#
#                  A1 | P1
#
#  =======  /      A2 | P2      \  ====                    ---------
#  actionQ  -                   -  tarQ -     T1    ->    |final tar|
#  =======  \        ...        /  ====                    ---------
#
#                  AX | PX
#

from __future__ import print_function

import sys
import os
import logging
import datetime
import logging.handlers
import subprocess
import json
import glob
import signal
import time
import argparse

# make sure we can find the vmsupport stuff below
if os.name == 'posix':
   sys.path.append('/usr/lib/vmware/site-packages/')

from vmsupport.archiver    import TarArchiver
from vmsupport.archiver    import ZipArchiver
from vmsupport.manifest    import ManifestLoader
from vmsupport.vmutils     import listVMs
from vmsupport.vmutils     import getRegisteredVMs
from vmsupport.performance import gatherPerfSnapshots
from vmsupport.vmtar       import vmtar
from vmsupport.errlog      import DisplayErrors, DumpLogs, LogInit, RecordError
from vmsupport.reconstruct import extractBundle, reconstruct

from vmsupport.encryption  import cryptoUtilPasswordProlog
from vmsupport.encryption  import cryptoUtilProlog
from vmsupport.encryption  import cryptoUtilEpilog

### globals
SCRIPT_VERSION="v3.2"
SCRIPT_NAME = os.path.basename(__file__)
WARNING_NO_INCIDENT_KEY = (
   "Command did not succeed because encryption mode was enabled for the host, "
   "but the vm-support incident key is missing.\n"
   "To collect useful coredumps, perform these tasks:\n"
   "1. Generate a vm-support incident key by running:\n"
   "crypto-util keys vm-support --password prolog\n"
   "2. Run vm-support:\n"
   "vm-support [options]\n"
   "3. Perform cleanup:\n"
   "crypto-util keys vm-support epilog\n")

# used for testing
fsRoot =''

def loadCgiOptions(options):
   """ Read key/value pairs from the url, and append them to options object """
   import cgi
   import cgitb

   cgitb.enable()
   form = cgi.FieldStorage()

   keys = form.keys()

   # Hack to support encrypted coredumps to VC: PR1623936
   if isEsx():
      if 'password' in keys:
         cryptoUtilPasswordProlog(form.getfirst('password'))
         keys.remove('password')
      else:
         cryptoUtilProlog()

   for key in keys:
      value = form.getfirst(key)

      if key in ('duration', 'interval', 'loglevel'):
         try:
            value = int(value)
         except ValueError:
            logging.critical("Invalid value for parameter %s: %s. "
                             "(Expected: %s, Got: %s)" %
                             (key, value, type(int), type(value)))
            sys.exit(1)
      setattr(options, key, value)
   del form

def GetSyslogRemoteHost():
   """ Query esxcli to see where logs are going to.

   Returns either the hostname or 'None'.
   """
   try:
      p = subprocess.Popen(['/sbin/localcli', '--formatter=json',
                            'system', 'syslog', 'config', 'get'],
                           shell=False, stdout=subprocess.PIPE,
                           universal_newlines=True)
      output = p.communicate()[0]
   except Exception as ex:
      # ugh, assume no remote
      logging.debug('Failed to get syslog config info: localcli failed %s' % ex)
      return None

   try:
      syslogConfig = json.loads(output)
   except ValueError as ex:
      # output wasn't json, assume remote
      logging.debug('Failed to get syslog config info: %s' % ex)
      return None

   try:
      remoteHost = syslogConfig['Remote Host']
      if remoteHost == "<none>":
         return None
      else:
         return remoteHost
   except KeyError as ex:
      # malformed json  / request.  Assume not logging remotely.
      logging.debug('failed to get syslog info: lookup error on remote host:'
                    '%s' % ex)
      return None

def PrintSupportStatement(tarObj, options):
   """ Prints vm-support's epilog """
   filePath = tarObj.getFilePath()
   if options.quiet:
      # Legacy code in vim/hostd/hostsvc/impl/DiagnosticManager.cpp looks
      # for the 'File:' line.
      print("File: '%s'" % filePath)

   print("Please attach this file when submitting an incident report.")
   print("To file a support incident, go to "
         "http://www.vmware.com/support/sr/sr_login.jsp")
   remoteHost = GetSyslogRemoteHost()
   if remoteHost:
      print("This host has been configured via syslog to send logs to %s. "
            % remoteHost)
      print("The relevant logs from %s should also be manually " % remoteHost +
            "added to the incident report.\n")
   print("To see the files collected, check '%s'" % filePath)


def GetOutputFilePrefix():
   """ Returns name prefix for support bundle. """
   if os.name == 'posix':
      searchDir = '/etc/*/vm-support'
   else:
      searchDir = os.path.join(os.environ['VMWARE_CFG_DIR'], 'vm-support')

   matches = glob.glob(os.path.join(searchDir, "product.cfg"))
   if len(matches) == 1:
      try:
         with open(matches[0]) as fhandle:
            cfg = json.load(fhandle)
            filePrefix = cfg["bundlePrefix"]
            return filePrefix
      except Exception as e:
         logging.debug("Could not read product.cfg: %s" % str(e))
   return 'esx'

def AddReadme(args, tarObj, parsedOptions):
   """ Add README to vm-support bundle """
   readMe = '''Command line: {cmdLine}
Bundle name: {bundleName}
Captured on: {date}
vm-support version: {version}
Options: {options}

Files/Directories of Interest:
------------------------------

error.log: log containing errors that ocurred while running vm-support
action.log: a log of all commands, and/or actions run during vm-support
commands: a directory containing output files for all commands run

All other directories and files should be mirrors of the the ESXi system
vm-support was run on.
'''.format(cmdLine=' '.join([__file__] + args),
           bundleName=tarObj.getFilePath(),
           date=datetime.datetime.now(),
           version=SCRIPT_VERSION,
           options=parsedOptions)

   # Py3: Adding data from a buffer to the tar requires the data to be bytes
   if sys.version_info.major >= 3:
      tarObj.addFromBuf('README', readMe.encode())
   else:
      tarObj.addFromBuf('README', readMe)

def isEsx():
   # os.uname is available only for linux
   return os.name == 'posix' and os.uname()[0] == 'VMkernel'

def GetCommandlineOptions(args):
   """Configure and run the option parser.

   Return command line options.
   """

   parser = argparse.ArgumentParser()

   # manifest options.  General note.  The long option name should be the same
   # as the destination variable name.  Otherwise the simplistic option
   # parsing in loadCgiOptions() doesn't work.

   inputGroup = parser.add_argument_group("Input")

   inputGroup.add_argument("-g", "--groups",
                           default="",
                           help="Specify manifest groups. See --listgroups.")

   inputGroup.add_argument("-a", "--manifests",
                           default="",
                           help="Specify manifest files. See --listmanifests.")

   inputGroup.add_argument("-e", "--excludemanifests",
                           default="",
                           help=("Exclude the listed manifests. See "
                                 "--listmanifests."))

   inputGroup.add_argument("--manifestdir", dest="manifestDir",
                           default=None,
                           help="Location to search for manifest files.")

   outputGroup = parser.add_argument_group("Output")

   outputGroup.add_argument("-w", "--workingdir",
                            default=None,
                            help="Archive destination.")

   outputGroup.add_argument("-s", "--stream",
                            action="store_true", default=False,
                            help="Stream data to stdout.")

   outputGroup.add_argument("-E", "--errorfile",
                            default="",
                            help=("Prints (non-fatal) errors to specified file "
                                  "(overrides --quiet and --stream)."))

   outputGroup.add_argument("--xmloutput",
                            action="store_true", default=False,
                            help=argparse.SUPPRESS)

   outputGroup.add_argument("-l", "--listfiles",
                            action="store_true", default=False,
                            help="At the end print all gathered files.")

   outputGroup.add_argument("-z", "--zip",
                            action="store_true", default=False,
                            help=("Archive bundle in zip format. Default is a "
                                  "gzipped tar archive."))

   outputGroup.add_argument("-o", "--obfuscate",
                            action="store_true", default=False,
                            help="Obfucate hostname in archive path.")

   listGroup = parser.add_argument_group("List")

   listGroup.add_argument("--listmanifests",
                          action="store_true", default=False,
                          help="List available manifests.")

   listGroup.add_argument("-G", "--listgroups",
                          action="store_true", default=False,
                          help="List available manifest groups.")

   listGroup.add_argument("-t", "--listtags",
                          action="store_true", default=False,
                          help="List available manifest tags.")


   # perf options
   perfGroup = parser.add_argument_group("Perfomance measurement")

   perfGroup.add_argument("-p", "--performance",
                          action="store_true", default=False,
                          help="Gather performance data.")

   perfGroup.add_argument("-d", "--duration",
                          type=int, default=300,
                          help=("Duration of performance monitoring "
                                "(in seconds)."))

   perfGroup.add_argument("-i", "--interval",
                          type=int, default=15,
                          help=("Interval between performance snapshots "
                                "(in seconds)."))

   # VM options
   if isEsx():
      vmGroup = parser.add_argument_group("VM options")

      vmGroup.add_argument("-v", "--vm",
                           default=None,
                           help=("Gather detailed information about this "
                                 "specific VM (ie --vm <path to .vmx file>)"))

      vmGroup.add_argument("-V", "--listvms",
                           action="store_true", default=False,
                           help="List registered VMs.")

   threadingGroup = parser.add_argument_group("Parallel execution")

   threadingGroup.add_argument("--action-threads", dest="actionThreads",
                               type=int, default=4,
                               help=("Specify the number of actions that will "
                                     "run in parallel."))

   threadingGroup.add_argument("-L", "--legacy", dest="legacy",
                               action="store_true", default=False,
                               help=("Run in legacy mode (no parallel "
                                     "execution)."))

   miscGroup = parser.add_argument_group("Misc")

   miscGroup.add_argument("-D", "--dryrun",
                          action="store_true", default=False,
                          help=("Prints out the data that would have been "
                                "gathered."))

   miscGroup.add_argument("-q", "--quiet",
                          action="store_true", default=False,
                          help="Output only the location of the bundle.")

   miscGroup.add_argument("--loglevel",
                          type=int, default=logging.WARN,
                          help=("Set logging to specified level: 0-50 "
                                "(0=most verbose)."))

   miscGroup.add_argument("--version", dest="version",
                          action="store_true", default=False,
                          help="Display the version.")

   miscGroup.add_argument("--allow-infinite-actions",
                          dest="allowInfiniteActions",
                          action="store_true", default=False,
                          help=("Wait indefinitely for actions to finish. Use "
                                "with caution."))

   miscGroup.add_argument("--default-timeout", dest="defaultTimeout",
                          type=int, default=300,
                          help=("Set default timeout (300s) for actions "
                                "without a specified TIMEOUT option. It is "
                                "ignored when --allow-infinite-actions is "
                                "used."))

   miscGroup.add_argument("--ignore-timeouts", dest="ignore_timeouts",
                          action="store_true", default=False,
                          help=("Ignore TIMEOUTs as specified in manifest "
                                "files and use the default timeout."))

   extractGroup = parser.add_argument_group("Reconstruct vm-support bundle")

   extractGroup.add_argument("-r","--reconstruct", metavar="DIRECTORY",
                             default=None,
                             help="Unfragment a support bundle directory.")

   extractGroup.add_argument("-x","--extract", metavar=("BUNDLE", "DIRECTORY"),
                             nargs=2, default=None,
                             help="Extract and reconstruct support bundle.")

   return parser.parse_args(args)

def getArchiveConfig(options):
   """ Return the proper config for archiving depending on the options.

   It returns the extension that should be used, the mode in which the file
   handler for the archive should be opened and the class that should be used
   for archiving.
   """
   if options.zip:
      config = {
         'archiver': ZipArchiver,
         'fileExtension': 'zip',
         'mode': {
            'stdout': 'w',
            'file': 'w'
         }
      }
   else:
      config = {
         'archiver': TarArchiver,
         'fileExtension': 'tgz',
         'mode': {
            'stdout': 'w|gz',
            'file': 'w:gz'
         }
      }

   return config

def setWorkingDir(options):
   """ Set default working directory if the user did not provide one """
   workingDirDefault = options.workingdir

   if workingDirDefault != None:
      return

   if os.name == 'nt':
      workingDirDefault = os.path.expanduser("~\Desktop")

   elif isEsx():
      try:
         from esxclipy import EsxcliPy
         cmd = "storage filesystem list"
         (ret, string) = EsxcliPy().Execute(cmd.split())
         if ret == 0:
            for entry in eval(string):
               if entry["Mounted"] and "VMFS" in entry["Type"]:
                  workingDir = os.path.join("/vmfs/volumes", entry["Volume Name"])
                  if os.access(workingDir, os.W_OK):
                     workingDirDefault = workingDir
      except Exception as e:
         # This is just a best-effort approach to find a more suiteable storage
         # location. Hence, we ignore all exceptions and fall back to /var/tmp
         # in case of an error.
         msg = "Exception raised while looking for VMFS partition: %s" % str(e)
         RecordError(msg, ignore=True)

      if not workingDirDefault:
         msg = "No VMFS partition found. Fallback to /var/tmp."
         RecordError(msg, ignore=True)

         tmp = "/var/tmp"
         if not os.access(tmp, os.W_OK):
            logging.critical("%s is not writeable. Please specify a "
                             "working directory.", tmp)
         else:
            workingDirDefault = tmp

   else:
      # Virtual Appliance case.
      # Using /storage/log rather than /var/log/vmware in case
      # the symlink has not been created.
      workingDirDefault = "/storage/log"

   if not workingDirDefault:
      DisplayErrors(options.errorfile)
      raise OSError("Failed to find a working directory.")

   setattr(options, 'workingdir', workingDirDefault)
   return

def needIncidentKey():
   """ Check if we might deal with encrypted coredumps which need to be
   re-encrypted with an incident key.
   """
   cmd = "crypto-util keys getkidbyname HostKey"
   try:
      subprocess.check_output(cmd.split(), stderr=subprocess.DEVNULL)
   except subprocess.CalledProcessError:
      # No HostKey? No problem.
      pass
   else:
      # Check whether an incident key for vm-support is in the ESXi key cache.
      cmd = "crypto-util keys getkidbyname VmSupportKey"
      try:
         subprocess.check_output(cmd.split(), stderr=subprocess.DEVNULL)
      except subprocess.CalledProcessError:
         # HostKey but no VmSupportKey? Problem.
         return True
   return False


def main(args):
   LogInit()

   options = GetCommandlineOptions(args)

   isCgi = os.path.splitext(__file__)[1] == '.cgi'

   # The syslog log handler has some bugs: it does not prepend a date,
   # which means that what it is writing are not actually valid syslog
   # messages.  Further, it does not include an ident field, which
   # vmsyslogd expects.  This alteration fixes these issues.
   class FixedSysLogHandler(logging.handlers.SysLogHandler):
      def format(self, record):
         return "%s %s: %s" % (datetime.datetime.now().ctime()[4:19],
            "vm-support", logging.handlers.SysLogHandler.format(self, record))

   class FixedLogFileHandler(logging.FileHandler):
      def format(self, record):
         return "%s %s: %s" % (datetime.datetime.now().ctime()[4:19],
            "vm-support", logging.FileHandler.format(self, record))

   if isCgi or options.stream:
      # log only to syslog when running via cgi or streaming
      try:
         syslogHandler = FixedSysLogHandler('/dev/log')
      except Exception:
         try:
            sys.stderr.write("Failed to open syslog. Logging to "
                             "/var/log/vm-support.log\n")
            syslogHandler = FixedLogFileHandler('/var/log/vm-support.log')
         except Exception as e2:
            sys.stderr.write("%s\nLogging disabled.\n" % e2)
            # NullHandler appears to be unsupported in our version of Python
            syslogHandler =  logging.handlers.MemoryHandler(1024*1024) # 1kb

      syslogHandler.setLevel(int(options.loglevel))
      logging.getLogger().addHandler(syslogHandler)

   if isCgi:
      options.stream = True
      loadCgiOptions(options)

   logging.getLogger('').setLevel(int(options.loglevel))

   if not options.stream:
      # Working directory is only needed for non CGI/streaming output
      setWorkingDir(options)

   if options.extract is not None:
      extractBundle(options.extract[0], options.extract[1])
      return
   elif options.reconstruct is not None:
      reconstruct(options.reconstruct)
      return

   if options.interval >= options.duration:
      logging.critical("Interval must be less than duration");
      sys.exit(1)

   if isEsx() and options.vm and not options.vm in getRegisteredVMs():
      logging.critical("Unknown VM %s.  Run %s --listvms to get list of VMs"
                       % (options.vm, SCRIPT_NAME));
      sys.exit(1)

   # Decide whether xml based processing is needed if either
   # a) invoked as a .cgi file
   # b) xmloutput option is specified
   handleXml = options.xmloutput or isCgi
   loader = ManifestLoader(options, isXml=handleXml, fsRoot=fsRoot)

   if options.listmanifests:
      loader.listManifests()
   elif options.listgroups:
      loader.listGroups()
   elif options.listtags:
      loader.listTags()
   elif isEsx() and options.listvms:
      listVMs()
   elif options.version:
      print("%s %s" % (SCRIPT_NAME, SCRIPT_VERSION))
   else:

      # Encrypted coredumps: Check for an incident key if a host key is present
      if isEsx() and not isCgi and sys.stdin.isatty():
         if needIncidentKey():
            logging.warning(WARNING_NO_INCIDENT_KEY)
            sys.exit(1)

      outfilePrefix = GetOutputFilePrefix()

      if options.dryrun or options.legacy:
         actionThreads = 1
      else:
         actionThreads = options.actionThreads

      if not options.quiet and not options.stream:
         print("vm-support %s: %s, action threads %d"
               % (SCRIPT_VERSION, time.strftime("%H:%M:%S"), actionThreads))

      archiveConfig = getArchiveConfig(options)

      tarObj = vmtar(options, archiveConfig, outfilePrefix)

      # Create tar thread and action threadpool
      tarObj.createTarThread()
      loader.createActionThreads(actionThreads)

      try:
         AddReadme(args, tarObj, options)

         if options.performance:
            gatherPerfSnapshots(loader, tarObj, int(options.interval),
                                int(options.duration))

         # Start executing manifests/actions and start separate thread that
         # blocks on the action queue
         finishEvent = loader.execute(tarObj, groups=options.groups,
                                      include=options.manifests,
                                      exclude=options.excludemanifests)

         # Wait for actions to finish
         # o We cannot use signal.pause() due its not compatible with Windows
         # o Queue.join() doesn't provide a timeout parameter that's why this
         #   operation is executed in a separate thread
         # o finishEvent is set when Queue.join() returns
         # o Running Queue.join() or Event.wait() w/o timeout prevents signal
         #   handling (tested with CTRL-C)
         # o FYI: In python only the main thread can handle signals
         while not finishEvent.isSet():
            finishEvent.wait(1)

         # Stop action threads
         loader.stopActionThreads()

         DumpLogs(tarObj)

         # Close tar or stream
         # Stop and join tar thread
         tarObj.close()

      except KeyboardInterrupt as ki:
         tarObj.reportProgress("Shut down threads and remove tgz file ... ",
                               newline=True)
         tarObj.stopRunningCommands()
         loader.stopActionThreads()
         tarObj.stopTarThread()

         tarObj.removeTar()
         raise ki

      except Exception as ex:
         tarObj.reportProgress("Shut down threads ... ", newline=True)
         tarObj.stopRunningCommands()
         loader.stopActionThreads()
         tarObj.stopTarThread()
         raise ex

      # Print action errors
      if not options.stream:
         if not options.quiet:
            DisplayErrors(options.errorfile)
            if options.listfiles:
               tarObj.PrintFiles()
         elif options.errorfile != '':
            DisplayErrors(options.errorfile)
         PrintSupportStatement(tarObj, options)
      elif options.errorfile != '':
         DisplayErrors(options.errorfile)

      if isEsx() and isCgi:
         # Hack to support encrypted coredumps to VC: PR1623936
         cryptoUtilEpilog()


if __name__ == "__main__":
    if os.name == 'posix':
       # Suppress broken pipe errors if our output is piped into pagers.
       signal.signal(signal.SIGPIPE, signal.SIG_DFL)
    try:
       main(sys.argv[1:])
    except KeyboardInterrupt:
       logging.critical("%s interrupted" % (SCRIPT_NAME))
       sys.exit(1)
    except Exception as ex:
       logging.exception("%s encounted an exception: %s" % (SCRIPT_NAME, ex))
       sys.exit(1)
