#!/usr/local/groundwork/perl/bin/perl -w --
# sg_autocreate
#
# This script creates certain Service Groups in Foundation (the gwcollagedb
# database) based on the content of the Monarch (monarch) database for
# host/service associations, plus the content of an external config file
# naming the generic services for which said service groups are to be created.
# Copyright (c) 2016 GroundWork Open Source, Inc.
# The usual invocation of this script will be from a suitably customized
# copy of the submit() routine in MonarchCallOut.pm, at the end of a
# Monarch Commit operation.  The general idea is to:
#
# (*) Read a master config file (sg_autocreate.conf) that contains overall
#     customization, such as the setup for logging from this script, the
#     application type to be used for the service groups in Foundation, and
#     the location of the service-group definition config file (the next item).
#
# (*) As part of reading the master config file, read a secondary configuration
#     file (sg_autocreate_servicegroups.conf), containing information on what
#     service groups are to be created and maintained by this script.
#
# (*) If there is any servicegroup in Foundation owned by this same agent but
#     which is not currently configured in the secondary config file, remove
#     it from Foundation.  This covers all cases where a previously configured
#     servicegroup has either been renamed to something else in the secondary
#     config file, or been completely removed from the secondary config file.
#
# (*) For each named service, find all hosts in Monarch that have that service
#     attached, and analyze any service instances on those host services.
#
# (*) For each named service instance suffix, find all host services in
#     Monarch that have an active service instance with that suffix.
#
# (*) For each named service or service instance suffix, manage the associated
#     Service Group in Foundation, which means:
#
#     (+) If there is no such service or service instance suffix in Monarch,
#         or if no active hosts in Monarch with that service attached or an
#         active service instance with that suffix, delete the entire
#         Service Group in Foundation if it already exists there.
#
#     (+) Otherwise, if the Service Group does not already exist in
#         Foundation, create it there.
#
#     (+) Adjust the host/service membership of the Service Group in
#         Foundation to match the set of active host-service pairs or the
#         set of host-service-instance pairs present in Monarch.  This may
#         involve both deletions and additions.
#
# By "active", I mean that the host is not directly (or indirectly, via a
# hostgroup) assigned to a Monarch Group that is marked inactive; and that
# a host service either has no service instances or that its active service
# instances are used instead; and that a host-service-instance (when
# calculating membership for servicegroups associated with service instance
# suffixes) is not marked as inactive.
#
# The Foundation REST API, via the Perl GW::RAPID package, is to be used
# for all manipulations of the Service Groups in Foundation.
#
# Prerequisites:
#
# (*) The selected application type cannot be "NAGIOS", and must be placed
#     in the gwcollagedb applicationtype table by mmeans outside of this
#     script, before this script is run.
#
# Follow-on:
#
# (*) The logfile should be put under log rotation control.
#
# (*) The MonarchCallout.pm file must be modified to call the script, and
#     look at its output (if desired) and exit code.
#
=pod
rough strategy:
read the config files using our standard TypedConfig package
set up logging and the GW::RAPID package handle ($rest_api)
set up a dassmonarch connection to Monarch
foreach configured service:
    use the dassmonarch get_service_hostlist() routine to find all hosts having the service assigned
    call in scalar context, and pay attention to the exit code to check for errors or no-such-service
    use the Foundation REST API to check for the existence of the service group
    if the list of hosts was empty:
	if the service group exists, delete it
	you're done with this service
    if the list of hosts was non-empty:
	if the service group does not exist, create it (with no members)
	read the servicegroup host membership list from Foundation (with as little extra
	    data retrieved as possible, other than just the host/service name pairs)
	create a simple hash from the membership list, and a hash for the Monarch membership list
	if there are host/service pairs in Foundation but not in Monarch, delete them from the
	    servicegroup in Foundation
	if there are host/service pairs in Monarch but not in Foundation, add them to the
	    servicegroup in Foundation
close the dassmonarch connection
close the $rest_api connection
exit with appropriate return code reflecting possible errors
something similar happens with configured service-instance suffixes
NOTES:
* I generally like to do operations in bulk, as being more efficient.
  But servicegroup creation and deletion are likely to be rare events,
  so bulk operation won't save us much time.
=cut
use strict;
use warnings;
use Getopt::Std;
$Getopt::Std::STANDARD_HELP_VERSION = 1;
use Data::Dumper;
$Data::Dumper::Indent   = 1;
$Data::Dumper::Sortkeys = 1;
use TypedConfig;
use GW::RAPID;
use GW::Logger;
use MonarchStorProc;
use dassmonarch;
# ================================
# Script Parameters
# ================================
my $PROGNAME       = "sg_autocreate";
my $VERSION        = "1.0.0";
my $COPYRIGHT_YEAR = "2016";
my $config_file      = '/usr/local/groundwork/scripts/sg_autocreate.conf';
my $default_log_file = '/usr/local/groundwork/foundation/container/logs/sg_autocreate.log';
# ================================
# Command-Line Parameters
# ================================
# In theory, these parameter settings could be overridden by command-line arguments.
# In practice, we don't currently support any such arguments; this script uses only
# a fixed set of arguments.
my $debug_config          = 0;       # if set, spill out certain data about config-file processing to STDOUT
my $show_help             = 0;
my $show_version          = 0;
my $run_interactively     = 1;       # Default on in this program to force logging of all useful output.
my $reflect_log_to_stdout = 1;       # Default on in this program to force logging of all useful output.
my $manage_servicegroups  = undef;
# ================================
# Configuration Parameters
# ================================
# Parameters in the config file.
my $debug_level = undef;
my $logfile                = $default_log_file;
my $max_logfile_size       = undef;               # log rotate is handled externally, not here
my $max_logfiles_to_retain = undef;               # log rotate is handled externally, not here
my $application_type;
my $agent_id;
my $use_bulk_monarch_queries;
# ------------------------------------------------------------------------
# Options for interacting with Foundation via the Foundation REST API.
# ------------------------------------------------------------------------
# The application name by which the sg_autocreate process will be known
# to the Foundation REST API.
my $rest_api_requestor = undef;
# Where to find credentials for accessing the Foundation REST API.
my $ws_client_config_file = undef;
# Application-level logging configuration, for that portion of the logging
# which is currently handled by the Log4perl package.
my $log4perl_config = undef;
# ================================
# Working Variables
# ================================
# Derived flags, for easy testing.
my $debug_summary = undef;
my $debug_basic   = undef;
my $debug_debug   = undef;
my $debug_trace   = undef;
my $rest_api;
my %valid_app_type = ();
my $process_outcome = undef;
my %servicegroup_services          = ();
my %servicegroup_service_instances = ();
# Status values to be used when searching for a servicegroup in Foundation.
use constant NO_INFO_AVAILABLE    => 0;
use constant NO_SUCH_SERVICEGROUP => 1;
use constant THEIR_SERVICEGROUP   => 2;
use constant OUR_SERVICEGROUP     => 3;
use constant ERROR_STATUS    => 0;
use constant STOP_STATUS     => 1;
use constant RESTART_STATUS  => 2;
use constant CONTINUE_STATUS => 3;
# ================================================================
# Program.
# ================================================================
exit ((main() == ERROR_STATUS) ? 1 : 0);
# ================================================================
# Supporting subroutines.
# ================================================================
sub main {
    my @SAVED_ARGV = @ARGV;
    # If this script fails, and we have successfully made it past reading the config file (so we know how to send
    # messages to Foundation), the $status_message will be sent to Foundation, and show up in the Event Console.
    # Thus there is no point in defining $status_message in the code below until we have made it past that point.
    my $status_message = '';
    $process_outcome = 1;
    if (open (STDERR, '>>&STDOUT')) {
	## Apparently, appending STDERR to the STDOUT stream isn't by itself enough
	## to get the line disciplines of STDOUT and STDERR synchronized and their
	## respective messages appearing in order as produced.  The combination is
	## apparently happening at the file-descriptor level, not at the level of
	## Perl's i/o buffering.  So it's still possible to have their respective
	## output streams inappropriately interleaved, brought on by buffering of
	## STDOUT messages.  To prevent that, we need to have STDOUT use the same
	## buffering as STDERR, namely to flush every line as soon as it is produced.
	## This is certainly a less-efficient use of system resources, but we don't
	## expect this program to write much to the STDOUT stream anyway.
	STDOUT->autoflush(1);
    }
    else {
	print "ERROR:  STDERR cannot be redirected to STDOUT!\n";
	$process_outcome = 0;
    }
    if ($process_outcome) {
	my $command_line_status = parse_command_line();
	if ( !$command_line_status ) {
	    spill_message "FATAL:  $PROGNAME either cannot understand its command-line parameters or cannot find its config file";
	    exit 1;
	}
	if ($show_version) {
	    print_version();
	}
	if ($show_help) {
	    print_usage();
	}
	if ($show_version || $show_help) {
	    exit 0;
	}
	eval {
	    fetch_valid_app_types();
	};
	if ($@) {
	    print $@;
	    ## We'd like to send_process_status(); here, but without a clean read of the configuration
	    ## file at this point, we won't have initialized our values for knowing where to send the
	    ## status message.
	    return ERROR_STATUS;
	}
	if (not read_config_file($config_file, $debug_config)) {
	    spill_message "FATAL:  $PROGNAME cannot load configuration from $config_file";
	    return ERROR_STATUS;
	}
	# Stop if this is just a debugging run.
	return STOP_STATUS if $debug_config;
	# We need to prohibit executing as root (say, for a manual debugging run), so we
	# don't create files and directories that won't be modifiable later on when this
	# script is run in its usual mode as an ordinary user ("nagios").  We purposely
	# delay this test until after simple actions of the script, so we can at least
	# show the version and command-usage messages without difficulty.
	if ($> == 0) {
	    (my $program = $0) =~ s<.*/><>;
	    print "ERROR:  You cannot run $program as root.\n";
	    return ERROR_STATUS;
	}
	# We don't use a message prefix, because this is intended to be an interactive script and
	# the extra text written to the terminal would just be distracting and useless there.  We
	# don't expect multiple concurrent copies of this script to be writing to the log file, so
	# we don't really have a need to disambiguate where each message comes from in that record.
	GW::Logger->new( $logfile, $run_interactively, $reflect_log_to_stdout, $max_logfile_size, $max_logfiles_to_retain, '' );
	if ( !open_logfile() ) {
	    ## The routine will print an error message if it fails, so we don't do so ourselves.
	    $status_message  = 'cannot open log file';
	    $process_outcome = 0;
	}
    }
    if ($process_outcome) {
	## We precede the startup message with a blank line, simply so the startup message is more visible.
	log_message '';
	log_timed_message "Service group auto-creation script (version $VERSION) is starting up.";
	(my $program = $0) =~ s<.*/><>;
	log_message "INFO:  Running with options:  $program " . join (' ', @SAVED_ARGV);
    }
    if ($process_outcome) {
	my $total_services                 = scalar keys %servicegroup_services;
	my $total_service_instances        = scalar keys %servicegroup_service_instances;
	my $total_configured_servicegroups = $total_services + $total_service_instances;
	## English grammar is complicated, but the output here is much more readable if we get it right.
	my $service_suffix          = $total_services == 1          ? '' : 's';
	my $service_instance_suffix = $total_service_instances == 1 ? '' : 's';
	if ($total_configured_servicegroups) {
	    log_message 'NOTICE:  Processing '
	      . ( $total_services ? "$total_services <service> element$service_suffix" : '' )
	      . ( ( $total_services && $total_service_instances ) ? ' and ' : '' )
	      . ( $total_service_instances ? "$total_service_instances <service_instance> element$service_instance_suffix" : '' )
	      . ' representing'
	      . ( $total_configured_servicegroups == 1 ? ' a' : '' )
	      . ' servicegroup'
	      . ( $total_configured_servicegroups == 1 ? '' : 's' ) . '.';
	    $process_outcome = process_servicegroups( \%servicegroup_services, \%servicegroup_service_instances );
	    $status_message = 'cannot process configured services or service instances' if not $process_outcome;
	    log_message "NOTICE:  Processed $total_configured_servicegroups configured servicegroup"
	      . ( $total_configured_servicegroups == 1 ? '' : 's' ) . '.'
	      if $process_outcome;
	}
	else {
	    log_message 'NOTICE:  No servicegroups are configured for processing.';
	}
    }
    ## Not yet implemented in this script.  See restore-old-status-markers.pl for a sample implementation.
    # send_outcome_to_foundation( $status_message, $process_outcome );
    close_logfile();
    # Now return the overall processing success or failure as the status of this routine.
    # This will be turned into a corresponding script exit code.
    return $process_outcome ? STOP_STATUS : ERROR_STATUS;
}
# "Getopt::Std"-compatible versions of these routines,
# to provide support for --version and --help options.
#
sub VERSION_MESSAGE {
    print_version();
}
sub HELP_MESSAGE {
    print_usage();
}
sub print_version {
    print "$PROGNAME Version:  $VERSION\n";
    print "Copyright $COPYRIGHT_YEAR GroundWork, Inc. (www.gwos.com).\n";
    print "All rights reserved.\n";
}
sub print_usage {
    print <<EOF;
usage:  $PROGNAME -h
	$PROGNAME -v
	$PROGNAME -d
	$PROGNAME -m
where:  -h:  print this help message
	-v:  print the version number
	-d:  debug config file
	-m:  operate normally, managing servicegroups as needed
The usual invocation is:
    $PROGNAME -m
EOF
# Usage lines not printed because we hardcode the -i and -o options in this program.
=pod
	$PROGNAME -m [-c config_file] [-i] [-o]
	-i:  run interactively, not as a background process
	-o:  write log messages also to standard output
    $PROGNAME -i -o -S
The -o option is illegal unless -i is also specified.
=cut
}
sub parse_command_line {
    ## First, clean up the $default_config_file value in case we print usage.
    ## (This is disabled because of potential working-directory issues with realpath().)
    ## my $real_path = realpath ($default_config_file);
    ## $default_config_file = $real_path if $real_path;
    # The -i and -o options are hardcoded on in this program, so we don't process them here.
    my %opts;
    if ( not getopts( 'hvdm', \%opts ) ) {
	print_usage();
	return 0;
    }
    $show_help             = $opts{h};
    $show_version          = $opts{v};
    $debug_config          = $opts{d};
#   $run_interactively     = $opts{i};
#   $reflect_log_to_stdout = $opts{o};
    $manage_servicegroups  = $opts{m};
    # This test is not a full enforcement of intended exclusivity of the major
    # mode options, but it at least requires that you specify either -d or
    # -f, if neither -h nor -v is specified.
    if ( !$show_version && !$show_help && !$debug_config && !$manage_servicegroups ) {
	print_usage();
	return 0;
    }
    if ( !$run_interactively && $reflect_log_to_stdout ) {
	print_usage();
	return 0;
    }
    return 1;
}
sub fetch_valid_app_types {
    my ( $f_dbname, $f_dbhost, $f_dbuser, $f_dbpass, $f_dbtype ) = CollageQuery::readGroundworkDBConfig('collage');
    if ( !defined($f_dbname) or !defined($f_dbhost) or !defined($f_dbuser) or !defined($f_dbpass) ) {
	die "ERROR:  Cannot read Foundation database parameters;\n    perhaps you are not running as the nagios user?\n";
    }
    my $dsn = '';
    if ( defined($f_dbtype) && $f_dbtype eq 'postgresql' ) {
	$dsn = "DBI:Pg:dbname=$f_dbname;host=$f_dbhost";
    }
    else {
	$dsn = "DBI:mysql:database=$f_dbname;host=$f_dbhost";
    }
    my $f_dbh = DBI->connect( $dsn, $f_dbuser, $f_dbpass, { 'AutoCommit' => 1 } );
    if ( !$f_dbh ) {
	my $errstr = $DBI::errstr;
	chomp $errstr;
	die "ERROR:  Cannot connect to database $f_dbname:\n$errstr\n";
    }
    my $f_query = "SELECT name FROM applicationtype";
    my $f_sth   = $f_dbh->prepare($f_query);
    if ( !$f_sth->execute() ) {
	my $errstr = $f_sth->errstr;
	chomp $errstr;
	$f_sth->finish();
	$f_dbh->disconnect();
	die "ERROR:  Cannot read Foundation application type names:\n$errstr\n";
    }
    my @values = ();
    while ( @values = $f_sth->fetchrow_array() ) {
	$valid_app_type{ $values[0] } = 1;
    }
    $f_sth->finish();
    $f_dbh->disconnect();
}
sub read_config_file {
    my $config_file  = shift;
    my $config_debug = shift;
    # All the config-file processing is wrapped in an eval{}; because TypedConfig
    # throws exceptions when it cannot open the config file or finds bad config data.
    eval {
	my $config = TypedConfig->new( $config_file, $config_debug );
	$debug_level = $config->get_number('debug_level');
	$debug_summary = $debug_level >= 1;
	$debug_basic   = $debug_level >= 2;
	$debug_debug   = $debug_level >= 3;
	$debug_trace   = $debug_level >= 4;
	$logfile                  = $config->get_scalar('logfile');
	$application_type         = $config->get_scalar('application_type');
	$agent_id                 = $config->get_scalar('agent_id');
	$use_bulk_monarch_queries = $config->get_boolean('use_bulk_monarch_queries');
	# The application_type cannot be NAGIOS, because that would conflict with Monarch's own
	# management of the service groups we manage via this script.  More generally, we check
	# here up from to make sure the application_type value already exists in Foundation, so
	# we don't get some less-clear failure later on.
	#
	if ( $application_type eq 'NAGIOS' ) {
	    die "ERROR:  $application_type cannot be configured as 'NAGIOS'\n";
	}
	if ( not $valid_app_type{$application_type} ) {
	    die "ERROR:  $application_type is not a valid Foundation application type name\n";
	}
	# An empty agent ID looks like values sometimes defaulted to by other agents,
	# so we disallow that here in order to force our ability to distinguish any
	# servicegroups that we create from servicegroups that other agents created.
	if ( $agent_id eq '' ) {
	    die "ERROR:  agent_id cannot be configured as an empty string\n";
	}
	my %generated_service_groups_hash = $config->get_hash('generated_service_groups');
	if ($debug_config) {
	    my $dumper = Data::Dumper->new( [ \%generated_service_groups_hash ], [qw(\%generated_service_groups_hash)] );
	    $dumper->{xpad} = '    ';
	    print $dumper->Dump;
	}
	if (   !defined( $generated_service_groups_hash{'service'} )
	    && !defined( $generated_service_groups_hash{'service_instance'} ) )
	{
	    # die "ERROR:  no <service> or <service_instance> sections are provided within <generated_service_groups>\n";
	}
	%servicegroup_services = %{ $generated_service_groups_hash{'service'} || {} };
	if ($debug_config) {
	    my $dumper = Data::Dumper->new( [ \%servicegroup_services ], [qw(\%servicegroup_services)] );
	    $dumper->{xpad} = '    ';
	    print $dumper->Dump;
	}
	%servicegroup_service_instances = %{ $generated_service_groups_hash{'service_instance'} || {} };
	if ($debug_config) {
	    my $dumper = Data::Dumper->new( [ \%servicegroup_service_instances ], [qw(\%servicegroup_service_instances)] );
	    $dumper->{xpad} = '    ';
	    print $dumper->Dump;
	}
	my %servicegroup_names = ();
	foreach my $service_name ( keys %servicegroup_services ) {
	    if ( ref $servicegroup_services{$service_name} ne 'HASH' ) {
		if ( ref $servicegroup_services{$service_name} eq 'ARRAY' ) {
		    die "ERROR:  <service $service_name> is specified more than once\n";
		}
		die "ERROR:  <service $service_name> is misconfigured\n";
	    }
	    my $servicegroup_name = $servicegroup_services{$service_name}{service_group};
	    if ( not $servicegroup_name ) {
		die "ERROR:  no service_group is specified for <service $service_name>\n";
	    }
	    my $servicegroup_description = $servicegroup_services{$service_name}{service_group_description};
	    if ( not $servicegroup_description ) {
		die "ERROR:  no service_group_description is specified for <service $service_name>\n";
	    }
	    if ( exists $servicegroup_names{$servicegroup_name} ) {
		die "ERROR:  service_group \"$servicegroup_name\" is specified more than once\n";
	    }
	    $servicegroup_names{$servicegroup_name} = 1;
	}
	foreach my $service_instance_suffix ( keys %servicegroup_service_instances ) {
	    if ( ref $servicegroup_service_instances{$service_instance_suffix} ne 'HASH' ) {
		if ( ref $servicegroup_service_instances{$service_instance_suffix} eq 'ARRAY' ) {
		    die "ERROR:  <service_instance $service_instance_suffix> is specified more than once\n";
		}
		die "ERROR:  <service_instance $service_instance_suffix> is misconfigured\n";
	    }
	    my $servicegroup_name = $servicegroup_service_instances{$service_instance_suffix}{service_group};
	    if ( not $servicegroup_name ) {
		die "ERROR:  no service_group is specified for <service_instance $service_instance_suffix>\n";
	    }
	    my $servicegroup_description = $servicegroup_service_instances{$service_instance_suffix}{service_group_description};
	    if ( not $servicegroup_description ) {
		die "ERROR:  no service_group_description is specified for <service_instance $service_instance_suffix>\n";
	    }
	    if ( exists $servicegroup_names{$servicegroup_name} ) {
		die "ERROR:  service_group \"$servicegroup_name\" is specified more than once\n";
	    }
	    $servicegroup_names{$servicegroup_name} = 1;
	}
	$rest_api_requestor    = $config->get_scalar('rest_api_requestor');
	$ws_client_config_file = $config->get_scalar('ws_client_config_file');
	$log4perl_config       = $config->get_scalar('log4perl_config');
    };
    if ($@) {
	chomp $@;
	$@ =~ s/^ERROR:\s+//i;
	print "ERROR:  Cannot read config file $config_file\n  ($@).\n";
	return 0;
    }
    return 1;
}
sub log_outcome {
    my $outcome = $_[0];
    my $context = $_[1];
    if ($debug_trace) {
	if (%$outcome) {
	    log_message "ERROR:  Outcome of $context:";
	    foreach my $key ( sort keys %$outcome ) {
		log_message "    $key => $outcome->{$key}";
	    }
	}
	else {
	    log_message "ERROR:  No outcome data returned for failed $context.";
	}
    }
}
sub log_results {
    my $results = $_[0];
    my $context = $_[1];
    if ($debug_trace) {
	if ( ref $results eq 'HASH' ) {
	    if (%$results) {
		log_message "ERROR:  Results of $context:";
		foreach my $key ( sort keys %$results ) {
		    if ( ref $results->{$key} eq 'HASH' ) {
			foreach my $subkey ( sort keys %{ $results->{$key} } ) {
			    if ( ref $results->{$key}{$subkey} eq 'HASH' ) {
				foreach my $subsubkey ( sort keys %{ $results->{$key}{$subkey} } ) {
				    if ( ref $results->{$key}{$subkey}{$subsubkey} eq 'HASH' ) {
					foreach my $subsubsubkey ( sort keys %{ $results->{$key}{$subkey}{$subsubkey} } ) {
					    log_message "    ${key}{$subkey}{$subsubkey}{$subsubsubkey} => '$results->{$key}{$subkey}{$subsubkey}{$subsubsubkey}'";
					}
				    }
				    else {
					log_message "    ${key}{$subkey}{$subsubkey} => '$results->{$key}{$subkey}{$subsubkey}'";
				    }
				}
			    }
			    else {
				log_message "    ${key}{$subkey} => '$results->{$key}{$subkey}'";
			    }
			}
		    }
		    else {
			log_message "    $key => '$results->{$key}'";
		    }
		}
	    }
	    else {
		log_message "ERROR:  No results data returned for failed $context.";
	    }
	}
	elsif ( ref $results eq 'ARRAY' ) {
	    if (@$results) {
		log_message "ERROR:  Results of $context:";
		my $i = 0;
		foreach my $result (@$results) {
		    if ( ref $result eq 'HASH' ) {
			foreach my $key ( keys %$result ) {
			    log_message "    result[$i]{$key} => '$result->{$key}'";
			}
		    }
		    else {
			log_message "    result[$i]:  $result";
		    }
		    ++$i;
		}
	    }
	    else {
		log_message "ERROR:  No results data returned for failed $context.";
	    }
	}
	else {
	    log_message 'ERROR:  Internal programming error when displaying results (' . code_coordinates() . ").";
	}
    }
}
sub initialize_rest_api {
    ## Basic security:  disallow code in the logging config data.
    Log::Log4perl::Config->allow_code(0);
    # Here we add custom logging levels to form our full standard complement.  There are six
    # predefined log levels:  FATAL, ERROR, WARN, INFO, DEBUG, and TRACE (in descending priority).
    # We add NOTICE and STATS levels to the default set of logging levels supplied by Log4perl,
    # to form the full useful set:  FATAL, ERROR, WARN, NOTICE, STATS, INFO, DEBUG, and TRACE
    # (excepting NONE, I suppose, though there is some hint in the code that OFF is also supported).
    # This *must* be done before the call to Log::Log4perl::init().
    Log::Log4perl::Logger::create_custom_level("NOTICE", "WARN");
    Log::Log4perl::Logger::create_custom_level("STATS", "NOTICE");
    # If we wanted to support logging either through a syslog appender (I'm not sure how this would
    # be done; presumably via something other than Log::Dispatch::Syslog, since that is still
    # Log::Dispatch) or through Log::Dispatch, the following code extensions would come in handy.
    # (Frankly, I'm not really sure that Log4perl even supports syslog logging other than through
    # Log::Log4perl::JavaMap::SyslogAppender, which just wraps Log::Dispatch::Syslog.)
    #
    # use Sys::Syslog qw(:macros);
    # use Log::Dispatch;
    # my $log_null = Log::Dispatch->new( outputs => [ [ 'Null', min_level => 'debug' ] ] );
    # Log::Log4perl::Logger::create_custom_level("NOTICE", "WARN", LOG_NOTICE, $log_null->_level_as_number('notice'));
    # Log::Log4perl::Logger::create_custom_level("STATS", "NOTICE", LOG_INFO, $log_null->_level_as_number('info'));
    # This logging setup is an application-global initialization for the Log::Log4perl package, so
    # it only makes sense to initialize it at the application level, not in some lower-level package.
    #
    # It's not documented, but apparently Log::Log4perl::init() always returns 1, even if
    # it is handed a garbage configuration as a literal string.  That makes it hard to tell
    # if you really have it configured correctly.  On the other hand, if it's handed the
    # path to a missing config file, it throws an exception (also undocumented).
    eval {
	## If the value starts with a leading slash, we interpret it as an absolute path to a file that
	## contains the logging configuration data.  Otherwise, we interpret it as the data itself.
	Log::Log4perl::init( $log4perl_config =~ m{^/} ? $log4perl_config : \$log4perl_config );
    };
    if ($@) {
	chomp $@;
	log_message "ERROR:  Could not initialize Log::Log4perl logging:\n$@";
	return 0;
    }
    # Initialize the REST API object.
    my %rest_api_options = (
	logger => Log::Log4perl::get_logger("Service.Group.AutoCreator.GW.RAPID"),
	access => $ws_client_config_file
    );
    $rest_api = GW::RAPID->new( undef, undef, undef, undef, $rest_api_requestor, \%rest_api_options );
    if ( not defined $rest_api ) {
	## The GW::RAPID constructor doesn't directly return any information to the caller on the reason for
	## a failure.  But it will already have used the logger handle to write such detail into the logfile.
	log_timed_message "ERROR:  Could not create a GW::RAPID object.";
	return 0;
    }
    return 1;
}
sub terminate_rest_api {
    ## Release our handle to the REST API (if we used it), to force the REST API to call its destructor.
    ## This will attempt to log out before Perl's global destruction pass wipes out resources needed for
    ## logout to work properly.
    $rest_api = undef;
}
# Return false on failure, true on success.
sub process_servicegroups {
    my $services          = shift;
    my $service_instances = shift;
    local $_;
    my $status = 1;
    my $dm;
    eval {
	$dm = dassmonarch->new();
	## The messages from dassmonarch are not terribly useful.  They clutter the output,
	## and we generate our own messages here.  So we just disable the lower-level output.
	$dm->set_debuglevel( 'none' );
    };
    if ($@) {
	chomp $@;
	log_timed_message "ERROR:  $@";
	return 0;
    }
    # The dassmonarch package doesn't currently have any routines to handle the kinds of bulk
    # data retrieval we need, nor to handle service instances, so we have to dig deeper and
    # use lower-level calls.  Fortunately, having created a dassmonarch handle, we've already
    # opened a StorProc connection to the database that we can use now.
    #
    my %host_id_for_hostname                      = ();
    my %hostname_for_host_id                      = ();
    my %servicename_id_for_servicename            = ();
    my %servicename_for_servicename_id            = ();
    my %service_id_for_host_id_and_servicename_id = ();
    my %host_id_for_service_id                    = ();
    my %servicename_id_for_service_id             = ();
    my %hostnames_for_servicename                 = ();
    if ($use_bulk_monarch_queries) {
	eval {
	    ## This will retrieve data as:  $host_id_for_hostname{$hostname} = $this_host_id;
	    %host_id_for_hostname = StorProc->get_hosts();
	    ## This will retrieve data as:  $hostname_for_host_id{$host_id} = $this_hostname;
	    %hostname_for_host_id = StorProc->get_table_objects( 'hosts', 1 );
	    ## This will retrieve data as:  $servicename_id_for_servicename{$servicename} = $this_servicename_id;
	    %servicename_id_for_servicename = StorProc->get_table_objects( 'service_names', 0 );
	    ## This will retrieve data as:  $servicename_for_servicename_id{$servicename_id} = $this_servicename;
	    %servicename_for_servicename_id = StorProc->get_table_objects( 'service_names', 1 );
	    ## This will retrieve data as:  $service_id_for_host_id_and_servicename_id{$host_id}{$servicename_id} = $this_service_id;
	    %service_id_for_host_id_and_servicename_id = StorProc->get_hostid_servicenameid_serviceid();
	    ## This will map data as:  $host_id_for_service_id{$service_id} = $this_host_id;
	    ## This will map data as:  $servicename_id_for_service_id{$service_id} = $this_servicename_id;
	    ##
	    ## This will construct data as:
	    ## $hostnames_for_servicename{$servicename} = [$hostname1 $hostname2 ...];
	    ##
	    my $service_id_for_servicename_id_given_host_id;
	    my $service_id;
	    my $hostname;
	    foreach my $host_id ( keys %service_id_for_host_id_and_servicename_id ) {
		$hostname                                    = $hostname_for_host_id{$host_id};
		$service_id_for_servicename_id_given_host_id = $service_id_for_host_id_and_servicename_id{$host_id};
		foreach my $servicename_id ( keys %$service_id_for_servicename_id_given_host_id ) {
		    $service_id                                 = $service_id_for_servicename_id_given_host_id->{$servicename_id};
		    $host_id_for_service_id{$service_id}        = $host_id;
		    $servicename_id_for_service_id{$service_id} = $servicename_id;
		    push @{ $hostnames_for_servicename{ $servicename_for_servicename_id{$servicename_id} } }, $hostname;
		}
	    }
	};
	if ($@) {
	    my $exception = $@;
	    chomp $exception;
	    log_timed_message "ERROR:  Cannot probe the monarch database for host and service data:\n$exception";
	    return 0;
	}
    }
    # We deal not just with the configured service names, but also with any service instances of
    # those services which might be configured for individual hosts.
    #
    # There is also some special interpretation of service instances that we must take into account.
    # Service instances can be set active or inactive.  If any service instances are defined, the
    # base unsuffixed service does not appear in the configuration; only the active instances
    # appear.  If some of the instances for a service are set inactive, those particular instances
    # will be ignored, and any active instances will still be in play.  If all instances for a
    # service are set inactive, neither the instances nor the base unsuffixed service will appear in
    # the configuration.
    #
    # All the interpretation of service instances happens on a per-individual-host basis.  All of
    # that could play out in unexpected ways.  For instance, if we have only one host for a service,
    # and that host service has all inactive service instances, then the servicegroup should be
    # deleted because it will contain no service members.
    #
    my $all_instance_statuses;
    eval {
	## StorProc currently labels the get_service_instances_status_for_sync() routine "Unstable interface,
	## subject to change across releases.".  It's not intended for use in general code outside the Monarch
	## sync process itself, so we can be not afraid to modify it over time as necessary.  However, what
	## we're doing here in this script is indeed sync-related, and this call has been stable for a long
	## time, so since it's exactly what we need in the present circumstance, we use it.
	##
	## This will retrieve data as:  $all_instance_statuses->{$service_id}{$instancename} = $active;
	## where $active is a boolean indicating active or inactive status of this service instance.
	##
	$all_instance_statuses = StorProc->get_service_instances_status_for_sync();
    };
    if ($@) {
	my $exception = $@;
	chomp $exception;
	log_timed_message "ERROR:  Cannot probe the monarch database for service instances:\n$exception";
	return 0;
    }
    # The %$all_instance_statuses hash is useful when generating the servicegroups assigned to services.
    # But inverting the hash structure and filtering out the inactive instances will be more useful for
    # generating the servicegroups assigned to service instances.
    #
    my %all_active_instance_services = ();
    foreach my $service_id ( keys %$all_instance_statuses ) {
	foreach my $instancename ( keys %{ $all_instance_statuses->{$service_id} } ) {
	    $all_active_instance_services{$instancename}{$service_id} = 1
	      if $all_instance_statuses->{$service_id}{$instancename};
	}
    }
    # These hashes are supposed to reflect whether a given host should be treated as inactive
    # because it belongs directly or indirectly to a Monarch Group which has been marked as
    # inactive.  The interpretation here reflects how this information is handled elsewhere
    # within Monarch.  Specifically, hosts in an active Monarch Group but not otherwise part
    # of a parent Monarch Group which is marked as inactive will still be treated as active.
    #
    my %is_inactive_host_id   = ();
    my %is_inactive_host_name = ();
    if ($use_bulk_monarch_queries) {
	%is_inactive_host_id   = StorProc->get_inactive_hosts(0);
	%is_inactive_host_name = map { $_ => 1 } @hostname_for_host_id{ keys %is_inactive_host_id };
    }
    if ($debug_basic) {
	if ($debug_trace) {
	    my $dumper = Data::Dumper->new( [$all_instance_statuses], [qw($all_instance_statuses)] );
	    $dumper->{xpad} = '    ';
	    log_message $dumper->Dump;
	}
	else {
	    my $hostname;
	    my $servicename;
	    foreach my $service_id ( sort keys %$all_instance_statuses ) {
		## The service instances we refer to here might or might not be active.
		## Furthermore, for purposes of this debug output, we're not filtering
		## to ignore inactive hosts.
		if ( $debug_debug && $use_bulk_monarch_queries ) {
		    $hostname    = $hostname_for_host_id{ $host_id_for_service_id{$service_id} };
		    $servicename = $servicename_for_servicename_id{ $servicename_id_for_service_id{$service_id} };
		    log_message "DEBUG:  host $hostname service $servicename (service_id $service_id) has some service instances";
		}
		else {
		    log_message "DEBUG:  service_id $service_id has some service instances";
		}
	    }
	}
    }
    if (not initialize_rest_api()) {
	log_timed_message "ERROR:  Cannot connect to the Foundation REST API.";
	return 0;
    }
    # If there is any servicegroup in Foundation owned by this same agent but
    # which is not currently configured in the secondary config file, remove
    # it from Foundation.  This covers all cases where a previously configured
    # servicegroup has either been renamed to something else in the secondary
    # config file, or been completely removed from the secondary config file.
    # The point is, there is nobody around to do the cleanup except us, so we'd
    # better do it now so as not to leave seemingly orphaned servicegroups around.
    #
    my $all_my_servicegroups = list_all_my_servicegroups( $application_type, $agent_id );
    if ($all_my_servicegroups) {
	my %all_configured_servicegroups = map { $_ => 1 } (
	    ( map { $services->{$_}{service_group} } keys %$services ),
	    ( map { $service_instances->{$_}{service_group} } keys %$service_instances )
	);
	if ($debug_basic) {
	    log_message "DEBUG:  configured servicegroup:  $_" for sort keys %all_configured_servicegroups;
	}
	my @obsolete_servicegroups = grep { not exists $all_configured_servicegroups{$_} } @$all_my_servicegroups;
	if ($debug_basic) {
	    log_message "DEBUG:  Deleting obsolete service group:  $_" for sort @obsolete_servicegroups;
	}
	if ( @obsolete_servicegroups and not delete_service_groups( \@obsolete_servicegroups ) ) {
	    log_message "ERROR:  Cannot delete obsolete Foundation service groups.";
	    $status = 0;
	}
    }
    else {
	## We flag this as an error and inform the user about it, but while serious, this condition is not
	## bad enough on its own to block attempting to manage the rest of the configured service groups.
	log_message "ERROR:  Cannot list the existing Foundation service groups.";
	log_message "WARNING:  Any obsolete Foundation service groups have not been deleted.";
	$status = 0;
    }
    foreach my $servicename ( sort keys %$services ) {
	my $service_hosts =
	  $use_bulk_monarch_queries
	  ? ( $servicename_id_for_servicename{$servicename} ? ( $hostnames_for_servicename{$servicename} || [] ) : 0 )
	  : $dm->get_service_hostlist($servicename);
	if ( not defined $service_hosts ) {
	    if ($use_bulk_monarch_queries) {
		## This can't happen, by construction.
		log_message "ERROR:  Internal logic problem; cannot find hosts for service $servicename";
	    }
	    else {
		log_timed_message "ERROR:  Cannot access the monarch database when looking for service $servicename hosts.";
	    }
	    terminate_rest_api();
	    return 0;
	}
	elsif ( not $service_hosts ) {
	    log_message "ERROR:  Service \"$servicename\" does not exist in the monarch database.";
	    # We soldier on in spite of this failure.  One bad apple should not spoil the whole barrel.
	    # But we still report an overall failure at the end.
	    $status = 0;
	    # We don't just exit this iteration of the enclosing loop because if the servicegroup
	    # associated with this service in the config file already exists in Foundation, but
	    # the service does not exist in Monarch, that servicegroup should now be deleted.
	    # That will be done later on in the loop.
	}
	# Now we have the name of the service, along with the list of hostnames which have that service
	# attached.  But if we have service instances in play, that complicates the picture, because of
	# the rule that having all-inactive service instances means the service itself never appears on
	# that host.  So let's build a data structure that will let us see the situation in detail.
	my @host_services = ();
	if ( ref $service_hosts ) {
	    foreach my $hostname (@$service_hosts) {
		next if $is_inactive_host_name{$hostname};
		## $service_id will be >0 if the host service exists, as it should, 0 if not,
		## and undef if the search failed (e.g., due to database errors).
		my $service_id = $use_bulk_monarch_queries
		  ? $service_id_for_host_id_and_servicename_id
		      { $host_id_for_hostname{$hostname} || 0 }
		      { $servicename_id_for_servicename{$servicename} || 0 }
		  : $dm->get_host_serviceid( $hostname, $servicename );
		if ( not defined $service_id ) {
		    if ($use_bulk_monarch_queries) {
			log_message "ERROR:  Cannot find the service_id for host $hostname service $servicename";
		    }
		    else {
			log_timed_message "ERROR:  Cannot access the monarch database when looking for host $hostname service $servicename";
		    }
		    terminate_rest_api();
		    return 0;
		}
		elsif ( exists $all_instance_statuses->{$service_id} ) {
		    ## This service has some instances defined on this host.  Each service instance may
		    ## or may not be active.  We only deal with the active instances, and no longer with
		    ## the base service name even if all the instances are inactive.
		    foreach my $instancename ( keys %{ $all_instance_statuses->{$service_id} } ) {
			log_message "DEBUG:  $hostname service instance \"$servicename$instancename\" is "
			  . ( $all_instance_statuses->{$service_id}{$instancename} ? 'active' : 'inactive' )
			  if $debug_debug;
			# Monarch currently doesn't do a good job of disallowing leading and trailing
			# whitespace in a service instance suffix.  But we can at least block the serious
			# confusion that would result if we had an all-whitespace service instance suffix
			# (it would appear in the GUI as though it were the base service name).
			if ( $instancename =~ /^\s*$/ ) {
			    log_message "WARNING:  For host $hostname service $servicename,"
			      . " an empty or all-whitespace instance name suffix is being ignored.";
			}
			else {
			    ## We only include this service instance if it's active.
			    push @host_services, [ $hostname, $servicename . $instancename ]
			      if $all_instance_statuses->{$service_id}{$instancename};
			}
		    }
		}
		else {
		    ## This service has no instances (at all) defined on this host, active or not.
		    ## This is the most common case; we just use the base service name.
		    push @host_services, [ $hostname, $servicename ];
		}
	    }
	}
	# If the service exists in Monarch, it might or might not be effectively assigned to some hosts,
	# as reflected now in (scalar @host_services), which also takes into account service instances.
	# But all detail of how to handle the setup for the service group has now been collected, and we
	# can pass that down to a separate routine to handle possible servicegroup creation, deletion,
	# or membership adjustment.
	#
	$status &= update_service_group(
	    $services->{$servicename}{service_group},
	    $services->{$servicename}{service_group_description},
	    \@host_services
	);
    }
    my $host_id;
    my $you_have_been_warned = 0;
    foreach my $instancename ( sort keys %$service_instances ) {
	my @host_services = ();
	# Monarch currently doesn't do a good job of disallowing leading and trailing
	# whitespace in a service instance suffix.  But we can at least block the serious
	# confusion that would result if we had an all-whitespace service instance suffix
	# (it would appear in the GUI as though it were the base service name).  I don't
	# actually think it would be possible to configure such an instance name for use
	# by this script, so this logic is only here in sympathetic parallelism for how
	# we handle service instances for configured services.
	if ( $instancename =~ /^\s*$/ ) {
	    log_message "WARNING:  An empty or all-whitespace configured service instance name suffix is being ignored.";
	    $status = 0;
	}
	elsif ( exists $all_active_instance_services{$instancename} ) {
	    ## We haven't yet implemented code to handle service instances if $use_bulk_monarch_queries
	    ## is false, so we may as well tell the user if they stumble into that situation and it
	    ## actually matters.  That will cause less confusion all around.
	    if ( not $use_bulk_monarch_queries ) {
		if ( not $you_have_been_warned ) {
		    log_message "WARNING:  <service_instance> entries for active service instances are not processed unless use_bulk_monarch_queries is true.";
		    $you_have_been_warned = 1;
		}
		# That's only a warning, not an outright failure, but it's enough to declare
		# that the operation of this script was not completely successful.
		$status = 0;
		# This is a configuration/code mismatch, not a failure of the database content.
		# So we ignore this <service_instance> entry; we don't move on to delete an
		# already-existing servicegroup associated with this service instance suffix
		# just because we dont't understand what its current membership ought to be.
		# The user can cure this condition either by not having any <service_instance>
		# entries configured, or by flipping the polarity of use_bulk_monarch_queries
		# in the config file.
		last;
	    }
	    foreach my $service_id ( keys %{ $all_active_instance_services{$instancename} } ) {
		$host_id = $host_id_for_service_id{$service_id};
		next if $is_inactive_host_id{$host_id};
		push @host_services,
		  [
		    $hostname_for_host_id{$host_id},
		    $servicename_for_servicename_id{ $servicename_id_for_service_id{$service_id} } . $instancename
		  ];
	    }
	}
	else {
	    ## If there are any service instances with this $instancename, none of them are active.
	    ## So effectively, this $instancename does not exist in the monarch database.  Since the
	    ## $instancename was configured, though, we can assume that the site expected that some
	    ## of those service instances would be active.  So we treat this as a soft failure.
	    log_message "ERROR:  Configured service instance \"$instancename\" has no active instances in the monarch database.";
	    # We soldier on in spite of this failure.  One bad apple should not spoil the whole barrel.
	    # But we still report an overall failure at the end.
	    $status = 0;
	    # We don't just exit this iteration of the enclosing loop because if the servicegroup
	    # associated with this service instance in the config file already exists in Foundation,
	    # but the service instance has no active instances in Monarch, that servicegroup should
	    # now be deleted.  That will be done later on in the loop.
	}
	# We might or might not have found any active instances for this service instance in Monarch,
	# as reflected now in (scalar @host_services).  But all detail of how to handle the setup for
	# the service group has now been collected, and we can pass that down to a separate routine
	# to handle possible servicegroup creation, deletion, or membership adjustment.
	#
	$status &= update_service_group(
	    $service_instances->{$instancename}{service_group},
	    $service_instances->{$instancename}{service_group_description},
	    \@host_services
	);
    }
    terminate_rest_api();
    return $status;
}
sub update_service_group {
    my $service_group             = shift;
    my $service_group_description = shift;
    my $host_services             = shift;
    # Use the Foundation REST API to check for the existence of the service group.
    #
    # FIX LATER:  Except for checking for servicegroups that we might not own, we might be able to
    # avoid calls to check_service_group_status() if we didn't get an undefined result back from the
    # call to list_all_my_servicegroups() above.  That would avoid a fair bit of work repeating one
    # part of the larger query.  It would require restructuring the subsequent tests here, though,
    # so for the moment we won't do that.  (We might be forced into it anyway if retrieving all the
    # servicegroup data for a large site ends up being too heavyweight and ought not to be repeated.)
    # We're avoiding that route because the better path is to get the REST API fixed so we can just
    # retrieve the object-specific level of detail that we really need at each step, and no detail on
    # subsidiary objects.  That would be far more efficient all around.  Trying to re-use the data we
    # collect from early probing would get us at best a 2x speedup.  Not pulling back all the excess
    # data in the first place, but improving the REST API, might get us a 10x speedup instead.  So
    # that's where any Engineering resources should be spent on optimization.
    #
    my $service_group_status = check_service_group_status( $service_group, $application_type, $agent_id );
    if ( $service_group_status == NO_INFO_AVAILABLE ) {
	## Maybe the database is down, we don't know.  Best to log it and skip it for now.
	log_message "ERROR:  Cannot probe Foundation for service group \"$service_group\" status.";
	return 0;
    }
    elsif ( $service_group_status == NO_SUCH_SERVICEGROUP ) {
	## No problem here.  We're free to create a new servicegroup of this name.
    }
    elsif ( $service_group_status == THEIR_SERVICEGROUP ) {
	log_message "ERROR:  Service group \"$service_group\" is owned by some other agent.";
	return 0;
    }
    elsif ( $service_group_status != OUR_SERVICEGROUP ) {
	## This can't happen, but if it does, let's abort the processing of this servicegroup.
	log_message "ERROR:  Internal logic problem; cannot probe Foundation for service group $service_group status";
	return 0;
    }
    if ( @$host_services == 0 ) {
	if ( $service_group_status == OUR_SERVICEGROUP and not delete_service_groups( [$service_group] ) ) {
	    log_message "ERROR:  Cannot delete Foundation service group $service_group";
	    return 0;
	}
	return 1;
    }
    if ( $service_group_status == NO_SUCH_SERVICEGROUP ) {
	if ( not create_service_group( $service_group, $application_type, $service_group_description, $agent_id ) ) {
	    log_message "ERROR:  Cannot create Foundation service group $service_group";
	    return 0;
	}
    }
    if ($debug_debug) {
	## This output is for human consumption, so let's take the trouble to organize it readably.
	## The sorting particularly helps with comparing the output from separate runs.
	##
	## Here's a real-life Schwartzian Transform in action.  Kids, don't try this at home!
	##
	foreach my $host_service ( map { $_->[0] } sort { $a->[1] cmp $b->[1] || $a->[2] cmp $b->[2] } map { [ $_, @$_ ] } @$host_services ) {
	    log_message "DEBUG:  servicegroup $service_group will contain host service $host_service->[0] \"$host_service->[1]\"";
	}
    }
    # At the time of GW::RAPID fervent development, it was my understanding that the REST API
    # wouldn't allow you to individually delete host/services from servicegroup membership, and
    # that you had to set the membership as a whole.  So while we could compute @hosts_to_delete
    # and @hosts_to_add and use them in the obvious ways, we instead just use the full set of hosts.
    #
    # Since that time, it appears that there are now /addmembers and /deletemembers sub-calls
    # supported for servicegroups, but GW::RAPID has not yet been extended to support them.
    # There might be a way to fake up such calls by abusing the currently existing calls,
    # but for the time being we won't go there.
    #
    # FIX LATER:  If an earlier successful check_service_group_status() call, or the even earlier
    # call to list_all_my_servicegroups(), unavoidably returned tons of detail about the existing
    # host/service membership of each servicegrop, or we intentionally get back just the first-level
    # servicegroup-membership data, we might use that info to compare to @$host_services and perhaps
    # avoid this call if nothing has changed.  On the other hand, if create_service_group() had to
    # be called just above, we'll need to make this present call anyway.  And to have reasonable
    # efficiency overall, we're better off pushing to get the REST API fixed so we don't pull back
    # all the excessive detail in the first place.
    #
    if ( not set_host_services_for_servicegroup( $service_group, $host_services ) ) {
	log_message "ERROR:  Cannot set host/service membership in Foundation service group $service_group";
	return 0;
    }
    return 1;
}
# The Foundation REST API access routines are broken out into wrappers not because they
# necessarily need to be, but because it makes understanding process_servicegroups() much easier.
# This routine returns either undef (on failure to properly query for existing servicegroups),
# or an arrayref pointing to a possibly-empty array of all the names of the servicegroups
# owned by this agent.
#
# FIX LATER:  It may be the case that with the present Foundation REST API, there is no way to avoid
# dredging up lots of lower-level details about the servicegroups we find here.  If that is the
# case, we will end up spending a lot of time in Foundation marshalling and serializing data that we
# then immediately throw away here.  If that turns out to be an unavoidable heavyweight operation,
# we might wish to restructure this script to just use all the information retrieved here, and avoid
# calls to check_service_group_status() which will largely repeat the work done in the initial call
# to list_all_my_servicegroups().  But we're better off pushing to get the REST API fixed so we
# don't pull back all the excessive detail in the first place.
#
sub list_all_my_servicegroups {
    my $app_type           = shift;
    my $agentID            = shift;
    my @servicegroup_names = ();
    my %outcome = ();
    my %results = ();
    # Retrieve all servicegroups owned by this agent.  Ideally, we would just retrieve top-level
    # attributes of the groups themselves, with no info about the host/service membership.  The
    # closer we can come to that, the better, unless we decide to use any extra information we
    # gather now in later processing so we don't need to probe again.  Even then, we would only
    # need a listing of the host/service members without any other detail at all about their
    # present state.
    #
    # FIX LATER:  Adding a "depth => 'simple'" option to this call has no useful effect.  That's
    # not at all surprising, because the doc indicates that such an option is not even supported
    # for this call.  But it means we're pulling a huge amount of subsidiary detail back from this
    # call, well beyond the data that pertains directly to the servicegroup itself, and diving down
    # into all of the individual service members and all of their associated properties.  This is
    # WAY more than we are looking for, but the Foundation REST API currently provides no means to
    # suppress all that unwanted information.
    #
    # The worrisome aspect of this is that the call might take quite awhile to run while it spends
    # time marshalling and serializing tons of data we don't care about -- and then on our side, we
    # have to deserialize and unmarshall it all as we translate it into Perl structures that we can
    # access.  That's a pure waste of system resources, so we need to extend the REST API with some
    # other depth option, perhaps "depth => 'minimal'" or "depth => 'sync'", to not pull back any
    # information on the service membership.
    #
    if ( not $rest_api->get_servicegroups( [], { agentId => $agentID, appType => $app_type }, \%outcome, \%results ) ) {
	## Not-completely-successful call.  Unfortunately, we get this status not just
	## if the RAPID API had trouble talking to Foundation, but also if the query
	## does not match any existing servicegroups.
	log_outcome \%outcome, 'servicegroup enumeration';
	log_results \%results, 'servicegroup enumeration';
	if ( defined( $outcome{response_code} ) && $outcome{response_code} == 404 ) {
	    ## We had no trouble talking to Foundation, but empty results unfortunately
	    ## get turned into a failure status by the REST API instead of a success status
	    ## with empty results.  This is very much unlike how a database treats such a
	    ## query, so we need to recognize this situation for what it is.  We would
	    ## prefer that the REST call tag this instead as a "warning" outcome at worst.
	    return [];
	}
	elsif ( $outcome{failed} ) {
	    ## I haven't yet been able to trigger this failure mode in testing, so I don't know
	    ## the right form of user-friendly logging to generate in this situation.  But if
	    ## we turn up the debug level high enough, we can get the GW::RAPID %outcome and
	    ## %results spilled out just above, if we ever need to diagnose this case.
	    # Given that the REST API reports not-found results as just specified above,
	    # we don't know what a failed status would mean.  So we have to report that to
	    # the caller as undef, meaning not knowing what went wrong.
	    return undef;
	}
	else {
	    ## Worst-case scenario; we don't know what exactly happened, but we can at least
	    ## report whatever the server said.
	    log_timed_message "ERROR:  While listing existing Foundation service groups, got $outcome{response_status}:  $outcome{response_error}";
	    return undef;
	}
    }
    if ($debug_debug) {
	foreach my $service_group ( sort keys %results ) {
	    log_message "DEBUG:  existing servicegroup $service_group: name=$results{$service_group}{name}";
	    log_message "DEBUG:  existing servicegroup $service_group: appType=$results{$service_group}{appType}";
	    log_message "DEBUG:  existing servicegroup $service_group: id=$results{$service_group}{id}";
	    if ( exists $results{$service_group}{services} ) {
		foreach my $service ( sort { $a->{hostName} cmp $b->{hostName} || $a->{description} cmp $b->{description} } @{ $results{$service_group}{services} } ) {
		    log_message "DEBUG:  existing servicegroup $service_group service: host=$service->{hostName} service=$service->{description} id=$service->{id}";
		}
	    }
	}
    }
    # We check the appType and agentId fields, to verify that they are as expected.
    # We do these checks here in addition to in the query both to catch any problems
    # in the querying and in case the query gets changed as this code evolves.  For
    # instance, we might want to query on only one of these fields, and allow the
    # checking here to identify misconfigured servicegroups in the logging instead
    # of just having them be completely ignored.
    #
    foreach my $service_group (sort keys %results) {
	if ( !defined( $results{$service_group}{appType} ) ) {
	    ## This is apparently our servicegroup, but its app type has disappeared.
	    ## We therefore won't claim it as our own.
	    log_message "ERROR:  Service group \"$service_group\" has no defined application type in Foundation.";
	    next;
	}
	elsif ( $results{$service_group}{appType} ne $app_type ) {
	    ## This is apparently our servicegroup, but its app type is incorrect.
	    ## We therefore won't claim it as our own.
	    log_message "ERROR:  Service group \"$service_group\" has application type \"$results{$service_group}{appType}\" in Foundation";
	    next;
	}
	elsif ( !defined( $results{$service_group}{agentId} ) ) {
	    ## The query returned a bad servicegroup.  This can't happen, but
	    ## if it does, we just ignore it except for the error logging.
	    log_message "ERROR:  Service group \"$service_group\" has no defined agent ID in Foundation";
	    next;
	}
	elsif ( $results{$service_group}{agentId} ne $agentID ) {
	    ## The query returned a bad servicegroup.  This can't happen, but
	    ## if it does, we just ignore it except for the error logging.
	    log_message "ERROR:  Service group \"$service_group\" has agent ID \"$results{$service_group}{agentId}\" in Foundation";
	    next;
	}
	push @servicegroup_names, $service_group;
    }
    return \@servicegroup_names;
}
# Unfortunately, this is a very clumsy and expensive check, via the current REST API.
# That's because the "usual" response to a query includes marshalling of a huge amount
# of data on the contained host/service member objects, when all we wanted was top-level
# attributes of the servicegroup itself.  Without testing, it's not clear that the
# "simple" depth limits this extra data that we basically don't care about in any way.
#
# FIX LATER:  Check with GroundWork Engineering to see when we might make improvements to
# the REST API to allow us to make the queries we really desire:  either top-level only,
# or membership-level as well but without all the extra current-status detail about every
# host/service member.
#
# There are multiple possible situations with checking for a servicegroup, and we need to
# distinguish all of them so the calling logic knows what to do in each case.
#
# NO_INFO_AVAILABLE:     Foundation access failed, and we have no information.
# NO_SUCH_SERVICEGROUP:  The servicegroup does not exist at all in Foundation.
# THEIR_SERVICEGROUP:    The servicegroup exists, but is clearly not owned by this agent.
# OUR_SERVICEGROUP:      The servicegroup exists, and is owned by this agent.
#
# To distinguish each of those cases, we return the indicated enumeration value instead of
# a boolean status.
#
sub check_service_group_status {
    my $service_group = shift;
    my $app_type      = shift;
    my $agentID       = shift;
    my %outcome = ();
    my %results = ();
    # Retrieve a single servicegroup.  Ideally, we would just retrieve either top-level attributes
    # of the group itself, with no info about the host/service membership, or at worst just a
    # listing of the host/service members without any other detail at all about their present state.
    # But testing against GWMEE 7.1.1 dev builds shows that it does pull back membership data in
    # great detail, and that neither "depth => 'simple'" nor "depth => 'sync'" has any effect here.
    # Also, the current doc makes it look like such an option is not even supported for this call.
    # So this operation ends up being much more heavyweight that we want it to be.
    #
    if ( not $rest_api->get_servicegroups( [$service_group], { }, \%outcome, \%results ) ) {
	## Not-completely-successful call.  Unfortunately, we get this status not just
	## if the RAPID API had trouble talking to Foundation, but also if the target
	## $service_group does not exist.
	log_outcome \%outcome, 'servicegroup status check';
	log_results \%results, 'servicegroup status check';
	if ( defined( $outcome{response_code} ) && $outcome{response_code} == 404 ) {
	    ## We had no trouble talking to Foundation, but empty results unfortunately
	    ## get turned into a failure status by the REST API instead of a success status
	    ## with empty results.  This is very much unlike how a database treats such a
	    ## query, so we need to recognize this situation for what it is.  We would
	    ## prefer that the REST call tag this instead as a "warning" outcome at worst.
	    return NO_SUCH_SERVICEGROUP;
	}
	elsif ( $outcome{failed} ) {
	    ## I haven't yet been able to trigger this failure mode in testing, so I don't know
	    ## the right form of user-friendly logging to generate in this situation.  But if
	    ## we turn up the debug level high enough, we can get the GW::RAPID %outcome and
	    ## %results spilled out just above, if we ever need to diagnose this case.
	    # Given that the REST API reports not-found results as just specified above,
	    # we don't know what a failed status would mean.  So we have to report that to
	    # the caller as not knowing what went wrong.
	    return NO_INFO_AVAILABLE;
	}
	else {
	    ## Worst-case scenario; we don't know what exactly happened, but we can at least
	    ## report whatever the server said.
	    log_timed_message "ERROR:  While checking status for servicegroup $service_group, got $outcome{response_status}:  $outcome{response_error}";
	    return NO_INFO_AVAILABLE;
	}
    }
    # We check the agentId field (eq $agentID) and the appType field (eq $app_type),
    # to verify that they both are as expected.  We do those checks here rather than
    # in the query because the REST API might have demanded a
    #
    #     query => "name = '$service_group'"
    #
    # option containing the $service_group name instead of just specifying the object name
    # in the standard objects array as we currently do, and that seems clumsy.
    if (%results) {
	if ( not $results{$service_group} ) {
	    log_timed_message "ERROR:  Cannot find service group $service_group in Foundation";
	    ## This would be a highly unexpected case, since we already checked above for the
	    ## standard form of returning an empty result.  If we successfully got back results
	    ## but not for the single servicegroup we asked for, what exactly DID we get back?
	    ## These log routines will tell us, if we crank up the debug level far enough.
	    log_outcome \%outcome, 'servicegroup status check';
	    log_results \%results, 'servicegroup status check';
	    return NO_INFO_AVAILABLE;
	}
	elsif ( !defined( $results{$service_group}{appType} ) ) {
	    log_message "ERROR:  Service group \"$service_group\" has no defined application type in Foundation.";
	    return THEIR_SERVICEGROUP;
	}
	elsif ( $results{$service_group}{appType} ne $app_type ) {
	    log_message "ERROR:  Service group \"$service_group\" has application type \"$results{$service_group}{appType}\" in Foundation";
	    return THEIR_SERVICEGROUP;
	}
	elsif ( !defined( $results{$service_group}{agentId} ) ) {
	    log_message "ERROR:  Service group \"$service_group\" has no defined agent ID in Foundation";
	    return THEIR_SERVICEGROUP;
	}
	elsif ( $results{$service_group}{agentId} ne $agentID ) {
	    log_message "ERROR:  Service group \"$service_group\" has agent ID \"$results{$service_group}{agentId}\" in Foundation";
	    return THEIR_SERVICEGROUP;
	}
    }
    else {
	## No such service found (we got empty resupts back).  This is not necessarily
	## an error; it might simply be a newly configured service to process.
	return NO_SUCH_SERVICEGROUP;
    }
    return OUR_SERVICEGROUP;
}
sub create_service_group {
    my $service_group = shift;
    my $app_type      = shift;
    my $description   = shift;
    my $agentID       = shift;
    my @servicegroups = ();
    my %outcome       = ();
    my @results       = ();
    my $status        = 1;
    my %sg_object = ();
    $sg_object{name}        = $service_group;
    $sg_object{appType}     = $app_type;
    $sg_object{description} = $description if defined($description) && $description ne '';
    $sg_object{agentId}     = $agentID;
    push @servicegroups, \%sg_object;
    if ( not $rest_api->upsert_servicegroups( \@servicegroups, {}, \%outcome, \@results ) ) {
	log_outcome \%outcome, 'servicegroup addition';
	log_results \@results, 'servicegroup addition';
	log_timed_message "ERROR:  $results[0]{message}" if defined $results[0]{message};
	$status = 0;
    }
    return $status;
}
sub delete_service_groups {
    my $servicegroup_names = shift;    # arrayref
    my %outcome            = ();
    my @results            = ();
    my $status             = 1;
    if ( not $rest_api->delete_servicegroups( $servicegroup_names, {}, \%outcome, \@results ) ) {
	log_outcome \%outcome, 'servicegroup deletion';
	log_results \@results, 'servicegroup deletion';
	foreach my $result (@results) {
	    log_timed_message( ( $result->{status} eq 'warning' ? "WARNING" : "ERROR" ) . ":  $result->{message}" )
	      if defined( $result->{status} )
		  and $result->{status} ne 'success'
		  and defined( $result->{message} );
	}
	$status = 0;
    }
    return $status;
}
# FIX LATER:  See if we need a $rest_api->clear_servicegroups() call here before we
# go adding host/service members to the servicegroup, given that we're currently
# using this in a set-all-members mode of operation in the calling code.  (However,
# such a call might not yet be supported in the GW::RAPID package, so we probably
# have no choice in the matter.  If this call doesn't work, we probably need to drop
# and re-create the entire servicegroup.  That could theoretically have unpleasant
# side effects in destroying any secondary associations the servicegroup might have
# with other objects in the system, so we want to avoid such action.)
#
sub set_host_services_for_servicegroup {
    my $service_group = shift;
    my $host_services = shift;
    my @servicegroups = ();
    my @members       = ();
    my %outcome       = ();
    my @results       = ();
    my $status        = 1;
    foreach my $host_service (@$host_services) {
	push @members, { host => $host_service->[0], service => $host_service->[1] };
    }
    if (@members) {
	push @servicegroups, { name => $service_group, services => \@members };
	if ( not $rest_api->upsert_servicegroups( \@servicegroups, {}, \%outcome, \@results ) ) {
	    log_outcome \%outcome, 'servicegroup member assignment';
	    log_results \@results, 'servicegroup member assignment';
	    log_timed_message "ERROR:  $results[0]{message}" if defined $results[0]{message};
	    $status = 0;
	}
    }
    return $status;
}