#!/usr/bin/env bash

# This program is part of Percona Toolkit: http://www.percona.com/software/
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
# notices and disclaimers.

usage() {
   if [ "${OPT_ERR}" ]; then
      echo "${OPT_ERR}" >&2
   fi
   echo "Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS]" >&2
   echo "For more information, 'man pt-collect' or 'perldoc $0'." >&2
   exit 1
}

# Make sure the disk isn't getting too full.  Exit if the disk is more than $1
# percent full, or there is less than $2 megabytes of free space on $3 drive.
check_disk_space() {
   PCT=${1:-"100"}
   MB=${2:-"0"}
   DEST="$3"
   avail=$(df -m -P "${DEST}" | awk '/^\//{print $4}');
   full=$(df -m -P "${DEST}" | awk '/^\//{print $5}' | sed -e 's/%//g');
   if [ "${avail}" -le "${MB}" -o "${full}" -ge "${PCT}" ]; then
      echo "Not enough free space (${full}% full, ${avail}MB free)"
      echo "Wanted less than ${PCT}% full and more than ${MB}MB"
      return 1
   fi
   return 0
}

for o; do
   case "${o}" in
      --)
         shift; break;
         ;;
      --help)
         usage;
         ;;
      -d)
         shift; OPT_d="${1}"; shift;
         ;;
      -f)
         shift; OPT_f="${1}"; shift;
         ;;
      -i)
         shift; OPT_i="${1}"; shift;
         ;;
      -g)
         shift; OPT_g="${1}"; shift;
         ;;
      -m)
         shift; OPT_m="${1}"; shift;
         ;;
      -o)
         shift; OPT_o="${1}"; shift;
         ;;
      -p)
         shift; OPT_p="${1}"; shift;
         ;;
      -s)
         shift; OPT_s="${1}"; shift;
         ;;
      -t)
         shift; OPT_t="${1}"; shift;
         ;;
   esac
done


if [ -z "${OPT_d}" -o -z "${OPT_i}" -o -z "${OPT_o}" -o -z "${OPT_g}"  -o -z "${OPT_s}" ]; then
   OPT_ERR="Missing command-line argument."
   usage
fi

if [ "${OPT_p}" ]; then
   d="${OPT_p}"
else
   d=$(date +%F-%T | tr :- _);
fi

# Check disk space up-front.
check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || exit 1

echo "Gathering info for $d"

# Make sure there's only one of me.
(
   flock 200

   # Get pidof mysqld; pidof doesn't exist on some systems.  We try our best...
   p=$(pidof -s mysqld);
   if [ -z "${p}" ]; then
      p=$(pgrep -o -x mysqld);
   fi
   if [ -z "${p}" ]; then
      p=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1);
   fi

   # Get memory allocation info before anything else.
   if [ "${p}" ]; then
      if pmap --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then
         pmap -x $p > "$OPT_d/$d-pmap"
      else
         # Some pmap's apparently don't support -x (issue 116).
         pmap $p > "$OPT_d/$d-pmap"
      fi
   fi

   # Getting a GDB stacktrace can be an intensive operation, so do this only if
   # necessary.
   if [ "${OPT_g}" = "yes" -a "${p}" ]; then
      gdb -ex "set pagination 0" -ex "thread apply all bt" --batch -p $p >> "$OPT_d/$d-stacktrace"
   else
      echo "GDB (-g) was not enabled" >> "$OPT_d/$d-stacktrace"
   fi

   # Get MySQL's variables if possible.  Then sleep long enough that we probably
   # complete SHOW VARIABLES if all's well.  (We don't want to run mysql in the
   # foreground, because it could hang.)
   mysql "$@" -e 'SHOW GLOBAL VARIABLES' >> "$OPT_d/$d-variables" 2>&1 &
   sleep .2
   
   # Get the major.minor version number.  Version 3.23 doesn't matter for our
   # purposes, and other releases have x.x.x* version conventions so far.
   VER="$(awk '/^version[^_]/{print substr($2,1,3)}' "$OPT_d/$d-variables")"

   # Is MySQL logging its errors to a file?  If so, tail that file.
   errfile="$(awk '/log_error/{print $2}' "$OPT_d/$d-variables")"
   if [ -z "${errfile}" -a "${p}" ]; then
      # Try getting it from the open filehandle...
      errfile="$(ls -l /proc/${p}/fd | awk '/ 2 ->/{print $NF}')"
   fi

   if [ "${errfile}" ]; then
      echo "The error file seems to be ${errfile}"
      tail -f "${errfile}" >"$OPT_d/$d-log_error" 2>&1 &
      error_pid=$!
      # Send a mysqladmin debug to the server so we can potentially learn about
      # locking etc.
      mysqladmin debug "$@"
   else
      echo "Could not detect error file; will not tail MySQL's log file"
   fi

   # Get a sample of these right away, so we can get these without interaction
   # with the other commands we're about to run.
   INNOSTAT="SHOW /*!40100 ENGINE*/ INNODB STATUS\G"
   mysql "$@" -e "${INNOSTAT}"             >> "$OPT_d/$d-innodbstatus1" 2>&1 &
   mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist1"  2>&1 &
   mysql "$@" -e 'SHOW OPEN TABLES'        >> "$OPT_d/$d-opentables1"   2>&1 &
   if [ "${VER}" '>' "5.1" ]; then
      mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status1" 2>&1 &
   else
      mysql "$@" -e 'SHOW MUTEX STATUS'        >> "$OPT_d/$d-mutex-status1" 2>&1 &
   fi

   # If TCP dumping is specified, start that on the server's port.
   if [ "${OPT_t}" = "yes" ]; then
      port=$(awk '/^port/{print $2}' "$OPT_d/$d-variables")
      if [ "${port}" ]; then
         tcpdump -i any -s 4096 -w "$OPT_d/$d-tcpdump" port ${port} &
         tcpdump_pid=$!
      fi
   fi

   # Next, start oprofile gathering data during the whole rest of this process.
   # The --init should be a no-op if it has already been init-ed.
   if [ "${OPT_o}" = "yes" ]; then
      if opcontrol --init; then
         opcontrol --start --no-vmlinux
      else
         OPT_o="no"
      fi
   elif [ "${OPT_s}" = "yes" ]; then
      # Don't run oprofile and strace at the same time.
      strace -T -s 0 -f -p $p > "${DEST}/$d-strace" 2>&1 &
      strace_pid=$!
   fi

   # Grab a few general things first.  Background all of these so we can start
   # them all up as quickly as possible.  We use mysqladmin -c even though it is
   # buggy and won't stop on its own in 5.1 and newer, because there is a chance
   # that we will get and keep a connection to the database; in troubled times
   # the database tends to exceed max_connections, so reconnecting in the loop
   # tends not to work very well.
   ps -eaf                       >> "$OPT_d/$d-ps" 2>&1 &
   sysctl -a                     >> "$OPT_d/$d-sysctl" 2>&1 &
   top -bn1                      >> "$OPT_d/$d-top" 2>&1 &
   vmstat 1 $OPT_i               >> "$OPT_d/$d-vmstat" 2>&1 &
   vmstat $OPT_i 2               >> "$OPT_d/$d-vmstat-overall" 2>&1 &
   iostat -dx  1 $OPT_i          >> "$OPT_d/$d-iostat" 2>&1 &
   iostat -dx  $OPT_i 2          >> "$OPT_d/$d-iostat-overall" 2>&1 &
   mpstat -P ALL 1 $OPT_i        >> "$OPT_d/$d-mpstat" 2>&1 &
   mpstat -P ALL $OPT_i 1        >> "$OPT_d/$d-mpstat-overall" 2>&1 &
   lsof -nP -p $p -bw            >> "$OPT_d/$d-lsof" 2>&1 &
   mysqladmin "$@" ext -i1 -c$OPT_i  >> "$OPT_d/$d-mysqladmin" 2>&1 &
   mysqladmin_pid=$!

   # This loop gathers data for the rest of the duration, and defines the time
   # of the whole job.
   echo "Loop start: $(date +'TS %s.%N %F %T')"
   for a in `seq 1 $OPT_i`; do
      # We check the disk, but don't exit, because we need to stop jobs if we
      # need to exit.
      check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || break

      # Synchronize ourselves onto the clock tick, so the sleeps are 1-second
      sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}')
      ts="$(date +"TS %s.%N %F %T")"

      # Collect the stuff for this cycle
      (cat /proc/diskstats  2>&1; echo $ts) >> "$OPT_d/$d-diskstats"   &
      (cat /proc/stat       2>&1; echo $ts) >> "$OPT_d/$d-procstat"    &
      (cat /proc/vmstat     2>&1; echo $ts) >> "$OPT_d/$d-procvmstat"  &
      (cat /proc/meminfo    2>&1; echo $ts) >> "$OPT_d/$d-meminfo"     &
      (cat /proc/slabinfo   2>&1; echo $ts) >> "$OPT_d/$d-slabinfo"    &
      (cat /proc/interrupts 2>&1; echo $ts) >> "$OPT_d/$d-interrupts"  &
      (df -h                2>&1; echo $ts) >> "$OPT_d/$d-df"          &
      (netstat -antp        2>&1; echo $ts) >> "$OPT_d/$d-netstat"     &
      (netstat -s           2>&1; echo $ts) >> "$OPT_d/$d-netstat_s"   &
   done
   echo "Loop end: $(date +'TS %s.%N %F %T')"

   if [ "${OPT_o}" = "yes" ]; then
      opcontrol --stop
      opcontrol --dump
      kill $(pidof oprofiled);
      opcontrol --save=pt_collect_$d

      # Attempt to generate a report; if this fails, then just tell the user how
      # to generate the report.
      path_to_binary=$(which mysqld);
      if [ "${path_to_binary}" -a -f "${path_to_binary}" ]; then
         opreport --demangle=smart --symbols --merge tgid session:pt_collect_$d "${path_to_binary}" > "$OPT_d/$d-opreport"
      else
         echo "oprofile data saved to pt_collect_$d; you should now be able to get a report" > "$OPT_d/$d-opreport"
         echo "by running something like" >> "$OPT_d/$d-opreport"
         echo "opreport --demangle=smart --symbols --merge tgid session:pt_collect_$d /path/to/mysqld" >> "$OPT_d/$d-opreport"
      fi
   elif [ "${OPT_s}" = "yes" ]; then
      kill -s 2 ${strace_pid}
      sleep 1
      kill -s 15 ${strace_pid}
      # Sometimes strace leaves threads/processes in T status.
      kill -s 18 $p
   fi

   mysql "$@" -e "${INNOSTAT}"             >> "$OPT_d/$d-innodbstatus2" 2>&1 &
   mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist2"  2>&1 &
   mysql "$@" -e 'SHOW OPEN TABLES'        >> "$OPT_d/$d-opentables2"   2>&1 &
   if [ "${VER}" '>' "5.1" ]; then
      mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status2" 2>&1 &
   else
      mysql "$@" -e 'SHOW MUTEX STATUS'        >> "$OPT_d/$d-mutex-status2" 2>&1 &
   fi

   # Kill backgrounded tasks.
   kill $mysqladmin_pid
   [ "$error_pid" ] && kill $error_pid
   [ "$tcpdump_pid" ] && kill $tcpdump_pid

   # Finally, record what system we collected this data from.
   hostname > "$OPT_d/$d-hostname"
)200>/tmp/percona-toolkit-collect-lockfile >> "$OPT_d/$d-output" 2>&1

# ############################################################################
# Documentation
# ############################################################################
:<<'DOCUMENTATION'
=pod

=head1 NAME

pt-collect - Collect information from a server for some period of time.

=head1 SYNOPSIS

Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS]

pt-collect tool gathers a variety of information about a system for a period
of time.  It is typically executed when the stalk tool detects a condition
and wants to collect information to assist in diagnosis.  Four options
must be specified on the command line: -dgios.

=head1 RISKS

The following section is included to inform users about the potential risks,
whether known or unknown, of using this tool.  The two main categories of risks
are those created by the nature of the tool (e.g. read-only tools vs. read-write
tools) and those created by bugs.

pt-collect is a read-only tool.  It should be very low-risk.

At the time of this release, we know of no bugs that could cause serious harm
to users.

The authoritative source for updated information is always the online issue
tracking system.  Issues that affect this tool will be marked as such.  You can
see a list of such issues at the following URL:
L<http://www.percona.com/bugs/pt-collect>.

See also L<"BUGS"> for more information on filing bugs and getting help.

=head1 DESCRIPTION

pt-collect creates a lock to ensure that only one instance runs at a time,
and then saves a variety of performance and status data into files in the
configured directory.  Files are named with a timestamp so they can be
grouped together.  The tool is MySQL-centric by default, and gathers quite
a bit of diagnostic data that's useful for understanding the behavior of
a MySQL database server.

Options after C<--> are passed to C<mysql> and C<mysqladmin>.

=head1 OPTIONS

=over

=item -d (required)

DESTINATION Where to store the resulting data; must already exist.

=item -g <yes/no> (required)

Collect GDB stack traces.

=item -i INTERVAL (required)

How many seconds to collect data.

=item -o <yes/no> (required)

Collect oprofile data; disables -s.

=item -s <yes/no> (required)

Collect strace data.

=item -f PERCENT

Exit if the disk is more than this percent full (default 100).

=item -m MEGABYTES

Exit if there are less than this many megabytes free disk space (default 0).

=item -p PREFIX

Store the data into files with this prefix (optional).

=item -t <yes/no>

Collect tcpdump data.

=back

=head1 ENVIRONMENT

This tool does not use any environment variables.

=head1 SYSTEM REQUIREMENTS

This tool requires Bash v3 or newer and assumes that these programs
are installed, in the PATH, and executable: sysctl, top, vmstat, iostat,
mpstat, lsof, mysql, mysqladmin, df, netstat, pidof, flock, and others
depending on what command-line options are specified.  If some of those
programs are not available, the tool will still run but may print warnings.

=head1 BUGS

For a list of known bugs, see L<http://www.percona.com/bugs/pt-collect>.

Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
Include the following information in your bug report:

=over

=item * Complete command-line used to run the tool

=item * Tool L<"--version">

=item * MySQL version of all servers involved

=item * Output from the tool including STDERR

=item * Input files (log/dump/config files, etc.)

=back

If possible, include debugging output by running the tool with C<PTDEBUG>;
see L<"ENVIRONMENT">.

=head1 DOWNLOADING

Visit L<http://www.percona.com/software/percona-toolkit/> to download the
latest release of Percona Toolkit.  Or, get the latest release from the
command line:

   wget percona.com/get/percona-toolkit.tar.gz

   wget percona.com/get/percona-toolkit.rpm

   wget percona.com/get/percona-toolkit.deb

You can also get individual tools from the latest release:

   wget percona.com/get/TOOL

Replace C<TOOL> with the name of any tool.

=head1 AUTHORS

Baron Schwartz

=head1 ABOUT PERCONA TOOLKIT

This tool is part of Percona Toolkit, a collection of advanced command-line
tools developed by Percona for MySQL support and consulting.  Percona Toolkit
was forked from two projects in June, 2011: Maatkit and Aspersa.  Those
projects were created by Baron Schwartz and developed primarily by him and
Daniel Nichter, both of whom are employed by Percona.  Visit
L<http://www.percona.com/software/> for more software developed by Percona.

=head1 COPYRIGHT, LICENSE, AND WARRANTY

This program is copyright 2010-2011 Baron Schwartz, 2011-2012 Percona Inc.
Feedback and improvements are welcome.

THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA  02111-1307  USA.

=head1 VERSION

pt-collect 2.0.2

=cut

DOCUMENTATION
