/*  Xlunzip - Test tool for the lzip_decompress linux module
    Copyright (C) 2016-2018 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
/*
    Exit status: 0 for a normal exit, 1 for environmental problems
    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
    corrupt or invalid input file, 3 for an internal consistency error
    (eg, bug) which caused xlunzip to panic.
*/

#define _FILE_OFFSET_BITS 64

#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <signal.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <utime.h>
#include <sys/stat.h>
#if defined(__MSVCRT__)
#include <io.h>
#define fchmod(x,y) 0
#define fchown(x,y,z) 0
#define SIGHUP SIGTERM
#define S_ISSOCK(x) 0
#ifndef S_IRGRP
#define S_IRGRP 0
#define S_IWGRP 0
#define S_IROTH 0
#define S_IWOTH 0
#endif
#endif
#if defined(__OS2__)
#include <io.h>
#endif

#include "carg_parser.h"
#include "linux_lzip.h"
#include "linux_lunzip.h"
#include "lzip.h"

#ifndef O_BINARY
#define O_BINARY 0
#endif

#if CHAR_BIT != 8
#error "Environments where CHAR_BIT != 8 are not supported."
#endif

int verbosity = 0;
void cleanup_and_fail( const int retval );

const char * const Program_name = "Xlunzip";
const char * const program_name = "xlunzip";
const char * const program_year = "2018";
const char * invocation_name = 0;

const struct { const char * from; const char * to; } known_extensions[] = {
  { ".lz",  ""     },
  { ".tlz", ".tar" },
  { 0,      0      } };

int infd = -1;			/* needed by the fill function */
/* Variables used in signal handler context.
   They are not declared volatile because the handler never returns. */
char * output_filename = 0;
int outfd = -1;
bool delete_output_on_interrupt = false;


static void show_help( void )
  {
  printf( "Xlunzip is a test tool for the lzip decompression code of my lzip patch\n"
          "for linux. Xlunzip is similar to lunzip, but it uses the lzip_decompress\n"
          "linux module as a backend. Xlunzip tests the module for stream,\n"
          "buffer-to-buffer and mixed decompression modes, including in-place\n"
          "decompression (using the same buffer for input and output). You can use\n"
          "xlunzip to verify that the module produces correct results when\n"
          "decompressing single member files, multimember files, or the\n"
          "concatenation of two or more compressed files. Xlunzip can be used with\n"
          "unzcrash to test the robustness of the module to the decompression of\n"
          "corrupted data.\n"
          "\nNote that the in-place decompression of concatenated files can't be\n"
          "guaranteed to work because an arbitrarily low compression ratio of the\n"
          "last part of the data can be achieved by appending enough empty\n"
          "compressed members to a file.\n"
          "\nUsage: %s [options] [files]\n", invocation_name );
  printf( "\nOptions:\n"
          "  -h, --help                 display this help and exit\n"
          "  -V, --version              output version information and exit\n"
          "  -c, --stdout               write to standard output, keep input files\n"
          "  -d, --decompress           decompress (this is the default)\n"
          "  -f, --force                overwrite existing output files\n"
          "  -I, --in-place             decompress or test using only one buffer\n"
          "  -k, --keep                 keep (don't delete) input files\n"
          "  -o, --output=<file>        if reading standard input, write to <file>\n"
          "  -q, --quiet                suppress all messages\n"
          "  -t, --test                 test compressed file integrity\n"
          "  -v, --verbose              be verbose (a 2nd -v gives more)\n"
          "These options are ignored when --in-place is in effect:\n"
          "      --insize[=<size>]      pre-allocate and fill inbuf [default 16 KiB]\n"
          "      --outsize[=<size>]     pre-allocate outbuf [default 512 MiB]\n"
          "      --nofill               do not pass a fill function; requires --insize\n"
          "      --noflush              do not pass a flush function; requires --outsize\n"
          "If no file names are given, or if a file is '-', xlunzip decompresses\n"
          "from standard input to standard output.\n"
          "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
          "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
          "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
          "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
          "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
          "caused xlunzip to panic.\n"
          "\nReport bugs to lzip-bug@nongnu.org\n"
          "Xlunzip home page: http://www.nongnu.org/lzip/xlunzip.html\n" );
  }


static void show_version( void )
  {
  printf( "%s %s\n", program_name, PROGVERSION );
  printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
  printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
          "This is free software: you are free to change and redistribute it.\n"
          "There is NO WARRANTY, to the extent permitted by law.\n" );
  }


/* assure at least a minimum size for buffer 'buf' */
void * resize_buffer( void * buf, const unsigned min_size )
  {
  if( buf ) buf = realloc( buf, min_size );
  else buf = malloc( min_size );
  if( !buf )
    {
    show_error( "Not enough memory.", 0, false );
    cleanup_and_fail( 1 );
    }
  return buf;
  }


void Pp_show_msg( struct Pretty_print * const pp, const char * const msg )
  {
  if( verbosity >= 0 )
    {
    if( pp->first_post )
      {
      pp->first_post = false;
      fputs( pp->padded_name, stderr );
      if( !msg ) fflush( stderr );
      }
    if( msg ) fprintf( stderr, "%s\n", msg );
    }
  }


static unsigned long getnum( const char * const ptr,
                             const unsigned long llimit,
                             const unsigned long ulimit )
  {
  unsigned long result;
  char * tail;
  errno = 0;
  result = strtoul( ptr, &tail, 0 );
  if( tail == ptr )
    {
    show_error( "Bad or missing numerical argument.", 0, true );
    exit( 1 );
    }

  if( !errno && tail[0] )
    {
    const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000;
    int exponent = 0;				/* 0 = bad multiplier */
    int i;
    switch( tail[0] )
      {
      case 'Y': exponent = 8; break;
      case 'Z': exponent = 7; break;
      case 'E': exponent = 6; break;
      case 'P': exponent = 5; break;
      case 'T': exponent = 4; break;
      case 'G': exponent = 3; break;
      case 'M': exponent = 2; break;
      case 'K': if( factor == 1024 ) exponent = 1; break;
      case 'k': if( factor == 1000 ) exponent = 1; break;
      }
    if( exponent <= 0 )
      {
      show_error( "Bad multiplier in numerical argument.", 0, true );
      exit( 1 );
      }
    for( i = 0; i < exponent; ++i )
      {
      if( ulimit / factor >= result ) result *= factor;
      else { errno = ERANGE; break; }
      }
    }
  if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
  if( errno )
    {
    show_error( "Numerical argument out of limits.", 0, false );
    exit( 1 );
    }
  return result;
  }


static int extension_index( const char * const name )
  {
  int eindex;
  for( eindex = 0; known_extensions[eindex].from; ++eindex )
    {
    const char * const ext = known_extensions[eindex].from;
    const unsigned name_len = strlen( name );
    const unsigned ext_len = strlen( ext );
    if( name_len > ext_len &&
        strncmp( name + name_len - ext_len, ext, ext_len ) == 0 )
      return eindex;
    }
  return -1;
  }


static void set_d_outname( const char * const name, const int eindex )
  {
  const unsigned name_len = strlen( name );
  if( eindex >= 0 )
    {
    const char * const from = known_extensions[eindex].from;
    const unsigned from_len = strlen( from );
    if( name_len > from_len )
      {
      output_filename = resize_buffer( output_filename, name_len +
                                       strlen( known_extensions[eindex].to ) + 1 );
      strcpy( output_filename, name );
      strcpy( output_filename + name_len - from_len, known_extensions[eindex].to );
      return;
      }
    }
  output_filename = resize_buffer( output_filename, name_len + 4 + 1 );
  strcpy( output_filename, name );
  strcat( output_filename, ".out" );
  if( verbosity >= 1 )
    fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
             program_name, name, output_filename );
  }


static int open_instream( const char * const name, struct stat * const in_statsp,
                          const bool no_ofile )
  {
  int infd = open( name, O_RDONLY | O_BINARY );
  if( infd < 0 )
    show_file_error( name, "Can't open input file", errno );
  else
    {
    const int i = fstat( infd, in_statsp );
    const mode_t mode = in_statsp->st_mode;
    const bool can_read = ( i == 0 &&
                            ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
                              S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
    if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || !no_ofile ) ) )
      {
      if( verbosity >= 0 )
        fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
                 program_name, name,
                 ( can_read && !no_ofile ) ?
                 ",\n         and '--stdout' was not specified" : "" );
      close( infd );
      infd = -1;
      }
    }
  return infd;
  }


static bool open_outstream( const bool force, const bool from_stdin )
  {
  const mode_t usr_rw = S_IRUSR | S_IWUSR;
  const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
  const mode_t outfd_mode = from_stdin ? all_rw : usr_rw;
  int flags = O_APPEND | O_CREAT | O_RDWR | O_BINARY;
  if( force ) flags |= O_TRUNC; else flags |= O_EXCL;

  outfd = open( output_filename, flags, outfd_mode );
  if( outfd >= 0 ) delete_output_on_interrupt = true;
  else if( verbosity >= 0 )
    {
    if( errno == EEXIST )
      fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
               program_name, output_filename );
    else
      fprintf( stderr, "%s: Can't create output file '%s': %s\n",
               program_name, output_filename, strerror( errno ) );
    }
  return ( outfd >= 0 );
  }


static void set_signals( void (*action)(int) )
  {
  signal( SIGHUP, action );
  signal( SIGINT, action );
  signal( SIGTERM, action );
  }


void cleanup_and_fail( const int retval )
  {
  set_signals( SIG_IGN );			/* ignore signals */
  if( delete_output_on_interrupt )
    {
    delete_output_on_interrupt = false;
    if( verbosity >= 0 )
      fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
               program_name, output_filename );
    if( outfd >= 0 ) { close( outfd ); outfd = -1; }
    if( remove( output_filename ) != 0 && errno != ENOENT )
      show_error( "WARNING: deletion of output file (apparently) failed.", 0, false );
    }
  exit( retval );
  }


void signal_handler( int sig )
  {
  if( sig ) {}				/* keep compiler happy */
  show_error( "Control-C or similar caught, quitting.", 0, false );
  cleanup_and_fail( 1 );
  }


     /* Set permissions, owner and times. */
static void close_and_set_permissions( const struct stat * const in_statsp )
  {
  bool warning = false;
  if( in_statsp )
    {
    const mode_t mode = in_statsp->st_mode;
    /* fchown will in many cases return with EPERM, which can be safely ignored. */
    if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
      { if( fchmod( outfd, mode ) != 0 ) warning = true; }
    else
      if( errno != EPERM ||
          fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
        warning = true;
    }
  if( close( outfd ) != 0 )
    {
    show_error( "Error closing output file", errno, false );
    cleanup_and_fail( 1 );
    }
  outfd = -1;
  delete_output_on_interrupt = false;
  if( in_statsp )
    {
    struct utimbuf t;
    t.actime = in_statsp->st_atime;
    t.modtime = in_statsp->st_mtime;
    if( utime( output_filename, &t ) != 0 ) warning = true;
    }
  if( warning && verbosity >= 1 )
    show_error( "Can't change output file attributes.", 0, false );
  }


int convert_retval( const int retval )
  {
  switch( retval )
    {
    case 0:                return 0;
    case LZIP_OOM_INBUF:
    case LZIP_OOM_OUTBUF:
    case LZIP_WRITE_ERROR: return 1;
    case LZIP_HEADER1_EOF:
    case LZIP_HEADER2_EOF:
    case LZIP_BAD_MAGIC1:
    case LZIP_BAD_MAGIC2:
    case LZIP_BAD_VERSION:
    case LZIP_BAD_DICT_SIZE:
    case LZIP_BAD_DATA:
    case LZIP_DATA_EOF:
    case LZIP_BAD_CRC:     return 2;
    default:               return 3;
    }
  }


static long fill( void * buf, unsigned long size )
  {
  unsigned long sz = 0;
  errno = 0;
  while( sz < size )
    {
    const int n = read( infd, (uint8_t *)buf + sz, min( 1UL << 20, size - sz ) );
    if( n > 0 ) sz += n;
    else if( n == 0 ) break;				/* EOF */
    else if( errno != EINTR ) break;
    errno = 0;
    }
  return sz;
  }

long flush( void * buf, unsigned long size )
  {
  unsigned long sz = ( outfd >= 0 ) ? 0 : size;
  errno = 0;
  while( sz < size )
    {
    const int n = write( outfd, (uint8_t *)buf + sz, min( 1UL << 20, size - sz ) );
    if( n > 0 ) sz += n;
    else if( n < 0 && errno != EINTR ) break;
    errno = 0;
    }
  return sz;
  }

const char * global_name;		/* copy of filename for 'error' */
static void error(char *x) { show_file_error( global_name, x, 0 ); }


static int decompress( struct Pretty_print * const pp, const long cl_insize,
                       const long cl_outsize, const bool nofill,
                       const bool noflush, const bool testing )
  {
  long in_len = cl_insize;
  uint8_t * const inbuf = (in_len > 0) ? malloc( in_len ) : 0;
  long out_size = cl_outsize;
  uint8_t * const outbuf = (out_size > 0) ? malloc( out_size ) : 0;
  long in_pos, out_pos;
  int retval;
  if( ( in_len > 0 && !inbuf ) || ( out_size > 0 && !outbuf ) )
    { show_error( "Not enough memory.", 0, false ); return 1; }

  if( inbuf )
    {
    const long len = fill( inbuf, in_len );
    if( len < in_len )
      { if( errno ) { show_file_error( pp->name, "Read error", errno ); return 1; }
        in_len = len; }
    }
  global_name = pp->name;
  retval = convert_retval( __lunzip( inbuf, in_len, nofill ? 0 : fill,
                                     noflush ? 0 : flush, outbuf, out_size,
                                     &in_pos, &out_pos, error ) );
  if( retval ) return retval;
  if( outbuf && noflush )
    {
    const long len = flush( outbuf, out_pos );
    if( len < out_pos )
      { show_file_error( pp->name, "Write error", errno ); return 1; }
    }
  if( verbosity >= 1 ) Pp_show_msg( pp, 0 );
  if( verbosity >= 2 )
    {
    if( out_pos <= 0 || in_pos <= 0 )
      fputs( "no data compressed.  ", stderr );
    else
      fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved.  ",
               (double)out_pos / in_pos,
               ( 100.0 * in_pos ) / out_pos,
               100.0 - ( ( 100.0 * in_pos ) / out_pos ) );
    if( verbosity >= 3 )
      fprintf( stderr, "decompressed %9lu, compressed %8lu.  ",
               out_pos, in_pos );
    }
  if( verbosity >= 1 )
    fputs( testing ? "ok\n" : "done\n", stderr );
  return 0;
  }


void show_error( const char * const msg, const int errcode, const bool help )
  {
  if( verbosity < 0 ) return;
  if( msg && msg[0] )
    fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
             ( errcode > 0 ) ? ": " : "",
             ( errcode > 0 ) ? strerror( errcode ) : "" );
  if( help )
    fprintf( stderr, "Try '%s --help' for more information.\n",
             invocation_name );
  }


void show_file_error( const char * const filename, const char * const msg,
                      const int errcode )
  {
  if( verbosity >= 0 )
    fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
             ( errcode > 0 ) ? ": " : "",
             ( errcode > 0 ) ? strerror( errcode ) : "" );
  }


void internal_error( const char * const msg )
  {
  if( verbosity >= 0 )
    fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
  exit( 3 );
  }


int main( const int argc, const char * const argv[] )
  {
  const char * default_output_filename = "";
  const char ** filenames = 0;
  long cl_insize = 0;
  long cl_outsize = 0;
  int num_filenames = 0;
  int argind = 0;
  int failed_tests = 0;
  int retval = 0;
  int i;
  bool filenames_given = false;
  bool force = false;
  bool in_place = false;
  bool keep_input_files = false;
  bool nofill = false;
  bool noflush = false;
  bool stdin_used = false;
  bool testing = false;
  bool to_stdout = false;
  struct Pretty_print pp;

  enum { opt_insize = 256, opt_outsize, opt_nofill, opt_noflush };
  const struct ap_Option options[] =
    {
    { 'c', "stdout",          ap_no  },
    { 'd', "decompress",      ap_no  },
    { 'f', "force",           ap_no  },
    { 'h', "help",            ap_no  },
    { 'I', "in-place",        ap_no  },
    { 'k', "keep",            ap_no  },
    { 'n', "threads",         ap_yes },
    { 'o', "output",          ap_yes },
    { 'q', "quiet",           ap_no  },
    { 't', "test",            ap_no  },
    { 'v', "verbose",         ap_no  },
    { 'V', "version",         ap_no  },
    { opt_insize, "insize",   ap_maybe  },
    { opt_outsize, "outsize", ap_maybe  },
    { opt_nofill, "nofill",   ap_no  },
    { opt_noflush, "noflush", ap_no  },
    {  0 ,  0,                ap_no  } };

  struct Arg_parser parser;
  invocation_name = argv[0];

  if( !ap_init( &parser, argc, argv, options, 0 ) )
    { show_error( "Not enough memory.", 0, false ); return 1; }
  if( ap_error( &parser ) )				/* bad option */
    { show_error( ap_error( &parser ), 0, true ); return 1; }

  for( ; argind < ap_arguments( &parser ); ++argind )
    {
    const int code = ap_code( &parser, argind );
    const char * const arg = ap_argument( &parser, argind );
    if( !code ) break;					/* no more options */
    switch( code )
      {
      case 'c': to_stdout = true; break;
      case 'd': testing = false; break;
      case 'f': force = true; break;
      case 'h': show_help(); return 0;
      case 'I': in_place = true; break;
      case 'k': keep_input_files = true; break;
      case 'n': break;
      case 'o': default_output_filename = arg; break;
      case 'q': verbosity = -1; break;
      case 't': testing = true; break;
      case 'v': if( verbosity < 4 ) ++verbosity; break;
      case 'V': show_version(); return 0;
      case opt_insize:
        cl_insize = arg[0] ? getnum( arg, 1, LONG_MAX ) : 16384; break;
      case opt_outsize: cl_outsize = arg[0] ?
        getnum( arg, min_dictionary_size, LONG_MAX ) : max_dictionary_size;
        break;
      case opt_nofill: nofill = true; break;
      case opt_noflush: noflush = true; break;
      default : internal_error( "uncaught option." );
      }
    } /* end process options */

#if defined(__MSVCRT__) || defined(__OS2__)
  setmode( STDIN_FILENO, O_BINARY );
  setmode( STDOUT_FILENO, O_BINARY );
#endif

  num_filenames = max( 1, ap_arguments( &parser ) - argind );
  filenames = resize_buffer( filenames, num_filenames * sizeof filenames[0] );
  filenames[0] = "-";

  for( i = 0; argind + i < ap_arguments( &parser ); ++i )
    {
    filenames[i] = ap_argument( &parser, argind + i );
    if( strcmp( filenames[i], "-" ) != 0 ) filenames_given = true;
    }

  if( testing )
    outfd = -1;

  if( !to_stdout && !testing &&
      ( filenames_given || default_output_filename[0] ) )
    set_signals( signal_handler );

  Pp_init( &pp, filenames, num_filenames );

  output_filename = resize_buffer( output_filename, 1 );
  for( i = 0; i < num_filenames; ++i )
    {
    const char * input_filename = "";
    int tmp;
    struct stat in_stats;
    const struct stat * in_statsp;
    output_filename[0] = 0;

    if( !filenames[i][0] || strcmp( filenames[i], "-" ) == 0 )
      {
      if( stdin_used ) continue; else stdin_used = true;
      infd = STDIN_FILENO;
      if( !testing )
        {
        if( to_stdout || !default_output_filename[0] )
          outfd = STDOUT_FILENO;
        else
          {
          output_filename = resize_buffer( output_filename,
                              strlen( default_output_filename ) + 1 );
          strcpy( output_filename, default_output_filename );
          if( !open_outstream( force, true ) )
            {
            if( retval < 1 ) retval = 1;
            close( infd ); infd = -1;
            continue;
            }
          }
        }
      }
    else
      {
      input_filename = filenames[i];
      infd = open_instream( input_filename, &in_stats, to_stdout || testing );
      if( infd < 0 ) { if( retval < 1 ) retval = 1; continue; }
      if( !testing )
        {
        if( to_stdout ) outfd = STDOUT_FILENO;
        else
          {
          set_d_outname( input_filename, extension_index( input_filename ) );
          if( !open_outstream( force, false ) )
            {
            if( retval < 1 ) retval = 1;
            close( infd ); infd = -1;
            continue;
            }
          }
        }
      }

    Pp_set_name( &pp, input_filename );
    if( isatty( infd ) )
      {
      show_file_error( pp.name,
                       "I won't read compressed data from a terminal.", 0 );
      if( retval < 1 ) retval = 1;
      if( testing ) { close( infd ); infd = -1; continue; }
      cleanup_and_fail( retval );
      }

    in_statsp = input_filename[0] ? &in_stats : 0;
    if( in_place )
      tmp = decompress_in_place( infd, &pp, testing );
    else
      tmp = decompress( &pp, cl_insize, cl_outsize, nofill, noflush, testing );
    if( close( infd ) != 0 )
      {
      show_error( input_filename[0] ? "Error closing input file" :
                                      "Error closing stdin", errno, false );
      if( tmp < 1 ) tmp = 1;
      }
    infd = -1;
    if( tmp > retval ) retval = tmp;
    if( tmp )
      { if( !testing ) cleanup_and_fail( retval );
        else ++failed_tests; }

    if( delete_output_on_interrupt )
      close_and_set_permissions( in_statsp );
    if( input_filename[0] )
      {
      if( !keep_input_files && !to_stdout && !testing )
        remove( input_filename );
      }
    }
  if( outfd >= 0 && close( outfd ) != 0 )
    {
    show_error( "Error closing stdout", errno, false );
    if( retval < 1 ) retval = 1;
    }
  if( failed_tests > 0 && verbosity >= 1 && num_filenames > 1 )
    fprintf( stderr, "%s: warning: %d %s failed the test.\n",
             program_name, failed_tests,
             ( failed_tests == 1 ) ? "file" : "files" );
  free( output_filename );
  free( filenames );
  ap_free( &parser );
  return retval;
  }
