// Copyright 2007-2012 16Systems, LLC. All rights reserved.
// TCHunt is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// TCHunt is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with TCHunt. If not, see .
// Email: tchunt@16s.us
// 16 Systems, LLC
// P.O. Box 356
// Blacksburg, VA
// 24063
// Boost Includes
#include "boost/filesystem/operations.hpp"
#include "boost/program_options.hpp"
// Standard Includes
#include
#include
#include
#include
#include
bool VERBOSE = false;
// The modulo test.
bool modulo( const std::string& file_path, const uintmax_t file_size )
{
if ( file_size % 512 == 0 )
{
if ( VERBOSE == true )
{
std::cout << "modulo_true:\t" << file_path << "\t" << file_size << std::endl;
return true;
}
else
return true;
}
else
{
if ( VERBOSE == true )
{
std::cout << "modulo_false:\t" << file_path << "\t" << file_size << std::endl;
return false;
}
else
return false;
}
}
// The Chi Squared Distribution test.
bool X2( const std::string& file_path )
{
// sample_size = bytes read.
// possibilities = number of possible byte values.
static const int sample_size = 1024*13;
static const int possibilities = 256;
double chi = 0.0;
static const double min = 163.0;
static const double max = 373.0;
static const double expected = sample_size/possibilities;
std::vector observed_frequency;
std::vector sample_bytes;
std::ifstream fd;
// Open the file in binary mode for reading.
fd.open (file_path.c_str(), std::ios::binary);
int i = 0;
if ( fd.is_open() )
{
// Read bytes
while ( i < sample_size )
{
int c = fd.get();
sample_bytes.push_back( c );
++i;
}
// Close the file
fd.close();
//std::cout << sample_bytes.size() << std::endl;
}
else
{
if ( VERBOSE == true )
{
std::cout << "X2:\tcannot open file:\t" << file_path << std::endl;
return false;
}
else
return false;
}
for ( int x = 0; x < possibilities; ++x )
{
// Populate observed_frequency
double num_chars = count( sample_bytes.begin(), sample_bytes.end(), x );
observed_frequency.push_back(num_chars);
}
//std::cout << observed_frequency.size() << std::endl;
//~ The four chi squared calculations
//~ one = observed - expected
//~ two = one squared
//~ three = two/expected (this is individual chi)
//~ four = sum the individual chi results for one X2 score.
std::vector::iterator it;
for( it = observed_frequency.begin(); it != observed_frequency.end(); ++it )
{
double observed = *( it );
double one = observed - expected;
double two = one * one;
double three = two / expected;
chi += three;
}
if ( chi < min )
{
if ( VERBOSE == true )
{
std::cout << "X2_nandom:\t" << file_path << "\t" << chi << std::endl;
return false;
}
else
return false;
}
else if ( chi > max )
{
if ( VERBOSE == true )
{
std::cout << "X2_nandom:\t" << file_path << "\t" << chi << std::endl;
return false;
}
else
return false;
}
else
{
if ( VERBOSE == true )
{
std::cout << "X2_random:\t" << file_path << "\t" << chi << std::endl;
return true;
}
else
return true;
}
}
// Skip Certain files based on the file header (up to 4 bytes).
bool header( const std::string& file_path )
{
int i = 0;
std::vector header_bytes;
std::ifstream fd;
// Open the file in binary mode for reading.
fd.open (file_path.c_str(), std::ios::binary);
if (fd.is_open())
{
// Read First 4 bytes
while (i < 4)
{
header_bytes.push_back(fd.get());
++i;
}
// Close the file
fd.close();
}
else
{
if ( VERBOSE == true )
{
std::cout << "header:\tcannot open file:\t" << file_path << std::endl;
return false;
}
else
return false;
}
int byte1, byte2, byte3, byte4;
byte1 = header_bytes[0];
byte2 = header_bytes[1];
byte3 = header_bytes[2];
byte4 = header_bytes[3];
// Skip GZIP files
//
// Three Byte Header
// Hex(1f 8b 08) Dec(31 139 8)
if ( byte1 == 31 and byte2 == 139 and byte3 == 8 )
{
if ( VERBOSE == true )
{
std::cout << "header_GZIP:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip RAR files
//
// Four Byte Header
// Hex(52 61 72 21) Dec(82 97 114 33)
else if ( byte1 == 82 and byte2 == 97 and byte3 == 114 and byte4 == 33 )
{
if ( VERBOSE == true )
{
std::cout << "header_RAR:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip GIF files
//
// Three Byte Header
// Hex(47 49 46 38) Dec(71 73 70 56) GIF8
else if ( byte1 == 71 and byte2 == 73 and byte3 == 70 and byte4 == 56 )
{
if ( VERBOSE == true )
{
std::cout << "header_GIF:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip ZIP files
// 4 byte header Hex(50 4b 03 04) Dec(80 75 3 4)
else if ( byte1 == 80 and byte2 == 75 and byte3 == 3 and byte4 == 4 )
{
if ( VERBOSE == true )
{
std::cout << "header_ZIP:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip VDF files (Virus Database File from Avira)
// 4 byte header 'Viru' Hex(56 69 72 75) Dec(86 105 114 117)
else if ( byte1 == 86 and byte2 == 105 and byte3 == 114 and byte4 == 117 )
{
if ( VERBOSE == true )
{
std::cout << "header_ZIP:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip .ar | .deb files
// 4 byte header Hex(21 3c 61 72) Dec(33 60 97 114)
// Total header is "!"
else if ( byte1 == 33 and byte2 == 60 and byte3 == 97 and byte4 == 114 )
{
if ( VERBOSE == true )
{
std::cout << "header_AR:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip .cab files
// 4 byte header Dec(77 83 67 70)
else if ( byte1 == 77 and byte2 == 83 and byte3 == 67 and byte4 == 70 )
{
if ( VERBOSE == true )
{
std::cout << "header_CAB:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip .png files
// 4 byte header Dec(137 80 78 71)
else if ( byte1 == 137 and byte2 == 80 and byte3 == 78 and byte4 == 71 )
{
if ( VERBOSE == true )
{
std::cout << "header_PNG:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip .jpg files
// 4 byte header Dec(255 216 255 224)
else if ( byte1 == 255 and byte2 == 216 and byte3 == 255 and byte4 == 224 )
{
if ( VERBOSE == true )
{
std::cout << "header_JPG:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip .xex (xbox 360) files
// 4 byte header Dec(88 69 88 50)
else if ( byte1 == 88 and byte2 == 69 and byte3 == 88 and byte4 == 50 )
{
if ( VERBOSE == true )
{
std::cout << "header_XEX:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip ID3 files
// Dec(73 68 51 3)
else if ( byte1 == 73 and byte2 == 68 and byte3 == 51 and byte4 == 3 )
{
if ( VERBOSE == true )
{
std::cout << "header_ID3:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip RIFF
// Hex(52 49 46 46) Dec(82 73 70 70)
else if ( byte1 == 82 and byte2 == 73 and byte3 == 70 and byte4 == 70 )
{
if ( VERBOSE == true )
{
std::cout << "header_RIFF:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip PDF
// Hex(25 50 44 46) Dec(37 80 68 70)
else if ( byte1 == 37 and byte2 == 80 and byte3 == 68 and byte4 == 70 )
{
if ( VERBOSE == true )
{
std::cout << "header_PDF:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip BZ2
// Hex(42 5A 68 39) Dec(66 90 104 57)
else if ( byte1 == 66 and byte2 == 90 and byte3 == 104 and byte4 == 57 )
{
if ( VERBOSE == true )
{
std::cout << "header_BZ2:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip epi
// Hex(fd 86 a4 c9) Dec(253 134 164 201)
else if ( byte1 == 253 and byte2 == 134 and byte3 == 164 and byte4 == 201 )
{
if ( VERBOSE == true )
{
std::cout << "header_EPI:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// Skip MPQ (World of WarCraft data files)
// Hex(4d 50 51) Dec(77 80 81)
else if ( byte1 == 77 and byte2 == 80 and byte3 == 81 )
{
if ( VERBOSE == true )
{
std::cout << "header_MPQ:\t" << file_path << std::endl;
return true;
}
else
return true;
}
// File does not have a known file header.
else
{
if ( VERBOSE == true )
{
std::cout << "no_header:\t" << file_path << std::endl;
return false;
}
else
return false;
}
}
// Iterate through the filesystem applying tests.
void files( const boost::filesystem::path& dir_path, const unsigned int tc_min_file_size )
{
std::string str_path;
boost::filesystem::directory_iterator end_itr;
for ( boost::filesystem::directory_iterator itr(dir_path); itr != end_itr; ++itr )
{
str_path = itr->path().string();
try
{
// Folders
if ( boost::filesystem::is_directory( itr->status() ) and !boost::filesystem::symbolic_link_exists( itr->path() ) )
{
// Recursion
files( str_path, tc_min_file_size );
}
}
catch ( const boost::filesystem::filesystem_error& e )
{
std::cerr << e.what() << str_path << std::endl;
}
try
{
// Regular files
if ( boost::filesystem::is_regular( itr->status() ) and !boost::filesystem::symbolic_link_exists( itr->path() ) )
{
// Get file's size in bytes
const uintmax_t filesize = boost::filesystem::file_size( itr->path() );
// 1. If the file is above minimum file size, pass it on
if ( filesize >= tc_min_file_size )
{
// 2. If the file passes the modulo test, pass it on
if ( modulo( str_path, filesize ) == true )
{
// 3. If file passes X2 Test, pass it on
if ( X2( str_path ) == true )
{
// 4. If file has no known header, stop and report it
if ( header( str_path ) == false )
{
std::cout << "Suspect_File:\t" << str_path << std::endl;
}
}
}
}
else
{
if ( VERBOSE == true )
{
std::cout << "Too_Small:\t" << str_path << std::endl;
}
}
}
}
catch ( const boost::filesystem::filesystem_error& e )
{
std::cerr << e.what() << str_path << std::endl;
}
}
}
void version()
{
std::cout << "_/_/_/_/_/ _/_/_/ _/ _/ _/" << std::endl;
std::cout << " _/ _/ _/ _/ _/ _/ _/_/_/ _/_/_/_/" << std::endl;
std::cout << " _/ _/ _/_/_/_/ _/ _/ _/ _/ _/" << std::endl;
std::cout << " _/ _/ _/ _/ _/ _/ _/ _/ _/" << std::endl;
std::cout << "_/ _/_/_/ _/ _/ _/_/_/ _/ _/ _/_/ v1.6" << std::endl;
}
int main( int argc, char* argv[] )
{
const unsigned int tc_min_file_size = 15*(1024*1024); // 1024 * 19
std::string search_path;
boost::program_options::options_description d("\nAllowed options for TCHunt");
d.add_options()
("dir,d", boost::program_options::value(), "The directory to search (recursive).")
("help,h", "Print this message and exit.")
("verbose,v", "Print verbose output.")
;
boost::program_options::variables_map m;
try
{
boost::program_options::store( boost::program_options::parse_command_line( argc, argv, d ), m );
boost::program_options::notify(m);
}
catch ( boost::program_options::unknown_option )
{
std::cout << d << std::endl;
return 1;
}
if ( m.count("help") or argc == 1 )
{
version();
std::cout << d << std::endl;
return 0;
}
if ( m.count("verbose") )
{
VERBOSE = true;
}
if ( m.count("dir") )
{
search_path = m["dir"].as();
files( search_path, tc_min_file_size );
}
return 0;
}