// GMMM (C) 2020 Stephane Charette <stephanecharette@gmail.com>
// $Id: gmm-merge.cpp 3096 2020-11-23 07:31:01Z stephane $

// Ignore the localtime() warning in Windows.
#define _CRT_SECURE_NO_WARNINGS

#include <set>
#include <filesystem>
#include <iostream>
#include <fstream>
#include <map>
#include <regex>
#include <vector>
#include <ctime>
#include <cmath>
#include "MCFile.hpp"


std::string current_year;		// "2020"
std::string current_timestamp;	// "2020-11-22-15-45" (YYYY-mm-dd-HH-MM)
std::string input_dir;
std::string output_dir;
std::string backup_dir;


SetMCFiles summary_files_to_rebuild;
size_t number_of_files_moved = 0;
size_t number_of_data_files_processed = 0;
size_t number_of_board_moisture_values = 0;
size_t number_of_backup_files_copies = 0;


void validate_directory(std::string & dir, const bool attempt_to_create)
{
	std::cout << "-> validating directory \"" << dir << "\"" << std::endl;

	// attempt to create the directory if it does not yet exist.
	if (attempt_to_create && std::filesystem::exists(dir) == false)
	{
		std::cout << "-> attempting to create new directory \"" << dir << "\"" << std::endl;
		std::filesystem::create_directories(dir);
	}

	auto path = std::filesystem::canonical(dir);

	if (std::filesystem::exists(path) == false)
	{
		throw std::invalid_argument("\"" + path.string() + "\" does not exist");
	}

	if (std::filesystem::is_directory(path) == false)
	{
		throw std::invalid_argument("\"" + path.string() + "\" is not a directory");
	}

	dir = path.string();

	std::cout << "-> using path \"" << dir << "\"" << std::endl;

	return;
}


void validate_directories()
{
	validate_directory(input_dir, false); // input dir must already exist!
	validate_directory(output_dir, true);

	if (input_dir == output_dir)
	{
		throw std::invalid_argument("must specify different input and output directories");
	}

	// the backup directory is optional -- don't assume that one has been specified
	if (backup_dir.empty() == false)
	{
		validate_directory(backup_dir, true);

		if (backup_dir == input_dir ||
			backup_dir == output_dir)
		{
			throw std::invalid_argument("the backup directory must be different than the input and output directories");
		}
	}

	std::cout << std::endl;

	return;
}


std::string replace_keywords(std::string str)
{
	// replace "YEAR" with "2020" and "TIMESTAMP" with "2020-11-22-15-32"

	const std::map<std::string, std::string> m =
	{
		{"YEAR"		, current_year		},
		{"TIMESTAMP", current_timestamp	}
	};

	for (const auto [key, val] : m)
	{
		while (true)
		{
			const size_t pos = str.find(key);
			if (pos == std::string::npos)
			{
				break;
			}
			
			str.replace(pos, key.size(), val);
		}
	}

	return str;
}


bool merge_file(const std::filesystem::path & src)
{
	std::cout << std::endl << "-> found " << src.string() << std::endl;

	MCFile mc_file;
	try
	{
		mc_file.init(src);
	}
	catch (...)
	{
		std::cout << "-> skipping invalid file " << src.filename() << std::endl;
		return false;
	}

	std::cout << "-> " << mc_file.to_string() << std::endl;

	if (backup_dir.empty() == false)
	{
		auto dst = backup_dir / src.filename();
		std::cout << "-> backup " << src.filename().string() << " to " << dst.string() << std::endl;
		std::filesystem::copy_file(src, dst, std::filesystem::copy_options::update_existing);
		number_of_backup_files_copies ++;
	}

	if (mc_file.is_summary() == false)
	{
		auto dst = output_dir / src.filename();
		std::cout << "-> moving " << src.filename().string() << " to " << dst.string() << std::endl;
		std::filesystem::rename(src, dst);
		number_of_files_moved ++;
	}

	MCFile summary_file = mc_file.get_summary_file();
	std::cout << "-> scheduling summary file " << summary_file.file.filename() << " to be rebuilt once all .mc files are copied" << std::endl;
	summary_files_to_rebuild.insert(summary_file);

	return true;
}


void rebuild_summary(MCFile & summary_file)
{
	std::cout << std::endl << "-> rebuilding " << summary_file.to_string() << " (" << summary_file.file.string() << ")" << std::endl;

	// We need to track the number of boards in each "bucket".  The buckets start at 8% and go to 24% (17 buckets).
	const size_t min_bucket							= 8;
	const size_t max_bucket							= 24;
	const size_t number_of_buckets					= 1 + max_bucket - min_bucket;
	size_t total_number_of_boards					= 0;
	float sum_of_all_moisture_readings				= 0.0f;
	float sum_of_square_of_all_moisture_readings	= 0.0f;
	std::vector<size_t> bucket(number_of_buckets, 0);
	std::vector<float> moisture_values;

	for (auto entry : std::filesystem::directory_iterator(output_dir))
	{
		try
		{
			MCFile mc_file(entry.path());
			if (mc_file.kiln == summary_file.kiln &&
				mc_file.run == summary_file.run &&
				mc_file.pkg != summary_file.pkg )
			{
				std::cout << "-> found " << mc_file.to_string() << " (" << mc_file.file.filename().string() << ")" << std::endl;

				number_of_data_files_processed ++;

				std::ifstream ifs(entry.path());
				if (ifs.good())
				{
					std::cout << "-> reading boards from " << entry.path().filename().string() << std::flush;
					size_t boards_in_this_file = 0;

					std::string line;
					std::getline(ifs, line); // 1st line is bucket info; we'll ignore all the values on this line
					std::string token;
					while (std::getline(ifs, token, ','))
					{
						const float value = std::stof(token);
						total_number_of_boards ++;
						boards_in_this_file ++;
						number_of_board_moisture_values ++;
						sum_of_all_moisture_readings += value;
						sum_of_square_of_all_moisture_readings += (value * value);
						moisture_values.push_back(value);

						size_t bucket_index = static_cast<size_t>(std::floor(value));
						if (bucket_index <= min_bucket)
						{
							bucket_index = 0;
						}
						else
						{
							bucket_index -= min_bucket;
						}

						if (bucket_index >= bucket.size())
						{
							bucket_index = bucket.size() - 1;
						}

						bucket.at(bucket_index) ++;

						if (total_number_of_boards % 10 == 0)
						{
							std::cout << "." << std::flush;
						}
					}

					std::cout << " [" << boards_in_this_file << "]" << std::endl;
				}
			}
		}
		catch(...)
		{
			// do nothing; move on to the next file
//			std::cout << "ERROR: failed to process " << entry.path() << std::endl;
		}
	}

	// we're done reading the .mc files -- now we need to create the summary file

	const float average = [&]()
	{
		float f = 0.0f;
		if (total_number_of_boards > 0)
		{
			f = sum_of_all_moisture_readings / static_cast<float>(total_number_of_boards);
		}
		return f;
	}();

	const float standard_deviation = [&]()
	{
		float f = 0.0f;
		if (total_number_of_boards > 0)
		{
			for (const auto & value : moisture_values)
			{
				f += std::pow(value - average, 2.0f);
			}
			f = f / static_cast<float>(total_number_of_boards);
			f = std::sqrt(f);
		}
		return f;
	}();

	std::ofstream ofs(summary_file.file.string());
	ofs << total_number_of_boards << ",";
	for (const auto count : bucket)
	{
		ofs << std::fixed << std::setfill('0') << std::setw(4) << count << ",";
	}

	ofs	<< "000"							<< ","	// unknown field
		<< "000"							<< ","	// unknown field
		<< standard_deviation				<< ","
		<< average							<< ","
		<< sum_of_all_moisture_readings		<< ","
		<< sum_of_square_of_all_moisture_readings
		<< std::endl;

	std::cout << "-> boards=" << total_number_of_boards << ", avg=" << average << ", sd=" << standard_deviation << std::endl;

	return;
}


void merge_files()
{
	std::set<std::filesystem::path> files_processed;

	for (auto & entry : std::filesystem::directory_iterator(input_dir))
	{
		if (entry.is_regular_file())
		{
			if (merge_file(entry.path()))
			{
				// if we get here then it means it was a proper .mc file that we
				// either moved or it was a summary file that needs to be rebuilt
				files_processed.insert(entry.path());
			}
		}
	}

	std::cout << std::endl << "-> looking for files to remove..." << std::endl;
	for (auto & filename : files_processed)
	{
		if (std::filesystem::exists(filename))
		{
			std::cout << "-> removing " << filename.string() << std::endl;
			std::filesystem::remove(filename);
		}
	}

	std::cout << std::endl << "-> looking for summary files to rebuild..." << std::endl;
	for (auto entry : summary_files_to_rebuild)
	{
		entry.file = output_dir / entry.file;
		rebuild_summary(entry);
	}

	std::cout
		<< std::endl
		<< "Number of summary files rebuilt ...... " << summary_files_to_rebuild.size()	<< std::endl
		<< "Number of .mc backup files created ... " << number_of_backup_files_copies	<< std::endl
		<< "Number of .mc files moved ............ " << number_of_files_moved			<< std::endl
		<< "Number of .mc data files processed ... " << number_of_data_files_processed	<< std::endl
		<< "Number of moisture board values ...... " << number_of_board_moisture_values	<< std::endl
		<< std::endl
		<< "Done!" << std::endl
		<< std::endl;

	return;
}


int main(int argc, char *argv[])
{
	std::cout
		<< "Gorman Moisture Meter Merge" << std::endl
		<< "Built " << __DATE__ << " " << __TIME__ << "." << std::endl
		<< std::endl;

	try
	{
		// get the current year and remember it since it gets used in several places
		std::time_t now = std::time(nullptr);
		auto timeinfo = std::localtime(&now);
		char buffer[50] = "";

		std::strftime(buffer, sizeof(buffer), "%Y-%m-%d-%H-%M", timeinfo);
		current_timestamp	= buffer;
		current_year		= current_timestamp.substr(0, 4);

		if (argc < 3 || argc > 4)
		{
			std::cout
				<< "Requires 2 or 3 parameters:" << std::endl
				<< "" << std::endl
				<< "\t- input directory" << std::endl
				<< "\t- output directory" << std::endl
				<< "\t- backup directory (optional)" << std::endl
				<< std::endl
				<< "Examples:" << std::endl
				<< std::endl
				<< "\t" << argv[0] << " c:\\22m2k\\data\\board_oroville c:\\22m2k\\data\\YEAR" << std::endl
				<< "\t" << argv[0] << " c:\\22m2k\\data\\board_oroville c:\\22m2k\\data\\YEAR c:\\22m2k\\backup\\TIMESTAMP" << std::endl
				<< std::endl
				<< "If the word \"YEAR\" (case sensitive!) appears anywhere in a" << std::endl
				<< "directory name, it will be replaced with the current 4-digit year." << std::endl
				<< "So a directory path like this one:" << std::endl
				<< std::endl
				<< "\tC:\\22m2k\\data\\YEAR_test\\" << std::endl
				<< std::endl
				<< "would be interpreted as if it was:" << std::endl
				<< std::endl
				<< "\tC:\\22m2k\\data\\" << current_year << "_test\\" << std::endl
				<< std::endl
				<< "If the word \"TIMESTAMP\" (case sensitive!) appears anywhere in a" << std::endl
				<< "directory name, it will be replaced with the current date and time" << std::endl
				<< "(using a 24-hour clock).  So a directory path like this one:" << std::endl
				<< std::endl
				<< "\tC:\\22m2k\\backup\\TIMESTAMP\\" << std::endl
				<< std::endl
				<< "would be interpreted as if it was:" << std::endl
				<< std::endl
				<< "\tC:\\22m2k\\bacup\\" << current_timestamp << "\\" << std::endl
				<< std::endl;

			return 1;
		}

		/* argv[0] is the .exe
		 * argv[1] is the input dir
		 * argv[2] is the output dir
		 * argv[3] is the backup dir, but this one is optional so check before using it!
		 */
		input_dir	= replace_keywords(argv[1]);
		output_dir	= replace_keywords(argv[2]);
		if (argc > 3)
		{
			backup_dir = replace_keywords(argv[3]);
		}

		std::cout
			<< "- input directory:\t"	<< input_dir	<< std::endl
			<< "- output directory:\t"	<< output_dir	<< std::endl
			<< "- backup directory:\t"	<< backup_dir	<< std::endl
			<< std::endl;

		validate_directories();

		std::cout << "Reading from \"" << input_dir << "\" and merging into \"" << output_dir << "\"..." << std::endl;

		merge_files();
	}
	catch (const std::exception & e)
	{
		std::cout << "Error occured: " << e.what() << std::endl;
		return 2;
	}

	return 0;
}
