diff --git a/CMakeLists.txt b/CMakeLists.txt index e8fa11a..e344acd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,4 +4,4 @@ project(Tarstats__) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror -std=c++20") -add_executable(tarstats_pp main.cpp) +add_executable(tarstats_pp main.cpp tarconst.h tarfunc.cpp tarfunc.h) diff --git a/main.cpp b/main.cpp index 5d18a5e..5baaa9c 100644 --- a/main.cpp +++ b/main.cpp @@ -1,95 +1,30 @@ +/* + * by Marcel Nowicki (github@epenis.de | github.com/blindi0815) + */ + #include #include #include #include -#include #include #include - -// global itemtype definition -std::string const typeFile = "FILES"; -std::string const typeDir = "DIRECTORIES"; -std::string const typeSym = "SYMLINKS"; -std::string const typeHard = "HARDLINKS"; -std::string const typeOther = "OTHER"; - -// checks if a valid modern tar file - ustar -bool validTar (std::istream & file) { - file.seekg(257); - char* buffer = new char [6]; - file.read(buffer, 6); - std::string magicfield(&buffer[0], 5); - delete[] buffer; - file.seekg(0); - return magicfield == "ustar" ? true: false; -} - -// checks if a 512byte block consist only of 0 or \0 -bool eof (const char* buf) { - std::string test(&buf[0]); - std::erase(test, '\0'); - std::erase(test, '0'); - if (test.size() == 0) - return 1; - return 0; -} - -// gets size of an item in bytes. assumes octal encoding. -uint64_t getitemsize(const char* buf){ - std::string asciisize(&buf[124], 11); // at offset 124 because first byte marks octal or base256 coding - uint64_t itemsize{}; - uint8_t power = 10; - for (auto i : asciisize) { - uint8_t value = i - '0'; - itemsize += static_cast(value * std::pow(8, power)); - power--; - } - return itemsize; -} - -// gets type of an item -std::string getitemtype(char &n) { - // read itemtype - switch (n){ - case '0': case '\0': - return typeFile; - case '1': - return typeHard; - case '2': - return typeSym; - case '5': - return typeDir; - default: - return typeOther; - } -} - - +#include "tarconst.h" +#include "tarfunc.h" int main(int argc, char** argv) { // GENERAL VARIABLES //count the types of all items std::map typecount{ - {typeFile, 0}, {typeDir, 0}, {typeSym, 0}, - {typeHard, 0}, {typeOther, 0} + {tarconstant::typeFile, 0}, {tarconstant::typeDir, 0}, {tarconstant::typeSym, 0}, + {tarconstant::typeHard, 0}, {tarconstant::typeOther, 0} }; uint64_t sizeof_allfiles{}; // total size of all files in the archive - std::string helptext - {"usage: tarstats-pp [-h] [-j] [-f] tarfile.\n" - "A tool to calculate basics statistics on tarball. Shamelessly inspired by github.com/isotopp/tarstats!\n\n" - "mandatory argument:\n" - "tarfile a tarfile to calculate stats on\n\n" - "optional arguments\n" - "-h this helptext\n" - "-j stats in JSON format printed to stdout\n" - "-f print stats to file .txt or .json if -j invoked"}; - // Trivial check for arguments. Errorprone and has to be changed. if (argc < 2) { - std::cout << helptext << '\n'; + std::cout << tarconstant::helptext << '\n'; return 9; } // Getting name from argument lists on startup. Trivial and errorprone. Placeholder for now. @@ -99,39 +34,36 @@ int main(int argc, char** argv) { std::ifstream file(archiveFilename, std::ios::binary); if(!file) { std::cout << "Error opening file!" << '\n' << '\n'; - std::cout << helptext << '\n'; + std::cout << tarconstant::helptext << '\n'; return 9; } - if (!validTar(file)) { + if (!tar::validTar(file)) { std::cout << archiveFilename << " is not a valid tar file for tarstats-pp" << '\n'; return 9; } - // Tar spec is working with continous 512 byte size blocks. Header is 512 bytes. - int buffersize = 512; while (file) { - //Read header of first item in tar archive - char *headbuffer = new char[buffersize]; - file.read(headbuffer, buffersize); + //Read header of next item in tar archive + char *headbuffer = new char[tarconstant::blocksize]; + file.read(headbuffer, tarconstant::blocksize); // tar file ends with 2 512byte blocks of 0. As no block should ever be 0 unless at the end, we check only once. - if (eof(headbuffer)) { + if (tar::eof(headbuffer)) { break; } // Read type of item - std::string itemtype{}; - itemtype = getitemtype(headbuffer[156]); + std::string itemtype = tar::getitemtype(headbuffer[tarconstant::itemtypeByte]); typecount[itemtype] += 1; - // read itemsize and add to total - sizeof_allfiles += getitemsize(headbuffer); + sizeof_allfiles += tar::getitemsize(headbuffer); // ignore file content. we want to get to the next header. item types != FILE have no content blocks (0 byte) - if (getitemsize(headbuffer)!=0) { - file.ignore((getitemsize(headbuffer) / 512) * 512 + 512); + if (tar::getitemsize(headbuffer)!=0) { + file.ignore((tar::getitemsize(headbuffer) / tarconstant::blocksize) + * tarconstant::blocksize + tarconstant::blocksize); } // empty the headerbuffer diff --git a/tarconst.h b/tarconst.h new file mode 100644 index 0000000..d2e208b --- /dev/null +++ b/tarconst.h @@ -0,0 +1,40 @@ +/* this headerfile contains global constant variables needed during parsing of a tar file + * + * by Marcel Nowicki (github@epenis.de | github.com/blindi0815) + */ + +#ifndef TARSTATS___TARCONST_H +#define TARSTATS___TARCONST_H + +#include +#include + +namespace tarconstant { + // global itemtype definition + inline std::string const typeFile = "FILES"; + inline std::string const typeDir = "DIRECTORIES"; + inline std::string const typeSym = "SYMLINKS"; + inline std::string const typeHard = "HARDLINKS"; + inline std::string const typeOther = "OTHER"; + + // valid magicfield + inline std::string const mgcfield = "ustar"; + + // Byte offsets + inline uint16_t blocksize = 512; // tar consists of continuous 512 Byte data blocks + inline uint8_t itemtypeByte = 156; + inline std::pair mgcfieldByte {257,6}; // { offset , length } + inline std::pair itemoctlnByte {124,11}; // at offset 124 because first byte + // marks octal or base256 coding + inline std::string helptext + {"usage: tarstats-pp [-h] [-j] [-f] tarfile.\n" + "A tool to calculate basics statistics on tarball. Shamelessly inspired by github.com/isotopp/tarstats!\n\n" + "mandatory argument:\n" + "tarfile a tarfile to calculate stats on\n\n" + "optional arguments\n" + "-h this helptext\n" + "-j stats in JSON format printed to stdout\n" + "-f print stats to file .txt or .json if -j invoked"}; +} + +#endif //TARSTATS___TARCONST_H diff --git a/tarfunc.cpp b/tarfunc.cpp new file mode 100644 index 0000000..0c15b95 --- /dev/null +++ b/tarfunc.cpp @@ -0,0 +1,60 @@ +/* this file contains function definitions for parsing a tar file. + * + * by Marcel Nowicki (github@epenis.de | github.com/blindi0815) + */ + +#include "tarfunc.h" +#include "tarconst.h" +#include +#include + +// checks if a valid modern tar file - ustar +bool tar::validTar(std::istream &file) { + file.seekg(tarconstant::mgcfieldByte.first); + char* buffer = new char [tarconstant::mgcfieldByte.second]; + file.read(buffer, tarconstant::mgcfieldByte.second); + std::string magicfield(&buffer[0], 5); + delete[] buffer; + file.seekg(0); + return magicfield == tarconstant::mgcfield ? true: false; +} + +// checks if a 512byte block consist only of 0 or \0 +bool tar::eof (const char* buf) { + std::string test(&buf[0]); + std::erase(test, '\0'); + std::erase(test, '0'); + if (test.empty()) + return true; + return false; +} + +// gets size of an item in bytes. assumes octal encoding. +uint64_t tar::getitemsize(const char* buf){ + std::string asciisize(&buf[tarconstant::itemoctlnByte.first], tarconstant::itemoctlnByte.second); + uint64_t itemsize{}; + uint8_t power = tarconstant::itemoctlnByte.second - 1; + for (auto i : asciisize) { + uint8_t value = i - '0'; + itemsize += static_cast(value * std::pow(8, power)); + power--; + } + return itemsize; +} + +// gets type of an item +std::string tar::getitemtype(char &n) { + // read itemtype + switch (n){ + case '0': case '\0': + return tarconstant::typeFile; + case '1': + return tarconstant::typeHard; + case '2': + return tarconstant::typeSym; + case '5': + return tarconstant::typeDir; + default: + return tarconstant::typeOther; + } +} \ No newline at end of file diff --git a/tarfunc.h b/tarfunc.h new file mode 100644 index 0000000..bd43a40 --- /dev/null +++ b/tarfunc.h @@ -0,0 +1,26 @@ +/* this header file contains function declarations used parsing a tar file + * + * by Marcel Nowicki (github@epenis.de | github.com/blindi0815) + */ + +#include + + +#ifndef TARSTATS___TARFUNC_H +#define TARSTATS___TARFUNC_H +namespace tar { +// checks if a valid modern tar file - ustar + bool validTar(std::istream &file); + +// checks if a 512byte block consist only of 0 or \0 + bool eof(const char *buf); + +// gets size of an item in bytes. assumes octal encoding. + uint64_t getitemsize(const char *buf); + +// gets type of an item + std::string getitemtype(char &n); +} + + +#endif //TARSTATS___TARFUNC_H