this successfully implememted gz parsing.

This commit is contained in:
Marcel Nowicki
2022-02-12 00:29:59 +01:00
parent 692ab2ff85
commit 0b0759271a
4 changed files with 165 additions and 74 deletions

178
main.cpp
View File

@@ -19,15 +19,23 @@ int main(int argc, char** argv) {
} }
// GENERAL VARIABLES // GENERAL VARIABLES
bool toFile = false;
bool toJSON = false;
uintmax_t global_sizeofall{};
std::map<std::string, uintmax_t> globaltypecount{
{tarconstant::typeFile, 0},
{tarconstant::typeDir, 0},
{tarconstant::typeSym, 0},
{tarconstant::typeHard, 0},
{tarconstant::typeOther, 0}
};
// Load command line parameters into vector, read them and set variables // Load command line parameters into vector, read them and set variables
std::vector<std::string> cmdparam{}; std::vector<std::string> cmdparam{};
for (int i = 1; i < argc; i++){ for (int i = 1; i < argc; i++){
cmdparam.emplace_back(argv[i]); cmdparam.emplace_back(argv[i]);
} }
bool toFile = false;
bool toJSON = false;
bool g = false;
std::vector<std::string> archiveFilename {}; std::vector<std::string> archiveFilename {};
for (auto i : cmdparam) { for (auto i : cmdparam) {
@@ -46,9 +54,6 @@ int main(int argc, char** argv) {
case 'h': case 'h':
tar::printhelp(); tar::printhelp();
return 9; return 9;
case 'g':
g = true;
break;
case '-': case '-':
break; break;
default: default:
@@ -60,79 +65,116 @@ int main(int argc, char** argv) {
} }
} }
if (g) {
std::ifstream test("test4.tar.gz", std::ios::binary);
char x;
test.seekg(1);
test.read((&x), 1);
test.close();
if (x == '\213')
std::cout << "success" << std::endl;
gzFile gzIn = gzopen("test4.tar.gz", "rb");
gzbuffer(gzIn, 512);
char* testbuf = new char[512];
gzread(gzIn, testbuf, 512);
std::string teststr (&testbuf[0], 100);
std::cout << teststr << '\n';
return 0;
}
for (auto &archiveName : archiveFilename) { for (auto &archiveName : archiveFilename) {
//count the types of all items
std::map<std::string, uintmax_t> typecount{
{tarconstant::typeFile, 0}, {tarconstant::typeDir, 0}, {tarconstant::typeSym, 0},
{tarconstant::typeHard, 0}, {tarconstant::typeOther, 0}
};
uint64_t sizeof_allfiles{}; // total size of all files in the archive if (!tar::fileOpen(archiveName)) {
//Open tar File.
std::ifstream file(archiveName, std::ios::binary);
if (!file) {
std::cout << "Error opening file " << archiveName << "!" << '\n' << '\n'; std::cout << "Error opening file " << archiveName << "!" << '\n' << '\n';
tar::printhelp(); tar::printhelp();
return 9; return 9;
} }
if (!tar::validTar(file)) { //count the types of all items
std::cout << "!!!!!!!" << '\n'; std::map<std::string, uintmax_t> typecount{
std::cout << archiveName << " is not a valid tar file for tarstats-pp" << '\n'; {tarconstant::typeFile, 0},
std::cout << "!!!!!!!" << '\n' << '\n'; {tarconstant::typeDir, 0},
} {tarconstant::typeSym, 0},
else { {tarconstant::typeHard, 0},
while (file) { {tarconstant::typeOther, 0}
//Read header of next item in tar archive };
char *headbuffer = new char[tarconstant::blocksize];
file.read(headbuffer, tarconstant::blocksize);
// tar file ends with 2 512byte blocks of 0. As no block should ever be 0 unless at the end, we check only once. uint64_t sizeof_allfiles{}; // total size of all files in the archive
if (tar::eof(headbuffer)) { bool isGz = tar::validGzip(archiveName);
break;
}
// Read type of item if (isGz) {
std::string itemtype = tar::getitemtype(headbuffer[tarconstant::itemtypeByte]); if (!tar::gzValidTar(archiveName)) {
typecount[itemtype] += 1; std::cout << "!!!!!!!" << '\n';
std::cout << archiveName << " is not a valid tar file for tarstats-pp" << '\n';
// read itemsize and add to total std::cout << "!!!!!!!" << '\n' << '\n';
sizeof_allfiles += tar::getitemsize(headbuffer);
// ignore file content. we want to get to the next header. item types != FILE have no content blocks (0 byte)
if (tar::getitemsize(headbuffer) != 0) {
file.ignore((tar::getitemsize(headbuffer) / tarconstant::blocksize)
* tarconstant::blocksize + tarconstant::blocksize);
}
// empty the headerbuffer
delete[] headbuffer;
} }
file.close(); else {
gzFile gzIn = gzopen(archiveName.c_str(), "r");
gzbuffer(gzIn, 8192);
while (!gzeof(gzIn)) {
std::cout << toJSON << '\n'; char headbuffer[512] = {0};
tar::consolestats(typecount, std::filesystem::file_size(archiveName), sizeof_allfiles); uint64_t unzippedbytes = gzread(gzIn, headbuffer, sizeof(headbuffer));
if (toFile) { if (unzippedbytes == 0)
tar::txtfilestats(typecount, std::filesystem::file_size(archiveName), sizeof_allfiles, break;
archiveName);
// tar file ends with 2 512byte blocks of 0. As no block should ever be 0 unless at the end, we check only once.
if (tar::eof(headbuffer)) {
break;
}
// Read type of item
std::string itemtype = tar::getitemtype(headbuffer[tarconstant::itemtypeByte]);
typecount[itemtype] += 1;
// read itemsize and add to total
sizeof_allfiles += tar::getitemsize(headbuffer);
// ignore file content. we want to get to the next header. item types != FILE have no content blocks (0 byte)
uintmax_t help = (tar::getitemsize(headbuffer) / tarconstant::blocksize)
* tarconstant::blocksize + tarconstant::blocksize;
char dump[help] = {0};
if (tar::getitemsize(headbuffer) != 0) {
gzread(gzIn, dump, (tar::getitemsize(headbuffer) / tarconstant::blocksize)
* tarconstant::blocksize + tarconstant::blocksize);
}
}
gzclose(gzIn);
tar::consolestats(typecount, std::filesystem::file_size(archiveName), sizeof_allfiles);
if (toFile) {
tar::txtfilestats(typecount, std::filesystem::file_size(archiveName),
sizeof_allfiles, archiveName);
}
}
}
if (!isGz) {
//Open tar File.
std::ifstream file(archiveName, std::ios::binary);
if (!tar::validTar(file)) {
std::cout << "!!!!!!!" << '\n';
std::cout << archiveName << " is not a valid tar file for tarstats-pp" << '\n';
std::cout << "!!!!!!!" << '\n' << '\n';
} else {
while (file) {
//Read header of next item in tar archive
char *headbuffer = new char[tarconstant::blocksize];
file.read(headbuffer, tarconstant::blocksize);
// tar file ends with 2 512byte blocks of 0. As no block should ever be 0 unless at the end, we check only once.
if (tar::eof(headbuffer)) {
break;
}
// Read type of item
std::string itemtype = tar::getitemtype(headbuffer[tarconstant::itemtypeByte]);
typecount[itemtype] += 1;
// read itemsize and add to total
sizeof_allfiles += tar::getitemsize(headbuffer);
// ignore file content. we want to get to the next header. item types != FILE have no content blocks (0 byte)
if (tar::getitemsize(headbuffer) != 0) {
file.ignore((tar::getitemsize(headbuffer) / tarconstant::blocksize)
* tarconstant::blocksize + tarconstant::blocksize);
}
// empty the headerbuffer
delete[] headbuffer;
}
file.close();
std::cout << toJSON << '\n';
tar::consolestats(typecount, std::filesystem::file_size(archiveName), sizeof_allfiles);
if (toFile) {
tar::txtfilestats(typecount, std::filesystem::file_size(archiveName), sizeof_allfiles,
archiveName);
}
} }
} }
} }

View File

@@ -17,8 +17,9 @@ namespace tarconstant {
inline std::string const typeHard = "HARDLINKS"; inline std::string const typeHard = "HARDLINKS";
inline std::string const typeOther = "OTHER"; inline std::string const typeOther = "OTHER";
// valid magicfield // valid magicfield tar
inline std::string const mgcfield = "ustar"; inline std::string const mgctar = "ustar";
inline constexpr char mgcgzip = '\213';
// Byte offsets // Byte offsets
inline uint16_t blocksize = 512; // tar consists of continuous 512 Byte data blocks inline uint16_t blocksize = 512; // tar consists of continuous 512 Byte data blocks

View File

@@ -9,6 +9,7 @@
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include "zlib.h"
// checks if a valid modern tar file - ustar // checks if a valid modern tar file - ustar
bool tar::validTar(std::istream &file) { bool tar::validTar(std::istream &file) {
@@ -18,7 +19,20 @@ bool tar::validTar(std::istream &file) {
std::string magicfield(&buffer[0], 5); std::string magicfield(&buffer[0], 5);
delete[] buffer; delete[] buffer;
file.seekg(0); file.seekg(0);
return magicfield == tarconstant::mgcfield ? true: false; return magicfield == tarconstant::mgctar ? true : false;
}
// check if tar in gzfile is valid
bool tar::gzValidTar(std::string filename) {
auto gzIn = ::gzopen(filename.c_str(), "r");
gzbuffer(gzIn, 8192);
char headbuffer[512] = {0};
gzread( gzIn, headbuffer, sizeof(headbuffer));
std::string magicfield (&headbuffer[tarconstant::mgcfieldByte.first], 5);
gzclose(gzIn);
if (magicfield == tarconstant::mgctar)
return true;
return false;
} }
// checks if a 512byte block consist only of 0 or \0 // checks if a 512byte block consist only of 0 or \0
@@ -87,4 +101,29 @@ void tar::txtfilestats (std::map<std::string, uintmax_t> &typecount, uintmax_t t
// print out helpertext // print out helpertext
void tar::printhelp(){ void tar::printhelp(){
std::cout << tarconstant::helptext << '\n' << '\n'; std::cout << tarconstant::helptext << '\n' << '\n';
}
// check if valid GNU ZIP file via magic byte
bool tar::validGzip(std::string &filename) {
std::ifstream file(filename, std::ios::binary);
char byte;
file.seekg(1);
file.read((&byte), 1);
file.close();
if (byte == tarconstant::mgcgzip)
return true;
return false;
}
// check if file can be opened
bool tar::fileOpen(std::string &filename) {
std::ifstream file(filename);
if(file) {
file.close();
return true;
}
file.close();
return false;
} }

View File

@@ -15,6 +15,9 @@ namespace tar {
// checks if a valid modern tar file - ustar // checks if a valid modern tar file - ustar
bool validTar(std::istream &file); bool validTar(std::istream &file);
// check if tar in gzfile is valid
bool gzValidTar(std::string filename);
// checks if a 512byte block consist only of 0 or \0 // checks if a 512byte block consist only of 0 or \0
bool eof(const char *buf); bool eof(const char *buf);
@@ -25,13 +28,19 @@ namespace tar {
std::string getitemtype(char &n); std::string getitemtype(char &n);
// to write stats to console in default style // to write stats to console in default style
void consolestats (std::map<std::string, uintmax_t> &typecount, uintmax_t tarfilesize, uintmax_t sizeofall); void consolestats(std::map<std::string, uintmax_t> &typecount, uintmax_t tarfilesize, uintmax_t sizeofall);
// to write default console output to txt file // to write default console output to txt file
void txtfilestats (std::map<std::string, uintmax_t> &typecount, uintmax_t tarfilesize, uintmax_t sizeofall, void txtfilestats(std::map<std::string, uintmax_t> &typecount, uintmax_t tarfilesize, uintmax_t sizeofall,
std::string archiveName); std::string archiveName);
// print out helpertext // print out helpertext
void printhelp(); void printhelp();
// check if valid GNU ZIP file via magic byte.
bool validGzip(std::string &filename);
// check if file can be opened
bool fileOpen(std::string &filename);
} }