mirror of
https://gitea.home.endeavr.de/Marcel/Tarstats-pp.git
synced 2025-12-10 00:39:01 +01:00
Tarstats-pp #1 Prototyping: Parse all items of a tarball
This commit is contained in:
157
main.cpp
157
main.cpp
@@ -3,12 +3,67 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
// checks if a 512byte block consist only of 0 or \0
|
||||||
|
bool eof (const char* buf) {
|
||||||
|
std::string test(&buf[0]);
|
||||||
|
std::erase(test, '\0');
|
||||||
|
std::erase(test, '0');
|
||||||
|
if (test.size() == 0)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// gets name of an item (for test purposes)
|
||||||
|
std::string getitemname (const char* buf) {
|
||||||
|
std::string name (&buf[0], 100);
|
||||||
|
std::erase(name, '\0');
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
// gets size of an item in bytes. assumes octal encoding.
|
||||||
|
uint64_t getitemsize(const char* buf){
|
||||||
|
std::string asciisize(&buf[124], 11); // at offset 124 because first byte marks octal or base256 coding
|
||||||
|
uint64_t itemsize{};
|
||||||
|
uint8_t power = 10;
|
||||||
|
for (auto i : asciisize) {
|
||||||
|
uint8_t value = i - '0';
|
||||||
|
itemsize += static_cast<uint64_t >(value * std::pow(8, power));
|
||||||
|
power--;
|
||||||
|
}
|
||||||
|
return itemsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// gets type of an item
|
||||||
|
std::string getitemtype(char &n) {
|
||||||
|
// read itemtype
|
||||||
|
switch (n){
|
||||||
|
case '0': case '\0':
|
||||||
|
return "FILE";
|
||||||
|
case '1':
|
||||||
|
return "HARDLINK";
|
||||||
|
case '2':
|
||||||
|
return "SYMLINK";
|
||||||
|
case '5':
|
||||||
|
return "DIRECTORY";
|
||||||
|
default:
|
||||||
|
return "OTHER";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
|
// set general variables
|
||||||
|
std::map <std::string, uint> typecount{
|
||||||
|
{"FILE", 0}, {"HARDLINK", 0}, {"SYMLINK", 0},
|
||||||
|
{"DIRECTORY", 0}, {"OTHER", 0}
|
||||||
|
}; // count the types of all items.
|
||||||
|
|
||||||
|
uint64_t sizeof_allfiles{};
|
||||||
|
|
||||||
// Trivial check for arguments. Errorprone and has to be changed.
|
// Trivial check for arguments. Errorprone and has to be changed.
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
std::cout << "Please enter filename" << std::endl;
|
std::cout << "Please enter filename" << std::endl;
|
||||||
@@ -18,102 +73,46 @@ int main(int argc, char** argv) {
|
|||||||
std::string archiveFilename(argv[1]);
|
std::string archiveFilename(argv[1]);
|
||||||
|
|
||||||
//Open tar File.
|
//Open tar File.
|
||||||
std::ifstream datei(archiveFilename, std::ios::binary);
|
std::ifstream file(archiveFilename, std::ios::binary);
|
||||||
if(!datei)
|
if(!file)
|
||||||
std::cout << "Error opening file" << std::endl;
|
std::cout << "Error opening file" << std::endl;
|
||||||
|
|
||||||
// Tar spec is working with continous 512 byte size blocks. Header is 512 bytes.
|
// Tar spec is working with continous 512 byte size blocks. Header is 512 bytes.
|
||||||
int buffersize = 512;
|
int buffersize = 512;
|
||||||
|
while (file) {
|
||||||
//Read header of first item in tar archive
|
//Read header of first item in tar archive
|
||||||
char *headbuffer = new char[buffersize];
|
char *headbuffer = new char[buffersize];
|
||||||
datei.read(headbuffer, buffersize);
|
file.read(headbuffer, buffersize);
|
||||||
|
|
||||||
// Read name of the next item
|
// tar file ends with 2 512byte blocks of 0. As no block should ever be 0 unless at the end, we check only once.
|
||||||
std::string itemname(&headbuffer[0], 100);
|
if (eof(headbuffer)) {
|
||||||
std::erase(itemname, '\0');
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Read type of item
|
// Read type of item
|
||||||
std::string itemtype{};
|
std::string itemtype{};
|
||||||
switch (headbuffer[156]){
|
itemtype = getitemtype(headbuffer[156]);
|
||||||
case '0': case '\0':
|
typecount[itemtype] += 1;
|
||||||
itemtype = "FILE";
|
|
||||||
break;
|
|
||||||
case '1':
|
// read itemsize and add to total
|
||||||
itemtype = "HARDLINK";
|
sizeof_allfiles += getitemsize(headbuffer);
|
||||||
break;
|
|
||||||
case '2':
|
// ignore file content. we want to get to the next header. item types != FILE have no content blocks (0 byte)
|
||||||
itemtype = "SYMLINK";
|
if (getitemsize(headbuffer)!=0) {
|
||||||
break;
|
file.ignore((getitemsize(headbuffer) / 512) * 512 + 512);
|
||||||
case '5':
|
|
||||||
itemtype = "DIRECTORY";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
itemtype = "OTHER";
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// empty the headerbuffer
|
||||||
|
|
||||||
|
|
||||||
// Read size of the next file
|
|
||||||
long double itemsize{};
|
|
||||||
int power = 10;
|
|
||||||
std::string asciifilesize(&headbuffer[124], 11);
|
|
||||||
for (auto i : asciifilesize) {
|
|
||||||
int value = i - '0';
|
|
||||||
itemsize += value * std::pow(8, power);
|
|
||||||
power--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// uint64_t locationtonextfile{};
|
|
||||||
/* find out how often 512 bytes have to be read until the next header begins. Itemcontents are filled with '0' to
|
|
||||||
* fill the next 512 byte block
|
|
||||||
*/
|
|
||||||
int filereadcount = (itemsize / buffersize) + 1;
|
|
||||||
char* itembuffer = new char[buffersize];
|
|
||||||
|
|
||||||
while (filereadcount >= 0){
|
|
||||||
datei.read(itembuffer, buffersize);
|
|
||||||
filereadcount--;
|
|
||||||
}
|
|
||||||
delete[] itembuffer;
|
|
||||||
delete[] headbuffer;
|
delete[] headbuffer;
|
||||||
|
|
||||||
|
|
||||||
headbuffer = new char[buffersize];
|
|
||||||
datei.read(headbuffer, buffersize);
|
|
||||||
std::string itemname2 (&headbuffer[0], 100);
|
|
||||||
std::erase(itemname2, '\0');
|
|
||||||
|
|
||||||
std::string itemtype2{};
|
|
||||||
switch (headbuffer[156]){
|
|
||||||
case '0': case '\0':
|
|
||||||
itemtype2 = "FILE";
|
|
||||||
break;
|
|
||||||
case '1':
|
|
||||||
itemtype2 = "HARDLINK";
|
|
||||||
break;
|
|
||||||
case '2':
|
|
||||||
itemtype2 = "SYMLINK";
|
|
||||||
break;
|
|
||||||
case '5':
|
|
||||||
itemtype2 = "DIRECTORY";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
itemtype2 = "OTHER";
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cout << sizeof_allfiles << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Printing to stdout
|
|
||||||
std::cout << itemname << std::endl;
|
|
||||||
std::cout << itemtype << std::endl;
|
|
||||||
std::cout << itemsize << " Bytes" << std::endl;
|
|
||||||
|
|
||||||
//Second item
|
|
||||||
std::cout << itemname2 << std::endl;
|
|
||||||
std::cout << itemtype2 << std::endl;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user