Tarstats-pp #1 Prototyping: Parse all items of a tarball

2026-07-16 03:54:48 +02:00 · 2022-02-09 21:15:15 +01:00
parent 53eac32e70
commit 3c3306f395
1 changed files with 81 additions and 82 deletions
@@ -3,12 +3,67 @@
 #include <string>
 #include <vector>
 #include <cmath>
+#include <map>

+// checks if a 512byte block consist only of 0 or \0
+bool eof (const char* buf) {
+    std::string test(&buf[0]);
+    std::erase(test, '\0');
+    std::erase(test, '0');
+    if (test.size() == 0)
+        return 1;
+    return 0;
+}
+
+// gets name of an item (for test purposes)
+std::string getitemname (const char* buf) {
+    std::string name (&buf[0], 100);
+    std::erase(name, '\0');
+    return name;
+}
+
+// gets size of an item in bytes. assumes octal encoding.
+uint64_t getitemsize(const char* buf){
+    std::string asciisize(&buf[124], 11); // at offset 124 because first byte marks octal or base256 coding
+    uint64_t itemsize{};
+    uint8_t power = 10;
+    for (auto i : asciisize) {
+        uint8_t value = i - '0';
+        itemsize += static_cast<uint64_t >(value * std::pow(8, power));
+        power--;
+    }
+    return itemsize;
+}
+
+// gets type of an item
+std::string getitemtype(char &n) {
+    // read itemtype
+    switch (n){
+        case '0': case '\0':
+            return "FILE";
+        case '1':
+            return "HARDLINK";
+        case '2':
+            return "SYMLINK";
+        case '5':
+            return "DIRECTORY";
+        default:
+            return "OTHER";
+    }
+}



 int main(int argc, char** argv) {

+    // set general variables
+    std::map <std::string, uint> typecount{
+        {"FILE", 0}, {"HARDLINK", 0}, {"SYMLINK", 0},
+        {"DIRECTORY", 0}, {"OTHER", 0}
+    }; // count the types of all items.
+
+    uint64_t sizeof_allfiles{};
+
    // Trivial check for arguments. Errorprone and has to be changed.
    if (argc < 2) {
        std::cout << "Please enter filename" << std::endl;
@@ -18,102 +73,46 @@ int main(int argc, char** argv) {
    std::string archiveFilename(argv[1]);

    //Open tar File.
-    std::ifstream datei(archiveFilename, std::ios::binary);
-    if(!datei)
+    std::ifstream file(archiveFilename, std::ios::binary);
+    if(!file)
        std::cout << "Error opening file" << std::endl;

    // Tar spec is working with continous 512 byte size blocks. Header is 512 bytes.
    int buffersize = 512;
+    while (file) {
+        //Read header of first item in tar archive
+        char *headbuffer = new char[buffersize];
+        file.read(headbuffer, buffersize);

-    //Read header of first item in tar archive
-    char* headbuffer = new char[buffersize];
-    datei.read(headbuffer, buffersize);
+        // tar file ends with 2 512byte blocks of 0. As no block should ever be 0 unless at the end, we check only once.
+        if (eof(headbuffer)) {
+            break;
+        }

-    // Read name of the next item
-    std::string itemname(&headbuffer[0], 100);
-    std::erase(itemname, '\0');
+        // Read type of item
+        std::string itemtype{};
+        itemtype = getitemtype(headbuffer[156]);
+        typecount[itemtype] += 1;

-    // Read type of item
-    std::string itemtype{};
-    switch (headbuffer[156]){
-        case '0': case '\0':
-            itemtype = "FILE";
-            break;
-        case '1':
-            itemtype = "HARDLINK";
-            break;
-        case '2':
-            itemtype = "SYMLINK";
-            break;
-        case '5':
-            itemtype = "DIRECTORY";
-            break;
-        default:
-            itemtype = "OTHER";
-            break;
+
+        // read itemsize and add to total
+        sizeof_allfiles += getitemsize(headbuffer);
+
+        // ignore file content. we want to get to the next header. item types != FILE have no content blocks (0 byte)
+        if (getitemsize(headbuffer)!=0) {
+            file.ignore((getitemsize(headbuffer) / 512) * 512 + 512);
+        }
+
+        // empty the headerbuffer
+        delete[] headbuffer;
    }

+    std::cout << sizeof_allfiles << std::endl;



-    // Read size of the next file
-    long double itemsize{};
-    int power = 10;
-    std::string asciifilesize(&headbuffer[124], 11);
-    for (auto i : asciifilesize) {
-        int value = i - '0';
-        itemsize += value * std::pow(8, power);
-        power--;
-    }
-
-    // uint64_t locationtonextfile{};
-    /* find out how often 512 bytes have to be read until the next header begins. Itemcontents are filled with '0' to
-     * fill the next 512 byte block
-     */
-    int filereadcount = (itemsize / buffersize) + 1;
-    char* itembuffer = new char[buffersize];
-
-    while (filereadcount >= 0){
-        datei.read(itembuffer, buffersize);
-        filereadcount--;
-    }
-    delete[] itembuffer;
-    delete[] headbuffer;


-    headbuffer = new char[buffersize];
-    datei.read(headbuffer, buffersize);
-    std::string itemname2 (&headbuffer[0], 100);
-    std::erase(itemname2, '\0');
-
-    std::string itemtype2{};
-    switch (headbuffer[156]){
-        case '0': case '\0':
-            itemtype2 = "FILE";
-            break;
-        case '1':
-            itemtype2 = "HARDLINK";
-            break;
-        case '2':
-            itemtype2 = "SYMLINK";
-            break;
-        case '5':
-            itemtype2 = "DIRECTORY";
-            break;
-        default:
-            itemtype2 = "OTHER";
-            break;
-    }
-
-
-    //Printing to stdout
-    std::cout << itemname << std::endl;
-    std::cout << itemtype << std::endl;
-    std::cout << itemsize << " Bytes" << std::endl;
-
-    //Second item
-    std::cout << itemname2 << std::endl;
-    std::cout << itemtype2 << std::endl;

    return 0;
 }