Code to read and write CSV files












4












$begingroup$


Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



#include <string>    // std::string
#include <fstream> // std::ofstream, std::ifstream
#include <vector> // std::vector
#include <utility> // std::pair
#include <stdexcept> // std::runtime_error
#include <sstream> // std::stringstream
#include <iostream> // std::cout, std::cin

void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
// Make a CSV file with one or more columns of integer values
// Each column of data is represented by the pair <column name, column data>
// as std::pair<std::string, std::vector<int>>
// The dataset is represented as a vector of these columns
// Note that all columns should be the same size

// Create an output filestream object
std::ofstream myFile(filename);

// Send column names to the stream
for(int j = 0; j < dataset.size(); ++j)
{
myFile << dataset.at(j).first;
if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
}
myFile << "n";

// Send data to the stream
for(int i = 0; i < dataset.at(0).second.size(); ++i)
{
for(int j = 0; j < dataset.size(); ++j)
{
myFile << dataset.at(j).second.at(i);
if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
}
myFile << "n";
}

// Close the file
myFile.close();
}

std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
// Reads a CSV file into a vector of <string, vector<int>> pairs where
// each pair represents <column name, column values>

// Create a vector of <string, int vector> pairs to store the result
std::vector<std::pair<std::string, std::vector<int>>> result;

// Create an input filestream
std::ifstream myFile(filename);

// Make sure the file is open
if(!myFile.is_open()) throw std::runtime_error("Could not open file");

// Helper vars
std::string line, colname;
int val;

// Read the column names
if(myFile.good())
{
// Extract the first line in the file
std::getline(myFile, line);

// Create a stringstream from line
std::stringstream ss(line);

// Extract each column name
while(std::getline(ss, colname, ',')){

// Initialize and add <colname, int vector> pairs to result
result.push_back({colname, std::vector<int> {}});
}
}

// Read data, line by line
while(std::getline(myFile, line))
{
// Create a stringstream of the current line
std::stringstream ss(line);

// Keep track of the current column index
int colIdx = 0;

// Extract each integer
while(ss >> val){

// Add the current integer to the 'colIdx' column's values vector
result.at(colIdx).second.push_back(val);

// If the next token is a comma, ignore it and move on
if(ss.peek() == ',') ss.ignore();

// Increment the column index
colIdx++;
}
}

// Close file
myFile.close();

return result;
}

int main() {
// // Make three vectors, each of length N filled with 1s, 2s, and 3s
int N = 1000;
std::vector<int> vec1(N, 1);
std::vector<int> vec2(N, 2);
std::vector<int> vec3(N, 3);

// Wrap into a vector
std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

// Write the vector to CSV
write_csv("three_cols.csv", vals);

// Read three_cols.csv
std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

// Print row and column counts to check if this was successful
std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

return 0;
}









share|improve this question









New contributor




Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.







$endgroup$

















    4












    $begingroup$


    Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



    Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



    #include <string>    // std::string
    #include <fstream> // std::ofstream, std::ifstream
    #include <vector> // std::vector
    #include <utility> // std::pair
    #include <stdexcept> // std::runtime_error
    #include <sstream> // std::stringstream
    #include <iostream> // std::cout, std::cin

    void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
    // Make a CSV file with one or more columns of integer values
    // Each column of data is represented by the pair <column name, column data>
    // as std::pair<std::string, std::vector<int>>
    // The dataset is represented as a vector of these columns
    // Note that all columns should be the same size

    // Create an output filestream object
    std::ofstream myFile(filename);

    // Send column names to the stream
    for(int j = 0; j < dataset.size(); ++j)
    {
    myFile << dataset.at(j).first;
    if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
    }
    myFile << "n";

    // Send data to the stream
    for(int i = 0; i < dataset.at(0).second.size(); ++i)
    {
    for(int j = 0; j < dataset.size(); ++j)
    {
    myFile << dataset.at(j).second.at(i);
    if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
    }
    myFile << "n";
    }

    // Close the file
    myFile.close();
    }

    std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
    // Reads a CSV file into a vector of <string, vector<int>> pairs where
    // each pair represents <column name, column values>

    // Create a vector of <string, int vector> pairs to store the result
    std::vector<std::pair<std::string, std::vector<int>>> result;

    // Create an input filestream
    std::ifstream myFile(filename);

    // Make sure the file is open
    if(!myFile.is_open()) throw std::runtime_error("Could not open file");

    // Helper vars
    std::string line, colname;
    int val;

    // Read the column names
    if(myFile.good())
    {
    // Extract the first line in the file
    std::getline(myFile, line);

    // Create a stringstream from line
    std::stringstream ss(line);

    // Extract each column name
    while(std::getline(ss, colname, ',')){

    // Initialize and add <colname, int vector> pairs to result
    result.push_back({colname, std::vector<int> {}});
    }
    }

    // Read data, line by line
    while(std::getline(myFile, line))
    {
    // Create a stringstream of the current line
    std::stringstream ss(line);

    // Keep track of the current column index
    int colIdx = 0;

    // Extract each integer
    while(ss >> val){

    // Add the current integer to the 'colIdx' column's values vector
    result.at(colIdx).second.push_back(val);

    // If the next token is a comma, ignore it and move on
    if(ss.peek() == ',') ss.ignore();

    // Increment the column index
    colIdx++;
    }
    }

    // Close file
    myFile.close();

    return result;
    }

    int main() {
    // // Make three vectors, each of length N filled with 1s, 2s, and 3s
    int N = 1000;
    std::vector<int> vec1(N, 1);
    std::vector<int> vec2(N, 2);
    std::vector<int> vec3(N, 3);

    // Wrap into a vector
    std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

    // Write the vector to CSV
    write_csv("three_cols.csv", vals);

    // Read three_cols.csv
    std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

    // Print row and column counts to check if this was successful
    std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

    return 0;
    }









    share|improve this question









    New contributor




    Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
    Check out our Code of Conduct.







    $endgroup$















      4












      4








      4





      $begingroup$


      Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



      Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



      #include <string>    // std::string
      #include <fstream> // std::ofstream, std::ifstream
      #include <vector> // std::vector
      #include <utility> // std::pair
      #include <stdexcept> // std::runtime_error
      #include <sstream> // std::stringstream
      #include <iostream> // std::cout, std::cin

      void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
      // Make a CSV file with one or more columns of integer values
      // Each column of data is represented by the pair <column name, column data>
      // as std::pair<std::string, std::vector<int>>
      // The dataset is represented as a vector of these columns
      // Note that all columns should be the same size

      // Create an output filestream object
      std::ofstream myFile(filename);

      // Send column names to the stream
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).first;
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";

      // Send data to the stream
      for(int i = 0; i < dataset.at(0).second.size(); ++i)
      {
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).second.at(i);
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";
      }

      // Close the file
      myFile.close();
      }

      std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
      // Reads a CSV file into a vector of <string, vector<int>> pairs where
      // each pair represents <column name, column values>

      // Create a vector of <string, int vector> pairs to store the result
      std::vector<std::pair<std::string, std::vector<int>>> result;

      // Create an input filestream
      std::ifstream myFile(filename);

      // Make sure the file is open
      if(!myFile.is_open()) throw std::runtime_error("Could not open file");

      // Helper vars
      std::string line, colname;
      int val;

      // Read the column names
      if(myFile.good())
      {
      // Extract the first line in the file
      std::getline(myFile, line);

      // Create a stringstream from line
      std::stringstream ss(line);

      // Extract each column name
      while(std::getline(ss, colname, ',')){

      // Initialize and add <colname, int vector> pairs to result
      result.push_back({colname, std::vector<int> {}});
      }
      }

      // Read data, line by line
      while(std::getline(myFile, line))
      {
      // Create a stringstream of the current line
      std::stringstream ss(line);

      // Keep track of the current column index
      int colIdx = 0;

      // Extract each integer
      while(ss >> val){

      // Add the current integer to the 'colIdx' column's values vector
      result.at(colIdx).second.push_back(val);

      // If the next token is a comma, ignore it and move on
      if(ss.peek() == ',') ss.ignore();

      // Increment the column index
      colIdx++;
      }
      }

      // Close file
      myFile.close();

      return result;
      }

      int main() {
      // // Make three vectors, each of length N filled with 1s, 2s, and 3s
      int N = 1000;
      std::vector<int> vec1(N, 1);
      std::vector<int> vec2(N, 2);
      std::vector<int> vec3(N, 3);

      // Wrap into a vector
      std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

      // Write the vector to CSV
      write_csv("three_cols.csv", vals);

      // Read three_cols.csv
      std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

      // Print row and column counts to check if this was successful
      std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

      return 0;
      }









      share|improve this question









      New contributor




      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.







      $endgroup$




      Below is my code that 1) writes a CSV file with three columns of integer data (plus column names on the first line) and 2) reads the CSV file. I'm new to C++ and would appreciate some code review.



      Also, I'm bothered by the fact that my technique requires all of the data to be integers. I'd appreciate advice on how I could generalize this code to write/read data with a mix of integers, doubles, strings, etc.



      #include <string>    // std::string
      #include <fstream> // std::ofstream, std::ifstream
      #include <vector> // std::vector
      #include <utility> // std::pair
      #include <stdexcept> // std::runtime_error
      #include <sstream> // std::stringstream
      #include <iostream> // std::cout, std::cin

      void write_csv(std::string filename, std::vector<std::pair<std::string, std::vector<int>>> dataset){
      // Make a CSV file with one or more columns of integer values
      // Each column of data is represented by the pair <column name, column data>
      // as std::pair<std::string, std::vector<int>>
      // The dataset is represented as a vector of these columns
      // Note that all columns should be the same size

      // Create an output filestream object
      std::ofstream myFile(filename);

      // Send column names to the stream
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).first;
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";

      // Send data to the stream
      for(int i = 0; i < dataset.at(0).second.size(); ++i)
      {
      for(int j = 0; j < dataset.size(); ++j)
      {
      myFile << dataset.at(j).second.at(i);
      if(j != dataset.size() - 1) myFile << ","; // No comma at end of line
      }
      myFile << "n";
      }

      // Close the file
      myFile.close();
      }

      std::vector<std::pair<std::string, std::vector<int>>> read_csv(std::string filename){
      // Reads a CSV file into a vector of <string, vector<int>> pairs where
      // each pair represents <column name, column values>

      // Create a vector of <string, int vector> pairs to store the result
      std::vector<std::pair<std::string, std::vector<int>>> result;

      // Create an input filestream
      std::ifstream myFile(filename);

      // Make sure the file is open
      if(!myFile.is_open()) throw std::runtime_error("Could not open file");

      // Helper vars
      std::string line, colname;
      int val;

      // Read the column names
      if(myFile.good())
      {
      // Extract the first line in the file
      std::getline(myFile, line);

      // Create a stringstream from line
      std::stringstream ss(line);

      // Extract each column name
      while(std::getline(ss, colname, ',')){

      // Initialize and add <colname, int vector> pairs to result
      result.push_back({colname, std::vector<int> {}});
      }
      }

      // Read data, line by line
      while(std::getline(myFile, line))
      {
      // Create a stringstream of the current line
      std::stringstream ss(line);

      // Keep track of the current column index
      int colIdx = 0;

      // Extract each integer
      while(ss >> val){

      // Add the current integer to the 'colIdx' column's values vector
      result.at(colIdx).second.push_back(val);

      // If the next token is a comma, ignore it and move on
      if(ss.peek() == ',') ss.ignore();

      // Increment the column index
      colIdx++;
      }
      }

      // Close file
      myFile.close();

      return result;
      }

      int main() {
      // // Make three vectors, each of length N filled with 1s, 2s, and 3s
      int N = 1000;
      std::vector<int> vec1(N, 1);
      std::vector<int> vec2(N, 2);
      std::vector<int> vec3(N, 3);

      // Wrap into a vector
      std::vector<std::pair<std::string, std::vector<int>>> vals = {{"One", vec1}, {"Two", vec2}, {"Three", vec3}};

      // Write the vector to CSV
      write_csv("three_cols.csv", vals);

      // Read three_cols.csv
      std::vector<std::pair<std::string, std::vector<int>>> three_cols = read_csv("three_cols.csv");

      // Print row and column counts to check if this was successful
      std::cout << "Rows: " << three_cols.at(0).second.size() << ", Columns: " << three_cols.size() << std::endl;

      return 0;
      }






      c++ beginner csv






      share|improve this question









      New contributor




      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.











      share|improve this question









      New contributor




      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      share|improve this question




      share|improve this question








      edited 3 hours ago









      200_success

      129k15152415




      129k15152415






      New contributor




      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      asked 3 hours ago









      BenBen

      1212




      1212




      New contributor




      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.





      New contributor





      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






      Ben is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






















          2 Answers
          2






          active

          oldest

          votes


















          2












          $begingroup$

          Good documentation



          Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



          Pass by const reference for only read purposes



          Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



          Check if file is opened



          The check is performed when reading, but not performed when writing.



          Do not use at() unless exception-throwing version is desired



          .at() incurs overhead by performing in-range check, and also throws if out of range.



          Use emplace back to construct and push in place



          result.push_back({colname, std::vector<int> {}});


          This could be rewritten as



          result.emplace_back(colname, std::vector<int> {});


          From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



          Improve printing algorithm



          This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



          Do not explicitly close file



          Destructor of std::ifstream and of it's twin automatically close the file.



          Create type alias where useful



          using column = std::pair<std::string, std::vector<int>>;


          would save users a lot of typing.



          Use locales for reading from csv



          Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



          Unreliable reading algorithm



          I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





          Architecture for generic reading/writing



          I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





          • Upstream and downstream invariances



            Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




          • Data layout



            This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








          share|improve this answer









          $endgroup$





















            0












            $begingroup$

            My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



            Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






            share|improve this answer









            $endgroup$









            • 1




              $begingroup$
              I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
              $endgroup$
              – Incomputable
              2 hours ago











            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "196"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });






            Ben is a new contributor. Be nice, and check out our Code of Conduct.










            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f211826%2fcode-to-read-and-write-csv-files%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            2 Answers
            2






            active

            oldest

            votes








            2 Answers
            2






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            2












            $begingroup$

            Good documentation



            Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



            Pass by const reference for only read purposes



            Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



            Check if file is opened



            The check is performed when reading, but not performed when writing.



            Do not use at() unless exception-throwing version is desired



            .at() incurs overhead by performing in-range check, and also throws if out of range.



            Use emplace back to construct and push in place



            result.push_back({colname, std::vector<int> {}});


            This could be rewritten as



            result.emplace_back(colname, std::vector<int> {});


            From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



            Improve printing algorithm



            This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



            Do not explicitly close file



            Destructor of std::ifstream and of it's twin automatically close the file.



            Create type alias where useful



            using column = std::pair<std::string, std::vector<int>>;


            would save users a lot of typing.



            Use locales for reading from csv



            Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



            Unreliable reading algorithm



            I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





            Architecture for generic reading/writing



            I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





            • Upstream and downstream invariances



              Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




            • Data layout



              This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








            share|improve this answer









            $endgroup$


















              2












              $begingroup$

              Good documentation



              Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



              Pass by const reference for only read purposes



              Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



              Check if file is opened



              The check is performed when reading, but not performed when writing.



              Do not use at() unless exception-throwing version is desired



              .at() incurs overhead by performing in-range check, and also throws if out of range.



              Use emplace back to construct and push in place



              result.push_back({colname, std::vector<int> {}});


              This could be rewritten as



              result.emplace_back(colname, std::vector<int> {});


              From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



              Improve printing algorithm



              This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



              Do not explicitly close file



              Destructor of std::ifstream and of it's twin automatically close the file.



              Create type alias where useful



              using column = std::pair<std::string, std::vector<int>>;


              would save users a lot of typing.



              Use locales for reading from csv



              Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



              Unreliable reading algorithm



              I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





              Architecture for generic reading/writing



              I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





              • Upstream and downstream invariances



                Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




              • Data layout



                This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








              share|improve this answer









              $endgroup$
















                2












                2








                2





                $begingroup$

                Good documentation



                Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



                Pass by const reference for only read purposes



                Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



                Check if file is opened



                The check is performed when reading, but not performed when writing.



                Do not use at() unless exception-throwing version is desired



                .at() incurs overhead by performing in-range check, and also throws if out of range.



                Use emplace back to construct and push in place



                result.push_back({colname, std::vector<int> {}});


                This could be rewritten as



                result.emplace_back(colname, std::vector<int> {});


                From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



                Improve printing algorithm



                This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



                Do not explicitly close file



                Destructor of std::ifstream and of it's twin automatically close the file.



                Create type alias where useful



                using column = std::pair<std::string, std::vector<int>>;


                would save users a lot of typing.



                Use locales for reading from csv



                Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



                Unreliable reading algorithm



                I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





                Architecture for generic reading/writing



                I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





                • Upstream and downstream invariances



                  Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




                • Data layout



                  This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.








                share|improve this answer









                $endgroup$



                Good documentation



                Every time I wanted to say "but it behaves badly in case ...", I found specification disallowing that case. One nitpick might be that "with one or more columns" could be explicitly written as "requires at least one column". The only thing left is to pray that people will read it.



                Pass by const reference for only read purposes



                Copying non-scalar and heavy data structures (strings, vectors) might incur significant overhead. Prefer to pass by const reference.



                Check if file is opened



                The check is performed when reading, but not performed when writing.



                Do not use at() unless exception-throwing version is desired



                .at() incurs overhead by performing in-range check, and also throws if out of range.



                Use emplace back to construct and push in place



                result.push_back({colname, std::vector<int> {}});


                This could be rewritten as



                result.emplace_back(colname, std::vector<int> {});


                From C++17, if I'm not mistaken, the two are equivalent due to copy elision, but emplace version is a bit clearer.



                Improve printing algorithm



                This is a general problem of string joining. This answer shows a great implementation for a simple case. One can remove templates from them if needed.



                Do not explicitly close file



                Destructor of std::ifstream and of it's twin automatically close the file.



                Create type alias where useful



                using column = std::pair<std::string, std::vector<int>>;


                would save users a lot of typing.



                Use locales for reading from csv



                Whenever I want casual reading of csv files I just copy/paste from this asnwer and march on.



                Unreliable reading algorithm



                I would be a bit worried to use it as is. It tends to assume the layout to be the same as in writing, but I'm afraid edge cases slip in and hit like a truck. This is one of the rare cases when enabling exceptions on the stream might be a good idea.





                Architecture for generic reading/writing



                I don't think implementing reading algorithm is a viable option, because it involves extreme amounts of error checking to be useful. As for writing:





                • Upstream and downstream invariances



                  Upstream invariance in this case is CSV format, which the final output has to obey. Downstream invariance is requirements on the result of object getting streamed (file << myObject). One will need to specify the requirements on data type very clearly and with great scrutiny. For example, if one wants to accept std::string as data type, the user has to override default streaming for the type, which pulls them into realm of undefined behavior. This functionality clearly requires a lot of thought put into it in order to be correct and robust.




                • Data layout



                  This is one is just a speculation, but the way data is stored might be problematic in terms of performance. It would be better to create a data structure that stores headers, and then stores values row-wise. Access could be done by using header value as first subscript, and row index as second subscript. Horizontal offset can be saved for each header value to access the right cell in a row. This also would be a great learning exercise.









                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered 2 hours ago









                IncomputableIncomputable

                6,63021653




                6,63021653

























                    0












                    $begingroup$

                    My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



                    Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






                    share|improve this answer









                    $endgroup$









                    • 1




                      $begingroup$
                      I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
                      $endgroup$
                      – Incomputable
                      2 hours ago
















                    0












                    $begingroup$

                    My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



                    Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






                    share|improve this answer









                    $endgroup$









                    • 1




                      $begingroup$
                      I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
                      $endgroup$
                      – Incomputable
                      2 hours ago














                    0












                    0








                    0





                    $begingroup$

                    My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



                    Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.






                    share|improve this answer









                    $endgroup$



                    My first observation is that your object model is quite confusing; you have a vector of pairs with vectors, and it is very difficult to keep track of what is what. If I'm reading this code correctly, you should consider extracting this pair into a column class, giving you std::vector<column>.



                    Once you have this column class, you can add additional properties to it, such as what type of data it contains, and a void* to the data in each cell.







                    share|improve this answer












                    share|improve this answer



                    share|improve this answer










                    answered 2 hours ago









                    Joe CJoe C

                    60429




                    60429








                    • 1




                      $begingroup$
                      I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
                      $endgroup$
                      – Incomputable
                      2 hours ago














                    • 1




                      $begingroup$
                      I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
                      $endgroup$
                      – Incomputable
                      2 hours ago








                    1




                    1




                    $begingroup$
                    I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
                    $endgroup$
                    – Incomputable
                    2 hours ago




                    $begingroup$
                    I would strongly advise against void*. Some template metaprogramming might have higher development cost, but it will sure payoff in learning and correctness of user code.
                    $endgroup$
                    – Incomputable
                    2 hours ago










                    Ben is a new contributor. Be nice, and check out our Code of Conduct.










                    draft saved

                    draft discarded


















                    Ben is a new contributor. Be nice, and check out our Code of Conduct.













                    Ben is a new contributor. Be nice, and check out our Code of Conduct.












                    Ben is a new contributor. Be nice, and check out our Code of Conduct.
















                    Thanks for contributing an answer to Code Review Stack Exchange!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    Use MathJax to format equations. MathJax reference.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f211826%2fcode-to-read-and-write-csv-files%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    How to label and detect the document text images

                    Vallis Paradisi

                    Tabula Rosettana