Skip to content

Commit

Permalink
columns order is ok now
Browse files Browse the repository at this point in the history
  • Loading branch information
SermetPekin committed Dec 8, 2024
1 parent c30fd6a commit 23df435
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 34 deletions.
114 changes: 95 additions & 19 deletions include/dataframe.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,19 @@ namespace microgradCpp
std::unordered_map<std::string, std::optional<std::type_index>> column_types;
std::unordered_map<std::string, std::unordered_map<std::string, int>> encoding_mappings;

std::vector<std::string> column_order;

// ............................................................. get_column_names
std::vector<std::string> get_column_names() const
{
std::vector<std::string> names;
for (const auto &[name, _] : columns)
{
names.push_back(name);
}
return names;

return column_order;
// std::vector<std::string> names;
// for (const auto &[name, _] : columns)
// {
// names.push_back(name);
// }
// return names;
}

void print_shape() const
Expand Down Expand Up @@ -326,7 +330,11 @@ namespace microgradCpp
return "unknown";
}

// Rocking start print method
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <variant>

void rocking_star_print(size_t n = 10) const
{
std::cout << "\n🚀 DataFrame Overview 🚀\n";
Expand All @@ -343,7 +351,7 @@ namespace microgradCpp
// Display column names and types
std::cout << "\n🧩 Columns and Data Types:\n";
std::cout << "---------------------------\n";
for (const auto &[name, col] : columns)
for (const auto &name : column_order)
{
std::cout << "🔹 " << std::setw(15) << std::left << name << " | [" << get_type_string(name) << "]\n";
}
Expand All @@ -352,35 +360,36 @@ namespace microgradCpp
std::cout << "\n🔍 First " << n << " Rows:\n";
std::cout << "---------------------------\n";

// Print column headers
for (const auto &[name, _] : columns)
// Print column headers in the correct order
for (const auto &name : column_order)
{
std::cout << std::setw(15) << std::left << name;
}
std::cout << "\n";

// Print separator line
for (size_t i = 0; i < columns.size(); ++i)
for (size_t i = 0; i < column_order.size(); ++i)
{
std::cout << std::setw(15) << std::setfill('-') << "" << std::setfill(' ');
}
std::cout << "\n";

// Print rows
// Print rows in the correct order
for (size_t row = 0; row < std::min(n, num_rows); ++row)
{
for (const auto &[_, col] : columns)
for (const auto &name : column_order)
{
const auto &col = columns.at(name);
if (row < col.size())
{
std::visit([](const auto &value)
{
using T = std::decay_t<decltype(value)>;
if constexpr (std::is_same_v<T, std::monostate>) {
std::cout << std::setw(15) << "NaN";
} else {
std::cout << std::setw(15) << value;
} }, col[row]);
using T = std::decay_t<decltype(value)>;
if constexpr (std::is_same_v<T, std::monostate>) {
std::cout << std::setw(15) << "NaN";
} else {
std::cout << std::setw(15) << value;
} }, col[row]);
}
else
{
Expand All @@ -393,6 +402,73 @@ namespace microgradCpp
std::cout << "========================\n\n";
}

// Rocking start print method
// void rocking_star_printBackup(size_t n = 10) const
// {
// std::cout << "\n🚀 DataFrame Overview 🚀\n";
// std::cout << "========================\n";

// // Display shape
// size_t num_rows = 0;
// if (!columns.empty())
// {
// num_rows = columns.begin()->second.size();
// }
// std::cout << "📝 Shape: (" << num_rows << " rows, " << columns.size() << " columns)\n";

// // Display column names and types
// std::cout << "\n🧩 Columns and Data Types:\n";
// std::cout << "---------------------------\n";
// for (const auto &[name, col] : columns)
// {
// std::cout << "🔹 " << std::setw(15) << std::left << name << " | [" << get_type_string(name) << "]\n";
// }

// // Display first 'n' rows
// std::cout << "\n🔍 First " << n << " Rows:\n";
// std::cout << "---------------------------\n";

// // Print column headers
// for (const auto &[name, _] : columns)
// {
// std::cout << std::setw(15) << std::left << name;
// }
// std::cout << "\n";

// // Print separator line
// for (size_t i = 0; i < columns.size(); ++i)
// {
// std::cout << std::setw(15) << std::setfill('-') << "" << std::setfill(' ');
// }
// std::cout << "\n";

// // Print rows
// for (size_t row = 0; row < std::min(n, num_rows); ++row)
// {
// for (const auto &[_, col] : columns)
// {
// if (row < col.size())
// {
// std::visit([](const auto &value)
// {
// using T = std::decay_t<decltype(value)>;
// if constexpr (std::is_same_v<T, std::monostate>) {
// std::cout << std::setw(15) << "NaN";
// } else {
// std::cout << std::setw(15) << value;
// } }, col[row]);
// }
// else
// {
// std::cout << std::setw(15) << "NaN";
// }
// }
// std::cout << "\n";
// }

// std::cout << "========================\n\n";
// }

private:
void m_save_csv(const std::string &file_name, std::optional<char> delimiter = std::nullopt)
{
Expand Down
30 changes: 17 additions & 13 deletions include/dataframe_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ namespace microgradCpp
std::cout << "CSV file saved as: " << filename << std::endl;
}


inline void DataFrame::from_csv(const std::string &filename, bool has_header, char delimiter) // = true, ','
{
std::ifstream file(filename);
Expand All @@ -153,7 +152,7 @@ namespace microgradCpp
}

std::string line;
std::vector<std::string> column_names;
// std::vector<std::string> column_names;
bool is_first_line = true;

while (std::getline(file, line))
Expand All @@ -169,12 +168,12 @@ namespace microgradCpp

if (is_first_line && has_header)
{
column_names = cells;
for (auto &col : column_names)
column_order = cells;
for (auto &col : column_order)
{

col = trim(col); // TODO
col = trim(col); // TODO

columns[col] = Column();
column_types[col] = std::nullopt; // Initialize types as unknown
}
Expand All @@ -187,16 +186,22 @@ namespace microgradCpp
// If no header, create generic column names
for (size_t i = 0; i < cells.size(); ++i)
{
column_names.push_back("column_" + std::to_string(i));
columns[column_names[i]] = Column();
column_types[column_names[i]] = std::nullopt;

std::string col_name = "column_" + std::to_string(i);
column_order.push_back(col_name);
columns[col_name] = Column();
column_types[col_name] = std::nullopt;

// column_names.push_back("column_" + std::to_string(i));
// columns[column_names[i]] = Column();
// column_types[column_names[i]] = std::nullopt;
}
is_first_line = false;
}

for (size_t i = 0; i < cells.size(); ++i)
{
const auto &col_name = column_names[i];
const auto &col_name = column_order[i];
const std::string &value = cells[i];

if (is_numeric(value))
Expand Down Expand Up @@ -226,7 +231,6 @@ namespace microgradCpp
file.close();
}


// inline void DataFrame::from_csvBackup(const std::string &filename, bool has_header, char delimiter) // = true, ','
// {
// std::ifstream file(filename);
Expand Down Expand Up @@ -256,8 +260,8 @@ namespace microgradCpp
// for (auto &col : column_names)
// {

// col = trim(col); // TODO
// col = trim(col); // TODO

// columns[col] = Column();
// column_types[col] = std::nullopt; // Initialize types as unknown
// }
Expand Down
4 changes: 2 additions & 2 deletions tests/test_dataframe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ TEST(DataFrameTest, LoadCSVParts)

ASSERT_TRUE(sptest::contains(columns, "sepal_length")) << "'sepal_length' not found in columns";

// EXPECT_EQ(columns[0], "sepal_length");
// EXPECT_EQ(columns[4], "species");
EXPECT_EQ(columns[0], "sepal_length");
EXPECT_EQ(columns[4], "species");

// std::remove(temp_file.c_str());
}
Expand Down

0 comments on commit 23df435

Please sign in to comment.