#include "csv_reader.h" #include #include #include #include #include #include #include using namespace std; CSVField::CSVField(int start, int length, bool hasQuotes): start(start), length(length), hasQuotes(hasQuotes) { } CSVRow::CSVRow(string *row, const std::string * const filePath, long lineNumber, char delimiter, char quote): m_row(row), m_filePath(filePath), m_lineNumber(lineNumber), m_quote(quote) { parse(delimiter, quote); } string CSVRow::toString() const { stringstream ss; ss << "CSVRow (" << endl; if (m_lineNumber > 0) { ss << " line #: \"" << m_lineNumber << "\"" << endl; } if (m_filePath) { ss << " path: \"" << *m_filePath << "\"" << endl; } ss << " value: \"" << *m_row << "\"" << endl; for (unsigned int i = 0; i < m_fields.size(); i++) { string row; getFieldAsString(i, &row); ss << " field " << i << ": '" << row << "'" << endl; } ss << ")"; return ss.str(); } static bool string_to_long(const string &s, long *out) { const char *cstr = s.c_str(); char *end = NULL; errno = 0; long x = strtol(cstr, &end, 10); if (errno != 0) { return false; } else if (end == &cstr[s.size()]) { *out = x; return true; } else { return false; } } static bool cstring_to_long(const char *start, const char *end, long *out) { char *parse_end = NULL; errno = 0; long x = strtol(start, &parse_end, 10); if (errno != 0) { return false; } else if (parse_end == end) { *out = x; return true; } else { return false; } } static bool string_to_double(const string &s, double *out) { const char *cstr = s.c_str(); char *end = NULL; errno = 0; double x = strtod(cstr, &end); if (errno != 0) { return false; } else if (end == &cstr[s.size()]) { *out = x; return true; } else { return false; } } static bool cstring_to_double(const char *start, const char *end, double *out) { char *parse_end = NULL; errno = 0; double x = strtod(start, &parse_end); if (errno != 0) { return false; } else if (parse_end == end) { *out = x; return true; } else { return false; } } bool CSVRow::getFieldAsString(unsigned int fieldIndex, string *out) const { if (fieldIndex < m_fields.size()) { CSVField field = m_fields[fieldIndex]; if (field.length == 0) { *out = ""; } else { string s; if (field.hasQuotes) { s = m_row->substr(field.start, field.length); s = unquote(s, m_quote); } else { s = m_row->substr(field.start, field.length); } // remove newlines s.erase(std::remove(s.begin(), s.end(), '\n'), s.end()); s.erase(std::remove(s.begin(), s.end(), '\r'), s.end()); *out = s; } return true; } else { return false; } } bool CSVRow::getFieldAsLong(unsigned int fieldIndex, long *out) const { if (fieldIndex >= m_fields.size()) { return false; } if (m_fields[fieldIndex].hasQuotes) { string field; if (!getFieldAsString(fieldIndex, &field)) { return false; } return string_to_long(field, out); } else { const CSVField *field = &m_fields[fieldIndex]; int sep_index = field->start + field->length; char sep = (*m_row)[sep_index]; (*m_row)[sep_index] = '\0'; bool ok = cstring_to_long(m_row->c_str() + field->start, m_row->c_str() + sep_index, out); (*m_row)[sep_index] = sep; return ok; } } bool CSVRow::getFieldAsInt(unsigned int fieldIndex, int *out) const { if (fieldIndex >= m_fields.size()) { return false; } long x; if (!getFieldAsLong(fieldIndex, &x)) { return false; } if (x < INT_MIN || x > INT_MAX) { // underflow or overflow return false; } *out = (int)x; return true; } bool CSVRow::getFieldAsDouble(unsigned int fieldIndex, double *out) const { if (fieldIndex >= m_fields.size()) { return false; } if (m_fields[fieldIndex].hasQuotes) { string field; if (!getFieldAsString(fieldIndex, &field)) { return false; } return string_to_double(field, out); } else { const CSVField *field = &m_fields[fieldIndex]; int sep_index = field->start + field->length; char sep = (*m_row)[sep_index]; (*m_row)[sep_index] = '\0'; bool ok = cstring_to_double(m_row->c_str() + field->start, m_row->c_str() + sep_index, out); (*m_row)[sep_index] = sep; return ok; } } string CSVRow::getFilePath() const { if (m_filePath) { return string(*m_filePath); } else { return ""; } } long CSVRow::getLineNumber() const { return m_lineNumber; } unsigned int CSVRow::getNumberOfFields() const { return m_fields.size(); } void CSVRow::parse(char delimiter, char quote) { m_fields.clear(); //TODO: properly handle unicode strings (could use QString::fromUtf8 and iterate over that) int start = 0; int length = -1; bool in_quotes = false; bool has_quotes = false; unsigned int size = m_row->size(); // handle trailing \n and \r chars while (size > 0 && ((*m_row)[size - 1] == '\n' || (*m_row)[size - 1] == '\r')) { size--; } for (unsigned int i = 0; i < size; i++) { length++; char c = (*m_row)[i]; if (c == delimiter && !in_quotes) { m_fields.push_back(CSVField(start, length, has_quotes)); start = i + 1; length = -1; has_quotes = false; } if (c == quote) { has_quotes = true; if (in_quotes) { in_quotes = false; } else { in_quotes = true; } } } if (!(start == 0 && length == -1)) { length++; m_fields.push_back(CSVField(start, length, has_quotes)); } } string CSVRow::unquote(const string &fieldStr, char quoteChar) { string ret; bool in_quotes = false; bool last_char_was_quote = false; //TODO: properly handle unicode strings here too for (unsigned int i = 0; i < fieldStr.size(); i++) { char c = fieldStr[i]; if (in_quotes) { if (c == quoteChar) { in_quotes = false; last_char_was_quote = true; } else { ret.push_back(c); last_char_was_quote = false; } } else { if (c == quoteChar) { in_quotes = true; if (last_char_was_quote) { ret.push_back(quoteChar); } } last_char_was_quote = false; } } return ret; } string CSVRow::quote(const string &fieldStr, char quoteChar) { string ret; //TODO: properly handle unicode strings here too //TODO: optimize using string::find for (unsigned int i = 0; i < fieldStr.size(); i++) { char c = fieldStr[i]; if (c == quoteChar) { ret.append(2, quoteChar); } else { ret.push_back(c); } } return ret; } bool CSVReader::readFromFile(const std::string filePath, CSVReader::RowCallback callback, void *userData, char delimiter, char quote) { ifstream stream; stream.open(filePath.c_str(), ios::in); if (stream) { bool ok = CSVReader::readFromStream(stream, callback, userData, &filePath, delimiter, quote); stream.close(); return ok; } else { cerr << "Error: Cannot read file \"" << filePath << "\"" << endl; return false; } return true; } bool CSVReader::readFromStream(std::istream &stream, CSVReader::RowCallback callback, void *userData, const string * const filePath, char delimiter, char quote) { string line = ""; long nr = 0; while (getline(stream, line, '\n')) { nr++; if (line.size() > 0) { CSVRow row(&line, filePath, nr, delimiter, quote); if (!callback(row, userData)) { return false; } } } return true; }