| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382 |
- #include "csv_reader.h"
-
- #include <errno.h>
- #include <limits.h>
- #include <stdlib.h>
-
- #include <algorithm>
- #include <fstream>
- #include <iostream>
- #include <sstream>
-
- using namespace std;
-
-
- CSVField::CSVField(int start, int length, bool hasQuotes): start(start),
- length(length),
- hasQuotes(hasQuotes)
- {
- }
-
- CSVRow::CSVRow(string *row,
- const std::string * const filePath,
- long lineNumber,
- char delimiter,
- char quote): m_row(row),
- m_filePath(filePath),
- m_lineNumber(lineNumber),
- m_quote(quote)
- {
- parse(delimiter, quote);
- }
-
- string CSVRow::toString() const {
- stringstream ss;
-
- ss << "CSVRow (" << endl;
-
- if (m_lineNumber > 0) {
- ss << " line #: \"" << m_lineNumber << "\"" << endl;
- }
-
- if (m_filePath) {
- ss << " path: \"" << *m_filePath << "\"" << endl;
- }
-
- ss << " value: \"" << *m_row << "\"" << endl;
-
- for (unsigned int i = 0; i < m_fields.size(); i++) {
- string row;
- getFieldAsString(i, &row);
- ss << " field " << i << ": '" << row << "'" << endl;
- }
-
- ss << ")";
- return ss.str();
- }
-
- static bool string_to_long(const string &s, long *out) {
- const char *cstr = s.c_str();
- char *end = NULL;
-
- errno = 0;
-
- long x = strtol(cstr, &end, 10);
-
- if (errno != 0) {
- return false;
- } else if (end == &cstr[s.size()]) {
- *out = x;
- return true;
- } else {
- return false;
- }
- }
-
- static bool cstring_to_long(const char *start, const char *end, long *out) {
- char *parse_end = NULL;
-
- errno = 0;
-
- long x = strtol(start, &parse_end, 10);
-
- if (errno != 0) {
- return false;
- } else if (parse_end == end) {
- *out = x;
- return true;
- } else {
- return false;
- }
- }
-
- static bool string_to_double(const string &s, double *out) {
- const char *cstr = s.c_str();
- char *end = NULL;
-
- errno = 0;
-
- double x = strtod(cstr, &end);
-
- if (errno != 0) {
- return false;
- } else if (end == &cstr[s.size()]) {
- *out = x;
- return true;
- } else {
- return false;
- }
- }
-
- static bool cstring_to_double(const char *start, const char *end, double *out) {
- char *parse_end = NULL;
-
- errno = 0;
-
- double x = strtod(start, &parse_end);
-
- if (errno != 0) {
- return false;
- } else if (parse_end == end) {
- *out = x;
- return true;
- } else {
- return false;
- }
- }
-
- bool CSVRow::getFieldAsString(unsigned int fieldIndex, string *out) const {
- if (fieldIndex < m_fields.size()) {
- CSVField field = m_fields[fieldIndex];
-
- if (field.length == 0) {
- *out = "";
- } else {
- string s;
-
- if (field.hasQuotes) {
- s = m_row->substr(field.start, field.length);
- s = unquote(s, m_quote);
- } else {
- s = m_row->substr(field.start, field.length);
- }
-
- // remove newlines
- s.erase(std::remove(s.begin(), s.end(), '\n'), s.end());
- s.erase(std::remove(s.begin(), s.end(), '\r'), s.end());
-
- *out = s;
- }
-
- return true;
- } else {
- return false;
- }
- }
-
- bool CSVRow::getFieldAsLong(unsigned int fieldIndex, long *out) const {
- if (fieldIndex >= m_fields.size()) {
- return false;
- }
-
- if (m_fields[fieldIndex].hasQuotes) {
- string field;
- if (!getFieldAsString(fieldIndex, &field)) {
- return false;
- }
-
- return string_to_long(field, out);
- } else {
- const CSVField *field = &m_fields[fieldIndex];
- int sep_index = field->start + field->length;
- char sep = (*m_row)[sep_index];
- (*m_row)[sep_index] = '\0';
-
- bool ok = cstring_to_long(m_row->c_str() + field->start, m_row->c_str() + sep_index, out);
-
- (*m_row)[sep_index] = sep;
-
- return ok;
- }
- }
-
- bool CSVRow::getFieldAsInt(unsigned int fieldIndex, int *out) const {
- if (fieldIndex >= m_fields.size()) {
- return false;
- }
-
- long x;
- if (!getFieldAsLong(fieldIndex, &x)) {
- return false;
- }
-
- if (x < INT_MIN || x > INT_MAX) {
- // underflow or overflow
- return false;
- }
-
- *out = (int)x;
-
- return true;
- }
-
- bool CSVRow::getFieldAsDouble(unsigned int fieldIndex, double *out) const {
- if (fieldIndex >= m_fields.size()) {
- return false;
- }
-
- if (m_fields[fieldIndex].hasQuotes) {
- string field;
- if (!getFieldAsString(fieldIndex, &field)) {
- return false;
- }
-
- return string_to_double(field, out);
- } else {
- const CSVField *field = &m_fields[fieldIndex];
- int sep_index = field->start + field->length;
- char sep = (*m_row)[sep_index];
- (*m_row)[sep_index] = '\0';
-
- bool ok = cstring_to_double(m_row->c_str() + field->start, m_row->c_str() + sep_index, out);
-
- (*m_row)[sep_index] = sep;
-
- return ok;
- }
- }
-
- string CSVRow::getFilePath() const {
- if (m_filePath) {
- return string(*m_filePath);
- } else {
- return "";
- }
- }
-
- long CSVRow::getLineNumber() const {
- return m_lineNumber;
- }
-
- unsigned int CSVRow::getNumberOfFields() const {
- return m_fields.size();
- }
-
- void CSVRow::parse(char delimiter, char quote) {
- m_fields.clear();
-
- //TODO: properly handle unicode strings (could use QString::fromUtf8 and iterate over that)
- int start = 0;
- int length = -1;
- bool in_quotes = false;
- bool has_quotes = false;
-
- unsigned int size = m_row->size();
-
- // handle trailing \n and \r chars
- while (size > 0 && ((*m_row)[size - 1] == '\n' || (*m_row)[size - 1] == '\r')) {
- size--;
- }
-
- for (unsigned int i = 0; i < size; i++) {
- length++;
- char c = (*m_row)[i];
-
- if (c == delimiter && !in_quotes) {
- m_fields.push_back(CSVField(start, length, has_quotes));
- start = i + 1;
- length = -1;
- has_quotes = false;
- }
-
- if (c == quote) {
- has_quotes = true;
- if (in_quotes) {
- in_quotes = false;
- } else {
- in_quotes = true;
- }
- }
- }
- if (!(start == 0 && length == -1)) {
- length++;
- m_fields.push_back(CSVField(start, length, has_quotes));
- }
- }
-
- string CSVRow::unquote(const string &fieldStr, char quoteChar) {
- string ret;
- bool in_quotes = false;
- bool last_char_was_quote = false;
-
- //TODO: properly handle unicode strings here too
- for (unsigned int i = 0; i < fieldStr.size(); i++) {
- char c = fieldStr[i];
-
- if (in_quotes) {
- if (c == quoteChar) {
- in_quotes = false;
- last_char_was_quote = true;
- } else {
- ret.push_back(c);
- last_char_was_quote = false;
- }
- } else {
- if (c == quoteChar) {
- in_quotes = true;
- if (last_char_was_quote) {
- ret.push_back(quoteChar);
- }
- }
- last_char_was_quote = false;
- }
- }
-
- return ret;
- }
-
- string CSVRow::quote(const string &fieldStr, char quoteChar) {
- string ret;
-
- //TODO: properly handle unicode strings here too
- //TODO: optimize using string::find
- for (unsigned int i = 0; i < fieldStr.size(); i++) {
- char c = fieldStr[i];
-
- if (c == quoteChar) {
- ret.append(2, quoteChar);
- } else {
- ret.push_back(c);
- }
- }
-
- return ret;
- }
-
- bool CSVReader::readFromFile(const std::string filePath,
- CSVReader::RowCallback callback,
- void *userData,
- char delimiter,
- char quote)
- {
- ifstream stream;
- stream.open(filePath.c_str(), ios::in);
- if (stream) {
- bool ok = CSVReader::readFromStream(stream,
- callback,
- userData,
- &filePath,
- delimiter,
- quote);
- stream.close();
- return ok;
- } else {
- cerr << "Error: Cannot read file \"" << filePath << "\"" << endl;
- return false;
- }
-
- return true;
- }
-
-
- bool CSVReader::readFromStream(std::istream &stream,
- CSVReader::RowCallback callback,
- void *userData,
- const string * const filePath,
- char delimiter,
- char quote)
- {
- string line = "";
- long nr = 0;
- while (getline(stream, line, '\n')) {
- nr++;
- if (line.size() > 0) {
- CSVRow row(&line, filePath, nr, delimiter, quote);
- if (!callback(row, userData)) {
- return false;
- }
- }
- }
-
- return true;
- }
|