summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 41d9c65)
raw | patch | inline | side by side (parent: 41d9c65)
author | Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> | |
Sun, 13 Nov 2011 20:03:38 +0000 (20:03 +0000) | ||
committer | Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> | |
Sun, 13 Nov 2011 20:03:38 +0000 (20:03 +0000) |
odcread.cc | patch | blob | history | |
textmodel.cc | patch | blob | history | |
textmodel.h | patch | blob | history |
diff --git a/odcread.cc b/odcread.cc
index 858028e6babf5e252ede8ade0c0f71d2946ceb4e..caa87e7713f0dc937cd24c717c54dbbc2add4160 100644 (file)
--- a/odcread.cc
+++ b/odcread.cc
#include <textmodel.h>
#include <visitor.h>
+// Character encoding conversions
+#include <locale.h>
+#include <iconv.h>
+#include <errno.h>
+#include <string.h>
+
namespace odc {
class Context {
public:
virtual void foldRight() {
terminateContext();
}
+ char *getCharSet() {
+ return "UTF-8"; // FIXME setlocale(LC_CTYPE, 0) + processing
+ }
virtual void textShortPiece(const ShortPiece *piece) {
- std::string text = piece->getText();
- d_context.top()->addPiece(text);
+ iconv_t conv = iconv_open("UTF-8", "ISO-8859-1");
+ if (conv == (iconv_t)-1) {
+ std::string str("iconv initialization error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ size_t bytesIn = piece->size() + 1;
+ SHORTCHAR *in = piece->getBuffer();
+ size_t bytesOut = bytesIn; // FIXME probably not safe.
+ char *out = new char[bytesIn];
+ char *outPtr = out;
+ size_t rval = iconv(conv, &in, &bytesIn, &outPtr, &bytesOut);
+ if (rval == (size_t)-1) {
+ std::string str("iconv error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ iconv_close(conv);
+ std::string str(out);
+ for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
+ if (*it == '\r') *it = '\n';
+ }
+ d_context.top()->addPiece(str);
}
virtual void textLongPiece(const LongPiece *piece) {
- throw "Long Piece not handled";
- //std::string text = piece->getText();
- //d_context.top()->addPiece(text);
+ char *out = (char*)piece->getBuffer();
+ std::string str(out);
+ d_context.top()->addPiece(str);
+ //d_convLong = iconv_open(setlocale(LC_CTYPE, 0), "UCS-2");
+ /*
+ iconv_t conv = iconv_open("UTF-8", "UTF-8");
+ if (conv == (iconv_t)-1) {
+ std::string str("iconv initialization error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ size_t bytesIn = piece->size() + 1;
+ char *in = (char*)piece->getBuffer();
+ size_t bytesOut = bytesIn; // FIXME probably not safe.
+ char *out = new char[bytesIn];
+ char *outPtr = out;
+ size_t rval = iconv(conv, &in, &bytesIn, &outPtr, &bytesOut);
+ if (rval == (size_t)-1) {
+ std::string str("iconv error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ iconv_close(conv);
+ std::string str(out);
+ for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
+ if (*it == '\r') *it = '\n';
+ }
+ d_context.top()->addPiece(str);*/
}
};
if (argc < 2) {
return 1;
}
+
+ // Set the locale according to the terminal's environment
+ setlocale(LC_ALL, "");
+
std::ifstream in(argv[1], std::ios::in | std::ios::binary);
odc::Store* s;
diff --git a/textmodel.cc b/textmodel.cc
index dd982dc490873b612a2377269d35fa3b30e0edac..7afc34c7983ccaf56ee3cb37ca05a92a1cb0de4a 100644 (file)
--- a/textmodel.cc
+++ b/textmodel.cc
TextPiece::TextPiece(size_t len): d_len(len) {}
+unsigned TextPiece::size() const {
+ return d_len;
+}
+
LongPiece::LongPiece(size_t len): TextPiece(len * 2) {}
LongPiece::~LongPiece() {
return std::string("LongPiece(FIXME)");
}
-std::wstring LongPiece::getText() const {
- return std::wstring((wchar_t*)d_buf);
+CHAR* LongPiece::getBuffer() const {
+ return d_buf;
}
void LongPiece::accept(Visitor &visitor) const {
return std::string("ShortPiece(") + std::string(d_buf) + std::string(")");
}
-std::string ShortPiece::getText() const {
- std::string str(d_buf);
- for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
- if (*it == '\r') *it = '\n';
- }
- return str;
+SHORTCHAR* ShortPiece::getBuffer() const {
+ return d_buf;
}
void ShortPiece::accept(Visitor &visitor) const {
diff --git a/textmodel.h b/textmodel.h
index 8eb26eb47d419c2a2d2b7d5e30765161e4898d91..e6faa6ec8ee2a25d792f6fd451ca2f29d61fd43e 100644 (file)
--- a/textmodel.h
+++ b/textmodel.h
virtual void read(Reader &reader) = 0;
virtual std::string toString() const = 0;
virtual void accept(Visitor &visitor) const = 0;
+ /**
+ * Size in bytes, excluding the null-character that terminates the string (i.e. the size that is read from file).
+ */
+ unsigned size() const;
};
/**
- * TextPiece consisting of 16-bit characters.
- * Not sure of the encoding.
+ * TextPiece consisting of 16-bit unicode characters.
+ * Not sure if the encoding is UCS-2 or UTF-16.
*/
class LongPiece : public TextPiece {
private:
~LongPiece();
virtual void read(Reader &reader);
virtual std::string toString() const;
+ virtual void accept(Visitor &visitor) const;
+
/**
- * Return the text contained in this piece.
- * Currently just casting the buffer to wchar_t* and hoping for the best.
+ * Get the buffer contents as 16-bit (UCS-2 or UTF-16 I don't know) unicode.
*/
- virtual std::wstring getText() const;
- virtual void accept(Visitor &visitor) const;
+ CHAR *getBuffer() const;
};
/**
- * TextPiece consisting of 8-bit characters.
+ * TextPiece consisting of 8-bit characters in the Latin-1 extension of ASCII.
*/
class ShortPiece : public TextPiece {
private:
~ShortPiece();
virtual void read(Reader &reader);
virtual std::string toString() const;
- virtual std::string getText() const;
virtual void accept(Visitor &visitor) const;
+
+ /**
+ * Get the buffer contents as 8-bit (Latin-1) characters.
+ */
+ SHORTCHAR *getBuffer() const;
};
/**