#include <textmodel.h>
#include <visitor.h>
+// Character encoding conversions
+#include <locale.h>
+#include <iconv.h>
+#include <errno.h>
+#include <string.h>
+
namespace odc {
class Context {
public:
virtual void foldRight() {
terminateContext();
}
+ char *getCharSet() {
+ return "UTF-8"; // FIXME setlocale(LC_CTYPE, 0) + processing
+ }
virtual void textShortPiece(const ShortPiece *piece) {
- std::string text = piece->getText();
- d_context.top()->addPiece(text);
+ iconv_t conv = iconv_open("UTF-8", "ISO-8859-1");
+ if (conv == (iconv_t)-1) {
+ std::string str("iconv initialization error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ size_t bytesIn = piece->size() + 1;
+ SHORTCHAR *in = piece->getBuffer();
+ size_t bytesOut = bytesIn; // FIXME probably not safe.
+ char *out = new char[bytesIn];
+ char *outPtr = out;
+ size_t rval = iconv(conv, &in, &bytesIn, &outPtr, &bytesOut);
+ if (rval == (size_t)-1) {
+ std::string str("iconv error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ iconv_close(conv);
+ std::string str(out);
+ for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
+ if (*it == '\r') *it = '\n';
+ }
+ d_context.top()->addPiece(str);
}
virtual void textLongPiece(const LongPiece *piece) {
- throw "Long Piece not handled";
- //std::string text = piece->getText();
- //d_context.top()->addPiece(text);
+ char *out = (char*)piece->getBuffer();
+ std::string str(out);
+ d_context.top()->addPiece(str);
+ //d_convLong = iconv_open(setlocale(LC_CTYPE, 0), "UCS-2");
+ /*
+ iconv_t conv = iconv_open("UTF-8", "UTF-8");
+ if (conv == (iconv_t)-1) {
+ std::string str("iconv initialization error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ size_t bytesIn = piece->size() + 1;
+ char *in = (char*)piece->getBuffer();
+ size_t bytesOut = bytesIn; // FIXME probably not safe.
+ char *out = new char[bytesIn];
+ char *outPtr = out;
+ size_t rval = iconv(conv, &in, &bytesIn, &outPtr, &bytesOut);
+ if (rval == (size_t)-1) {
+ std::string str("iconv error: ");
+ str += strerror(errno);
+ throw str.c_str();
+ }
+ iconv_close(conv);
+ std::string str(out);
+ for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
+ if (*it == '\r') *it = '\n';
+ }
+ d_context.top()->addPiece(str);*/
}
};
if (argc < 2) {
return 1;
}
+
+ // Set the locale according to the terminal's environment
+ setlocale(LC_ALL, "");
+
std::ifstream in(argv[1], std::ios::in | std::ios::binary);
odc::Store* s;
TextPiece::TextPiece(size_t len): d_len(len) {}
+unsigned TextPiece::size() const {
+ return d_len;
+}
+
LongPiece::LongPiece(size_t len): TextPiece(len * 2) {}
LongPiece::~LongPiece() {
return std::string("LongPiece(FIXME)");
}
-std::wstring LongPiece::getText() const {
- return std::wstring((wchar_t*)d_buf);
+CHAR* LongPiece::getBuffer() const {
+ return d_buf;
}
void LongPiece::accept(Visitor &visitor) const {
return std::string("ShortPiece(") + std::string(d_buf) + std::string(")");
}
-std::string ShortPiece::getText() const {
- std::string str(d_buf);
- for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
- if (*it == '\r') *it = '\n';
- }
- return str;
+SHORTCHAR* ShortPiece::getBuffer() const {
+ return d_buf;
}
void ShortPiece::accept(Visitor &visitor) const {
virtual void read(Reader &reader) = 0;
virtual std::string toString() const = 0;
virtual void accept(Visitor &visitor) const = 0;
+ /**
+ * Size in bytes, excluding the null-character that terminates the string (i.e. the size that is read from file).
+ */
+ unsigned size() const;
};
/**
- * TextPiece consisting of 16-bit characters.
- * Not sure of the encoding.
+ * TextPiece consisting of 16-bit unicode characters.
+ * Not sure if the encoding is UCS-2 or UTF-16.
*/
class LongPiece : public TextPiece {
private:
~LongPiece();
virtual void read(Reader &reader);
virtual std::string toString() const;
+ virtual void accept(Visitor &visitor) const;
+
/**
- * Return the text contained in this piece.
- * Currently just casting the buffer to wchar_t* and hoping for the best.
+ * Get the buffer contents as 16-bit (UCS-2 or UTF-16 I don't know) unicode.
*/
- virtual std::wstring getText() const;
- virtual void accept(Visitor &visitor) const;
+ CHAR *getBuffer() const;
};
/**
- * TextPiece consisting of 8-bit characters.
+ * TextPiece consisting of 8-bit characters in the Latin-1 extension of ASCII.
*/
class ShortPiece : public TextPiece {
private:
~ShortPiece();
virtual void read(Reader &reader);
virtual std::string toString() const;
- virtual std::string getText() const;
virtual void accept(Visitor &visitor) const;
+
+ /**
+ * Get the buffer contents as 8-bit (Latin-1) characters.
+ */
+ SHORTCHAR *getBuffer() const;
};
/**