summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 4016b9a)
raw | patch | inline | side by side (parent: 4016b9a)
author | Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> | |
Mon, 14 Nov 2011 12:06:20 +0000 (12:06 +0000) | ||
committer | Gert van Valkenhoef <g.h.m.van.valkenhoef@rug.nl> | |
Mon, 14 Nov 2011 12:06:20 +0000 (12:06 +0000) |
odcread.cc | patch | blob | history |
diff --git a/odcread.cc b/odcread.cc
index 891ea990102ba11cd63da7e952d66b4c2a3c379f..d2aa025e12713d516fa5355d7d4751a03c5a1a50 100644 (file)
--- a/odcread.cc
+++ b/odcread.cc
return nl_langinfo(CODESET);
}
virtual void textShortPiece(const ShortPiece *piece) {
- iconv_t conv = iconv_open(getCharSet(), "ISO-8859-1");
- if (conv == (iconv_t)-1) {
- std::string str("iconv initialization error: ");
- str += strerror(errno);
- throw str.c_str();
- }
- size_t bytesIn = piece->size() + 1;
- SHORTCHAR *in = piece->getBuffer();
- size_t bytesOut = bytesIn; // FIXME probably not safe.
- char *out = new char[bytesIn];
- char *outPtr = out;
- size_t rval = iconv(conv, &in, &bytesIn, &outPtr, &bytesOut);
- if (rval == (size_t)-1) {
- std::string str("iconv error: ");
- str += strerror(errno);
- throw str.c_str();
- }
- iconv_close(conv);
- std::string str(out);
- for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
- if (*it == '\r') *it = '\n';
- }
+ std::string str = convert((char *)piece->getBuffer(), piece->size() + 1, (char *)"ISO-8859-1", 1);
d_context.top()->addPiece(str);
}
virtual void textLongPiece(const LongPiece *piece) {
- /*
- char *out = (char*)piece->getBuffer();
- std::string str(out);
+ std::string str = convert((char *)piece->getBuffer(), piece->size() + 2, (char *)"UCS-2", 2);
d_context.top()->addPiece(str);
- */
- //d_convLong = iconv_open(setlocale(LC_CTYPE, 0), "UCS-2");
- iconv_t conv = iconv_open(getCharSet(), "UCS-2");
+ }
+
+ /**
+ * Convert an input character buffer in the given encoding to the
+ * locale's encoding.
+ */
+ std::string convert(char *in, size_t bytesIn, char *encodingIn, size_t inBytesPerChar) {
+ // Convert from the input encoding to the locale's encoding
+ iconv_t conv = iconv_open(getCharSet(), encodingIn);
+
+ // Handle errors by throwing a readable message
if (conv == (iconv_t)-1) {
std::string str("iconv initialization error: ");
str += strerror(errno);
throw str.c_str();
}
- size_t bytesIn = piece->size() + 2;
- char *in = (char*)piece->getBuffer();
- size_t bytesOut = bytesIn; // FIXME probably not safe.
- char *out = new char[bytesIn];
+
+ // Assume at most 4 bytes per character are needed
+ size_t bytesOut = 4 * bytesIn / inBytesPerChar;
+
+ // Allocate the output buffer
+ char *out = new char[bytesOut];
char *outPtr = out;
+
+ // Perform conversion
size_t rval = iconv(conv, &in, &bytesIn, &outPtr, &bytesOut);
if (rval == (size_t)-1) {
std::string str("iconv error: ");
str += strerror(errno);
throw str.c_str();
}
+
+ // Free the iconv state
iconv_close(conv);
+
+ // Copy result into a std::string
std::string str(out);
+ delete out;
+
+ // Convert newlines
for (std::string::iterator it = str.begin(); it < str.end(); ++it) {
if (*it == '\r') *it = '\n';
}
- d_context.top()->addPiece(str);
+
+ return str;
}
};