当前位置：首页 > 编程开发

FLAC 编码code节选

webgou16年前 (2010-12-03)编程开发94

FLAC库编码code节选 charset.h [CODE_LITE]/* * Copyright (C) 2001 Edmund Grimley Evans * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include /* * These functions are like the C library's mbtowc() and wctomb(), * but instead of depending on the locale they always work in UTF-8, * and they use int instead of wchar_t. */ int utf8_mbtowc(int *pwc, const char *s, size_t n); int utf8_wctomb(char *s, int wc); /* * This is an object-oriented version of mbtowc() and wctomb(). * The caller first uses charset_find() to get a pointer to struct * charset, then uses the mbtowc() and wctomb() methods on it. * The function charset_max() gives the maximum length of a * multibyte character in that encoding. * This API is only appropriate for stateless encodings like UTF-8 * or ISO-8859-3, but I have no intention of implementing anything * other than UTF-8 and 8-bit encodings. * * MINOR BUG: If there is no memory charset_find() may return 0 and * there is no way to distinguish this case from an unknown encoding. */ struct charset; struct charset *charset_find(const char *code); int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n); int charset_wctomb(struct charset *charset, char *s, int wc); int charset_max(struct charset *charset); /* * Function to convert a buffer from one encoding to another. * Invalid bytes are replaced by '#', and characters that are * not available in the target encoding are replaced by '?'. * Each of TO and TOLEN may be zero if the result is not wanted. * The input or output may contain null bytes, but the output * buffer is also null-terminated, so it is all right to * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0). * * Return value: * * -2 : memory allocation failed * -1 : unknown encoding * 0 : data was converted exactly * 1 : valid data was converted approximately (using '?') * 2 : input was invalid (but still converted, using '#') */ int charset_convert(const char *fromcode, const char *tocode, const char *from, size_t fromlen, char **to, size_t *tolen); [/CODE_LITE] charset.c [CODE_LITE]/* * Copyright (C) 2001 Edmund Grimley Evans * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* * See the corresponding header file for a description of the functions * that this file provides. * * This was first written for Ogg Vorbis but could be of general use. * * The only deliberate assumption about data sizes is that a short has * at least 16 bits, but this code has only been tested on systems with * 8-bit char, 16-bit short and 32-bit int. */ #ifdef HAVE_CONFIG_H #include #endif #ifndef HAVE_ICONV /* should be ifdef USE_CHARSET_CONVERT */ #include #include "charset.h" #include "charmaps.h" /* * This is like the standard strcasecmp, but it does not depend * on the locale. Locale-dependent functions can be dangerous: * we once had a bug involving strcasecmp("iso", "ISO") in a * Turkish locale! * * (I'm not really sure what the official standard says * about the sign of strcasecmp("Z", "["), but usually * we're only interested in whether it's zero.) */ static int ascii_strcasecmp(const char *s1, const char *s2) { char c1, c2; for (;; s1++, s2++) { if (!*s1 || !*s1) break; if (*s1 == *s2) continue; c1 = *s1; if ('a' <= c1 && c1 <= 'z') c1 += 'A' - 'a'; c2 = *s2; if ('a' <= c2 && c2 <= 'z') c2 += 'A' - 'a'; if (c1 != c2) break; } return (unsigned char)*s1 - (unsigned char)*s2; } /* * UTF-8 equivalents of the C library's wctomb() and mbtowc(). */ int utf8_mbtowc(int *pwc, const char *s, size_t n) { unsigned char c; int wc, i, k; if (!n || !s) return 0; c = *s; if (c < 0x80) { if (pwc) *pwc = c; return c ? 1 : 0; } else if (c < 0xc2) return -1; else if (c < 0xe0) { if (n >= 2 && (s[1] & 0xc0) == 0x80) { if (pwc) *pwc = ((c & 0x1f) << 6) | (s[1] & 0x3f); return 2; } else return -1; } else if (c < 0xf0) k = 3; else if (c < 0xf8) k = 4; else if (c < 0xfc) k = 5; else if (c < 0xfe) k = 6; else return -1; if (n < (size_t)k) return -1; wc = *s++ & ((1 << (7 - k)) - 1); for (i = 1; i < k; i++) { if ((*s & 0xc0) != 0x80) return -1; wc = (wc << 6) | (*s++ & 0x3f); } if (wc < (1 << (5 * k - 4))) return -1; if (pwc) *pwc = wc; return k; } int utf8_wctomb(char *s, int wc1) { unsigned int wc = wc1; if (!s) return 0; if (wc < (1u << 7)) { *s++ = wc; return 1; } else if (wc < (1u << 11)) { *s++ = 0xc0 | (wc >> 6); *s++ = 0x80 | (wc & 0x3f); return 2; } else if (wc < (1u << 16)) { *s++ = 0xe0 | (wc >> 12); *s++ = 0x80 | ((wc >> 6) & 0x3f); *s++ = 0x80 | (wc & 0x3f); return 3; } else if (wc < (1u << 21)) { *s++ = 0xf0 | (wc >> 18); *s++ = 0x80 | ((wc >> 12) & 0x3f); *s++ = 0x80 | ((wc >> 6) & 0x3f); *s++ = 0x80 | (wc & 0x3f); return 4; } else if (wc < (1u << 26)) { *s++ = 0xf8 | (wc >> 24); *s++ = 0x80 | ((wc >> 18) & 0x3f); *s++ = 0x80 | ((wc >> 12) & 0x3f); *s++ = 0x80 | ((wc >> 6) & 0x3f); *s++ = 0x80 | (wc & 0x3f); return 5; } else if (wc < (1u << 31)) { *s++ = 0xfc | (wc >> 30); *s++ = 0x80 | ((wc >> 24) & 0x3f); *s++ = 0x80 | ((wc >> 18) & 0x3f); *s++ = 0x80 | ((wc >> 12) & 0x3f); *s++ = 0x80 | ((wc >> 6) & 0x3f); *s++ = 0x80 | (wc & 0x3f); return 6; } else return -1; } /* * The charset "object" and methods. */ struct charset { int max; int (*mbtowc)(void *table, int *pwc, const char *s, size_t n); int (*wctomb)(void *table, char *s, int wc); void *map; }; int charset_mbtowc(struct charset *charset, int *pwc, const char *s, size_t n) { return (*charset->mbtowc)(charset->map, pwc, s, n); } int charset_wctomb(struct charset *charset, char *s, int wc) { return (*charset->wctomb)(charset->map, s, wc); } int charset_max(struct charset *charset) { return charset->max; } /* * Implementation of UTF-8. */ static int mbtowc_utf8(void *map, int *pwc, const char *s, size_t n) { (void)map; return utf8_mbtowc(pwc, s, n); } static int wctomb_utf8(void *map, char *s, int wc) { (void)map; return utf8_wctomb(s, wc); } /* * Implementation of US-ASCII. * Probably on most architectures this compiles to less than 256 bytes * of code, so we can save space by not having a table for this one. */ static int mbtowc_ascii(void *map, int *pwc, const char *s, size_t n) { int wc; (void)map; if (!n || !s) return 0; wc = (unsigned char)*s; if (wc & ~0x7f) return -1; if (pwc) *pwc = wc; return wc ? 1 : 0; } static int wctomb_ascii(void *map, char *s, int wc) { (void)map; if (!s) return 0; if (wc & ~0x7f) return -1; *s = wc; return 1; } /* * Implementation of ISO-8859-1. * Probably on most architectures this compiles to less than 256 bytes * of code, so we can save space by not having a table for this one. */ static int mbtowc_iso1(void *map, int *pwc, const char *s, size_t n) { int wc; (void)map; if (!n || !s) return 0; wc = (unsigned char)*s; if (wc & ~0xff) return -1; if (pwc) *pwc = wc; return wc ? 1 : 0; } static int wctomb_iso1(void *map, char *s, int wc) { (void)map; if (!s) return 0; if (wc & ~0xff) return -1; *s = wc; return 1; } /* * Implementation of any 8-bit charset. */ struct map { const unsigned short *from; struct inverse_map *to; }; static int mbtowc_8bit(void *map1, int *pwc, const char *s, size_t n) { struct map *map = map1; unsigned short wc; if (!n || !s) return 0; wc = map->from[(unsigned char)*s]; if (wc == 0xffff) return -1; if (pwc) *pwc = (int)wc; return wc ? 1 : 0; } /* * For the inverse map we use a hash table, which has the advantages * of small constant memory requirement and simple memory allocation, * but the disadvantage of slow conversion in the worst case. * If you need real-time performance while letting a potentially * malicious user define their own map, then the method used in * linux/drivers/char/consolemap.c would be more appropriate. */ struct inverse_map { unsigned char first[256]; unsigned char next[256]; }; /* * The simple hash is good enough for this application. * Use the alternative trivial hashes for testing. */ #define HASH(i) ((i) & 0xff) /* #define HASH(i) 0 */ /* #define HASH(i) 99 */ static struct inverse_map *make_inverse_map(const unsigned short *from) { struct inverse_map *to; char used[256]; int i, j, k; to = (struct inverse_map *)malloc(sizeof(struct inverse_map)); if (!to) return 0; for (i = 0; i < 256; i++) to->first[i] = to->next[i] = used[i] = 0; for (i = 255; i >= 0; i--) if (from[i] != 0xffff) { k = HASH(from[i]); to->next[i] = to->first[k]; to->first[k] = i; used[k] = 1; } /* Point the empty buckets at an empty list. */ for (i = 0; i < 256; i++) if (!to->next[i]) break; if (i < 256) for (j = 0; j < 256; j++) if (!used[j]) to->first[j] = i; return to; } int wctomb_8bit(void *map1, char *s, int wc1) { struct map *map = map1; unsigned short wc = wc1; int i; if (!s) return 0; if (wc1 & ~0xffff) return -1; if (1) /* Change 1 to 0 to test the case where malloc fails. */ if (!map->to) map->to = make_inverse_map(map->from); if (map->to) { /* Use the inverse map. */ i = map->to->first[HASH(wc)]; for (;;) { if (map->from[i] == wc) { *s = i; return 1; } if (!(i = map->to->next[i])) break; } } else { /* We don't have an inverse map, so do a linear search. */ for (i = 0; i < 256; i++) if (map->from[i] == wc) { *s = i; return 1; } } return -1; } /* * The "constructor" charset_find(). */ struct charset charset_utf8 = { 6, &mbtowc_utf8, &wctomb_utf8, 0 }; struct charset charset_iso1 = { 1, &mbtowc_iso1, &wctomb_iso1, 0 }; struct charset charset_ascii = { 1, &mbtowc_ascii, &wctomb_ascii, 0 }; struct charset *charset_find(const char *code) { int i; /* Find good (MIME) name. */ for (i = 0; names[i].bad; i++) if (!ascii_strcasecmp(code, names[i].bad)) { code = names[i].good; break; } /* Recognise some charsets for which we avoid using a table. */ if (!ascii_strcasecmp(code, "UTF-8")) return &charset_utf8; if (!ascii_strcasecmp(code, "US-ASCII")) return &charset_ascii; if (!ascii_strcasecmp(code, "ISO-8859-1")) return &charset_iso1; /* Look for a mapping for a simple 8-bit encoding. */ for (i = 0; maps[i].name; i++) if (!ascii_strcasecmp(code, maps[i].name)) { if (!maps[i].charset) { maps[i].charset = (struct charset *)malloc(sizeof(struct charset)); if (maps[i].charset) { struct map *map = (struct map *)malloc(sizeof(struct map)); if (!map) { free(maps[i].charset); maps[i].charset = 0; } else { maps[i].charset->max = 1; maps[i].charset->mbtowc = &mbtowc_8bit; maps[i].charset->wctomb = &wctomb_8bit; maps[i].charset->map = map; map->from = maps[i].map; map->to = 0; /* inverse mapping is created when required */ } } } return maps[i].charset; } return 0; } /* * Function to convert a buffer from one encoding to another. * Invalid bytes are replaced by '#', and characters that are * not available in the target encoding are replaced by '?'. * Each of TO and TOLEN may be zero, if the result is not needed. * The output buffer is null-terminated, so it is all right to * use charset_convert(fromcode, tocode, s, strlen(s), &t, 0). */ int charset_convert(const char *fromcode, const char *tocode, const char *from, size_t fromlen, char **to, size_t *tolen) { int ret = 0; struct charset *charset1, *charset2; char *tobuf, *p, *newbuf; int i, j, wc; charset1 = charset_find(fromcode); charset2 = charset_find(tocode); if (!charset1 || !charset2 ) return -1; tobuf = (char *)malloc(fromlen * charset2->max + 1); if (!tobuf) return -2; for (p = tobuf; fromlen; from += i, fromlen -= i, p += j) { i = charset_mbtowc(charset1, &wc, from, fromlen); if (!i) i = 1; else if (i == -1) { i = 1; wc = '#'; ret = 2; } j = charset_wctomb(charset2, p, wc); if (j == -1) { if (!ret) ret = 1; j = charset_wctomb(charset2, p, '?'); if (j == -1) j = 0; } } if (tolen) *tolen = p - tobuf; *p++ = '\0'; if (to) { newbuf = realloc(tobuf, p - tobuf); *to = newbuf ? newbuf : tobuf; } else free(tobuf); return ret; } #endif /* USE_CHARSET_ICONV */ [/CODE_LITE] icovert.c [CODE_LITE]/* * Copyright (C) 2001 Edmund Grimley Evans * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef HAVE_CONFIG_H #include #endif #ifdef HAVE_ICONV #include #include #include #include #include /* * Convert data from one encoding to another. Return: * * -2 : memory allocation failed * -1 : unknown encoding * 0 : data was converted exactly * 1 : data was converted inexactly * 2 : data was invalid (but still converted) * * We convert in two steps, via UTF-8, as this is the only * reliable way of distinguishing between invalid input * and valid input which iconv refuses to transliterate. * We convert from UTF-8 twice, because we have no way of * knowing whether the conversion was exact if iconv returns * E2BIG (due to a bug in the specification of iconv). * An alternative approach is to assume that the output of * iconv is never more than 4 times as long as the input, * but I prefer to avoid that assumption if possible. */ int iconvert(const char *fromcode, const char *tocode, const char *from, size_t fromlen, char **to, size_t *tolen) { int ret = 0; iconv_t cd1, cd2; char *ib; char *ob; char *utfbuf = 0, *outbuf, *newbuf; size_t utflen, outlen, ibl, obl, k; char tbuf[2048]; cd1 = iconv_open("UTF-8", fromcode); if (cd1 == (iconv_t)(-1)) return -1; cd2 = (iconv_t)(-1); /* Don't use strcasecmp() as it's locale-dependent. */ if (!strchr("Uu", tocode[0]) || !strchr("Tt", tocode[1]) || !strchr("Ff", tocode[2]) || tocode[3] != '-' || tocode[4] != '8' || tocode[5] != '\0') { char *tocode1; /* * Try using this non-standard feature of glibc and libiconv. * This is deliberately not a config option as people often * change their iconv library without rebuilding applications. */ tocode1 = (char *)malloc(strlen(tocode) + 11); if (!tocode1) goto fail; strcpy(tocode1, tocode); strcat(tocode1, "//TRANSLIT"); cd2 = iconv_open(tocode1, "UTF-8"); free(tocode1); if (cd2 == (iconv_t)(-1)) cd2 = iconv_open(tocode, fromcode); if (cd2 == (iconv_t)(-1)) { iconv_close(cd1); return -1; } } utflen = 1; /*fromlen * 2 + 1; XXX */ utfbuf = (char *)malloc(utflen); if (!utfbuf) goto fail; /* Convert to UTF-8 */ ib = (char *)from; ibl = fromlen; ob = utfbuf; obl = utflen; for (;;) { k = iconv(cd1, &ib, &ibl, &ob, &obl); assert((!k && !ibl) || (k == (size_t)(-1) && errno == E2BIG && ibl && obl < 6) || (k == (size_t)(-1) && (errno == EILSEQ || errno == EINVAL) && ibl)); if (!ibl) break; if (obl < 6) { /* Enlarge the buffer */ utflen *= 2; newbuf = (char *)realloc(utfbuf, utflen); if (!newbuf) goto fail; ob = (ob - utfbuf) + newbuf; obl = utflen - (ob - newbuf); utfbuf = newbuf; } else { /* Invalid input */ ib++, ibl--; *ob++ = '#', obl--; ret = 2; iconv(cd1, 0, 0, 0, 0); } } if (cd2 == (iconv_t)(-1)) { /* The target encoding was UTF-8 */ if (tolen) *tolen = ob - utfbuf; if (!to) { free(utfbuf); iconv_close(cd1); return ret; } newbuf = (char *)realloc(utfbuf, (ob - utfbuf) + 1); if (!newbuf) goto fail; ob = (ob - utfbuf) + newbuf; *ob = '\0'; *to = newbuf; iconv_close(cd1); return ret; } /* Truncate the buffer to be tidy */ utflen = ob - utfbuf; newbuf = (char *)realloc(utfbuf, utflen); if (!newbuf) goto fail; utfbuf = newbuf; /* Convert from UTF-8 to discover how long the output is */ outlen = 0; ib = utfbuf; ibl = utflen; while (ibl) { ob = tbuf; obl = sizeof(tbuf); k = iconv(cd2, &ib, &ibl, &ob, &obl); assert((k != (size_t)(-1) && !ibl) || (k == (size_t)(-1) && errno == E2BIG && ibl) || (k == (size_t)(-1) && errno == EILSEQ && ibl)); if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) { /* Replace one character */ char *tb = "?"; size_t tbl = 1; outlen += ob - tbuf; ob = tbuf; obl = sizeof(tbuf); k = iconv(cd2, &tb, &tbl, &ob, &obl); assert((!k && !tbl) || (k == (size_t)(-1) && errno == EILSEQ && tbl)); for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--) ; } outlen += ob - tbuf; } ob = tbuf; obl = sizeof(tbuf); k = iconv(cd2, 0, 0, &ob, &obl); assert(!k); outlen += ob - tbuf; /* Convert from UTF-8 for real */ outbuf = (char *)malloc(outlen + 1); if (!outbuf) goto fail; ib = utfbuf; ibl = utflen; ob = outbuf; obl = outlen; while (ibl) { k = iconv(cd2, &ib, &ibl, &ob, &obl); assert((k != (size_t)(-1) && !ibl) || (k == (size_t)(-1) && errno == EILSEQ && ibl)); if (k && !ret) ret = 1; if (ibl && !(k == (size_t)(-1) && errno == E2BIG)) { /* Replace one character */ char *tb = "?"; size_t tbl = 1; k = iconv(cd2, &tb, &tbl, &ob, &obl); assert((!k && !tbl) || (k == (size_t)(-1) && errno == EILSEQ && tbl)); for (++ib, --ibl; ibl && (*ib & 0x80); ib++, ibl--) ; } } k = iconv(cd2, 0, 0, &ob, &obl); assert(!k); assert(!obl); *ob = '\0'; free(utfbuf); iconv_close(cd1); iconv_close(cd2); if (tolen) *tolen = outlen; if (!to) { free(outbuf); return ret; } *to = outbuf; return ret; fail: if(0 != utfbuf) free(utfbuf); iconv_close(cd1); if (cd2 != (iconv_t)(-1)) iconv_close(cd2); return -2; } #endif /* HAVE_ICONV */ [/CODE_LITE]

扫描二维码推送至手机访问。

本文链接：https://www.webgou.info/?id=407

标签: Unicode UTF-8 编码 flac

分享给朋友：

FLAC 编码code节选

“FLAC 编码code节选” 的相关文章

vs2005运行tcpmp遇到"SignTool Error: No certificates were found that met all the given criteria."

mysql 远程连接方法

Linux下修改PATH的方法

android编译全过程

UTF-8, Unicode, GB2312格式串转换之C语言版

wince 串口驱动分析

发表评论

Copyright know blog. Some Rights Reserved.站长:webmaster#webgou.info(#换成@) 粤ICP备09183716号

Powered By Z-BlogPHP

FLAC 编码code节选

“FLAC 编码code节选” 的相关文章

vs2005运行tcpmp遇到"SignTool Error: No certificates were found that met all the given criteria."

mysql 远程连接方法

Linux下修改PATH的方法

android编译全过程

UTF-8, Unicode, GB2312格式串转换之C语言版

wince 串口驱动分析

发表评论取消回复

Copyright know blog. Some Rights Reserved.站长:webmaster#webgou.info(#换成@) 粤ICP备09183716号

Powered By Z-BlogPHP

发表评论