dotemacs

My Emacs configuration
git clone git://git.entf.net/dotemacs
Log | Files | Refs | LICENSE

utf8.c (2198B)


      1 #include "utf8.h"
      2 
      3 size_t codepoint_to_utf8(const uint32_t codepoint, unsigned char buffer[4]) {
      4   if (codepoint <= 0x7F) {
      5     buffer[0] = codepoint;
      6     return 1;
      7   }
      8   if (codepoint >= 0x80 && codepoint <= 0x07FF) {
      9     buffer[0] = 0xC0 | (codepoint >> 6);
     10     buffer[1] = 0x80 | (codepoint & 0x3F);
     11     return 2;
     12   }
     13   if (codepoint >= 0x0800 && codepoint <= 0xFFFF) {
     14     buffer[0] = 0xE0 | (codepoint >> 12);
     15     buffer[1] = 0x80 | ((codepoint >> 6) & 0x3F);
     16     buffer[2] = 0x80 | (codepoint & 0x3F);
     17     return 3;
     18   }
     19 
     20   if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) {
     21     buffer[0] = 0xF0 | (codepoint >> 18);
     22     buffer[1] = 0x80 | ((codepoint >> 12) & 0x3F);
     23     buffer[2] = 0x80 | ((codepoint >> 6) & 0x3F);
     24     buffer[3] = 0x80 | (codepoint & 0x3F);
     25     return 4;
     26   }
     27   return 0;
     28 }
     29 
     30 bool utf8_to_codepoint(const unsigned char buffer[4], const size_t len,
     31                        uint32_t *codepoint) {
     32   *codepoint = 0;
     33   if (len == 1 && buffer[0] <= 0x7F) {
     34     *codepoint = buffer[0];
     35     return true;
     36   }
     37   if (len == 2 && (buffer[0] >= 0xC0 && buffer[0] <= 0xDF) &&
     38       (buffer[1] >= 0x80 && buffer[1] <= 0xBF)) {
     39     *codepoint = buffer[0] & 0x1F;
     40     *codepoint = *codepoint << 6;
     41     *codepoint = *codepoint | (buffer[1] & 0x3F);
     42     return true;
     43   }
     44   if (len == 3 && (buffer[0] >= 0xE0 && buffer[0] <= 0xEF) &&
     45       (buffer[1] >= 0x80 && buffer[1] <= 0xBF) &&
     46       (buffer[2] >= 0x80 && buffer[2] <= 0xBF)) {
     47     *codepoint = buffer[0] & 0xF;
     48     *codepoint = *codepoint << 6;
     49     *codepoint = *codepoint | (buffer[1] & 0x3F);
     50     *codepoint = *codepoint << 6;
     51     *codepoint = *codepoint | (buffer[2] & 0x3F);
     52     return true;
     53   }
     54   if (len == 4 && (buffer[0] >= 0xF0 && buffer[0] <= 0xF7) &&
     55       (buffer[1] >= 0x80 && buffer[1] <= 0xBF) &&
     56       (buffer[2] >= 0x80 && buffer[2] <= 0xBF) &&
     57       (buffer[3] >= 0x80 && buffer[3] <= 0xBF)) {
     58     *codepoint = buffer[0] & 7;
     59     *codepoint = *codepoint << 6;
     60     *codepoint = *codepoint | (buffer[1] & 0x3F);
     61     *codepoint = *codepoint << 6;
     62     *codepoint = *codepoint | (buffer[2] & 0x3F);
     63     *codepoint = *codepoint << 6;
     64     *codepoint = *codepoint | (buffer[3] & 0x3F);
     65     return true;
     66   }
     67 
     68   return false;
     69 }