#include <apr.h>
#include <apr_pools.h>

/* According to the official BitTorrent specification, all strings in
 * metainfo files should be UTF-8 encoded. However, there are non-compliant
 * torrents around. If we encounter an invalid UTF-8 character, we assume
 * it is ISO 8859-1 and convert it to UTF-8.
 */
char* bt_strcat_utf8(apr_pool_t* pool, char* s, char* append) {
    char* p;
    char* rv = apr_palloc(pool, strlen(s) + (strlen(append) * 2));
    char* rvp = rv;
    memcpy(rv, s, strlen(s));

    /* Go to the end of the destination string */
    while(*rvp)
        rvp++;

    /* Now start appending, converting on the fly if necessary */
    for(p=append; *p;) {
        if(!(p[0] & 0x80)) {
            /* ASCII character */
            *(rvp++) = *(p++);
            continue;
        }

        if((p[0] & 0xE0) == 0xC0 && (p[1] & 0xC0) == 0x80) {
            /* 2 byte UTF-8 character */
            *(rvp++) = *(p++); *(rvp++) = *(p++);
            continue;
        }

        if(
            (p[0] & 0xF0) == 0xE0 && (p[1] & 0xC0) == 0x80 &&
            (p[2] & 0xC0) == 0x80
        ) {
            /* 3 byte UTF-8 character */
            *(rvp++) = *(p++); *(rvp++) = *(p++);
            *(rvp++) = *(p++);
            continue;
        }

        if(
            (p[0] & 0xF8) == 0xF0 && (p[1] & 0xC0) == 0x80 &&
            (p[2] & 0xC0) == 0x80 && (p[3] & 0xC0) == 0x80
        ) {
            /* 4 byte UTF-8 character */
            *(rvp++) = *(p++); *(rvp++) = *(p++);
            *(rvp++) = *(p++); *(rvp++) = *(p++);
            continue;
        }

        /* ISO 8859-1 -> UTF-8 conversion */
        *(rvp++) = 0xC0 | ((*p & 0xFF) >> 6);
        *(rvp++) = 0x80 | (*(p++) & 0x3F);
    }
    
    *rvp = 0;
    
    return rv;
}
