#include "cx/string.h"
#include "cx/utils.h"
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#ifndef _WIN32
#include <strings.h>
#endif
cxmutstr cx_mutstr(
char *cstring) {
return (cxmutstr) {cstring, strlen(cstring)};
}
cxmutstr cx_mutstrn(
char *cstring,
size_t length
) {
return (cxmutstr) {cstring, length};
}
cxstring cx_str(
const char *cstring) {
return (cxstring) {cstring, strlen(cstring)};
}
cxstring cx_strn(
const char *cstring,
size_t length
) {
return (cxstring) {cstring, length};
}
cxstring cx_strcast(cxmutstr str) {
return (cxstring) {str.ptr, str.length};
}
void cx_strfree(cxmutstr *str) {
free(str->ptr);
str->ptr =
NULL;
str->length =
0;
}
void cx_strfree_a(
CxAllocator
const *alloc,
cxmutstr *str
) {
cxFree(alloc, str->ptr);
str->ptr =
NULL;
str->length =
0;
}
size_t cx_strlen(
size_t count,
...
) {
if (count ==
0)
return 0;
va_list ap;
va_start(ap, count);
size_t size =
0;
cx_for_n(i, count) {
cxstring str = va_arg(ap, cxstring);
size += str.length;
}
va_end(ap);
return size;
}
cxmutstr cx_strcat_ma(
CxAllocator
const *alloc,
cxmutstr str,
size_t count,
...
) {
if (count ==
0)
return str;
cxstring *strings = calloc(count,
sizeof(cxstring));
if (!strings) abort();
va_list ap;
va_start(ap, count);
size_t slen = str.length;
cx_for_n(i, count) {
cxstring s = va_arg (ap, cxstring);
strings[i] = s;
slen += s.length;
}
va_end(ap);
if (str.ptr ==
NULL) {
str.ptr = cxMalloc(alloc, slen +
1);
}
else {
str.ptr = cxRealloc(alloc, str.ptr, slen +
1);
}
if (str.ptr ==
NULL) abort();
size_t pos = str.length;
str.length = slen;
cx_for_n(i, count) {
cxstring s = strings[i];
memcpy(str.ptr + pos, s.ptr, s.length);
pos += s.length;
}
str.ptr[str.length] =
'\0';
free(strings);
return str;
}
cxstring cx_strsubs(
cxstring string,
size_t start
) {
return cx_strsubsl(string, start, string.length - start);
}
cxmutstr cx_strsubs_m(
cxmutstr string,
size_t start
) {
return cx_strsubsl_m(string, start, string.length - start);
}
cxstring cx_strsubsl(
cxstring string,
size_t start,
size_t length
) {
if (start > string.length) {
return (cxstring) {
NULL,
0};
}
size_t rem_len = string.length - start;
if (length > rem_len) {
length = rem_len;
}
return (cxstring) {string.ptr + start, length};
}
cxmutstr cx_strsubsl_m(
cxmutstr string,
size_t start,
size_t length
) {
cxstring result = cx_strsubsl(cx_strcast(string), start, length);
return (cxmutstr) {(
char *) result.ptr, result.length};
}
cxstring cx_strchr(
cxstring string,
int chr
) {
chr = 0xFF & chr;
cx_for_n(i, string.length) {
if (string.ptr[i] == chr) {
return cx_strsubs(string, i);
}
}
return (cxstring) {
NULL,
0};
}
cxmutstr cx_strchr_m(
cxmutstr string,
int chr
) {
cxstring result = cx_strchr(cx_strcast(string), chr);
return (cxmutstr) {(
char *) result.ptr, result.length};
}
cxstring cx_strrchr(
cxstring string,
int chr
) {
chr = 0xFF & chr;
size_t i = string.length;
while (i >
0) {
i--;
if (string.ptr[i] == chr) {
return cx_strsubs(string, i);
}
}
return (cxstring) {
NULL,
0};
}
cxmutstr cx_strrchr_m(
cxmutstr string,
int chr
) {
cxstring result = cx_strrchr(cx_strcast(string), chr);
return (cxmutstr) {(
char *) result.ptr, result.length};
}
#ifndef CX_STRSTR_SBO_SIZE
#define CX_STRSTR_SBO_SIZE 512
#endif
cxstring cx_strstr(
cxstring haystack,
cxstring needle
) {
if (needle.length ==
0) {
return haystack;
}
if (needle.length ==
1) {
return cx_strchr(haystack, *needle.ptr);
}
size_t s_prefix_table[
CX_STRSTR_SBO_SIZE];
bool useheap = needle.length >=
CX_STRSTR_SBO_SIZE;
register size_t *ptable = useheap ? calloc(needle.length +
1,
sizeof(
size_t)) : s_prefix_table;
register size_t i, j;
i =
0;
j =
0;
ptable[i] = j;
while (i < needle.length) {
while (j >=
1 && needle.ptr[j -
1] != needle.ptr[i]) {
j = ptable[j -
1];
}
i++;
j++;
ptable[i] = j;
}
cxstring result = {
NULL,
0};
i =
0;
j =
1;
while (i < haystack.length) {
while (j >=
1 && haystack.ptr[i] != needle.ptr[j -
1]) {
j = ptable[j -
1];
}
i++;
j++;
if (j -
1 == needle.length) {
size_t start = i - needle.length;
result.ptr = haystack.ptr + start;
result.length = haystack.length - start;
break;
}
}
if (ptable != s_prefix_table) {
free(ptable);
}
return result;
}
cxmutstr cx_strstr_m(
cxmutstr haystack,
cxstring needle
) {
cxstring result = cx_strstr(cx_strcast(haystack), needle);
return (cxmutstr) {(
char *) result.ptr, result.length};
}
size_t cx_strsplit(
cxstring string,
cxstring delim,
size_t limit,
cxstring *output
) {
if (limit ==
0)
return 0;
if (delim.length ==
0) {
output[
0] = string;
return 1;
}
if (delim.length >= string.length) {
if (cx_strcmp(string, delim) ==
0) {
output[
0] = cx_strn(string.ptr,
0);
output[
1] = cx_strn(string.ptr + string.length,
0);
return 2;
}
else {
output[
0] = string;
return 1;
}
}
size_t n =
0;
cxstring curpos = string;
while (
1) {
++n;
cxstring match = cx_strstr(curpos, delim);
if (match.length >
0) {
if (n < limit) {
cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
output[n -
1] = item;
size_t processed = item.length + delim.length;
curpos.ptr += processed;
curpos.length -= processed;
}
else {
output[n -
1] = curpos;
break;
}
}
else {
output[n -
1] = curpos;
break;
}
}
return n;
}
size_t cx_strsplit_a(
CxAllocator
const *allocator,
cxstring string,
cxstring delim,
size_t limit,
cxstring **output
) {
size_t n =
0;
cxstring curpos = string;
while (
1) {
++n;
cxstring match = cx_strstr(curpos, delim);
if (match.length >
0) {
if (n < limit) {
size_t processed = match.ptr - curpos.ptr + delim.length;
curpos.ptr += processed;
curpos.length -= processed;
}
else {
break;
}
}
else {
break;
}
}
*output = cxCalloc(allocator, n,
sizeof(cxstring));
return cx_strsplit(string, delim, n, *output);
}
size_t cx_strsplit_m(
cxmutstr string,
cxstring delim,
size_t limit,
cxmutstr *output
) {
return cx_strsplit(cx_strcast(string),
delim, limit, (cxstring *) output);
}
size_t cx_strsplit_ma(
CxAllocator
const *allocator,
cxmutstr string,
cxstring delim,
size_t limit,
cxmutstr **output
) {
return cx_strsplit_a(allocator, cx_strcast(string),
delim, limit, (cxstring **) output);
}
int cx_strcmp(
cxstring s1,
cxstring s2
) {
if (s1.length == s2.length) {
return memcmp(s1.ptr, s2.ptr, s1.length);
}
else if (s1.length > s2.length) {
return 1;
}
else {
return -
1;
}
}
int cx_strcasecmp(
cxstring s1,
cxstring s2
) {
if (s1.length == s2.length) {
#ifdef _WIN32
return _strnicmp(s1.ptr, s2.ptr, s1.length);
#else
return strncasecmp(s1.ptr, s2.ptr, s1.length);
#endif
}
else if (s1.length > s2.length) {
return 1;
}
else {
return -
1;
}
}
int cx_strcmp_p(
void const *s1,
void const *s2
) {
cxstring
const *left = s1;
cxstring
const *right = s2;
return cx_strcmp(*left, *right);
}
int cx_strcasecmp_p(
void const *s1,
void const *s2
) {
cxstring
const *left = s1;
cxstring
const *right = s2;
return cx_strcasecmp(*left, *right);
}
cxmutstr cx_strdup_a(
CxAllocator
const *allocator,
cxstring string
) {
cxmutstr result = {
cxMalloc(allocator, string.length +
1),
string.length
};
if (result.ptr ==
NULL) {
result.length =
0;
return result;
}
memcpy(result.ptr, string.ptr, string.length);
result.ptr[string.length] =
'\0';
return result;
}
cxstring cx_strtrim(cxstring string) {
cxstring result = string;
while (result.length >
0 && isspace(*result.ptr)) {
result.ptr++;
result.length--;
}
while (result.length >
0 && isspace(result.ptr[result.length -
1])) {
result.length--;
}
return result;
}
cxmutstr cx_strtrim_m(cxmutstr string) {
cxstring result = cx_strtrim(cx_strcast(string));
return (cxmutstr) {(
char *) result.ptr, result.length};
}
bool cx_strprefix(
cxstring string,
cxstring prefix
) {
if (string.length < prefix.length)
return false;
return memcmp(string.ptr, prefix.ptr, prefix.length) ==
0;
}
bool cx_strsuffix(
cxstring string,
cxstring suffix
) {
if (string.length < suffix.length)
return false;
return memcmp(string.ptr + string.length - suffix.length,
suffix.ptr, suffix.length) ==
0;
}
bool cx_strcaseprefix(
cxstring string,
cxstring prefix
) {
if (string.length < prefix.length)
return false;
#ifdef _WIN32
return _strnicmp(string.ptr, prefix.ptr, prefix.length) ==
0;
#else
return strncasecmp(string.ptr, prefix.ptr, prefix.length) ==
0;
#endif
}
bool cx_strcasesuffix(
cxstring string,
cxstring suffix
) {
if (string.length < suffix.length)
return false;
#ifdef _WIN32
return _strnicmp(string.ptr+string.length-suffix.length,
suffix.ptr, suffix.length) ==
0;
#else
return strncasecmp(string.ptr + string.length - suffix.length,
suffix.ptr, suffix.length) ==
0;
#endif
}
void cx_strlower(cxmutstr string) {
cx_for_n(i, string.length) {
string.ptr[i] = (
char) tolower(string.ptr[i]);
}
}
void cx_strupper(cxmutstr string) {
cx_for_n(i, string.length) {
string.ptr[i] = (
char) toupper(string.ptr[i]);
}
}
#ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
#define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
#endif
struct cx_strreplace_ibuf {
size_t *buf;
struct cx_strreplace_ibuf *next;
unsigned int len;
};
static void cx_strrepl_free_ibuf(
struct cx_strreplace_ibuf *buf) {
while (buf) {
struct cx_strreplace_ibuf *next = buf->next;
free(buf->buf);
free(buf);
buf = next;
}
}
cxmutstr cx_strreplacen_a(
CxAllocator
const *allocator,
cxstring str,
cxstring pattern,
cxstring replacement,
size_t replmax
) {
if (pattern.length ==
0 || pattern.length > str.length || replmax ==
0)
return cx_strdup_a(allocator, str);
size_t ibufmax = str.length / pattern.length;
size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
if (ibuflen >
CX_STRREPLACE_INDEX_BUFFER_SIZE) {
ibuflen =
CX_STRREPLACE_INDEX_BUFFER_SIZE;
}
struct cx_strreplace_ibuf *firstbuf, *curbuf;
firstbuf = curbuf = calloc(
1,
sizeof(
struct cx_strreplace_ibuf));
if (!firstbuf)
return cx_mutstrn(
NULL,
0);
firstbuf->buf = calloc(ibuflen,
sizeof(
size_t));
if (!firstbuf->buf) {
free(firstbuf);
return cx_mutstrn(
NULL,
0);
}
cxstring searchstr = str;
size_t found =
0;
do {
cxstring match = cx_strstr(searchstr, pattern);
if (match.length >
0) {
if (curbuf->len == ibuflen) {
struct cx_strreplace_ibuf *nextbuf =
calloc(
1,
sizeof(
struct cx_strreplace_ibuf));
if (!nextbuf) {
cx_strrepl_free_ibuf(firstbuf);
return cx_mutstrn(
NULL,
0);
}
nextbuf->buf = calloc(ibuflen,
sizeof(
size_t));
if (!nextbuf->buf) {
free(nextbuf);
cx_strrepl_free_ibuf(firstbuf);
return cx_mutstrn(
NULL,
0);
}
curbuf->next = nextbuf;
curbuf = nextbuf;
}
found++;
size_t idx = match.ptr - str.ptr;
curbuf->buf[curbuf->len++] = idx;
searchstr.ptr = match.ptr + pattern.length;
searchstr.length = str.length - idx - pattern.length;
}
else {
break;
}
}
while (searchstr.length >
0 && found < replmax);
cxmutstr result;
{
ssize_t adjlen = (
ssize_t) replacement.length - (
ssize_t) pattern.length;
size_t rcount =
0;
curbuf = firstbuf;
do {
rcount += curbuf->len;
curbuf = curbuf->next;
}
while (curbuf);
result.length = str.length + rcount * adjlen;
result.ptr = cxMalloc(allocator, result.length +
1);
if (!result.ptr) {
cx_strrepl_free_ibuf(firstbuf);
return cx_mutstrn(
NULL,
0);
}
}
curbuf = firstbuf;
size_t srcidx =
0;
char *destptr = result.ptr;
do {
for (
size_t i =
0; i < curbuf->len; i++) {
size_t idx = curbuf->buf[i];
size_t srclen = idx - srcidx;
if (srclen >
0) {
memcpy(destptr, str.ptr + srcidx, srclen);
destptr += srclen;
srcidx += srclen;
}
srcidx += pattern.length;
memcpy(destptr, replacement.ptr, replacement.length);
destptr += replacement.length;
}
curbuf = curbuf->next;
}
while (curbuf);
memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
result.ptr[result.length] =
'\0';
cx_strrepl_free_ibuf(firstbuf);
return result;
}
CxStrtokCtx cx_strtok(
cxstring str,
cxstring delim,
size_t limit
) {
CxStrtokCtx ctx;
ctx.str = str;
ctx.delim = delim;
ctx.limit = limit;
ctx.pos =
0;
ctx.next_pos =
0;
ctx.delim_pos =
0;
ctx.found =
0;
ctx.delim_more =
NULL;
ctx.delim_more_count =
0;
return ctx;
}
CxStrtokCtx cx_strtok_m(
cxmutstr str,
cxstring delim,
size_t limit
) {
return cx_strtok(cx_strcast(str), delim, limit);
}
bool cx_strtok_next(
CxStrtokCtx *ctx,
cxstring *token
) {
if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
return false;
}
cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
cxstring delim = cx_strstr(haystack, ctx->delim);
if (delim.length >
0) {
delim.length = ctx->delim.length;
}
if (ctx->delim_more_count >
0) {
cx_for_n(i, ctx->delim_more_count) {
cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
if (d.length >
0 && (delim.length ==
0 || d.ptr < delim.ptr)) {
delim.ptr = d.ptr;
delim.length = ctx->delim_more[i].length;
}
}
}
ctx->found++;
ctx->pos = ctx->next_pos;
token->ptr = &ctx->str.ptr[ctx->pos];
ctx->delim_pos = delim.length ==
0 ?
ctx->str.length : (
size_t) (delim.ptr - ctx->str.ptr);
token->length = ctx->delim_pos - ctx->pos;
ctx->next_pos = ctx->delim_pos + delim.length;
return true;
}
bool cx_strtok_next_m(
CxStrtokCtx *ctx,
cxmutstr *token
) {
return cx_strtok_next(ctx, (cxstring *) token);
}
void cx_strtok_delim(
CxStrtokCtx *ctx,
cxstring
const *delim,
size_t count
) {
ctx->delim_more = delim;
ctx->delim_more_count = count;
}