diff -r ae5a98f0545c -r 88625853ae74 ucx/string.h --- a/ucx/string.h Sat Dec 01 20:34:55 2012 +0100 +++ b/ucx/string.h Mon Aug 12 14:40:19 2013 +0200 @@ -1,99 +1,345 @@ /* - * File: sstring.h - * Author: olaf + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2013 Olaf Wintermann. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * Created on 17. Juni 2010, 13:26 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/** + * Bounded string implementation. + * + * The UCX strings (sstr_t) provide an alternative to C strings. + * The main difference to C strings is, that sstr_t does not + * need to be NULL-terminated. Instead the length is stored + * within the structure. + * + * When using sstr_t, developers must be full aware of what type + * of string (NULL-terminated) or not) they are using, when + * accessing the char* ptr directly. + * + * The UCX string module provides some common string functions, known from + * standard libc, working with sstr_t. + * + * @file string.h + * @author Mike Becker + * @author Olaf Wintermann */ -#ifndef _SSTRING_H -#define _SSTRING_H +#ifndef UCX_STRING_H +#define UCX_STRING_H #include "ucx.h" +#include "allocator.h" #include -/* use macros for literals only */ -#define S(s) { (char*)s, sizeof(s)-1 } -#define ST(s) sstrn((char*)s, sizeof(s)-1) +/** Shortcut for a sstr_t struct literal. */ +#define ST(s) { (char*)s, sizeof(s)-1 } +/** Shortcut for the conversion of a C string to a sstr_t. */ +#define S(s) sstrn((char*)s, sizeof(s)-1) #ifdef __cplusplus extern "C" { #endif -typedef struct sstring { +/** + * The UCX string structure. + */ +typedef struct { + /** A reference to the string (not necessarily NULL + * -terminated) */ char *ptr; + /** The length of the string */ size_t length; } sstr_t; -/* - * creates a new sstr_t from a null terminated string +/** + * Creates a new sstr_t based on a C string. + * + * The length is implicitly inferred by using a call to strlen(). * - * s null terminated string + * Note: the sstr_t will hold a reference to the C string. If you + * do want a copy, use sstrdup() on the return value of this function. + * + * @param cstring the C string to wrap + * @return a new sstr_t containing the C string + * + * @see sstrn() */ -sstr_t sstr(char *s); +sstr_t sstr(char *cstring); -/* - * creates a new sstr_t from a string and length +/** + * Creates a new sstr_t of the specified length based on a C string. * - * s string - * n length of string + * Note: the sstr_t will hold a reference to the C string. If you + * do want a copy, use sstrdup() on the return value of this function. + * + * @param cstring the C string to wrap + * @param length the length of the string + * @return a new sstr_t containing the C string + * + * @see sstr() + * @see S() */ -sstr_t sstrn(char *s, size_t n); +sstr_t sstrn(char *cstring, size_t length); -/* - * gets the length of n sstr_t strings +/** + * Returns the cumulated length of all specified strings. + * + * At least one string must be specified. + * + * Attention: if the count argument does not match the count of the + * specified strings, the behavior is undefined. * - * n number of strings - * s string - * ... strings + * @param count the total number of specified strings (so at least 1) + * @param string the first string + * @param ... all other strings + * @return the cumulated length of all strings */ -size_t sstrnlen(size_t n, sstr_t s, ...); +size_t sstrnlen(size_t count, sstr_t string, ...); + + +/** + * Concatenates strings. + * + * At least one string must be specified and there must be enough memory + * available referenced by the destination sstr_t.ptr for this function to + * successfully concatenate all specified strings. + * + * The sstr_t.length of the destination string specifies the capacity and + * should match the total memory available referenced by the destination + * sstr_t.ptr. This function never copies data beyond the capacity and + * does not modify any of the source strings. + * + * Attention: + * + * + * @param dest new sstr_t with capacity information and allocated memory + * @param count the total number of strings to concatenate + * @param src the first string + * @param ... all other strings + * @return the argument for dest is returned + */ +sstr_t sstrncat(sstr_t dest, size_t count, sstr_t src, ...); -/* - * concatenates n strings - * - * n number of strings - * s new string with enough memory allocated - * ... strings +/** + * Returns a substring starting at the specified location. + * + * Attention: the new string references the same memory area as the + * input string and will NOT be NULL-terminated. + * Use sstrdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @return a substring of string starting at start + * + * @see sstrsubsl() + * @see sstrchr() */ -sstr_t sstrncat(size_t n, sstr_t s, sstr_t c1, ...); +sstr_t sstrsubs(sstr_t string, size_t start); - -/* - * +/** + * Returns a substring with a maximum length starting at the specified location. + * + * Attention: the new string references the same memory area as the + * input string and will NOT be NULL-terminated. + * Use sstrdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @param length the maximum length of the substring + * @return a substring of string starting at start + * with a maximum length of length + * + * @see sstrsubs() + * @see sstrchr() */ -sstr_t sstrsubs(sstr_t s, size_t start); +sstr_t sstrsubsl(sstr_t string, size_t start, size_t length); -/* - * +/** + * Returns a substring starting at the location of the first occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the least location of chr + * + * @see sstrsubs() */ -sstr_t sstrsubsl(sstr_t s, size_t start, size_t length); +sstr_t sstrchr(sstr_t string, int chr); -/* - * splits s into n parts +/** + * Splits a string into parts by using a delimiter string. + * + * This function will return NULL, if one of the following happens: + * + * + * The integer referenced by count is used as input and determines + * the maximum size of the resulting list, i.e. the maximum count of splits to + * perform + 1. + * + * The integer referenced by count is also used as output and is + * set to + * + * + * If the string starts with the delimiter, the first item of the resulting + * list will be an empty string. + * + * If the string ends with the delimiter and the maximum list size is not + * exceeded, the last list item will be an empty string. + * + * Attention: All list items AND all sstr_t.ptr of the list + * items must be manually passed to free(). Use sstrsplit_a() with + * an allocator to managed memory, to avoid this. * - * s the string to split - * d the delimiter string - * n the maximum size of the resulting list - * a size of 0 indicates an unbounded list size - * the actual size of the list will be stored here - * - * Hint: use this value to avoid dynamic reallocation of the result list - * - * Returns a list of the split strings - * NOTE: this list needs to be freed manually after usage - * - * Returns NULL on error + * @param string the string to split + * @param delim the delimiter string + * @param count IN: the maximum size of the resulting list (0 for an + * unbounded list), OUT: the actual size of the list + * @return a list of the split strings as sstr_t array or + * NULL on error + * + * @see sstrsplit_a() */ -sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n); +sstr_t* sstrsplit(sstr_t string, sstr_t delim, size_t *count); +/** + * Performing sstrsplit() using an UcxAllocator. + * + * Read the description of sstrsplit() for details. + * + * The memory for the sstr_t.ptr pointers of the list items and the memory for + * the sstr_t array itself are allocated by using the UcxAllocator.malloc() + * function. + * + * Note: the allocator is not used for memory that is freed within the + * same call of this function (locally scoped variables). + * + * @param allocator the UcxAllocator used for allocating memory + * @param string the string to split + * @param delim the delimiter string + * @param count IN: the maximum size of the resulting list (0 for an + * unbounded list), OUT: the actual size of the list + * @return a list of the split strings as sstr_t array or + * NULL on error + * + * @see sstrsplit() + */ +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim, + size_t *count); + +/** + * Compares two UCX strings with standard memcmp(). + * + * At first it compares the sstr_t.length attribute of the two strings. The + * memcmp() function is called, if and only if the lengths match. + * + * @param s1 the first string + * @param s2 the second string + * @return -1, if the length of s1 is less than the length of s2 or 1, if the + * length of s1 is greater than the length of s2 or the result of + * memcmp() otherwise (i.e. 0 if the strings match) + */ int sstrcmp(sstr_t s1, sstr_t s2); -sstr_t sstrdup(sstr_t s); +/** + * Creates a duplicate of the specified string. + * + * The new sstr_t will contain a copy allocated by standard + * malloc(). So developers MUST pass the sstr_t.ptr to + * free(). + * + * The sstr_t.ptr of the return value will always be NULL- + * terminated. + * + * @param string the string to duplicate + * @return a duplicate of the string + * @see sstrdup_a() + */ +sstr_t sstrdup(sstr_t string); + +/** + * Creates a duplicate of the specified string using an UcxAllocator. + * + * The new sstr_t will contain a copy allocated by the allocators + * ucx_allocator_malloc function. So it is implementation depended, whether the + * returned sstr_t.ptr pointer must be passed to the allocators + * ucx_allocator_free function manually. + * + * The sstr_t.ptr of the return value will always be NULL- + * terminated. + * + * @param allocator a valid instance of an UcxAllocator + * @param string the string to duplicate + * @return a duplicate of the string + * @see sstrdup() + */ +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string); + +/** + * Omits leading and trailing spaces. + * + * This function returns a new sstr_t containing a trimmed version of the + * specified string. + * + * Note: the new sstr_t references the same memory, thus you + * MUST NOT pass the sstr_t.ptr of the return value to + * free(). It is also highly recommended to avoid assignments like + * mystr = sstrtrim(mystr); as you lose the reference to the + * source string. Assignments of this type are only permitted, if the + * sstr_t.ptr of the source string does not need to be freed or if another + * reference to the source string exists. + * + * @param string the string that shall be trimmed + * @return a new sstr_t containing the trimmed string + */ +sstr_t sstrtrim(sstr_t string); #ifdef __cplusplus } #endif -#endif /* _SSTRING_H */ +#endif /* UCX_STRING_H */