ucx/string.h

changeset 0
1f419bd32da1
child 124
80609f9675f1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ucx/string.h	Sat Dec 07 12:14:59 2013 +0100
@@ -0,0 +1,392 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2013 Olaf Wintermann. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Bounded string implementation.
+ * 
+ * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
+ * The main difference to C strings is, that <code>sstr_t</code> does <b>not
+ * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
+ * within the structure.
+ * 
+ * When using <code>sstr_t</code>, developers must be full aware of what type
+ * of string (<code>NULL</code>-terminated) or not) they are using, when 
+ * accessing the <code>char* ptr</code> directly.
+ * 
+ * The UCX string module provides some common string functions, known from
+ * standard libc, working with <code>sstr_t</code>.
+ * 
+ * @file   string.h
+ * @author Mike Becker
+ * @author Olaf Wintermann
+ */
+
+#ifndef UCX_STRING_H
+#define	UCX_STRING_H
+
+#include "ucx.h"
+#include "allocator.h"
+#include <stddef.h>
+
+/** Shortcut for a <code>sstr_t struct</code> literal. */
+#define ST(s) { (char*)s, sizeof(s)-1 }
+
+/** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
+#define S(s) sstrn((char*)s, sizeof(s)-1)
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/**
+ * The UCX string structure.
+ */
+typedef struct {
+   /** A reference to the string (<b>not necessarily  <code>NULL</code>
+    * -terminated</b>) */
+    char   *ptr;
+    /** The length of the string */
+    size_t length;
+} sstr_t;
+
+/**
+ * Creates a new sstr_t based on a C string.
+ * 
+ * The length is implicitly inferred by using a call to <code>strlen()</code>.
+ *
+ * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
+ * do want a copy, use sstrdup() on the return value of this function.
+ * 
+ * @param cstring the C string to wrap
+ * @return a new sstr_t containing the C string
+ * 
+ * @see sstrn()
+ */
+sstr_t sstr(char *cstring);
+
+/**
+ * Creates a new sstr_t of the specified length based on a C string.
+ *
+ * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
+ * do want a copy, use sstrdup() on the return value of this function.
+ * 
+ * @param cstring  the C string to wrap
+ * @param length   the length of the string
+ * @return a new sstr_t containing the C string
+ * 
+ * @see sstr()
+ * @see S()
+ */
+sstr_t sstrn(char *cstring, size_t length);
+
+
+/**
+ * Returns the cumulated length of all specified strings.
+ *
+ * At least one string must be specified.
+ * 
+ * <b>Attention:</b> if the count argument does not match the count of the
+ * specified strings, the behavior is undefined.
+ *
+ * @param count    the total number of specified strings (so at least 1)
+ * @param string   the first string
+ * @param ...      all other strings
+ * @return the cumulated length of all strings
+ */
+size_t sstrnlen(size_t count, sstr_t string, ...);
+
+
+/**
+ * Concatenates strings.
+ * 
+ * At least one string must be specified and there must be enough memory
+ * available referenced by the destination sstr_t.ptr for this function to
+ * successfully concatenate all specified strings.
+ * 
+ * The sstr_t.length of the destination string specifies the capacity and
+ * should match the total memory available referenced by the destination
+ * sstr_t.ptr. This function <i>never</i> copies data beyond the capacity and
+ * does not modify any of the source strings.
+ * 
+ * <b>Attention:</b>
+ * <ul>
+ *   <li>Any content in the destination string will be overwritten</li>
+ *   <li>The destination sstr_t.ptr is <b>NOT</b>
+ *       <code>NULL</code>-terminated</li>
+ *   <li>The destination sstr_t.length is set to the total length of the
+ *       concatenated strings</li>
+ *   <li><i>Hint:</i> get a <code>NULL</code>-terminated string by performing
+ *       <code>mystring.ptr[mystring.length]='\0'</code> after calling this
+ *       function</li>
+ * </ul>
+ *
+ * @param dest    new sstr_t with capacity information and allocated memory
+ * @param count   the total number of strings to concatenate
+ * @param src     the first string
+ * @param ...     all other strings
+ * @return the argument for <code>dest</code> is returned
+ */
+sstr_t sstrncat(sstr_t dest, size_t count, sstr_t src, ...);
+
+
+/**
+ * Returns a substring starting at the specified location.
+ * 
+ * <b>Attention:</b> the new string references the same memory area as the
+ * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
+ * Use sstrdup() to get a copy.
+ * 
+ * @param string input string
+ * @param start  start location of the substring
+ * @return a substring of <code>string</code> starting at <code>start</code>
+ * 
+ * @see sstrsubsl()
+ * @see sstrchr()
+ */
+sstr_t sstrsubs(sstr_t string, size_t start);
+
+/**
+ * Returns a substring with a maximum length starting at the specified location.
+ * 
+ * <b>Attention:</b> the new string references the same memory area as the
+ * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
+ * Use sstrdup() to get a copy.
+ * 
+ * @param string input string
+ * @param start  start location of the substring
+ * @param length the maximum length of the substring
+ * @return a substring of <code>string</code> starting at <code>start</code>
+ * with a maximum length of <code>length</code>
+ * 
+ * @see sstrsubs()
+ * @see sstrchr()
+ */
+sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
+
+/**
+ * Returns a substring starting at the location of the first occurrence of the
+ * specified character.
+ * 
+ * If the string does not contain the character, an empty string is returned.
+ * 
+ * @param string the string where to locate the character
+ * @param chr    the character to locate
+ * @return       a substring starting at the first location of <code>chr</code>
+ * 
+ * @see sstrsubs()
+ */
+sstr_t sstrchr(sstr_t string, int chr);
+
+/**
+ * Returns a substring starting at the location of the last occurrence of the
+ * specified character.
+ * 
+ * If the string does not contain the character, an empty string is returned.
+ * 
+ * @param string the string where to locate the character
+ * @param chr    the character to locate
+ * @return       a substring starting at the last location of <code>chr</code>
+ * 
+ * @see sstrsubs()
+ */
+sstr_t sstrrchr(sstr_t string, int chr);
+
+/**
+ * Splits a string into parts by using a delimiter string.
+ * 
+ * This function will return <code>NULL</code>, if one of the following happens:
+ * <ul>
+ *   <li>the string length is zero</li>
+ *   <li>the delimeter length is zero</li>
+ *   <li>the string equals the delimeter</li>
+ *   <li>memory allocation fails</li>
+ * </ul>
+ * 
+ * The integer referenced by <code>count</code> is used as input and determines
+ * the maximum size of the resulting list, i.e. the maximum count of splits to
+ * perform + 1.
+ * 
+ * The integer referenced by <code>count</code> is also used as output and is
+ * set to
+ * <ul>
+ *   <li>-2, on memory allocation errors</li>
+ *   <li>-1, if either the string or the delimiter is an empty string</li>
+ *   <li>0, if the string equals the delimiter</li>
+ *   <li>1, if the string does not contain the delimiter</li>
+ *   <li>the count of list items, otherwise</li>
+ * </ul>
+ * 
+ * If the string starts with the delimiter, the first item of the resulting
+ * list will be an empty string.
+ * 
+ * If the string ends with the delimiter and the maximum list size is not
+ * exceeded, the last list item will be an empty string.
+ * 
+ * <b>Attention:</b> All list items <b>AND</b> all sstr_t.ptr of the list
+ * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
+ * an allocator to managed memory, to avoid this.
+ *
+ * @param string the string to split
+ * @param delim  the delimiter string
+ * @param count  IN: the maximum size of the resulting list (0 for an
+ *               unbounded list), OUT: the actual size of the list
+ * @return a list of the split strings as sstr_t array or
+ *         <code>NULL</code> on error
+ * 
+ * @see sstrsplit_a()
+ */
+sstr_t* sstrsplit(sstr_t string, sstr_t delim, size_t *count);
+
+/**
+ * Performing sstrsplit() using an UcxAllocator.
+ * 
+ * <i>Read the description of sstrsplit() for details.</i>
+ * 
+ * The memory for the sstr_t.ptr pointers of the list items and the memory for
+ * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
+ * function.
+ * 
+ * <b>Note:</b> the allocator is not used for memory that is freed within the
+ * same call of this function (locally scoped variables).
+ * 
+ * @param allocator the UcxAllocator used for allocating memory
+ * @param string the string to split
+ * @param delim  the delimiter string
+ * @param count  IN: the maximum size of the resulting list (0 for an
+ *               unbounded list), OUT: the actual size of the list
+ * @return a list of the split strings as sstr_t array or
+ *         <code>NULL</code> on error
+ * 
+ * @see sstrsplit()
+ */
+sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
+        size_t *count);
+
+/**
+ * Compares two UCX strings with standard <code>memcmp()</code>.
+ * 
+ * At first it compares the sstr_t.length attribute of the two strings. The
+ * <code>memcmp()</code> function is called, if and only if the lengths match.
+ * 
+ * @param s1 the first string
+ * @param s2 the second string
+ * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
+ * length of s1 is greater than the length of s2 or the result of
+ * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
+ */
+int sstrcmp(sstr_t s1, sstr_t s2);
+
+/**
+ * Compares two UCX strings ignoring the case.
+ * 
+ * At first it compares the sstr_t.length attribute of the two strings. If and
+ * only if the lengths match, both strings are compared char by char ignoring
+ * the case.
+ * 
+ * @param s1 the first string
+ * @param s2 the second string
+ * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
+ * length of s1 is greater than the length of s2 or the difference between the
+ * first two differing characters otherwise (i.e. 0 if the strings match and
+ * no characters differ)
+ */
+int sstrcasecmp(sstr_t s1, sstr_t s2);
+
+/**
+ * Creates a duplicate of the specified string.
+ * 
+ * The new sstr_t will contain a copy allocated by standard
+ * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
+ * <code>free()</code>.
+ * 
+ * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
+ * terminated.
+ * 
+ * @param string the string to duplicate
+ * @return a duplicate of the string
+ * @see sstrdup_a()
+ */
+sstr_t sstrdup(sstr_t string);
+
+/**
+ * Creates a duplicate of the specified string using an UcxAllocator.
+ * 
+ * The new sstr_t will contain a copy allocated by the allocators
+ * ucx_allocator_malloc function. So it is implementation depended, whether the
+ * returned sstr_t.ptr pointer must be passed to the allocators
+ * ucx_allocator_free function manually.
+ * 
+ * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
+ * terminated.
+ * 
+ * @param allocator a valid instance of an UcxAllocator
+ * @param string the string to duplicate
+ * @return a duplicate of the string
+ * @see sstrdup()
+ */
+sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
+
+/**
+ * Omits leading and trailing spaces.
+ * 
+ * This function returns a new sstr_t containing a trimmed version of the
+ * specified string.
+ * 
+ * <b>Note:</b> the new sstr_t references the same memory, thus you
+ * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
+ * <code>free()</code>. It is also highly recommended to avoid assignments like
+ * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
+ * source string. Assignments of this type are only permitted, if the
+ * sstr_t.ptr of the source string does not need to be freed or if another
+ * reference to the source string exists.
+ * 
+ * @param string the string that shall be trimmed
+ * @return a new sstr_t containing the trimmed string
+ */
+sstr_t sstrtrim(sstr_t string);
+
+/**
+ * Checks, if a string has a specific prefix.
+ * @param string the string to check
+ * @param prefix the prefix the string should have
+ * @return 1, if and only if the string has the specified prefix, 0 otherwise
+ */
+int sstrprefix(sstr_t string, sstr_t prefix);
+
+/**
+ * Checks, if a string has a specific suffix.
+ * @param string the string to check
+ * @param suffix the suffix the string should have
+ * @return 1, if and only if the string has the specified suffix, 0 otherwise
+ */
+int sstrsuffix(sstr_t string, sstr_t suffix);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* UCX_STRING_H */

mercurial