src/ucx/cx/string.h

Sat, 12 Nov 2022 11:52:47 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Sat, 12 Nov 2022 11:52:47 +0100
changeset 424
3df9258cd3cc
parent 415
d938228c382e
child 490
d218607f5a7e
permissions
-rw-r--r--

allow '-' in tokens, add support for negative integers

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**
 * \file string.h
 * \brief Strings that know their length.
 * \author Mike Becker
 * \author Olaf Wintermann
 * \version 3.0
 * \copyright 2-Clause BSD License
 */

#ifndef UCX_STRING_H
#define UCX_STRING_H

#include "common.h"
#include "allocator.h"

/**
 * The UCX string structure.
 */
struct cx_mutstr_s {
    /**
     * A pointer to the string.
     * \note The string is not necessarily \c NULL terminated.
     * Always use the length.
     */
    char *ptr;
    /** The length of the string */
    size_t length;
};

/**
 * A mutable string.
 */
typedef struct cx_mutstr_s cxmutstr;

/**
 * The UCX string structure for immutable (constant) strings.
 */
struct cx_string_s {
    /**
     * A pointer to the immutable string.
     * \note The string is not necessarily \c NULL terminated.
     * Always use the length.
     */
    char const *ptr;
    /** The length of the string */
    size_t length;
};

/**
 * An immutable string.
 */
typedef struct cx_string_s cxstring;

/**
 * A literal initializer for an UCX string structure.
 *
 * The argument MUST be a string (const char*) \em literal.
 *
 * @param literal the string literal
 */
#define CX_STR(literal) {literal, sizeof(literal) - 1}

#ifdef __cplusplus
extern "C" {
#endif


/**
 * Wraps a mutable string that must be zero-terminated.
 *
 * The length is implicitly inferred by using a call to \c strlen().
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a constant string, use cx_str().
 *
 * @param cstring the string to wrap, must be zero-terminated
 * @return the wrapped string
 *
 * @see cx_mutstrn()
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_mutstr(char *cstring);

/**
 * Wraps a string that does not need to be zero-terminated.
 *
 * The argument may be \c NULL if the length is zero.
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a constant string, use cx_strn().
 *
 * @param cstring  the string to wrap (or \c NULL, only if the length is zero)
 * @param length   the length of the string
 * @return the wrapped string
 *
 * @see cx_mutstr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
);

/**
 * Wraps a string that must be zero-terminated.
 *
 * The length is implicitly inferred by using a call to \c strlen().
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a non-constant string, use cx_mutstr().
 *
 * @param cstring the string to wrap, must be zero-terminated
 * @return the wrapped string
 *
 * @see cx_strn()
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxstring cx_str(char const *cstring);


/**
 * Wraps a string that does not need to be zero-terminated.
 *
 * The argument may be \c NULL if the length is zero.
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a non-constant string, use cx_mutstrn().
 *
 * @param cstring  the string to wrap (or \c NULL, only if the length is zero)
 * @param length   the length of the string
 * @return the wrapped string
 *
 * @see cx_str()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strn(
        char const *cstring,
        size_t length
);

/**
* Casts a mutable string to an immutable string.
*
* \note This is not seriously a cast. Instead you get a copy
* of the struct with the desired pointer type. Both structs still
* point to the same location, though!
*
* @param str the mutable string to cast
* @return an immutable copy of the string pointer
*/
__attribute__((__warn_unused_result__))
cxstring cx_strcast(cxmutstr str);

/**
 * Passes the pointer in this string to \c free().
 *
 * The pointer in the struct is set to \c NULL and the length is set to zero.
 *
 * \note There is no implementation for cxstring, because it is unlikely that
 * you ever have a \c char \c const* you are really supposed to free. If you
 * encounter such situation, you should double-check your code.
 *
 * @param str the string to free
 */
__attribute__((__nonnull__))
void cx_strfree(cxmutstr *str);

/**
 * Passes the pointer in this string to the allocators free function.
 *
 * The pointer in the struct is set to \c NULL and the length is set to zero.
 *
 * \note There is no implementation for cxstring, because it is unlikely that
 * you ever have a \c char \c const* you are really supposed to free. If you
 * encounter such situation, you should double-check your code.
 *
 * @param alloc the allocator
 * @param str the string to free
 */
__attribute__((__nonnull__))
void cx_strfree_a(
        CxAllocator *alloc,
        cxmutstr *str
);

/**
 * Returns the accumulated length of all specified strings.
 *
 * \attention if the count argument is larger than the number of the
 * specified strings, the behavior is undefined.
 *
 * @param count    the total number of specified strings
 * @param ...      all strings
 * @return the accumulated length of all strings
 */
__attribute__((__warn_unused_result__))
size_t cx_strlen(
        size_t count,
        ...
);

/**
 * Concatenates two or more strings.
 *
 * The resulting string will be allocated by the specified allocator.
  * So developers \em must pass the return value to cx_strfree() eventually.
  *
  * \note It is guaranteed that there is only one allocation.
  * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param alloc the allocator to use
 * @param count   the total number of strings to concatenate
 * @param ...     all strings
 * @return the concatenated string
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_strcat_a(
        CxAllocator *alloc,
        size_t count,
        ...
);

/**
 * Concatenates two or more strings.
 *
 * The resulting string will be allocated by standard \c malloc().
 * So developers \em must pass the return value to cx_strfree() eventually.
 *
 * \note It is guaranteed that there is only one allocation.
 * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param count   the total number of strings to concatenate
 * @param ...     all strings
 * @return the concatenated string
 */
#define cx_strcat(count, ...) \
cx_strcat_a(cxDefaultAllocator, count, __VA_ARGS__)

/**
 * Returns a substring starting at the specified location.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubsl()
 * @see cx_strsubs_m()
 * @see cx_strsubsl_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strsubs(
        cxstring string,
        size_t start
);

/**
 * Returns a substring starting at the specified location.
 *
 * The returned string will be limited to \p length bytes or the number
 * of bytes available in \p string, whichever is smaller.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @param length the maximum length of the returned string
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubs()
 * @see cx_strsubs_m()
 * @see cx_strsubsl_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
);

/**
 * Returns a substring starting at the specified location.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubsl_m()
 * @see cx_strsubs()
 * @see cx_strsubsl()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
);

/**
 * Returns a substring starting at the specified location.
 *
 * The returned string will be limited to \p length bytes or the number
 * of bytes available in \p string, whichever is smaller.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @param length the maximum length of the returned string
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubs_m()
 * @see cx_strsubs()
 * @see cx_strsubsl()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the first location of \p chr
 *
 * @see cx_strchr_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strchr(
        cxstring string,
        int chr
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the first location of \p chr
 *
 * @see cx_strchr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
);

/**
 * Returns a substring starting at the location of the last occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the last location of \p chr
 *
 * @see cx_strrchr_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strrchr(
        cxstring string,
        int chr
);

/**
 * Returns a substring starting at the location of the last occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the last location of \p chr
 *
 * @see cx_strrchr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified string.
 *
 * If \p haystack does not contain \p needle, an empty string is returned.
 *
 * If \p needle is an empty string, the complete \p haystack is
 * returned.
 *
 * @param haystack the string to be scanned
 * @param needle  string containing the sequence of characters to match
 * @return       a substring starting at the first occurrence of
 *               \p needle, or an empty string, if the sequence is not
 *               contained
 * @see cx_strstr_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified string.
 *
 * If \p haystack does not contain \p needle, an empty string is returned.
 *
 * If \p needle is an empty string, the complete \p haystack is
 * returned.
 *
 * @param haystack the string to be scanned
 * @param needle  string containing the sequence of characters to match
 * @return       a substring starting at the first occurrence of
 *               \p needle, or an empty string, if the sequence is not
 *               contained
 * @see cx_strstr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
);

/**
 * Splits a given string using a delimiter string.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pre-allocated array of at least \p limit length
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
);

/**
 * Splits a given string using a delimiter string.
 *
 * The array pointed to by \p output will be allocated by \p allocator.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * \attention If allocation fails, the \c NULL pointer will be written to
 * \p output and the number returned will be zero.
 *
 * @param allocator the allocator to use for allocating the resulting array
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pointer where the address of the allocated array shall be
 * written to
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit_a(
        CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
);


/**
 * Splits a given string using a delimiter string.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pre-allocated array of at least \p limit length
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
);

/**
 * Splits a given string using a delimiter string.
 *
 * The array pointed to by \p output will be allocated by \p allocator.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * \attention If allocation fails, the \c NULL pointer will be written to
 * \p output and the number returned will be zero.
 *
 * @param allocator the allocator to use for allocating the resulting array
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pointer where the address of the allocated array shall be
 * written to
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit_ma(
        CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
);

/**
 * Compares two strings.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger
 * than \p s2, zero if both strings equal
 */
__attribute__((__warn_unused_result__))
int cx_strcmp(
        cxstring s1,
        cxstring s2
);

/**
 * Compares two strings ignoring case.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger
 * than \p s2, zero if both strings equal ignoring case
 */
__attribute__((__warn_unused_result__))
int cx_strcasecmp(
        cxstring s1,
        cxstring s2
);


/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by \p allocator.
 *
 * \note The returned string is guaranteed to be zero-terminated.
 *
 * @param allocator the allocator to use
 * @param string the string to duplicate
 * @return a duplicate of the string
 * @see cx_strdup()
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_strdup_a(
        CxAllocator *allocator,
        cxstring string
);

/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by standard
 * \c malloc(). So developers \em must pass the return value to cx_strfree().
 *
 * \note The returned string is guaranteed to be zero-terminated.
 *
 * @param string the string to duplicate
 * @return a duplicate of the string
 * @see cx_strdup_a()
 */
#define cx_strdup(string) cx_strdup_a(cxDefaultAllocator, string)

/**
 * Omits leading and trailing spaces.
 *
 * \note the returned string references the same memory, thus you
 * must \em not free the returned memory.
 *
 * @param string the string that shall be trimmed
 * @return the trimmed string
 */
__attribute__((__warn_unused_result__))
cxstring cx_strtrim(cxstring string);

/**
 * Omits leading and trailing spaces.
 *
 * \note the returned string references the same memory, thus you
 * must \em not free the returned memory.
 *
 * @param string the string that shall be trimmed
 * @return the trimmed string
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strtrim_m(cxmutstr string);

/**
 * Checks, if a string has a specific prefix.
 *
 * @param string the string to check
 * @param prefix the prefix the string should have
 * @return \c true, if and only if the string has the specified prefix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strprefix(
        cxstring string,
        cxstring prefix
);

/**
 * Checks, if a string has a specific suffix.
 *
 * @param string the string to check
 * @param suffix the suffix the string should have
 * @return \c true, if and only if the string has the specified suffix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strsuffix(
        cxstring string,
        cxstring suffix
);

/**
 * Checks, if a string has a specific prefix, ignoring the case.
 *
 * @param string the string to check
 * @param prefix the prefix the string should have
 * @return \c true, if and only if the string has the specified prefix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strcaseprefix(
        cxstring string,
        cxstring prefix
);

/**
 * Checks, if a string has a specific suffix, ignoring the case.
 *
 * @param string the string to check
 * @param suffix the suffix the string should have
 * @return \c true, if and only if the string has the specified suffix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strcasesuffix(
        cxstring string,
        cxstring suffix
);

/**
 * Converts the string to lower case.
 *
 * The change is made in-place. If you want a copy, use cx_strdup(), first.
 *
 * @param string the string to modify
 * @see cx_strdup()
 */
void cx_strlower(cxmutstr string);

/**
 * Converts the string to upper case.
 *
 * The change is made in-place. If you want a copy, use cx_strdup(), first.
 *
 * @param string the string to modify
 * @see cx_strdup()
 */
void cx_strupper(cxmutstr string);

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most \p replmax occurrences.
 *
 * The returned string will be allocated by \p allocator and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param allocator the allocator to use
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @param replmax maximum number of replacements
 * @return the resulting string after applying the replacements
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_strreplacen_a(
        CxAllocator *allocator,
        cxstring str,
        cxstring pattern,
        cxstring replacement,
        size_t replmax
);

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most \p replmax occurrences.
 *
 * The returned string will be allocated by \c malloc() and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @param replmax maximum number of replacements
 * @return the resulting string after applying the replacements
 */
#define cx_strreplacen(str, pattern, replacement, replmax) \
cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, replmax)

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 *
 * The returned string will be allocated by \p allocator and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param allocator the allocator to use
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @return the resulting string after applying the replacements
 */
#define cx_strreplace_a(allocator, str, pattern, replacement) \
cx_strreplacen_a(allocator, str, pattern, replacement, SIZE_MAX)

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most \p replmax occurrences.
 *
 * The returned string will be allocated by \c malloc() and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @return the resulting string after applying the replacements
 */
#define cx_strreplace(str, pattern, replacement) \
cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, SIZE_MAX)

#ifdef __cplusplus
} // extern "C"
#endif

#endif //UCX_STRING_H

mercurial