ucx/string.h

changeset 5
88625853ae74
parent 1
1bcaac272cdf
child 17
11dffb40cd91
equal deleted inserted replaced
4:ae5a98f0545c 5:88625853ae74
1 /* 1 /*
2 * File: sstring.h 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * Author: olaf 3 *
4 * 4 * Copyright 2013 Olaf Wintermann. All rights reserved.
5 * Created on 17. Juni 2010, 13:26 5 *
6 */ 6 * Redistribution and use in source and binary forms, with or without
7 7 * modification, are permitted provided that the following conditions are met:
8 #ifndef _SSTRING_H 8 *
9 #define _SSTRING_H 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28 /**
29 * Bounded string implementation.
30 *
31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
34 * within the structure.
35 *
36 * When using <code>sstr_t</code>, developers must be full aware of what type
37 * of string (<code>NULL</code>-terminated) or not) they are using, when
38 * accessing the <code>char* ptr</code> directly.
39 *
40 * The UCX string module provides some common string functions, known from
41 * standard libc, working with <code>sstr_t</code>.
42 *
43 * @file string.h
44 * @author Mike Becker
45 * @author Olaf Wintermann
46 */
47
48 #ifndef UCX_STRING_H
49 #define UCX_STRING_H
10 50
11 #include "ucx.h" 51 #include "ucx.h"
52 #include "allocator.h"
12 #include <stddef.h> 53 #include <stddef.h>
13 54
14 /* use macros for literals only */ 55 /** Shortcut for a <code>sstr_t struct</code> literal. */
15 #define S(s) { (char*)s, sizeof(s)-1 } 56 #define ST(s) { (char*)s, sizeof(s)-1 }
16 #define ST(s) sstrn((char*)s, sizeof(s)-1) 57 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
58 #define S(s) sstrn((char*)s, sizeof(s)-1)
17 59
18 #ifdef __cplusplus 60 #ifdef __cplusplus
19 extern "C" { 61 extern "C" {
20 #endif 62 #endif
21 63
22 typedef struct sstring { 64 /**
65 * The UCX string structure.
66 */
67 typedef struct {
68 /** A reference to the string (<b>not necessarily <code>NULL</code>
69 * -terminated</b>) */
23 char *ptr; 70 char *ptr;
71 /** The length of the string */
24 size_t length; 72 size_t length;
25 } sstr_t; 73 } sstr_t;
26 74
27 /* 75 /**
28 * creates a new sstr_t from a null terminated string 76 * Creates a new sstr_t based on a C string.
29 * 77 *
30 * s null terminated string 78 * The length is implicitly inferred by using a call to <code>strlen()</code>.
31 */ 79 *
32 sstr_t sstr(char *s); 80 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
33 81 * do want a copy, use sstrdup() on the return value of this function.
34 /* 82 *
35 * creates a new sstr_t from a string and length 83 * @param cstring the C string to wrap
36 * 84 * @return a new sstr_t containing the C string
37 * s string 85 *
38 * n length of string 86 * @see sstrn()
39 */ 87 */
40 sstr_t sstrn(char *s, size_t n); 88 sstr_t sstr(char *cstring);
41 89
42 90 /**
43 /* 91 * Creates a new sstr_t of the specified length based on a C string.
44 * gets the length of n sstr_t strings 92 *
45 * 93 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
46 * n number of strings 94 * do want a copy, use sstrdup() on the return value of this function.
47 * s string 95 *
48 * ... strings 96 * @param cstring the C string to wrap
49 */ 97 * @param length the length of the string
50 size_t sstrnlen(size_t n, sstr_t s, ...); 98 * @return a new sstr_t containing the C string
51 99 *
52 100 * @see sstr()
53 /* 101 * @see S()
54 * concatenates n strings 102 */
55 * 103 sstr_t sstrn(char *cstring, size_t length);
56 * n number of strings 104
57 * s new string with enough memory allocated 105
58 * ... strings 106 /**
59 */ 107 * Returns the cumulated length of all specified strings.
60 sstr_t sstrncat(size_t n, sstr_t s, sstr_t c1, ...); 108 *
61 109 * At least one string must be specified.
62 110 *
63 /* 111 * <b>Attention:</b> if the count argument does not match the count of the
64 * 112 * specified strings, the behavior is undefined.
65 */ 113 *
66 sstr_t sstrsubs(sstr_t s, size_t start); 114 * @param count the total number of specified strings (so at least 1)
67 115 * @param string the first string
68 /* 116 * @param ... all other strings
69 * 117 * @return the cumulated length of all strings
70 */ 118 */
71 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length); 119 size_t sstrnlen(size_t count, sstr_t string, ...);
72 120
73 /* 121
74 * splits s into n parts 122 /**
75 * 123 * Concatenates strings.
76 * s the string to split 124 *
77 * d the delimiter string 125 * At least one string must be specified and there must be enough memory
78 * n the maximum size of the resulting list 126 * available referenced by the destination sstr_t.ptr for this function to
79 * a size of 0 indicates an unbounded list size 127 * successfully concatenate all specified strings.
80 * the actual size of the list will be stored here 128 *
81 * 129 * The sstr_t.length of the destination string specifies the capacity and
82 * Hint: use this value to avoid dynamic reallocation of the result list 130 * should match the total memory available referenced by the destination
83 * 131 * sstr_t.ptr. This function <i>never</i> copies data beyond the capacity and
84 * Returns a list of the split strings 132 * does not modify any of the source strings.
85 * NOTE: this list needs to be freed manually after usage 133 *
86 * 134 * <b>Attention:</b>
87 * Returns NULL on error 135 * <ul>
88 */ 136 * <li>Any content in the destination string will be overwritten</li>
89 sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n); 137 * <li>The destination sstr_t.ptr is <b>NOT</b>
90 138 * <code>NULL</code>-terminated</li>
139 * <li>The destination sstr_t.length is set to the total length of the
140 * concatenated strings</li>
141 * <li><i>Hint:</i> get a <code>NULL</code>-terminated string by performing
142 * <code>mystring.ptr[mystring.length]='\0'</code> after calling this
143 * function</li>
144 * </ul>
145 *
146 * @param dest new sstr_t with capacity information and allocated memory
147 * @param count the total number of strings to concatenate
148 * @param src the first string
149 * @param ... all other strings
150 * @return the argument for <code>dest</code> is returned
151 */
152 sstr_t sstrncat(sstr_t dest, size_t count, sstr_t src, ...);
153
154
155 /**
156 * Returns a substring starting at the specified location.
157 *
158 * <b>Attention:</b> the new string references the same memory area as the
159 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
160 * Use sstrdup() to get a copy.
161 *
162 * @param string input string
163 * @param start start location of the substring
164 * @return a substring of <code>string</code> starting at <code>start</code>
165 *
166 * @see sstrsubsl()
167 * @see sstrchr()
168 */
169 sstr_t sstrsubs(sstr_t string, size_t start);
170
171 /**
172 * Returns a substring with a maximum length starting at the specified location.
173 *
174 * <b>Attention:</b> the new string references the same memory area as the
175 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
176 * Use sstrdup() to get a copy.
177 *
178 * @param string input string
179 * @param start start location of the substring
180 * @param length the maximum length of the substring
181 * @return a substring of <code>string</code> starting at <code>start</code>
182 * with a maximum length of <code>length</code>
183 *
184 * @see sstrsubs()
185 * @see sstrchr()
186 */
187 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
188
189 /**
190 * Returns a substring starting at the location of the first occurrence of the
191 * specified character.
192 *
193 * If the string does not contain the character, an empty string is returned.
194 *
195 * @param string the string where to locate the character
196 * @param chr the character to locate
197 * @return a substring starting at the least location of <code>chr</code>
198 *
199 * @see sstrsubs()
200 */
201 sstr_t sstrchr(sstr_t string, int chr);
202
203 /**
204 * Splits a string into parts by using a delimiter string.
205 *
206 * This function will return <code>NULL</code>, if one of the following happens:
207 * <ul>
208 * <li>the string length is zero</li>
209 * <li>the delimeter length is zero</li>
210 * <li>the string equals the delimeter</li>
211 * <li>memory allocation fails</li>
212 * </ul>
213 *
214 * The integer referenced by <code>count</code> is used as input and determines
215 * the maximum size of the resulting list, i.e. the maximum count of splits to
216 * perform + 1.
217 *
218 * The integer referenced by <code>count</code> is also used as output and is
219 * set to
220 * <ul>
221 * <li>-2, on memory allocation errors</li>
222 * <li>-1, if either the string or the delimiter is an empty string</li>
223 * <li>0, if the string equals the delimiter</li>
224 * <li>1, if the string does not contain the delimiter</li>
225 * <li>the count of list items, otherwise</li>
226 * </ul>
227 *
228 * If the string starts with the delimiter, the first item of the resulting
229 * list will be an empty string.
230 *
231 * If the string ends with the delimiter and the maximum list size is not
232 * exceeded, the last list item will be an empty string.
233 *
234 * <b>Attention:</b> All list items <b>AND</b> all sstr_t.ptr of the list
235 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
236 * an allocator to managed memory, to avoid this.
237 *
238 * @param string the string to split
239 * @param delim the delimiter string
240 * @param count IN: the maximum size of the resulting list (0 for an
241 * unbounded list), OUT: the actual size of the list
242 * @return a list of the split strings as sstr_t array or
243 * <code>NULL</code> on error
244 *
245 * @see sstrsplit_a()
246 */
247 sstr_t* sstrsplit(sstr_t string, sstr_t delim, size_t *count);
248
249 /**
250 * Performing sstrsplit() using an UcxAllocator.
251 *
252 * <i>Read the description of sstrsplit() for details.</i>
253 *
254 * The memory for the sstr_t.ptr pointers of the list items and the memory for
255 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
256 * function.
257 *
258 * <b>Note:</b> the allocator is not used for memory that is freed within the
259 * same call of this function (locally scoped variables).
260 *
261 * @param allocator the UcxAllocator used for allocating memory
262 * @param string the string to split
263 * @param delim the delimiter string
264 * @param count IN: the maximum size of the resulting list (0 for an
265 * unbounded list), OUT: the actual size of the list
266 * @return a list of the split strings as sstr_t array or
267 * <code>NULL</code> on error
268 *
269 * @see sstrsplit()
270 */
271 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
272 size_t *count);
273
274 /**
275 * Compares two UCX strings with standard <code>memcmp()</code>.
276 *
277 * At first it compares the sstr_t.length attribute of the two strings. The
278 * <code>memcmp()</code> function is called, if and only if the lengths match.
279 *
280 * @param s1 the first string
281 * @param s2 the second string
282 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
283 * length of s1 is greater than the length of s2 or the result of
284 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
285 */
91 int sstrcmp(sstr_t s1, sstr_t s2); 286 int sstrcmp(sstr_t s1, sstr_t s2);
92 287
93 sstr_t sstrdup(sstr_t s); 288 /**
289 * Creates a duplicate of the specified string.
290 *
291 * The new sstr_t will contain a copy allocated by standard
292 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
293 * <code>free()</code>.
294 *
295 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
296 * terminated.
297 *
298 * @param string the string to duplicate
299 * @return a duplicate of the string
300 * @see sstrdup_a()
301 */
302 sstr_t sstrdup(sstr_t string);
303
304 /**
305 * Creates a duplicate of the specified string using an UcxAllocator.
306 *
307 * The new sstr_t will contain a copy allocated by the allocators
308 * ucx_allocator_malloc function. So it is implementation depended, whether the
309 * returned sstr_t.ptr pointer must be passed to the allocators
310 * ucx_allocator_free function manually.
311 *
312 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
313 * terminated.
314 *
315 * @param allocator a valid instance of an UcxAllocator
316 * @param string the string to duplicate
317 * @return a duplicate of the string
318 * @see sstrdup()
319 */
320 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
321
322 /**
323 * Omits leading and trailing spaces.
324 *
325 * This function returns a new sstr_t containing a trimmed version of the
326 * specified string.
327 *
328 * <b>Note:</b> the new sstr_t references the same memory, thus you
329 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
330 * <code>free()</code>. It is also highly recommended to avoid assignments like
331 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
332 * source string. Assignments of this type are only permitted, if the
333 * sstr_t.ptr of the source string does not need to be freed or if another
334 * reference to the source string exists.
335 *
336 * @param string the string that shall be trimmed
337 * @return a new sstr_t containing the trimmed string
338 */
339 sstr_t sstrtrim(sstr_t string);
94 340
95 #ifdef __cplusplus 341 #ifdef __cplusplus
96 } 342 }
97 #endif 343 #endif
98 344
99 #endif /* _SSTRING_H */ 345 #endif /* UCX_STRING_H */

mercurial