ucx/string.h

changeset 157
0b33b9396851
parent 156
62f1a55535e7
child 158
4bde241c49b1
equal deleted inserted replaced
156:62f1a55535e7 157:0b33b9396851
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2016 Olaf Wintermann. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28 /**
29 * Bounded string implementation.
30 *
31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
34 * within the structure.
35 *
36 * When using <code>sstr_t</code>, developers must be full aware of what type
37 * of string (<code>NULL</code>-terminated) or not) they are using, when
38 * accessing the <code>char* ptr</code> directly.
39 *
40 * The UCX string module provides some common string functions, known from
41 * standard libc, working with <code>sstr_t</code>.
42 *
43 * @file string.h
44 * @author Mike Becker
45 * @author Olaf Wintermann
46 */
47
48 #ifndef UCX_STRING_H
49 #define UCX_STRING_H
50
51 #include "ucx.h"
52 #include "allocator.h"
53 #include <stddef.h>
54
55 /** Shortcut for a <code>sstr_t struct</code> literal. */
56 #define ST(s) { (char*)s, sizeof(s)-1 }
57
58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
59 #define S(s) sstrn((char*)s, sizeof(s)-1)
60
61 #ifdef __cplusplus
62 extern "C" {
63 #endif
64
65 /**
66 * The UCX string structure.
67 */
68 typedef struct {
69 /** A reference to the string (<b>not necessarily <code>NULL</code>
70 * -terminated</b>) */
71 char *ptr;
72 /** The length of the string */
73 size_t length;
74 } sstr_t;
75
76 /**
77 * Creates a new sstr_t based on a C string.
78 *
79 * The length is implicitly inferred by using a call to <code>strlen()</code>.
80 *
81 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
82 * do want a copy, use sstrdup() on the return value of this function.
83 *
84 * @param cstring the C string to wrap
85 * @return a new sstr_t containing the C string
86 *
87 * @see sstrn()
88 */
89 sstr_t sstr(char *cstring);
90
91 /**
92 * Creates a new sstr_t of the specified length based on a C string.
93 *
94 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
95 * do want a copy, use sstrdup() on the return value of this function.
96 *
97 * @param cstring the C string to wrap
98 * @param length the length of the string
99 * @return a new sstr_t containing the C string
100 *
101 * @see sstr()
102 * @see S()
103 */
104 sstr_t sstrn(char *cstring, size_t length);
105
106
107 /**
108 * Returns the cumulated length of all specified strings.
109 *
110 * At least one string must be specified.
111 *
112 * <b>Attention:</b> if the count argument does not match the count of the
113 * specified strings, the behavior is undefined.
114 *
115 * @param count the total number of specified strings (so at least 1)
116 * @param string the first string
117 * @param ... all other strings
118 * @return the cumulated length of all strings
119 */
120 size_t sstrnlen(size_t count, sstr_t string, ...);
121
122 /**
123 * Concatenates two or more strings.
124 *
125 * The resulting string will be allocated by standard <code>malloc()</code>.
126 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
127 *
128 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
129 * terminated.
130 *
131 * @param count the total number of strings to concatenate
132 * @param s1 first string
133 * @param s2 second string
134 * @param ... all remaining strings
135 * @return the concatenated string
136 */
137 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
138
139 /**
140 * Concatenates two or more strings using a UcxAllocator.
141 *
142 * See sstrcat() for details.
143 *
144 * @param a the allocator to use
145 * @param count the total number of strings to concatenate
146 * @param s1 first string
147 * @param s2 second string
148 * @param ... all remaining strings
149 * @return the concatenated string
150 */
151 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
152
153
154 /**
155 * Returns a substring starting at the specified location.
156 *
157 * <b>Attention:</b> the new string references the same memory area as the
158 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
159 * Use sstrdup() to get a copy.
160 *
161 * @param string input string
162 * @param start start location of the substring
163 * @return a substring of <code>string</code> starting at <code>start</code>
164 *
165 * @see sstrsubsl()
166 * @see sstrchr()
167 */
168 sstr_t sstrsubs(sstr_t string, size_t start);
169
170 /**
171 * Returns a substring with a maximum length starting at the specified location.
172 *
173 * <b>Attention:</b> the new string references the same memory area as the
174 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
175 * Use sstrdup() to get a copy.
176 *
177 * @param string input string
178 * @param start start location of the substring
179 * @param length the maximum length of the substring
180 * @return a substring of <code>string</code> starting at <code>start</code>
181 * with a maximum length of <code>length</code>
182 *
183 * @see sstrsubs()
184 * @see sstrchr()
185 */
186 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
187
188 /**
189 * Returns a substring starting at the location of the first occurrence of the
190 * specified character.
191 *
192 * If the string does not contain the character, an empty string is returned.
193 *
194 * @param string the string where to locate the character
195 * @param chr the character to locate
196 * @return a substring starting at the first location of <code>chr</code>
197 *
198 * @see sstrsubs()
199 */
200 sstr_t sstrchr(sstr_t string, int chr);
201
202 /**
203 * Returns a substring starting at the location of the last occurrence of the
204 * specified character.
205 *
206 * If the string does not contain the character, an empty string is returned.
207 *
208 * @param string the string where to locate the character
209 * @param chr the character to locate
210 * @return a substring starting at the last location of <code>chr</code>
211 *
212 * @see sstrsubs()
213 */
214 sstr_t sstrrchr(sstr_t string, int chr);
215
216 /**
217 * Returns a substring starting at the location of the first occurrence of the
218 * specified string.
219 *
220 * If the string does not contain the other string, an empty string is returned.
221 *
222 * If <code>match</code> is an empty string, the complete <code>string</code> is
223 * returned.
224 *
225 * @param string the string to be scanned
226 * @param match string containing the sequence of characters to match
227 * @return a substring starting at the first occurrence of
228 * <code>match</code>, or an empty string, if the sequence is not
229 * present in <code>string</code>
230 */
231 sstr_t sstrstr(sstr_t string, sstr_t match);
232
233 /**
234 * Splits a string into parts by using a delimiter string.
235 *
236 * This function will return <code>NULL</code>, if one of the following happens:
237 * <ul>
238 * <li>the string length is zero</li>
239 * <li>the delimeter length is zero</li>
240 * <li>the string equals the delimeter</li>
241 * <li>memory allocation fails</li>
242 * </ul>
243 *
244 * The integer referenced by <code>count</code> is used as input and determines
245 * the maximum size of the resulting array, i.e. the maximum count of splits to
246 * perform + 1.
247 *
248 * The integer referenced by <code>count</code> is also used as output and is
249 * set to
250 * <ul>
251 * <li>-2, on memory allocation errors</li>
252 * <li>-1, if either the string or the delimiter is an empty string</li>
253 * <li>0, if the string equals the delimiter</li>
254 * <li>1, if the string does not contain the delimiter</li>
255 * <li>the count of array items, otherwise</li>
256 * </ul>
257 *
258 * If the string starts with the delimiter, the first item of the resulting
259 * array will be an empty string.
260 *
261 * If the string ends with the delimiter and the maximum list size is not
262 * exceeded, the last array item will be an empty string.
263 * In case the list size would be exceeded, the last array item will be the
264 * remaining string after the last split, <i>including</i> the terminating
265 * delimiter.
266 *
267 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
268 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
269 * an allocator to managed memory, to avoid this.
270 *
271 * @param string the string to split
272 * @param delim the delimiter string
273 * @param count IN: the maximum size of the resulting array (0 = no limit),
274 * OUT: the actual size of the array
275 * @return a sstr_t array containing the split strings or
276 * <code>NULL</code> on error
277 *
278 * @see sstrsplit_a()
279 */
280 sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
281
282 /**
283 * Performing sstrsplit() using a UcxAllocator.
284 *
285 * <i>Read the description of sstrsplit() for details.</i>
286 *
287 * The memory for the sstr_t.ptr pointers of the array items and the memory for
288 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
289 * function.
290 *
291 * <b>Note:</b> the allocator is not used for memory that is freed within the
292 * same call of this function (locally scoped variables).
293 *
294 * @param allocator the UcxAllocator used for allocating memory
295 * @param string the string to split
296 * @param delim the delimiter string
297 * @param count IN: the maximum size of the resulting array (0 = no limit),
298 * OUT: the actual size of the array
299 * @return a sstr_t array containing the split strings or
300 * <code>NULL</code> on error
301 *
302 * @see sstrsplit()
303 */
304 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
305 ssize_t *count);
306
307 /**
308 * Compares two UCX strings with standard <code>memcmp()</code>.
309 *
310 * At first it compares the sstr_t.length attribute of the two strings. The
311 * <code>memcmp()</code> function is called, if and only if the lengths match.
312 *
313 * @param s1 the first string
314 * @param s2 the second string
315 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
316 * length of s1 is greater than the length of s2 or the result of
317 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
318 */
319 int sstrcmp(sstr_t s1, sstr_t s2);
320
321 /**
322 * Compares two UCX strings ignoring the case.
323 *
324 * At first it compares the sstr_t.length attribute of the two strings. If and
325 * only if the lengths match, both strings are compared char by char ignoring
326 * the case.
327 *
328 * @param s1 the first string
329 * @param s2 the second string
330 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
331 * length of s1 is greater than the length of s2 or the difference between the
332 * first two differing characters otherwise (i.e. 0 if the strings match and
333 * no characters differ)
334 */
335 int sstrcasecmp(sstr_t s1, sstr_t s2);
336
337 /**
338 * Creates a duplicate of the specified string.
339 *
340 * The new sstr_t will contain a copy allocated by standard
341 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
342 * <code>free()</code>.
343 *
344 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
345 * terminated.
346 *
347 * @param string the string to duplicate
348 * @return a duplicate of the string
349 * @see sstrdup_a()
350 */
351 sstr_t sstrdup(sstr_t string);
352
353 /**
354 * Creates a duplicate of the specified string using a UcxAllocator.
355 *
356 * The new sstr_t will contain a copy allocated by the allocators
357 * ucx_allocator_malloc function. So it is implementation depended, whether the
358 * returned sstr_t.ptr pointer must be passed to the allocators
359 * ucx_allocator_free function manually.
360 *
361 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
362 * terminated.
363 *
364 * @param allocator a valid instance of a UcxAllocator
365 * @param string the string to duplicate
366 * @return a duplicate of the string
367 * @see sstrdup()
368 */
369 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
370
371 /**
372 * Omits leading and trailing spaces.
373 *
374 * This function returns a new sstr_t containing a trimmed version of the
375 * specified string.
376 *
377 * <b>Note:</b> the new sstr_t references the same memory, thus you
378 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
379 * <code>free()</code>. It is also highly recommended to avoid assignments like
380 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
381 * source string. Assignments of this type are only permitted, if the
382 * sstr_t.ptr of the source string does not need to be freed or if another
383 * reference to the source string exists.
384 *
385 * @param string the string that shall be trimmed
386 * @return a new sstr_t containing the trimmed string
387 */
388 sstr_t sstrtrim(sstr_t string);
389
390 /**
391 * Checks, if a string has a specific prefix.
392 * @param string the string to check
393 * @param prefix the prefix the string should have
394 * @return 1, if and only if the string has the specified prefix, 0 otherwise
395 */
396 int sstrprefix(sstr_t string, sstr_t prefix);
397
398 /**
399 * Checks, if a string has a specific suffix.
400 * @param string the string to check
401 * @param suffix the suffix the string should have
402 * @return 1, if and only if the string has the specified suffix, 0 otherwise
403 */
404 int sstrsuffix(sstr_t string, sstr_t suffix);
405
406 /**
407 * Returns a lower case version of a string.
408 *
409 * This function creates a duplicate of the input string, first. See the
410 * documentation of sstrdup() for the implications.
411 *
412 * @param string the input string
413 * @return the resulting lower case string
414 * @see sstrdup()
415 */
416 sstr_t sstrlower(sstr_t string);
417
418 /**
419 * Returns a lower case version of a string.
420 *
421 * This function creates a duplicate of the input string, first. See the
422 * documentation of sstrdup_a() for the implications.
423 *
424 * @param allocator the allocator used for duplicating the string
425 * @param string the input string
426 * @return the resulting lower case string
427 * @see sstrdup_a()
428 */
429 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string);
430
431 /**
432 * Returns a upper case version of a string.
433 *
434 * This function creates a duplicate of the input string, first. See the
435 * documentation of sstrdup() for the implications.
436 *
437 * @param string the input string
438 * @return the resulting upper case string
439 * @see sstrdup()
440 */
441 sstr_t sstrupper(sstr_t string);
442
443 /**
444 * Returns a upper case version of a string.
445 *
446 * This function creates a duplicate of the input string, first. See the
447 * documentation of sstrdup_a() for the implications.
448 *
449 * @param allocator the allocator used for duplicating the string
450 * @param string the input string
451 * @return the resulting upper case string
452 * @see sstrdup_a()
453 */
454 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string);
455
456 #ifdef __cplusplus
457 }
458 #endif
459
460 #endif /* UCX_STRING_H */

mercurial