1 /* |
|
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
|
3 * |
|
4 * Copyright 2016 Olaf Wintermann. All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions are met: |
|
8 * |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
|
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
26 * POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 /** |
|
29 * Bounded string implementation. |
|
30 * |
|
31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings. |
|
32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not |
|
33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored |
|
34 * within the structure. |
|
35 * |
|
36 * When using <code>sstr_t</code>, developers must be full aware of what type |
|
37 * of string (<code>NULL</code>-terminated) or not) they are using, when |
|
38 * accessing the <code>char* ptr</code> directly. |
|
39 * |
|
40 * The UCX string module provides some common string functions, known from |
|
41 * standard libc, working with <code>sstr_t</code>. |
|
42 * |
|
43 * @file string.h |
|
44 * @author Mike Becker |
|
45 * @author Olaf Wintermann |
|
46 */ |
|
47 |
|
48 #ifndef UCX_STRING_H |
|
49 #define UCX_STRING_H |
|
50 |
|
51 #include "ucx.h" |
|
52 #include "allocator.h" |
|
53 #include <stddef.h> |
|
54 |
|
55 /** Shortcut for a <code>sstr_t struct</code> literal. */ |
|
56 #define ST(s) { (char*)s, sizeof(s)-1 } |
|
57 |
|
58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */ |
|
59 #define S(s) sstrn((char*)s, sizeof(s)-1) |
|
60 |
|
61 #ifdef __cplusplus |
|
62 extern "C" { |
|
63 #endif |
|
64 |
|
65 /** |
|
66 * The UCX string structure. |
|
67 */ |
|
68 typedef struct { |
|
69 /** A reference to the string (<b>not necessarily <code>NULL</code> |
|
70 * -terminated</b>) */ |
|
71 char *ptr; |
|
72 /** The length of the string */ |
|
73 size_t length; |
|
74 } sstr_t; |
|
75 |
|
76 /** |
|
77 * Creates a new sstr_t based on a C string. |
|
78 * |
|
79 * The length is implicitly inferred by using a call to <code>strlen()</code>. |
|
80 * |
|
81 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you |
|
82 * do want a copy, use sstrdup() on the return value of this function. |
|
83 * |
|
84 * @param cstring the C string to wrap |
|
85 * @return a new sstr_t containing the C string |
|
86 * |
|
87 * @see sstrn() |
|
88 */ |
|
89 sstr_t sstr(char *cstring); |
|
90 |
|
91 /** |
|
92 * Creates a new sstr_t of the specified length based on a C string. |
|
93 * |
|
94 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you |
|
95 * do want a copy, use sstrdup() on the return value of this function. |
|
96 * |
|
97 * @param cstring the C string to wrap |
|
98 * @param length the length of the string |
|
99 * @return a new sstr_t containing the C string |
|
100 * |
|
101 * @see sstr() |
|
102 * @see S() |
|
103 */ |
|
104 sstr_t sstrn(char *cstring, size_t length); |
|
105 |
|
106 |
|
107 /** |
|
108 * Returns the cumulated length of all specified strings. |
|
109 * |
|
110 * At least one string must be specified. |
|
111 * |
|
112 * <b>Attention:</b> if the count argument does not match the count of the |
|
113 * specified strings, the behavior is undefined. |
|
114 * |
|
115 * @param count the total number of specified strings (so at least 1) |
|
116 * @param string the first string |
|
117 * @param ... all other strings |
|
118 * @return the cumulated length of all strings |
|
119 */ |
|
120 size_t sstrnlen(size_t count, sstr_t string, ...); |
|
121 |
|
122 /** |
|
123 * Concatenates two or more strings. |
|
124 * |
|
125 * The resulting string will be allocated by standard <code>malloc()</code>. |
|
126 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>. |
|
127 * |
|
128 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>- |
|
129 * terminated. |
|
130 * |
|
131 * @param count the total number of strings to concatenate |
|
132 * @param s1 first string |
|
133 * @param s2 second string |
|
134 * @param ... all remaining strings |
|
135 * @return the concatenated string |
|
136 */ |
|
137 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...); |
|
138 |
|
139 /** |
|
140 * Concatenates two or more strings using a UcxAllocator. |
|
141 * |
|
142 * See sstrcat() for details. |
|
143 * |
|
144 * @param a the allocator to use |
|
145 * @param count the total number of strings to concatenate |
|
146 * @param s1 first string |
|
147 * @param s2 second string |
|
148 * @param ... all remaining strings |
|
149 * @return the concatenated string |
|
150 */ |
|
151 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...); |
|
152 |
|
153 |
|
154 /** |
|
155 * Returns a substring starting at the specified location. |
|
156 * |
|
157 * <b>Attention:</b> the new string references the same memory area as the |
|
158 * input string and will <b>NOT</b> be <code>NULL</code>-terminated. |
|
159 * Use sstrdup() to get a copy. |
|
160 * |
|
161 * @param string input string |
|
162 * @param start start location of the substring |
|
163 * @return a substring of <code>string</code> starting at <code>start</code> |
|
164 * |
|
165 * @see sstrsubsl() |
|
166 * @see sstrchr() |
|
167 */ |
|
168 sstr_t sstrsubs(sstr_t string, size_t start); |
|
169 |
|
170 /** |
|
171 * Returns a substring with a maximum length starting at the specified location. |
|
172 * |
|
173 * <b>Attention:</b> the new string references the same memory area as the |
|
174 * input string and will <b>NOT</b> be <code>NULL</code>-terminated. |
|
175 * Use sstrdup() to get a copy. |
|
176 * |
|
177 * @param string input string |
|
178 * @param start start location of the substring |
|
179 * @param length the maximum length of the substring |
|
180 * @return a substring of <code>string</code> starting at <code>start</code> |
|
181 * with a maximum length of <code>length</code> |
|
182 * |
|
183 * @see sstrsubs() |
|
184 * @see sstrchr() |
|
185 */ |
|
186 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length); |
|
187 |
|
188 /** |
|
189 * Returns a substring starting at the location of the first occurrence of the |
|
190 * specified character. |
|
191 * |
|
192 * If the string does not contain the character, an empty string is returned. |
|
193 * |
|
194 * @param string the string where to locate the character |
|
195 * @param chr the character to locate |
|
196 * @return a substring starting at the first location of <code>chr</code> |
|
197 * |
|
198 * @see sstrsubs() |
|
199 */ |
|
200 sstr_t sstrchr(sstr_t string, int chr); |
|
201 |
|
202 /** |
|
203 * Returns a substring starting at the location of the last occurrence of the |
|
204 * specified character. |
|
205 * |
|
206 * If the string does not contain the character, an empty string is returned. |
|
207 * |
|
208 * @param string the string where to locate the character |
|
209 * @param chr the character to locate |
|
210 * @return a substring starting at the last location of <code>chr</code> |
|
211 * |
|
212 * @see sstrsubs() |
|
213 */ |
|
214 sstr_t sstrrchr(sstr_t string, int chr); |
|
215 |
|
216 /** |
|
217 * Returns a substring starting at the location of the first occurrence of the |
|
218 * specified string. |
|
219 * |
|
220 * If the string does not contain the other string, an empty string is returned. |
|
221 * |
|
222 * If <code>match</code> is an empty string, the complete <code>string</code> is |
|
223 * returned. |
|
224 * |
|
225 * @param string the string to be scanned |
|
226 * @param match string containing the sequence of characters to match |
|
227 * @return a substring starting at the first occurrence of |
|
228 * <code>match</code>, or an empty string, if the sequence is not |
|
229 * present in <code>string</code> |
|
230 */ |
|
231 sstr_t sstrstr(sstr_t string, sstr_t match); |
|
232 |
|
233 /** |
|
234 * Splits a string into parts by using a delimiter string. |
|
235 * |
|
236 * This function will return <code>NULL</code>, if one of the following happens: |
|
237 * <ul> |
|
238 * <li>the string length is zero</li> |
|
239 * <li>the delimeter length is zero</li> |
|
240 * <li>the string equals the delimeter</li> |
|
241 * <li>memory allocation fails</li> |
|
242 * </ul> |
|
243 * |
|
244 * The integer referenced by <code>count</code> is used as input and determines |
|
245 * the maximum size of the resulting array, i.e. the maximum count of splits to |
|
246 * perform + 1. |
|
247 * |
|
248 * The integer referenced by <code>count</code> is also used as output and is |
|
249 * set to |
|
250 * <ul> |
|
251 * <li>-2, on memory allocation errors</li> |
|
252 * <li>-1, if either the string or the delimiter is an empty string</li> |
|
253 * <li>0, if the string equals the delimiter</li> |
|
254 * <li>1, if the string does not contain the delimiter</li> |
|
255 * <li>the count of array items, otherwise</li> |
|
256 * </ul> |
|
257 * |
|
258 * If the string starts with the delimiter, the first item of the resulting |
|
259 * array will be an empty string. |
|
260 * |
|
261 * If the string ends with the delimiter and the maximum list size is not |
|
262 * exceeded, the last array item will be an empty string. |
|
263 * In case the list size would be exceeded, the last array item will be the |
|
264 * remaining string after the last split, <i>including</i> the terminating |
|
265 * delimiter. |
|
266 * |
|
267 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array |
|
268 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with |
|
269 * an allocator to managed memory, to avoid this. |
|
270 * |
|
271 * @param string the string to split |
|
272 * @param delim the delimiter string |
|
273 * @param count IN: the maximum size of the resulting array (0 = no limit), |
|
274 * OUT: the actual size of the array |
|
275 * @return a sstr_t array containing the split strings or |
|
276 * <code>NULL</code> on error |
|
277 * |
|
278 * @see sstrsplit_a() |
|
279 */ |
|
280 sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count); |
|
281 |
|
282 /** |
|
283 * Performing sstrsplit() using a UcxAllocator. |
|
284 * |
|
285 * <i>Read the description of sstrsplit() for details.</i> |
|
286 * |
|
287 * The memory for the sstr_t.ptr pointers of the array items and the memory for |
|
288 * the sstr_t array itself are allocated by using the UcxAllocator.malloc() |
|
289 * function. |
|
290 * |
|
291 * <b>Note:</b> the allocator is not used for memory that is freed within the |
|
292 * same call of this function (locally scoped variables). |
|
293 * |
|
294 * @param allocator the UcxAllocator used for allocating memory |
|
295 * @param string the string to split |
|
296 * @param delim the delimiter string |
|
297 * @param count IN: the maximum size of the resulting array (0 = no limit), |
|
298 * OUT: the actual size of the array |
|
299 * @return a sstr_t array containing the split strings or |
|
300 * <code>NULL</code> on error |
|
301 * |
|
302 * @see sstrsplit() |
|
303 */ |
|
304 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim, |
|
305 ssize_t *count); |
|
306 |
|
307 /** |
|
308 * Compares two UCX strings with standard <code>memcmp()</code>. |
|
309 * |
|
310 * At first it compares the sstr_t.length attribute of the two strings. The |
|
311 * <code>memcmp()</code> function is called, if and only if the lengths match. |
|
312 * |
|
313 * @param s1 the first string |
|
314 * @param s2 the second string |
|
315 * @return -1, if the length of s1 is less than the length of s2 or 1, if the |
|
316 * length of s1 is greater than the length of s2 or the result of |
|
317 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match) |
|
318 */ |
|
319 int sstrcmp(sstr_t s1, sstr_t s2); |
|
320 |
|
321 /** |
|
322 * Compares two UCX strings ignoring the case. |
|
323 * |
|
324 * At first it compares the sstr_t.length attribute of the two strings. If and |
|
325 * only if the lengths match, both strings are compared char by char ignoring |
|
326 * the case. |
|
327 * |
|
328 * @param s1 the first string |
|
329 * @param s2 the second string |
|
330 * @return -1, if the length of s1 is less than the length of s2 or 1, if the |
|
331 * length of s1 is greater than the length of s2 or the difference between the |
|
332 * first two differing characters otherwise (i.e. 0 if the strings match and |
|
333 * no characters differ) |
|
334 */ |
|
335 int sstrcasecmp(sstr_t s1, sstr_t s2); |
|
336 |
|
337 /** |
|
338 * Creates a duplicate of the specified string. |
|
339 * |
|
340 * The new sstr_t will contain a copy allocated by standard |
|
341 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to |
|
342 * <code>free()</code>. |
|
343 * |
|
344 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>- |
|
345 * terminated. |
|
346 * |
|
347 * @param string the string to duplicate |
|
348 * @return a duplicate of the string |
|
349 * @see sstrdup_a() |
|
350 */ |
|
351 sstr_t sstrdup(sstr_t string); |
|
352 |
|
353 /** |
|
354 * Creates a duplicate of the specified string using a UcxAllocator. |
|
355 * |
|
356 * The new sstr_t will contain a copy allocated by the allocators |
|
357 * ucx_allocator_malloc function. So it is implementation depended, whether the |
|
358 * returned sstr_t.ptr pointer must be passed to the allocators |
|
359 * ucx_allocator_free function manually. |
|
360 * |
|
361 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>- |
|
362 * terminated. |
|
363 * |
|
364 * @param allocator a valid instance of a UcxAllocator |
|
365 * @param string the string to duplicate |
|
366 * @return a duplicate of the string |
|
367 * @see sstrdup() |
|
368 */ |
|
369 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string); |
|
370 |
|
371 /** |
|
372 * Omits leading and trailing spaces. |
|
373 * |
|
374 * This function returns a new sstr_t containing a trimmed version of the |
|
375 * specified string. |
|
376 * |
|
377 * <b>Note:</b> the new sstr_t references the same memory, thus you |
|
378 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to |
|
379 * <code>free()</code>. It is also highly recommended to avoid assignments like |
|
380 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the |
|
381 * source string. Assignments of this type are only permitted, if the |
|
382 * sstr_t.ptr of the source string does not need to be freed or if another |
|
383 * reference to the source string exists. |
|
384 * |
|
385 * @param string the string that shall be trimmed |
|
386 * @return a new sstr_t containing the trimmed string |
|
387 */ |
|
388 sstr_t sstrtrim(sstr_t string); |
|
389 |
|
390 /** |
|
391 * Checks, if a string has a specific prefix. |
|
392 * @param string the string to check |
|
393 * @param prefix the prefix the string should have |
|
394 * @return 1, if and only if the string has the specified prefix, 0 otherwise |
|
395 */ |
|
396 int sstrprefix(sstr_t string, sstr_t prefix); |
|
397 |
|
398 /** |
|
399 * Checks, if a string has a specific suffix. |
|
400 * @param string the string to check |
|
401 * @param suffix the suffix the string should have |
|
402 * @return 1, if and only if the string has the specified suffix, 0 otherwise |
|
403 */ |
|
404 int sstrsuffix(sstr_t string, sstr_t suffix); |
|
405 |
|
406 /** |
|
407 * Returns a lower case version of a string. |
|
408 * |
|
409 * This function creates a duplicate of the input string, first. See the |
|
410 * documentation of sstrdup() for the implications. |
|
411 * |
|
412 * @param string the input string |
|
413 * @return the resulting lower case string |
|
414 * @see sstrdup() |
|
415 */ |
|
416 sstr_t sstrlower(sstr_t string); |
|
417 |
|
418 /** |
|
419 * Returns a lower case version of a string. |
|
420 * |
|
421 * This function creates a duplicate of the input string, first. See the |
|
422 * documentation of sstrdup_a() for the implications. |
|
423 * |
|
424 * @param allocator the allocator used for duplicating the string |
|
425 * @param string the input string |
|
426 * @return the resulting lower case string |
|
427 * @see sstrdup_a() |
|
428 */ |
|
429 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string); |
|
430 |
|
431 /** |
|
432 * Returns a upper case version of a string. |
|
433 * |
|
434 * This function creates a duplicate of the input string, first. See the |
|
435 * documentation of sstrdup() for the implications. |
|
436 * |
|
437 * @param string the input string |
|
438 * @return the resulting upper case string |
|
439 * @see sstrdup() |
|
440 */ |
|
441 sstr_t sstrupper(sstr_t string); |
|
442 |
|
443 /** |
|
444 * Returns a upper case version of a string. |
|
445 * |
|
446 * This function creates a duplicate of the input string, first. See the |
|
447 * documentation of sstrdup_a() for the implications. |
|
448 * |
|
449 * @param allocator the allocator used for duplicating the string |
|
450 * @param string the input string |
|
451 * @return the resulting upper case string |
|
452 * @see sstrdup_a() |
|
453 */ |
|
454 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string); |
|
455 |
|
456 #ifdef __cplusplus |
|
457 } |
|
458 #endif |
|
459 |
|
460 #endif /* UCX_STRING_H */ |
|