I'm currently going through nand2tetris, mainly as an excuse to learn C via writing some non-trivial programs. As I've already done it in python, I've began to 'port' over some of the python features to c to make the rest of the course easier (N2T requires you to write an assembler, VM, compiler, etc.). This is the start of an attempt at a string type.
This string lib mostly lifts its (limited*) functionality from python strings, obviously there is not the same safety (I did consider making the struct String
opaque, but I've read that's generally considered bad practice).
Overall, the design goals are as follows:
- Strings should know their lengths (prevent out of bound errors)
- Strings should be immutable (if you want to add/remove characters, you create new string)
- API should be clear/obvious to use
- Discourage indexing directly (not quite an iterator but I've gone with java style charAt)
- Stay true to N2T style i.e. reinvent the wheel where possible/reasonable
I'm not going to direct the review too much, please just point out things that are horrible, "smell funny" and/or could generally be improved.
mystring.h
/* String library headers */
#ifndef MYSTRING
#define MYSTRING
#include <stdlib.h>
/* simple struct for managing strings */
typedef struct {
size_t length;
char *s;
} String;
/* create new string */
String newstr(char *);
/* concatenate two strings */
String concat(const String *, const String *);
/* concatenate a string with a character array (i.e. c string) */
String concatRaw(const String *, char *);
/*
prefix matching - returns 1 if there is a prefix match, else return 0
prefix can one or more characters i.e. 'hello' is a prefix of 'hello world'.
*/
int startswith(const String *, char *prefix);
/* get char at the given index - if out of bounds, log error and exit */
char charat(const String *, size_t idx);
void freestr(String *);
#endif
mystring.c
/* An attempt at as C string library */
#include <stdio.h>
#include "mystring.h"
#define OOM "-------- OUT OF MEMORY ---------"
/* Log error before exiting */
static void exit_with_message(char *func_name, char *ptr_name, char *message)
{
printf("(%s)-(%s): %s\n", func_name, ptr_name, message);
exit(1);
}
void freestr(String *w)
{
if (!w->s)
return;
free(w->s);
if (w->s != NULL)
w->s = NULL;
}
String concat(const String *w1, const String *w2)
{
size_t i, j;
String newString;
char *buff = malloc(sizeof(*buff) * (w1->length + w2->length + 1));
if (!buff)
// 'catch' oom error
goto oom_error;
i = j = 0;
// copy over old strings to the new buffer
while (i < w1->length) {
buff[i] = w1->s[i];
i++;
}
while (j < w2->length) {
buff[i++] = w2->s[j++];
}
// set final character
buff[i] = '\0';
newString.s = buff;
newString.length = w1->length + w2->length;
// free strings in w1, w2
freestr((String *)w1);
freestr((String *)w2);
return newString;
oom_error:
// free strings before exiting
freestr((String *)w1);
freestr((String *)w2);
exit_with_message("concat", "buff", OOM);
// return empty string for the compiler, does not matter as we will exit anyways
return (String){0, NULL};
}
String concatRaw(const String *w1, char w2[])
{
// turn w2 to a proper string, then concatenate
// I guess this can be done by the user - although the option is nice to have
String n = newstr(w2);
return concat(w1, &n);
}
int startswith(const String *haystack, char n[])
{
// we create the needle string in here so we can free it after the
// comparison
String needle = newstr(n);
int result = 0;
if (needle.length <= haystack->length) {
// we need to create this tmp variable as we'll be moving through the
// string using pointers and in order to successfully free the string the
// pointer variable needs to point to the beginning, otherwise it
// causes a memory free error.
char *tmp = needle.s;
for (size_t i = 0; i < needle.length; i++, tmp++) {
if (*tmp != haystack->s[i])
break;
}
// if we got to the end of the needle, result == 1, else 0
result = (*tmp == '\0');
}
// free the string contents of the needle
freestr(&needle);
return result;
}
char charat(const String *str, size_t index)
{
// off by one error means last item is (s->length - 1)
if (index >= str->length)
exit_with_message("charat", "None", "Out of bounds error");
char val = str->s[index];
return val;
}
String newstr(char word[])
{
String newString;
size_t length = 0;
while (word[length] != '\0')
length++;
// using *buff in the sizeof function 'locks' together the declaration and
// type information i.e. if buff type changes, the malloc does not need to
// change as well. This gives less room for errors.
// more info: https://stackoverflow.com/a/605858
char *buff = malloc(sizeof(*buff) * length + 1);
if (!buff)
exit_with_message("newstr", "buff", OOM);
size_t i = 0;
// copy word to buffer
while ((buff[i] = *word++) != '\0')
i++;
buff[i] = '\0';
newString.s = buff;
newString.length = length;
return newString;
}
test_mystring.c
/* String tests. */
#include <assert.h>
#include <stdio.h>
#include "mystring.h"
/* utility function to ensure two strings are the same */
int issame(const char w1[], const char w2[])
{
while (*w1 != '\0' && *w2 != '\0' && *w1 == *w2) {
w1++;
w2++;
}
if (*w1 != '\0' || *w2 != '\0')
return 0;
return 1;
}
void test_issame()
{
assert(issame("hello", "hello"));
assert(issame("hello world!!", "hello world!!"));
assert(!issame("", "hello"));
assert(!issame("hello", ""));
assert(!issame("hello ", "hello"));
assert(!issame("hello", "bello"));
assert(!issame("hello", "hella"));
assert(!issame("hello", "hell"));
assert(!issame("hell", "hello"));
assert(!issame("aello", "hello"));
}
void test_newstr_length()
{
assert(newstr("hello").length == 5);
assert(!(newstr("hello").length == 6));
assert(!(newstr("hello").length == 4));
assert(newstr("world").length == 5);
assert(newstr("hello world!!").length == 13);
assert(newstr(" hello world").length == 21);
}
void test_newstr_actual_string()
{
assert(issame(newstr("hello").s, "hello"));
assert(issame(newstr("This is a much longer word").s, "This is a much longer word"));
assert(!issame(newstr("hello ").s, "hello"));
assert(!issame(newstr("hello").s, "bello"));
assert(!issame(newstr("hhello").s, "hello"));
assert(!issame(newstr(" hello").s, "hello"));
// empty string
assert(issame(newstr("").s, ""));
}
void test_newstr()
{
test_newstr_length();
test_newstr_actual_string();
}
void test_concat__basic()
{
String w1, w2;
w1 = newstr("this is ");
w2 = newstr("a concatenated string");
assert(issame(concat(&w1, &w2).s, "this is a concatenated string"));
w1 = newstr("");
w2 = newstr("a concatenated string");
assert(issame(concat(&w1, &w2).s, "a concatenated string"));
w1 = newstr("this is ");
w2 = newstr("a concatenated string");
assert(!issame(concat(&w1, &w2).s, "this is a concatenated"));
}
void test_concatRaw__basic()
{
String w1 = newstr("hello world");
size_t len = w1.length;
w1 = concatRaw(&w1, "!!");
assert(w1.length == (len + 2));
assert(issame(w1.s, "hello world!!"));
}
void test_concatRaw()
{
char *t[] = {"a", "b", "c"};
String w1 = newstr("");
for (int i = 0; i < 3; i++)
w1 = concatRaw(&w1, t[i]);
assert(w1.length == 3);
assert(issame(w1.s, "abc"));
}
void test_concatRaw__stress()
{
String w1 = newstr("");
for (int i = 0; i < 1000; i++)
w1 = concatRaw(&w1, "a");
assert(w1.length == 1000);
}
void test_concat()
{
test_concat__basic();
test_concatRaw__basic();
test_concatRaw__stress();
test_concatRaw();
}
void test_startswith()
{
String w1 = newstr("// this is a comment");
assert(startswith(&w1, "//"));
assert(!startswith(&w1, " /"));
String w2 = newstr("hello world!");
assert(startswith(&w2, "h"));
assert(startswith(&w2, "hell"));
assert(startswith(&w2, "hello"));
assert(!startswith(&w2, "bello"));
assert(!startswith(&w2, "helo"));
String w3 = newstr("");
assert(startswith(&w3, ""));
assert(!startswith(&w3, " "));
freestr(&w1);
freestr(&w2);
freestr(&w3);
}
void test_charat()
{
String w1 = newstr("// this is a comment");
assert(charat(&w1, (size_t)0) == '/');
assert(charat(&w1, (size_t)9) == 's');
assert(charat(&w1, (size_t)10) == ' ');
assert(charat(&w1, w1.length - 1) == 't');
// this successfully exists
// assert(charat(&w1, w1.length) == 't');
}
void tests()
{
test_issame();
test_newstr();
test_concat();
test_startswith();
test_charat();
}
int main()
{
tests();
printf("----- STRING TESTS PASS ------\n");
return 0;
}
strlen
once. Then you can usestrlen
in your code. No need to rewritewhile (word[length] != '\0') length++;
again and again and again forever in every function that needs to know a string's length. \$\endgroup\$