The library (inspired by stb libraries) attempts to provide some commonly used functions (reading a file into memory, determining the size of a file) that are missing from the C standard library portably (for my own use cases).
Code:
#ifndef IO_H
#define IO_H
#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
/*
* To use, do this:
* #define IO_IMPLEMENTATION
* before you include this file in *one* C file to create the implementation.
*
* i.e. it should look like:
* #include ...
* #include ...
*
* #define IO_IMPLEMENTATION
* #include "io.h"
* ...
*
* To make all functions have internal linkage, i.e. be private to the source
* file, do this:
* #define `IO_STATIC`
* before including "io.h".
*
* i.e. it should look like:
* #define IO_IMPLEMENTATION
* #define IO_STATIC
* #include "io.h"
* ...
*
* You can #define IO_MALLOC, IO_REALLOC, and IO_FREE to avoid using malloc(),
* realloc(), and free(). Note that all three must be defined at once, or none.
*/
#ifndef IO_DEF
#ifdef IO_STATIC
#define IO_DEF static
#else
#define IO_DEF extern
#endif /* IO_STATIC */
#endif /* IO_DEF */
#if defined(__GNUC__) || defined(__clang__)
#define ATTRIB_NONNULL(...) __attribute__((nonnull (__VA_ARGS__)))
#define ATTRIB_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#else
#define ATTRIB_NONNULL(...) /* If only. */
#define ATTRIB_WARN_UNUSED_RESULT /* If only. */
#endif /* defined(__GNUC__) || define(__clang__) */
/*
* Reads the file pointed to by `stream` to a buffer and returns it.
* The returned buffer is a null-terminated string.
* If `nbytes` is not NULL, it shall hold the size of the file.
*
* Returns NULL on memory allocation failure. The caller is responsible for
* freeing the returned pointer.
*/
IO_DEF char *io_read_file(FILE *stream,
size_t *nbytes) ATTRIB_NONNULL(1) ATTRIB_WARN_UNUSED_RESULT;
/*
* Splits a string into a sequence of tokens. The `delim` argument
* specifies a set of bytes that delimit the tokens in the parsed string.
* If `ntokens` is not NULL, it shall hold the amount of total tokens.
*
* Returns an array of pointers to the tokens, or NULL on memory allocation
* failure. The caller is responsible for freeing the returned pointer.
*/
IO_DEF char **io_split_by_delim(char *s, const char *delim,
size_t *ntokens) ATTRIB_NONNULL(1, 2) ATTRIB_WARN_UNUSED_RESULT;
/*
* Splits a string into lines.
* A wrapper around `io_split_by_delim()`. It calls the function with "\n" as
* the delimiter.
*
* Returns an array of pointers to the tokens, or NULL on memory allocation
* failure. The caller is responsible for freeing the returned pointer.
*/
IO_DEF char **io_split_lines(char *s,
size_t *nlines) ATTRIB_NONNULL(1) ATTRIB_WARN_UNUSED_RESULT;
/*
* Reads the next line from the stream pointed to by `stream`. The returned line
* is null-terminated and does not contain a newline, if one was found.
*
* The memory pointed to by `size` shall contain the length of the
* line (including the terminating null character). Else it shall contain 0.
*
* Upon successful completion a pointer is returned and the size of the line is
* stored in the memory pointed to by `size`, otherwise NULL is returned and
* `size` holds 0.
*
* `fgetline()` does not distinguish between end-of-file and error; the routines
* `feof()` and `ferror()` must be used to determine which occurred. The
* function also returns NULL on a memory-allocation failure.
*
* Although a null character is always supplied after the line, note that
* `strlen(line)` will always be smaller than the value is `size` if the line
* contains embedded null characters.
*/
IO_DEF char *io_read_line(FILE *stream, size_t *size) ATTRIB_NONNULL(1,
2) ATTRIB_WARN_UNUSED_RESULT;
/*
* `size` must be a non-null pointer. On success, the function assigns `size`
* with the number of bytes read and returns true, or returns false elsewise.
*
* Note: The file can grow between io_fsize() and a subsequent read.
*/
IO_DEF bool io_fsize(FILE *stream, size_t *size) ATTRIB_NONNULL(1, 2);
/*
* Writes `lines` to the file pointed to by `stream`.
*
* On success, it returns true, or false elsewise.
*/
IO_DEF bool io_write_lines(FILE *stream, size_t nlines,
char *lines[const static nlines]) ATTRIB_NONNULL(1, 3);
/*
* Writes nbytes from the buffer pointed to by `data` to the file pointed to
* by `stream`.
*
* On success, it returns true, or false elsewise.
*/
IO_DEF bool io_write_file(FILE *stream, size_t nbytes,
const char data[static nbytes]) ATTRIB_NONNULL(1, 3);
#endif /* IO_H */
#ifdef IO_IMPLEMENTATION
#if defined(IO_MALLOC) && defined(IO_REALLOC) && defined(IO_FREE)
// Ok.
#elif !defined(IO_MALLOC) && !defined(IO_REALLOC) && !defined(IO_FREE)
// Ok.
#else
#error "Must define all or none of IO_MALLOC, IO_REALLOC, and IO_FREE."
#endif
#ifndef IO_MALLOC
#define IO_MALLOC(sz) malloc(sz)
#define IO_REALLOC(p, sz) realloc(p, sz)
#define IO_FREE(p) free(p)
#endif
#include <stdlib.h>
#include <string.h>
#include <string.h>
#define CHUNK_SIZE INT64_C(1024 * 8)
#define TOKEN_CHUNK_SIZE INT64_C(1024 * 2)
IO_DEF char *io_read_file(FILE *stream, size_t *nbytes)
{
static const size_t page_size = CHUNK_SIZE;
char *content = NULL;
size_t len = 0;
for (size_t rcount = 1; rcount; len += rcount) {
void *const tmp = IO_REALLOC(content, len + page_size);
if (!tmp) {
IO_FREE(content);
return content = NULL;
}
content = tmp;
rcount = fread(content + len, 1, page_size - 1, stream);
if (ferror(stream)) {
IO_FREE(content);
return content = NULL;
}
}
if (nbytes) {
*nbytes = len;
}
content[len] = '\0';
return content;
}
IO_DEF char **io_split_by_delim(char *s, const char *delim, size_t *ntokens)
{
char **tokens = NULL;
const size_t chunk_size = TOKEN_CHUNK_SIZE;
size_t capacity = 0;
size_t token_count = 0;
while (*s) {
if (token_count >= capacity) {
char **const tmp = IO_REALLOC(tokens,
sizeof *tokens * (capacity += chunk_size));
if (!tmp) {
IO_FREE(tokens);
return NULL;
}
tokens = tmp;
}
tokens[token_count++] = s;
s = strpbrk(s, delim);
if (s) {
*s++ = '\0';
}
}
if (ntokens) {
*ntokens = token_count;
}
return tokens;
}
IO_DEF char **io_split_lines(char *s, size_t *nlines)
{
return io_split_by_delim(s, "\n", nlines);
}
IO_DEF char *io_read_line(FILE *stream, size_t *size)
{
const size_t page_size = BUFSIZ;
size_t count = 0;
size_t capacity = 0;
char *line = NULL;
for (;;) {
if (count >= capacity) {
char *const tmp = realloc(line, capacity += page_size);
if (!tmp) {
free(line);
return NULL;
}
line = tmp;
}
int c = getc(stream);
if (c == EOF || c == '\n') {
if (c == EOF) {
if (feof(stream)) {
if (!count) {
free(line);
return NULL;
}
/* Return what was read. */
break;
}
free(line);
return NULL;
} else {
break;
}
} else {
line[count] = (char) c;
}
++count;
}
/* Shrink line to size if possible. */
void *tmp = realloc(line, count + 1);
if (tmp) {
line = tmp;
}
line[count] = '\0';
*size = ++count;
return line;
}
/*
* Reasons to not use `fseek()` and `ftell()` to compute the size of the file:
*
* Subclause 7.12.9.2 of the C Standard [ISO/IEC 9899:2011] specifies the
* following behavior when opening a binary file in binary mode:
*
* >> A binary stream need not meaningfully support fseek calls with a whence
* >> value of SEEK_END.
*
* In addition, footnote 268 of subclause 7.21.3 says:
*
* >> Setting the file position indicator to end-of-file, as with
* >> fseek(file, 0, SEEK_END) has undefined behavior for a binary stream.
*
* For regular files, the file position indicator returned by ftell() is useful
* only in calls to fseek. As such, the value returned may not be reflect the
* physical byte offset.
*
* Reasons to not use other non-standard functions:
* 1) fseeko()/ftello() - Not specified in the C Standard, and have the same
* problem as the method above.
*
* 2) fstat()/stat() - Not specified in the C standard. POSIX specific.
* 3) _filelength()/_filelengthi64()/GetFileSizeEx() - Not specified in the C
* Standard. Windows
* specific.
* As such, we read the file in chunks, which is the only portable way to
* determine the size of a file.
*
* Yet this is highly inefficient. We could use some #ifdefs and revert to
* platform-specific functions.
*/
IO_DEF bool io_fsize(FILE *stream, size_t *size)
{
/* TODO:
*
* #if defined(_WIN32)
* Windows also supports fileno(), struct stat, and fstat() as _fileno(),
* _fstat(), and struct _stat.
*
* #ifdef _WIN32
* #define fstat _fstat
* #define fileno _fileno
* #define stat _stat
* #endif
*
* But which version to use? See:
* learn.microsoft/en-us/cpp/c-runtime-library/reference/fstat-fstat32-fstat64-fstati64-fstat32i64-fstat64i32?view=msvc-170
* #elif defined(__unix__) || defined(__linux__) et cetera
* struct stat st;
*
* if (fstat(fileno(stream), st) == 0) {
* return st.size;
* }
* return -1?
*
* }
* #else
* Fall back to the default and read it in chunks.
* #endif
*/
size_t rcount = 0;
char chunk[CHUNK_SIZE];
while ((rcount = fread(chunk, 1, CHUNK_SIZE, stream)) > 0) {
*size += rcount;
}
return !ferror(stream);
}
IO_DEF bool io_write_lines(FILE *stream, size_t nlines,
char *lines[const static nlines])
{
for (size_t i = 0; i < nlines; ++i) {
if (fprintf(stream, "%s\n", lines[i]) < 0) {
return false;
}
}
return true;
}
IO_DEF bool io_write_file(FILE *stream, size_t nbytes,
const char data[static nbytes])
{
size_t nwritten = fwrite(data, 1, nbytes, stream);
if (nwritten != nbytes || ferror(stream)) {
return false;
}
return true;
}
#undef CHUNK_SIZE
#undef TOKEN_CHUNK_SIZE
#endif /* IO_IMPLEMENTATION */
And here's how it can be used:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define IO_IMPLEMENTATION
#define IO_STATIC
#include "io.h"
int main(int argc, char **argv)
{
if (argc == 2) {
FILE *fp = fopen(argv[1], "rb");
assert(fp);
size_t nbytes = 0;
char *const fbuf = io_read_file(fp, &nbytes);
assert(fbuf);
assert(io_write_file(stdout, nbytes, fbuf));
rewind(fp);
size_t size = 0;
bool rv = io_fsize(fp, &size);
assert(rv);
printf("Filesize: %zu.\n", size);
rewind(fp);
/* size_t nlines = 0; */
/* char **lines = io_split_lines(fbuf, &nlines); */
/* assert(lines); */
/* assert(io_write_lines(stdout, nlines, lines)); */
size_t ntokens = 0;
char **tokens = io_split_by_delim(fbuf, " \f\n\r\v\t", &ntokens);
assert(tokens);
assert(io_write_lines(stdout, ntokens, tokens));
free(fbuf);
free(tokens);
/* free(lines); */
fclose(fp);
}
return EXIT_SUCCESS;
}
Review Request:
Are there any bugs or undefined/implementation-defined behavior in the code? Are there any edge cases where the functions would leak memory?
Where would restrict
be suitable to use?
General coding comments, style, bad practices, et cetera.
if (nwritten != nbytes || ferror(stream)) {
will be true on a successful write if the error flag was set prior to theio_write_file()
call. Recommendif (nwritten != nbytes)
\$\endgroup\$