exercism

Exercism solutions.
git clone git://code.dwrz.net/exercism
Log | Files | Refs

word_count.c (2170B)


      1 #include <ctype.h>
      2 #include <stdbool.h>
      3 #include <stdio.h>
      4 #include <string.h>
      5 
      6 #include "word_count.h"
      7 
      8 const char *DELIMITERS = " ..,\n:!@#$%^&*()[]{}";
      9 
     10 static void lowercase(char *s) {
     11   for(int i = 0; s[i]; i++){
     12     s[i] = tolower(s[i]);
     13   }
     14 }
     15 
     16 static bool is_quote_char(char c) {
     17   switch (c) {
     18   case '\'':
     19     return true;
     20   case '"':
     21     return true;
     22   }
     23   return false;
     24 }
     25 
     26 static bool is_quoted_word(char *token) {
     27   return (is_quote_char(token[0]) &&
     28           is_quote_char(token[strlen(token)-1]));
     29 }
     30 
     31 static bool increment_seen_words(char *token,
     32                          int unique_words,
     33                          word_count_word_t *words) {
     34   for (int i = 0; i < unique_words; i++) {
     35     if (strcmp(token, words[i].text) == 0) {
     36       words[i].count++;
     37       return true;
     38     }
     39   }
     40   return false;
     41 }
     42 
     43 static void process_token(int *unique_words,
     44                    char *token,
     45                    word_count_word_t *words) {
     46 
     47   while (is_quoted_word(token)) {  // Strip the quotes from the string.
     48     token++;                       // Start at the char after the quote.
     49     token[strlen(token)-1] = '\0'; // Terminate at the last quote.
     50   }
     51 
     52   bool found = increment_seen_words(token, *unique_words, words);
     53 
     54   if (!found) {
     55     words += (*unique_words);
     56     strcpy(words -> text, token);
     57     words -> count = 1;
     58     (*unique_words)++;
     59   }
     60 
     61 }
     62 
     63 int word_count(const char *input_text, word_count_word_t *words) {
     64   int unique_words = 0;
     65 
     66   // Zero out the buffer; otherwise values will persist across invocations.
     67   memset(words, 0, sizeof(word_count_word_t) * MAX_WORDS);
     68 
     69   // strtok modifies strings. We'll use a copy, which we'll lowercase.
     70   char copy_text[strlen(input_text)];
     71   strcpy(copy_text, input_text);
     72   lowercase(copy_text);
     73 
     74   // Main loop -- tokenize and process copy_text.
     75   for (char *token = strtok(copy_text, DELIMITERS);
     76        token != NULL;
     77        token = strtok(NULL, DELIMITERS)) {
     78     if (unique_words >= MAX_WORDS) {
     79       return EXCESSIVE_NUMBER_OF_WORDS;
     80     }
     81     if (strlen(token) > MAX_WORD_LENGTH) {
     82       return EXCESSIVE_LENGTH_WORD;
     83     }
     84     process_token(&unique_words, token, words);
     85   }
     86 
     87   return unique_words;
     88 }