word_count.c (2170B)
1 #include <ctype.h> 2 #include <stdbool.h> 3 #include <stdio.h> 4 #include <string.h> 5 6 #include "word_count.h" 7 8 const char *DELIMITERS = " ..,\n:!@#$%^&*()[]{}"; 9 10 static void lowercase(char *s) { 11 for(int i = 0; s[i]; i++){ 12 s[i] = tolower(s[i]); 13 } 14 } 15 16 static bool is_quote_char(char c) { 17 switch (c) { 18 case '\'': 19 return true; 20 case '"': 21 return true; 22 } 23 return false; 24 } 25 26 static bool is_quoted_word(char *token) { 27 return (is_quote_char(token[0]) && 28 is_quote_char(token[strlen(token)-1])); 29 } 30 31 static bool increment_seen_words(char *token, 32 int unique_words, 33 word_count_word_t *words) { 34 for (int i = 0; i < unique_words; i++) { 35 if (strcmp(token, words[i].text) == 0) { 36 words[i].count++; 37 return true; 38 } 39 } 40 return false; 41 } 42 43 static void process_token(int *unique_words, 44 char *token, 45 word_count_word_t *words) { 46 47 while (is_quoted_word(token)) { // Strip the quotes from the string. 48 token++; // Start at the char after the quote. 49 token[strlen(token)-1] = '\0'; // Terminate at the last quote. 50 } 51 52 bool found = increment_seen_words(token, *unique_words, words); 53 54 if (!found) { 55 words += (*unique_words); 56 strcpy(words -> text, token); 57 words -> count = 1; 58 (*unique_words)++; 59 } 60 61 } 62 63 int word_count(const char *input_text, word_count_word_t *words) { 64 int unique_words = 0; 65 66 // Zero out the buffer; otherwise values will persist across invocations. 67 memset(words, 0, sizeof(word_count_word_t) * MAX_WORDS); 68 69 // strtok modifies strings. We'll use a copy, which we'll lowercase. 70 char copy_text[strlen(input_text)]; 71 strcpy(copy_text, input_text); 72 lowercase(copy_text); 73 74 // Main loop -- tokenize and process copy_text. 75 for (char *token = strtok(copy_text, DELIMITERS); 76 token != NULL; 77 token = strtok(NULL, DELIMITERS)) { 78 if (unique_words >= MAX_WORDS) { 79 return EXCESSIVE_NUMBER_OF_WORDS; 80 } 81 if (strlen(token) > MAX_WORD_LENGTH) { 82 return EXCESSIVE_LENGTH_WORD; 83 } 84 process_token(&unique_words, token, words); 85 } 86 87 return unique_words; 88 }