Added ex47 code.

This commit is contained in:
Zed A. Shaw 2015-06-20 19:51:06 -07:00
parent 0075236cfb
commit 4d53a6c6f4
4 changed files with 189 additions and 3 deletions

View File

@ -11,27 +11,64 @@ Ternary Search Tree
The Plan
====
Learn about my favorite data structure ever:
Ternary Search Tree
The Code
====
Similar to a Binary Search Tree, but it has 3 branches per node based on
the characters in strings.
The Analysis
Advantages
====
* Find any string comparing at most N characters.
* Detect *missing* strings as fast, usually faster.
* Find all strings that start with, or contain, any substring as fast.
* Find all similar known strings quickly.
Breaking It
Disadvantages
====
* Delete is a pain, as in most trees.
* Uses lots of memory to store keys, so bad for sets of large keys.
* Kind of weird for most programmers.
Improving It
===
* You could allow duplicates by using a *DArray* instead of the
*value*.
* As I mentioned earlier, deleting is hard, but you could simulate it by setting
the values to *NULL* so that they are effectively gone.
* There are no ways to collect all of the possible matching values. I'll have
you implement that in an extra credit.
* There are other algorithms that are more complex but have slightly
better properties. Take a look at suffix array, suffix tree, and
radix tree structures.
Extra Credit
====
* Implement a *TSTree_collect* that returns a *DArray* containing
all of the keys that match the given prefix.
* Implement *TSTree_search_suffix* and a *TSTree_insert_suffix*
so you can do suffix searches and inserts.
* Use the debugger to see how this structure is used in memory
compared to the *BSTree* and *Hashmap*.
End Of Lecture 46

5
ex47/ex47_urls.txt Normal file
View File

@ -0,0 +1,5 @@
/test.tst TestHandler
/ IndexHandler
/test/this/out/index.html PageHandler
/index.html PageHandler
/and/then/i/have/things/to/test.html PageHandler

View File

@ -11,27 +11,51 @@ A Fast URL Router
The Plan
====
Use the *TSTree* to do something useful:
Route URLs
The Code
====
A simple command line tool to play with URLs.
You'd find this in many web applications.
The Analysis
====
Watch me play with it and then tell you how it's working.
Breaking It
Improving It
====
* Collect all possible matches then choose the longest as winner.
* Use TSTree to find prefixes, then regex to choose winner.
Extra Credit
====
* Instead of just storing the string for the handler, create an actual engine that uses a
*Handler* struct to store the application. The structure would store the URL to which it's attached, the name, and anything else you'd need to make an actual routing system.
Extra Credit
====
* Instead of mapping URLs to arbitrary names, map them to .so files and use the *dlopen*
system to load handlers on the fly and call callbacks they contain. Put these callbacks that
in your *Handler* struct, and then you have yourself a fully dynamic callback
handler system in C.
End Of Lecture 47

120
ex47/urlor.c Normal file
View File

@ -0,0 +1,120 @@
#include <lcthw/tstree.h>
#include <lcthw/bstrlib.h>
TSTree *add_route_data(TSTree * routes, bstring line)
{
struct bstrList *data = bsplit(line, ' ');
check(data->qty == 2, "Line '%s' does not have 2 columns",
bdata(line));
routes = TSTree_insert(routes,
bdata(data->entry[0]),
blength(data->entry[0]),
bstrcpy(data->entry[1]));
bstrListDestroy(data);
return routes;
error:
return NULL;
}
TSTree *load_routes(const char *file)
{
TSTree *routes = NULL;
bstring line = NULL;
FILE *routes_map = NULL;
routes_map = fopen(file, "r");
check(routes_map != NULL, "Failed to open routes: %s", file);
while ((line = bgets((bNgetc) fgetc, routes_map, '\n')) != NULL) {
check(btrimws(line) == BSTR_OK, "Failed to trim line.");
routes = add_route_data(routes, line);
check(routes != NULL, "Failed to add route.");
bdestroy(line);
}
fclose(routes_map);
return routes;
error:
if (routes_map) fclose(routes_map);
if (line) bdestroy(line);
return NULL;
}
bstring match_url(TSTree * routes, bstring url)
{
bstring route = TSTree_search(routes, bdata(url), blength(url));
if (route == NULL) {
printf("No exact match found, trying prefix.\n");
route = TSTree_search_prefix(routes, bdata(url), blength(url));
}
return route;
}
bstring read_line(const char *prompt)
{
printf("%s", prompt);
bstring result = bgets((bNgetc) fgetc, stdin, '\n');
check_debug(result != NULL, "stdin closed.");
check(btrimws(result) == BSTR_OK, "Failed to trim.");
return result;
error:
return NULL;
}
void bdestroy_cb(void *value, void *ignored)
{
(void)ignored;
bdestroy((bstring) value);
}
void destroy_routes(TSTree * routes)
{
TSTree_traverse(routes, bdestroy_cb, NULL);
TSTree_destroy(routes);
}
int main(int argc, char *argv[])
{
bstring url = NULL;
bstring route = NULL;
TSTree *routes = NULL;
check(argc == 2, "USAGE: urlor <urlfile>");
routes = load_routes(argv[1]);
check(routes != NULL, "Your route file has an error.");
while (1) {
url = read_line("URL> ");
check_debug(url != NULL, "goodbye.");
route = match_url(routes, url);
if (route) {
printf("MATCH: %s == %s\n", bdata(url), bdata(route));
} else {
printf("FAIL: %s\n", bdata(url));
}
bdestroy(url);
}
destroy_routes(routes);
return 0;
error:
destroy_routes(routes);
return 1;
}