[Bio] / Billogix / url_data.c Repository:
ViewVC logotype

Annotation of /Billogix/url_data.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (view) (download) (as text)

1 : mkubal 1.1 /* This version uses a shell command and temporary file. */
2 :    
3 :     #include <gprolog.h>
4 :    
5 :     /*
6 :     In the data fetched from the URL, assume:
7 :     1. Fields are *SEPARATED* by \t.
8 :     2. Records are *TERMINATED* by \n.
9 :    
10 :     We'll build a list (a Relation) of lists (Tuples) of fields as a Prolog term.
11 :    
12 :     Note:
13 :     1. This was orignally meant to fetch relations (hence the terminology),
14 :     but it is used for other kinds of data, too.
15 :     2. The Tuples and the Relation get built (and returned) backward.
16 :     3. Everything (including all whitespace except \t and \n) go into fields.
17 :     4. Extra tabs cause empty fields, and
18 :     extra newlines cause empty tuples.
19 :     5. No checks are made to see if the data makes sense as a
20 :     relation. For example, tuples can be of different lengths,
21 :     and HTML tags are not removed.
22 :     */
23 :    
24 :     #define FMAX 5000 /* maximum munber of characters in a field */
25 :    
26 :     PlTerm read_relation(FILE *fp)
27 :     {
28 :     char field[FMAX+1]; /* allow for null char at end */
29 :     int f; /* current position in field */
30 :     PlTerm tuple; /* tuple (list of fields) being constructed */
31 :     PlTerm relation; /* relation (list of tuples) being constructed */
32 :    
33 :     int c;
34 :    
35 :     /* Initialize tuple and relation to empty lists. */
36 :    
37 :     tuple = Mk_Atom(atom_nil);
38 :     relation = Mk_Atom(atom_nil);
39 :     f = 0;
40 :    
41 :     while ((c = getc(fp)) != EOF) {
42 :    
43 : olson 1.2 if (c != '\t' && c != '\n') {
44 : mkubal 1.1 /* Try to add the character to the field. */
45 :     if (f == FMAX) {
46 :     field[FMAX] = '\0';
47 :     fprintf(stderr, "read_relation, field too big:%s...\n", field);
48 :     return 0; /* a PlTerm is really a long, so I assume this a nonterm */
49 :     }
50 :     else
51 :     field[f++] = c;
52 : olson 1.2 }
53 : mkubal 1.1
54 :     else {
55 :     /* We have a field. Prepend field to tuple. */
56 :     PlTerm args[2]; /* for building binary terms */
57 :     field[f] = '\0';
58 :    
59 :     args[0] = Mk_Atom(Create_Allocate_Atom(field));
60 :     args[1] = tuple;
61 :     tuple = Mk_List(args);
62 :    
63 :     if (c == '\n') {
64 :     /* We have a record. Prepend tuple to relation. */
65 :     args[0] = tuple;
66 :     args[1] = relation;
67 :     relation = Mk_List(args);
68 :    
69 :     /* Initialize tuple for next record. */
70 :     tuple = Mk_Atom(atom_nil);
71 :     }
72 :     f = 0; /* Start collecting next field. */
73 :     }
74 :     }
75 :    
76 :     if (f != 0) { /* field not empty, so last character is neither \t nor \n. */
77 :     fprintf(stderr, "\nread_relation: the data does not end with a newline.\n\n");
78 :     return 0;
79 :     }
80 :     else if (!Blt_Atom(tuple)) { /* tuple not empty, so last character is \t. */
81 :     fprintf(stderr, "\nread_relation: the data ends with a tab.\n\n");
82 :     return 0;
83 :     }
84 :     else { /* All is well. */
85 :     return relation;
86 :     }
87 :     } /* read_relation */
88 :    
89 :     /**********************************************************************
90 :    
91 :     backward_data_from_url()
92 :    
93 :     This is the routine that is called by gprolog. Declare it in prolog as:
94 :    
95 :     :- foreign(backward_data_from_url(+string, -term)).
96 :    
97 :     The returned relation is backward, and each tuple is backward.
98 :    
99 :     Remember to enclose the url in SINGLE quotes, like this:
100 :    
101 :     backward_data_from_url('http://www-unix.mcs.anl.gov/~mccune/misc/test2.html', X).
102 :    
103 :     If the HTML server is not found or times out, the call fails.
104 :    
105 :     If the sever responds "(404) File not found" or something similar,
106 :     the call succeeds, returning the HTML of the error message.
107 :    
108 :     ***********************************************************************/
109 :    
110 :     /**********************************************************************
111 :    
112 :     This version uses "system" to run curl or wget as a shell command,
113 :     putting the data in a temporary file, then it reads the file
114 :     and constructs the relation.
115 :    
116 :     Originally, I used libcurl to get the data directly. That worked
117 :     nicely in Linux, but I couldn't get it to work on my Mac (gprolog
118 :     kept crashing).
119 :    
120 :     Note that curl does not do automatic redirects or URL encoding, so
121 :     wget is probably better. However, wget has to be installed on the Mac.
122 :    
123 :     See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
124 :    
125 :     ***********************************************************************/
126 :    
127 :     Bool backward_data_from_url(char *url, PlTerm *relation)
128 :     {
129 :     int rc;
130 :     FILE *fp;
131 :     char tmpfile[50];
132 :     char *command = malloc(strlen(url) + 50);
133 :    
134 :     sprintf(tmpfile, "/tmp/gprolog_bdfu_%d", getpid());
135 :    
136 : olson 1.2 /* #define USE_CURL */
137 :     #ifdef USE_CURL
138 :     sprintf(command, "curl \"%s\" > %s 2> /dev/null", url, tmpfile);
139 : mkubal 1.1 #else
140 :     sprintf(command, "wget -O %s \"%s\" 2> /dev/null", tmpfile, url);
141 :     #endif
142 :    
143 :     rc = system(command);
144 :     *relation = 0;
145 :    
146 :     if (rc != 0) {
147 :     fprintf(stderr, "\ncommand \"%s\" fails with code %d.\n\n", command, rc);
148 :     }
149 :     else {
150 :     fp = fopen(tmpfile, "r");
151 :     if (fp == NULL) {
152 :     fprintf(stderr, "\nerror opening URL data file %s.\n\n", tmpfile);
153 :     }
154 :     else {
155 :     *relation = read_relation(fp);
156 :     fclose(fp);
157 :     }
158 :     }
159 :    
160 :     /* clean up */
161 :    
162 :     sprintf(command, "/bin/rm -f %s", tmpfile);
163 :     rc = system(command);
164 :     free(command);
165 :    
166 :     if (*relation == 0)
167 :     return FALSE;
168 :     else
169 :     return TRUE;
170 :     } /* backward_data_from_url */

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3