[Bio] / Billogix / url_data.c Repository:
ViewVC logotype

Annotation of /Billogix/url_data.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (view) (download) (as text)

1 : mkubal 1.1 /* This version uses a shell command and temporary file. */
2 :    
3 :     #include <gprolog.h>
4 :    
5 :     /*
6 :     In the data fetched from the URL, assume:
7 :     1. Fields are *SEPARATED* by \t.
8 :     2. Records are *TERMINATED* by \n.
9 :    
10 :     We'll build a list (a Relation) of lists (Tuples) of fields as a Prolog term.
11 :    
12 :     Note:
13 :     1. This was orignally meant to fetch relations (hence the terminology),
14 :     but it is used for other kinds of data, too.
15 :     2. The Tuples and the Relation get built (and returned) backward.
16 :     3. Everything (including all whitespace except \t and \n) go into fields.
17 :     4. Extra tabs cause empty fields, and
18 :     extra newlines cause empty tuples.
19 :     5. No checks are made to see if the data makes sense as a
20 :     relation. For example, tuples can be of different lengths,
21 :     and HTML tags are not removed.
22 :     */
23 :    
24 :     #define FMAX 5000 /* maximum munber of characters in a field */
25 :    
26 :     PlTerm read_relation(FILE *fp)
27 :     {
28 :     char field[FMAX+1]; /* allow for null char at end */
29 :     int f; /* current position in field */
30 :     PlTerm tuple; /* tuple (list of fields) being constructed */
31 :     PlTerm relation; /* relation (list of tuples) being constructed */
32 :    
33 :     int c;
34 :    
35 :     /* Initialize tuple and relation to empty lists. */
36 :    
37 :     tuple = Mk_Atom(atom_nil);
38 :     relation = Mk_Atom(atom_nil);
39 :     f = 0;
40 :    
41 :     while ((c = getc(fp)) != EOF) {
42 :    
43 :     if (c != '\t' && c != '\n')
44 :     /* Try to add the character to the field. */
45 :     if (f == FMAX) {
46 :     field[FMAX] = '\0';
47 :     fprintf(stderr, "read_relation, field too big:%s...\n", field);
48 :     return 0; /* a PlTerm is really a long, so I assume this a nonterm */
49 :     }
50 :     else
51 :     field[f++] = c;
52 :    
53 :     else {
54 :     /* We have a field. Prepend field to tuple. */
55 :     PlTerm args[2]; /* for building binary terms */
56 :     field[f] = '\0';
57 :    
58 :     args[0] = Mk_Atom(Create_Allocate_Atom(field));
59 :     args[1] = tuple;
60 :     tuple = Mk_List(args);
61 :    
62 :     if (c == '\n') {
63 :     /* We have a record. Prepend tuple to relation. */
64 :     args[0] = tuple;
65 :     args[1] = relation;
66 :     relation = Mk_List(args);
67 :    
68 :     /* Initialize tuple for next record. */
69 :     tuple = Mk_Atom(atom_nil);
70 :     }
71 :     f = 0; /* Start collecting next field. */
72 :     }
73 :     }
74 :    
75 :     if (f != 0) { /* field not empty, so last character is neither \t nor \n. */
76 :     fprintf(stderr, "\nread_relation: the data does not end with a newline.\n\n");
77 :     return 0;
78 :     }
79 :     else if (!Blt_Atom(tuple)) { /* tuple not empty, so last character is \t. */
80 :     fprintf(stderr, "\nread_relation: the data ends with a tab.\n\n");
81 :     return 0;
82 :     }
83 :     else { /* All is well. */
84 :     return relation;
85 :     }
86 :     } /* read_relation */
87 :    
88 :     /**********************************************************************
89 :    
90 :     backward_data_from_url()
91 :    
92 :     This is the routine that is called by gprolog. Declare it in prolog as:
93 :    
94 :     :- foreign(backward_data_from_url(+string, -term)).
95 :    
96 :     The returned relation is backward, and each tuple is backward.
97 :    
98 :     Remember to enclose the url in SINGLE quotes, like this:
99 :    
100 :     backward_data_from_url('http://www-unix.mcs.anl.gov/~mccune/misc/test2.html', X).
101 :    
102 :     If the HTML server is not found or times out, the call fails.
103 :    
104 :     If the sever responds "(404) File not found" or something similar,
105 :     the call succeeds, returning the HTML of the error message.
106 :    
107 :     ***********************************************************************/
108 :    
109 :     /**********************************************************************
110 :    
111 :     This version uses "system" to run curl or wget as a shell command,
112 :     putting the data in a temporary file, then it reads the file
113 :     and constructs the relation.
114 :    
115 :     Originally, I used libcurl to get the data directly. That worked
116 :     nicely in Linux, but I couldn't get it to work on my Mac (gprolog
117 :     kept crashing).
118 :    
119 :     Note that curl does not do automatic redirects or URL encoding, so
120 :     wget is probably better. However, wget has to be installed on the Mac.
121 :    
122 :     See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
123 :    
124 :     ***********************************************************************/
125 :    
126 :     Bool backward_data_from_url(char *url, PlTerm *relation)
127 :     {
128 :     int rc;
129 :     FILE *fp;
130 :     char tmpfile[50];
131 :     char *command = malloc(strlen(url) + 50);
132 :    
133 :     sprintf(tmpfile, "/tmp/gprolog_bdfu_%d", getpid());
134 :    
135 :     #ifdef USE_CURL
136 :     sprintf(command, "curl '%s' > %s 2> /dev/null", url, tmpfile);
137 :     #else
138 :     sprintf(command, "wget -O %s \"%s\" 2> /dev/null", tmpfile, url);
139 :     #endif
140 :    
141 :     rc = system(command);
142 :     *relation = 0;
143 :    
144 :     if (rc != 0) {
145 :     fprintf(stderr, "\ncommand \"%s\" fails with code %d.\n\n", command, rc);
146 :     }
147 :     else {
148 :     fp = fopen(tmpfile, "r");
149 :     if (fp == NULL) {
150 :     fprintf(stderr, "\nerror opening URL data file %s.\n\n", tmpfile);
151 :     }
152 :     else {
153 :     *relation = read_relation(fp);
154 :     fclose(fp);
155 :     }
156 :     }
157 :    
158 :     /* clean up */
159 :    
160 :     sprintf(command, "/bin/rm -f %s", tmpfile);
161 :     rc = system(command);
162 :     free(command);
163 :    
164 :     if (*relation == 0)
165 :     return FALSE;
166 :     else
167 :     return TRUE;
168 :     } /* backward_data_from_url */

MCS Webmaster
ViewVC Help
Powered by ViewVC 1.0.3