performance: keep track of the ddts older than 4 weeks
[ppastats.git] / src / lp_ws.c
1 /*
2  * Copyright (C) 2011-2014 jeanfi@gmail.com
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301 USA
18  */
19
20 #include <libintl.h>
21 #define _(String) gettext(String)
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/time.h>
27 #include <time.h>
28
29 #include <json.h>
30
31 #include "cache.h"
32 #include "fcache.h"
33 #include "http.h"
34 #include "list.h"
35 #include "log.h"
36 #include "lp_ws.h"
37 #include "lp_json.h"
38 #include "ppastats.h"
39
40 /** Default ws.size value for the getPublishedBinaries request. */
41 static const int DEFAULT_WS_SIZE = 150;
42
43 static const char *QUERY_GET_DOWNLOAD_COUNT = "?ws.op=getDownloadCount";
44 static const char *
45 QUERY_GET_DAILY_DOWNLOAD_TOTALS = "?ws.op=getDailyDownloadTotals";
46
47 static json_object *get_json_object(const char *url)
48 {
49         json_object *obj = NULL;
50         char *body;
51
52         body = get_url_content(url, 0);
53
54         if (body) {
55                 obj = json_tokener_parse(body);
56
57                 free(body);
58
59                 return obj;
60         }
61
62         return NULL;
63 }
64
65 static char *get_bpph_list_cache_key(const char *archive_url)
66 {
67         char *key;
68
69         key = malloc(strlen(archive_url + 7) + strlen("/bpph") + 1);
70         sprintf(key, "%s/bpph", archive_url + 7);
71
72         return key;
73 }
74
75 static char *get_ddts_list_cache_key(const char *url)
76 {
77         char *key;
78
79         key = malloc(strlen(url + 7) + strlen("/ddts") + 1);
80         sprintf(key, "%s/ddts", url + 7);
81
82         return key;
83 }
84
85 static struct bpph **get_bpph_list_from_cache(const char *key)
86 {
87         char *content;
88         struct bpph **list;
89         json_object *json;
90
91         content = fcache_get(key);
92         if (!content)
93                 return NULL;
94
95         json = json_tokener_parse(content);
96         if (!json)
97                 return NULL;
98
99         list = json_object_to_bpph_list(json);
100
101         json_object_put(json);
102         free(content);
103
104         return list;
105 }
106
107 static char *get_last_creation_date(struct bpph **list)
108 {
109         time_t last, t;
110         struct bpph **cur;
111
112         last = 0;
113
114         if (list)
115                 for (cur = list; *cur; cur++) {
116                         t = (*cur)->date_created;
117                         if (t > last)
118                                 last = t;
119                 }
120
121         if (last)
122                 return time_to_str(last);
123         else
124                 return NULL;
125 }
126
127 /*
128  * 'archive_url': LP URL of the archive.
129  * 'size': size of the reply array. Between 1-300, else default value is used.
130  */
131 static char *create_query_get_bpph(const char *archive_url,
132                                    const char *status,
133                                    int size)
134 {
135         static const char *default_opt = "?ws.op=getPublishedBinaries&ws.size=";
136         static const char *status_opt = "&status=";
137         char *url;
138         size_t n;
139
140         if (size < 1 || size > 300)
141                 size = DEFAULT_WS_SIZE;
142
143         n = strlen(archive_url) + strlen(default_opt) + 3 + 1;
144
145         if (status)
146                 n += strlen(status_opt) + strlen(status);
147
148         url = malloc(n);
149         sprintf(url, "%s%s%d", archive_url, default_opt, size);
150
151         if (status) {
152                 strcat(url, status_opt);
153                 strcat(url, status);
154         }
155
156         return url;
157 }
158
159 struct bpph **get_bpph_list(const char *archive_url,
160                             const char *pkg_status,
161                             int ws_size)
162 {
163         char *url, *key, *tmp;
164         struct bpph **result;
165         struct json_object *o, *bpph_json, *o_next;
166         char *date;
167         int ok;
168
169         url = create_query_get_bpph(archive_url, pkg_status, ws_size);
170
171         key = get_bpph_list_cache_key(archive_url);
172
173         result = get_bpph_list_from_cache(key);
174
175         if (result) {
176                 date = get_last_creation_date(result);
177
178                 if (date) {
179                         tmp = malloc(strlen(url)
180                                      + strlen("&created_since_date=")
181                                      + strlen(date)+1);
182                         strcpy(tmp, url);
183                         strcat(tmp, "&created_since_date=");
184                         strcat(tmp, date);
185
186                         free(url);
187                         url = tmp;
188
189                         free(date);
190                 }
191         }
192
193         ok = 1;
194         while (url) {
195                 o = get_json_object(url);
196                 free(url);
197                 url = NULL;
198
199                 if (!o) {
200                         ok = 0;
201                         break;
202                 }
203
204                 result = bpph_list_append_list(result,
205                                                json_object_to_bpph_list(o));
206
207                 o_next = json_object_object_get(o, "next_collection_link");
208
209                 if (o_next)
210                         url = strdup(json_object_get_string(o_next));
211
212                 json_object_put(o);
213         }
214
215         if (ok) {
216                 bpph_json = bpph_list_to_json(result);
217                 fcache_put(key, json_object_to_json_string(bpph_json));
218                 json_object_put(bpph_json);
219         }
220
221         free(key);
222
223         return result;
224 }
225
226 int get_download_count(const char *archive_url)
227 {
228         int n = strlen(archive_url) + strlen(QUERY_GET_DOWNLOAD_COUNT) + 1;
229         char *url = malloc(n);
230         int result;
231         json_object *obj;
232
233         strcpy(url, archive_url);
234         strcat(url, QUERY_GET_DOWNLOAD_COUNT);
235
236         obj = get_json_object(url);
237         free(url);
238
239         if (!obj)
240                 return -1;
241
242         result = json_object_get_int(obj);
243
244         json_object_put(obj);
245
246         return result;
247 }
248
249 const struct distro_arch_series *get_distro_arch_series(const char *url)
250 {
251         json_object *obj;
252         const struct distro_arch_series *distro;
253         char *content;
254
255         distro = cache_get(url);
256         if (distro)
257                 return (struct distro_arch_series *)distro;
258
259         content = get_url_content(url, 1);
260
261         if (!content)
262                 return NULL;
263
264         obj = json_tokener_parse(content);
265
266         free(content);
267
268         if (!obj)
269                 return NULL;
270
271         distro = json_object_to_distro_arch_series(obj);
272
273         json_object_put(obj);
274
275         cache_put(url, distro, (void (*)(void *))&distro_arch_series_free);
276
277         return distro;
278 }
279
280 const struct distro_series *get_distro_series(const char *url)
281 {
282         json_object *obj;
283         const struct distro_series *distro;
284         char *content;
285
286         distro = cache_get(url);
287         if (distro)
288                 return (struct distro_series *)distro;
289
290         content = get_url_content(url, 1);
291
292         if (!content)
293                 return NULL;
294
295         obj = json_tokener_parse(content);
296
297         free(content);
298
299         if (!obj)
300                 return NULL;
301
302         distro = json_object_to_distro_series(obj);
303
304         json_object_put(obj);
305
306         cache_put(url, distro, (void (*)(void *))&distro_series_free);
307
308         return distro;
309 }
310
311 char *date_to_str(struct tm tm)
312 {
313         char *str;
314
315         str = malloc(4 + 1 + 2 + 1 + 2 + 1);
316
317         strftime(str, 11, "%Y-%m-%d", &tm);
318
319         return str;
320 }
321
322 /*
323   Convert ddts older than 4 weeks to the same JSON representation than
324   the LP one.  Newer ddts are not stored in the cache because the data
325   may change during following days. It avoids to miss downloads which
326   are not yet taken in consideration by LP.
327  */
328 static json_object *ddts_to_json_for_cache(struct daily_download_total **ddts)
329 {
330         json_object *j_ddts;
331         struct daily_download_total *ddt;
332         char *date;
333         struct timeval *tv;
334         time_t t;
335         double d;
336
337         j_ddts = json_object_new_object();
338
339         tv = malloc(sizeof(struct timeval));
340         gettimeofday(tv, NULL);
341
342         while (ddts && *ddts) {
343                 ddt = *ddts;
344
345                 t = mktime(&(ddt->date));
346
347                 d = difftime(tv->tv_sec, t);
348
349                 if (d > 4 * 7 * 24 * 60 * 60) { /* older than 4 weeks */
350                         date = date_to_str(ddt->date);
351                         json_object_object_add(j_ddts,
352                                                date,
353                                                json_object_new_int(ddt->count));
354                         free(date);
355                 }
356
357                 ddts++;
358         }
359
360         free(tv);
361
362         return j_ddts;
363 }
364
365 static char *time_t_to_str(time_t t)
366 {
367         struct tm *tm;
368         char *str;
369
370         tm = gmtime(&t);
371
372         str = date_to_str(*tm);
373
374         return str;
375 }
376
377 char *create_ddts_query(const char *binary_url, time_t st)
378 {
379         char *q;
380         char *sdate;
381
382         if (st) {
383                 sdate = time_t_to_str(st);
384
385                 q = malloc(strlen(binary_url)
386                            + strlen(QUERY_GET_DAILY_DOWNLOAD_TOTALS)
387                            + strlen("&start_date=YYYY-MM-DD")
388                            + 1);
389                 strcpy(q, binary_url);
390                 strcat(q, QUERY_GET_DAILY_DOWNLOAD_TOTALS);
391                 strcat(q, "&start_date=");
392                 strcat(q, sdate);
393
394                 free(sdate);
395         } else {
396                 q = malloc(strlen(binary_url)
397                            + strlen(QUERY_GET_DAILY_DOWNLOAD_TOTALS)
398                            + 1);
399                 strcpy(q, binary_url);
400                 strcat(q, QUERY_GET_DAILY_DOWNLOAD_TOTALS);
401         }
402
403         return q;
404 }
405
406 struct daily_download_total **get_daily_download_totals(const char *binary_url)
407 {
408         char *url, *key, *content;
409         json_object *j_ddts, *json;
410         struct daily_download_total **retrieved_ddts = NULL;
411         struct daily_download_total **cached_ddts;
412         struct daily_download_total **ddts;
413         time_t last_t;
414
415         key = get_ddts_list_cache_key(binary_url);
416
417         content = fcache_get(key);
418         if (content)
419                 json = json_tokener_parse(content);
420         else
421                 json = NULL;
422
423         if (json) {
424                 cached_ddts = json_object_to_daily_download_totals(json);
425                 last_t = ddts_get_last_date(cached_ddts);
426         } else {
427                 last_t = 0;
428                 cached_ddts = NULL;
429         }
430
431         url = create_ddts_query(binary_url, last_t);
432
433         json = get_json_object(url);
434
435         free(url);
436
437         if (json) {
438                 retrieved_ddts = json_object_to_daily_download_totals(json);
439
440                 ddts = ddts_merge(cached_ddts, retrieved_ddts);
441
442                 json_object_put(json);
443                 j_ddts = ddts_to_json_for_cache(ddts);
444                 fcache_put(key, json_object_get_string(j_ddts));
445                 json_object_put(j_ddts);
446         } else {
447                 ddts = NULL;
448         }
449
450         free(key);
451         free(cached_ddts);
452         free(retrieved_ddts);
453
454         return ddts;
455 }
456