X-Git-Url: https://git.wpitchoune.net/gitweb/?a=blobdiff_plain;f=src%2Flp_ws.c;h=523f185bcd7a4e54b97f5e97a9f30af2b917be6d;hb=474ab6fa492a8b1dde963fdecb1edebc111c5fb2;hp=5eca19ab4c47ef30be8dd6dd4536a64ba2514892;hpb=04e8520815632d17a6705219ad8301c1352058f6;p=ppastats.git diff --git a/src/lp_ws.c b/src/lp_ws.c index 5eca19a..523f185 100644 --- a/src/lp_ws.c +++ b/src/lp_ws.c @@ -1,192 +1,211 @@ /* - Copyright (C) 2011 jeanfi@gmail.com - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301 USA -*/ - + * Copyright (C) 2011-2015 jeanfi@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include +#define _(String) gettext(String) + +#include #include #include -#include +#include +#include -#include -#include +#include -#include "cache.h" -#include "list.h" -#include "lp_ws.h" -#include "lp_json.h" -#include "ppastats.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** Default ws.size value for the getPublishedBinaries request. */ +static const int DEFAULT_WS_SIZE = 150; -static const char *QUERY_GET_PUBLISHED_BINARIES = "?ws.op=getPublishedBinaries"; static const char *QUERY_GET_DOWNLOAD_COUNT = "?ws.op=getDownloadCount"; static const char * QUERY_GET_DAILY_DOWNLOAD_TOTALS = "?ws.op=getDailyDownloadTotals"; -static const int DEFAULT_FETCH_RETRIES = 3; - -static CURL *curl; +static json_object *get_json_object(const char *url) +{ + json_object *obj = NULL; + char *body; -struct ucontent { - char *data; - size_t len; -}; + body = get_url_content(url, 0); -static size_t cbk_curl(void *buffer, size_t size, size_t nmemb, void *userp) -{ - size_t realsize = size * nmemb; - struct ucontent *mem = (struct ucontent *)userp; + if (body) { + obj = json_tokener_parse(body); - mem->data = realloc(mem->data, mem->len + realsize + 1); + free(body); - memcpy(&(mem->data[mem->len]), buffer, realsize); - mem->len += realsize; - mem->data[mem->len] = 0; + return obj; + } - return realsize; + return NULL; } -static char *fetch_url(const char *url) +static char *get_bpph_list_cache_key(const char *archive_url) { - struct ucontent *content = malloc(sizeof(struct ucontent)); - char *result; - long code; - int retries; - - if (debug) - printf("DEBUG: fetch_url %s\n", url); - - if (!curl) { - if (debug) - printf("DEBUG: initializing CURL\n"); - curl_global_init(CURL_GLOBAL_ALL); - curl = curl_easy_init(); - } + char *key; - if (!curl) - exit(EXIT_FAILURE); + key = malloc(strlen(archive_url + 7) + strlen("/bpph") + 1); + sprintf(key, "%s/bpph", archive_url + 7); - result = NULL; + return key; +} - retries = DEFAULT_FETCH_RETRIES; +static char *get_ddts_list_cache_key(const char *url) +{ + char *key; - retrieve: - content->data = malloc(1); - content->data[0] = '\0'; - content->len = 0; + key = malloc(strlen(url + 7) + strlen("/ddts") + 1); + sprintf(key, "%s/ddts", url + 7); - curl_easy_setopt(curl, CURLOPT_URL, url); - curl_easy_setopt(curl, CURLOPT_VERBOSE, 0); - curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, cbk_curl); - curl_easy_setopt(curl, CURLOPT_WRITEDATA, content); - curl_easy_setopt(curl, CURLOPT_USERAGENT, "ppastats/0.0"); + return key; +} - if (curl_easy_perform(curl) == CURLE_OK) { - curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code); +static struct bpph **get_bpph_list_from_cache(const char *key) +{ + char *content; + struct bpph **list; + json_object *json; - switch (code) { - case 200: - result = content->data; - break; - case 500: - case 502: - case 503: - case 504: - if (retries) { - fprintf(stderr, - "Fetch failed: with code %ld " - "for URL= %s\n", - code, - url); - - if (debug) - printf("Wait 5s before retry.\n"); - sleep(5); - - free(content->data); - retries--; - goto retrieve; - } - default: - fprintf(stderr, "Fetch failed: %ld\n", code); - } - } + content = fcache_get(key); + if (!content) + return NULL; - if (!result) - free(content->data); + json = json_tokener_parse(content); + if (!json) + return NULL; + list = json_object_to_bpph_list(json); + + json_object_put(json); free(content); - return result; + return list; } -static json_object *get_json_object(const char *url) +static char *get_last_creation_date(struct bpph **list) { - json_object *obj = NULL; - char *body; + time_t last, t; + struct bpph **cur; - body = fetch_url(url); + last = 0; - if (body) { - obj = json_tokener_parse(body); + if (list) + for (cur = list; *cur; cur++) { + t = (*cur)->date_created; + if (t > last) + last = t; + } - free(body); + if (last) + return time_to_ISO8601_time(&last); + else + return NULL; +} - return obj; +/* + * 'archive_url': LP URL of the archive. + * 'size': size of the reply array. Between 1-300, else default value is used. + */ +static char *create_query_get_bpph(const char *archive_url, + const char *status, + int size) +{ + static const char *default_opt = "?ws.op=getPublishedBinaries&ws.size="; + static const char *status_opt = "&status="; + char *url; + size_t n; + + if (size < 1 || size > 300) + size = DEFAULT_WS_SIZE; + + n = strlen(archive_url) + strlen(default_opt) + 3 + 1; + + if (status) + n += strlen(status_opt) + strlen(status); + + url = malloc(n); + sprintf(url, "%s%s%d", archive_url, default_opt, size); + + if (status) { + strcat(url, status_opt); + strcat(url, status); } - return NULL; + return url; } -#define json_object_to_bpph_list \ -json_object_to_binary_package_publishing_history_list - -struct binary_package_publishing_history * * -get_binary_package_publishing_history_list(const char *archive_url, - const char *pkg_status) +struct bpph **get_bpph_list(const char *archive_url, + const char *pkg_status, + int ws_size) { - struct json_object *o_next; - char *url; - json_object *o; - void **result = NULL; + char *url, *key, *tmp; + struct bpph **result; + struct json_object *o, *bpph_json, *o_next; + char *date; + int ok; - url = malloc(strlen(archive_url)+ - strlen(QUERY_GET_PUBLISHED_BINARIES)+ - (pkg_status ? strlen("&status=")+strlen(pkg_status) : 0)+ - 1); + url = create_query_get_bpph(archive_url, pkg_status, ws_size); - strcpy(url, archive_url); - strcat(url, QUERY_GET_PUBLISHED_BINARIES); + key = get_bpph_list_cache_key(archive_url); + + result = get_bpph_list_from_cache(key); + + if (result) { + date = get_last_creation_date(result); + + if (date) { + tmp = malloc(strlen(url) + + strlen("&created_since_date=") + + strlen(date)+1); + strcpy(tmp, url); + strcat(tmp, "&created_since_date="); + strcat(tmp, date); - if (pkg_status) { - strcat(url, "&status="); - strcat(url, pkg_status); + free(url); + url = tmp; + + free(date); + } } + ok = 1; while (url) { o = get_json_object(url); free(url); url = NULL; - if (!o) + if (!o) { + ok = 0; break; + } - result = list_append_list(result, - (void **)json_object_to_bpph_list(o)); + result = bpph_list_append_list(result, + json_object_to_bpph_list(o)); - o_next = json_object_object_get(o, "next_collection_link"); + json_object_object_get_ex(o, "next_collection_link", &o_next); if (o_next) url = strdup(json_object_get_string(o_next)); @@ -194,7 +213,15 @@ get_binary_package_publishing_history_list(const char *archive_url, json_object_put(o); } - return (struct binary_package_publishing_history **)result; + if (ok) { + bpph_json = bpph_list_to_json(result); + fcache_put(key, json_object_to_json_string(bpph_json)); + json_object_put(bpph_json); + } + + free(key); + + return result; } int get_download_count(const char *archive_url) @@ -224,12 +251,20 @@ const struct distro_arch_series *get_distro_arch_series(const char *url) { json_object *obj; const struct distro_arch_series *distro; + char *content; distro = cache_get(url); if (distro) return (struct distro_arch_series *)distro; - obj = get_json_object(url); + content = get_url_content(url, 1); + + if (!content) + return NULL; + + obj = json_tokener_parse(content); + + free(content); if (!obj) return NULL; @@ -247,12 +282,20 @@ const struct distro_series *get_distro_series(const char *url) { json_object *obj; const struct distro_series *distro; + char *content; distro = cache_get(url); if (distro) return (struct distro_series *)distro; - obj = get_json_object(url); + content = get_url_content(url, 1); + + if (!content) + return NULL; + + obj = json_tokener_parse(content); + + free(content); if (!obj) return NULL; @@ -266,35 +309,187 @@ const struct distro_series *get_distro_series(const char *url) return distro; } -struct daily_download_total **get_daily_download_totals(const char *binary_url) +/* + Convert ddts older than 4 weeks to the same JSON representation than + the LP one. Newer ddts are not stored in the cache because the data + may change during following days. It avoids to miss downloads which + are not yet taken in consideration by LP. + */ +static json_object *ddts_to_json_for_cache(struct daily_download_total **ddts) { - char *url; - json_object *obj; - struct daily_download_total **result = NULL; + json_object *j_ddts; + struct daily_download_total *ddt; + char *date; + struct timeval *tv; + time_t t; + double d; - url = malloc(strlen(binary_url)+ - strlen(QUERY_GET_DAILY_DOWNLOAD_TOTALS)+1); + j_ddts = json_object_new_object(); - strcpy(url, binary_url); - strcat(url, QUERY_GET_DAILY_DOWNLOAD_TOTALS); + tv = malloc(sizeof(struct timeval)); + gettimeofday(tv, NULL); - obj = get_json_object(url); + while (ddts && *ddts) { + ddt = *ddts; + + t = mktime(&(ddt->date)); + + d = difftime(tv->tv_sec, t); + + if (d > 4 * 7 * 24 * 60 * 60) { /* older than 4 weeks */ + date = tm_to_ISO8601_date(&ddt->date); + json_object_object_add(j_ddts, + date, + json_object_new_int(ddt->count)); + free(date); + } - if (obj) { - result = json_object_to_daily_download_totals(obj); - json_object_put(obj); + ddts++; } + free(tv); + + return j_ddts; +} + +char *create_ddts_query(const char *binary_url, time_t st, time_t et) +{ + char *q; + char *sdate, *edate; + + if (st) { + sdate = time_to_ISO8601_date(&st); + + q = malloc(strlen(binary_url) + + strlen(QUERY_GET_DAILY_DOWNLOAD_TOTALS) + + strlen("&start_date=YYYY-MM-DD") + + strlen("&end_date=YYYY-MM-DD") + + 1); + strcpy(q, binary_url); + strcat(q, QUERY_GET_DAILY_DOWNLOAD_TOTALS); + strcat(q, "&start_date="); + strcat(q, sdate); + + if (et > 0) { + edate = time_to_ISO8601_date(&et); + strcat(q, "&end_date="); + strcat(q, edate); + free(edate); + } + + free(sdate); + } else { + q = malloc(strlen(binary_url) + + strlen(QUERY_GET_DAILY_DOWNLOAD_TOTALS) + + 1); + strcpy(q, binary_url); + strcat(q, QUERY_GET_DAILY_DOWNLOAD_TOTALS); + } + + return q; +} + +static struct daily_download_total **retrieve_ddts(const char *binary_url, + time_t date_since) +{ + char *url; + json_object *json; + struct daily_download_total **ddts, **tmp; + time_t crt; + + url = create_ddts_query(binary_url, date_since, 0); + json = get_json_object(url); free(url); - return result; + if (json) { + ddts = json_object_to_daily_download_totals(json); + json_object_put(json); + } else { + crt = time(NULL); + ddts = NULL; + + while (date_since < crt) { + url = create_ddts_query(binary_url, + date_since, + date_since); + json = get_json_object(url); + free(url); + + if (!json) + break; + + tmp = json_object_to_daily_download_totals(json); + json_object_put(json); + ddts = ddts_merge(ddts, tmp); + free(tmp); + + date_since = date_since + 24 * 60 * 60; /* +1 day */ + + url = create_ddts_query(binary_url, date_since, 0); + json = get_json_object(url); + free(url); + + if (json) { + tmp = json_object_to_daily_download_totals + (json); + json_object_put(json); + ddts = ddts_merge(ddts, tmp); + free(tmp); + break; + } + } + } + + return ddts; } -void lp_ws_cleanup() +struct daily_download_total **get_daily_download_totals(const char *binary_url, + time_t date_created) { - if (debug) - printf("DEBUG: cleanup CURL\n"); + char *key, *content; + json_object *j_ddts, *json; + struct daily_download_total **retrieved_ddts = NULL; + struct daily_download_total **cached_ddts; + struct daily_download_total **ddts; + time_t last_t; + + key = get_ddts_list_cache_key(binary_url); + + content = fcache_get(key); + if (content) { + json = json_tokener_parse(content); + free(content); + } else { + json = NULL; + } + + if (json) { + cached_ddts = json_object_to_daily_download_totals(json); + json_object_put(json); + last_t = ddts_get_last_date(cached_ddts); + } else { + last_t = 0; + cached_ddts = NULL; + } - curl_easy_cleanup(curl); - curl_global_cleanup(); + if (last_t > 0) + retrieved_ddts = retrieve_ddts(binary_url, last_t); + else + retrieved_ddts = retrieve_ddts(binary_url, date_created); + + ddts = ddts_merge(cached_ddts, retrieved_ddts); + + if (ddts) { + j_ddts = ddts_to_json_for_cache(ddts); + fcache_put(key, json_object_get_string(j_ddts)); + json_object_put(j_ddts); + } + free(key); + + if (ddts != cached_ddts) + daily_download_total_list_free(cached_ddts); + daily_download_total_list_free(retrieved_ddts); + + return ddts; } +