This commit adds a new command line option (effective-url=true) which
causes nbdkit-curl-plugin the first time it fetches a URL to update
its internal 'url' variable with the CURLINFO_EFFECTIVE_URL. That
means, the URL after all redirects have been done. Further
connections will be done using this post-redirect URL, ensuring that
those connections are stable.
Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=2013000
---
plugins/curl/nbdkit-curl-plugin.pod | 35 ++++++++++++++++++++++++
plugins/curl/curl.c | 42 ++++++++++++++++++++++++++---
2 files changed, 73 insertions(+), 4 deletions(-)
diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod
index c7acf6225..673f1d327 100644
--- a/plugins/curl/nbdkit-curl-plugin.pod
+++ b/plugins/curl/nbdkit-curl-plugin.pod
@@ -126,6 +126,40 @@ Run C<SCRIPT> (a command or shell script fragment) to generate
the
HTTP/HTTPS cookies. C<cookie-script> cannot be used with C<cookie>.
See L</HEADER AND COOKIE SCRIPTS> below.
+=item B<effective-url=true>
+
+(nbdkit E<ge> 1.30)
+
+Replace the URL supplied on the command line with the effective URL
+(from L<CURLINFO_EFFECTIVE_URL(3)>, the final URL fetched after server
+redirects). This can be used with mirror services that redirect to a
+geographical region — for example file download sites — to ensure the
+URL will always be the same.
+
+Note use of this feature in long-lived nbdkit instances can cause
+subtle problems:
+
+=over 4
+
+=item *
+
+The effective URL persists across connections for the lifetime of the
+nbdkit instance. If nbdkit is used for a long time then it is
+possible for the redirected URL to become stale.
+
+=item *
+
+It will defeat some mirror load-balancing techniques.
+
+=item *
+
+If the mirror service sometimes redirects to a broken URL and it
+happens that the URL you fetch first is broken then nbdkit will no
+longer recover on subsequent connections (instead you will need to
+restart nbdkit).
+
+=back
+
=item B<followlocation=false>
(nbdkit E<ge> 1.26)
@@ -481,6 +515,7 @@ C<nbdkit-curl-plugin> first appeared in nbdkit 1.2.
L<curl(1)>,
L<libcurl(3)>,
+L<CURLINFO_EFFECTIVE_URL(3)>,
L<CURLOPT_CAINFO(3)>,
L<CURLOPT_CAPATH(3)>,
L<CURLOPT_COOKIE(3)>,
diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c
index a1b0afba7..8c016509f 100644
--- a/plugins/curl/curl.c
+++ b/plugins/curl/curl.c
@@ -44,6 +44,8 @@
#include <errno.h>
#include <assert.h>
+#include <pthread.h>
+
#include <curl/curl.h>
#include <nbdkit-plugin.h>
@@ -73,6 +75,7 @@ const char *cookiefile = NULL;
const char *cookiejar = NULL;
const char *cookie_script = NULL;
unsigned cookie_script_renew = 0;
+bool effectiveurl = false;
bool followlocation = true;
struct curl_slist *headers = NULL;
const char *header_script = NULL;
@@ -93,6 +96,12 @@ const char *unix_socket_path = NULL;
const char *user = NULL;
const char *user_agent = NULL;
+/* For handling the effective-url flag, we save the first effective
+ * URL we visit in this variable.
+ */
+char *url_effective = NULL;
+pthread_mutex_t url_effective_lock = PTHREAD_MUTEX_INITIALIZER;
+
/* Use '-D curl.verbose=1' to set. */
NBDKIT_DLL_PUBLIC int curl_debug_verbose = 0;
@@ -118,6 +127,7 @@ curl_unload (void)
free (proxy_password);
scripts_unload ();
curl_global_cleanup ();
+ free (url_effective);
}
/* See <curl/curl.h> */
@@ -248,6 +258,13 @@ curl_config (const char *key, const char *value)
return -1;
}
+ else if (strcmp (key, "effective-url") == 0) {
+ r = nbdkit_parse_bool (value);
+ if (r == -1)
+ return -1;
+ effectiveurl = r;
+ }
+
else if (strcmp (key, "followlocation") == 0) {
r = nbdkit_parse_bool (value);
if (r == -1)
@@ -404,6 +421,7 @@ curl_config_complete (void)
"cookiejar=<FILENAME> Read and write cookies to jar.\n" \
"cookie-script=<SCRIPT> Script to set HTTP/HTTPS cookies.\n" \
"cookie-script-renew=<SECS> Time to renew HTTP/HTTPS cookies.\n" \
+ "effective-url=true Always use redirected URL.\n" \
"followlocation=false Do not follow redirects.\n" \
"header=<HEADER> Set HTTP/HTTPS header.\n" \
"header-script=<SCRIPT> Script to set HTTP/HTTPS headers.\n" \
@@ -486,10 +504,14 @@ curl_open (int readonly)
}
/* Set the URL. */
- r = curl_easy_setopt (h->c, CURLOPT_URL, url);
- if (r != CURLE_OK) {
- display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url);
- goto err;
+ {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&url_effective_lock);
+
+ r = curl_easy_setopt (h->c, CURLOPT_URL, url_effective ? : url);
+ if (r != CURLE_OK) {
+ display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url);
+ goto err;
+ }
}
/* Various options we always set.
@@ -594,6 +616,18 @@ curl_open (int readonly)
goto err;
}
+ if (effectiveurl) {
+ ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&url_effective_lock);
+
+ if (url_effective == NULL) {
+ r = curl_easy_getinfo (h->c, CURLINFO_EFFECTIVE_URL, &url_effective);
+ if (r != CURLE_OK) {
+ display_curl_error (h, r, "could not get effective URL");
+ goto err;
+ }
+ }
+ }
+
#ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T
r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o);
if (r != CURLE_OK) {
--
2.32.0