From 98dbcfae108e403019b2f60923266bcb8f78235d Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 15 Mar 2019 16:59:33 +0000 Subject: [PATCH] can now supply API_KEY, HTTP_USERNAME and HTTP_PASSWORD --- walk-rpde.sh | 54 +++++++++++++++++++--------------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/walk-rpde.sh b/walk-rpde.sh index 6e42786..c41f4a2 100755 --- a/walk-rpde.sh +++ b/walk-rpde.sh @@ -1,47 +1,33 @@ -#!/usr/bin/env bash - -set -o errexit -set -o nounset -set -o pipefail -# Uncomment the following line in order to debug -# set -o xtrace +#!/usr/bin/env sh # required commands: awk, curl, jq, tee -if [ "$#" -lt "1" -o "$#" -gt "2" ] +if [ "$#" -ne "1" ] then - printf 'Usage: %s [-s]\n' "$0" - printf '\n' - printf ' -s Squashes all pages into a single page, "rpde.json"\n' + printf 'Usage: %s \n' "$0" exit 1 fi -single_file=$2 max=100 page=1 base=$(printf '%s' "$1" | awk -F/ '{print $1 "//" $3}') - -last_next_url= -next_url="$1" -num_items=-1 - -# Stop paging if the "next" url is the same as in the last page. This is the end of the feed as defined by RPDE -while [ "${next_url}" != "${last_next_url}" ] +# $1 = next url +# $2 = num items +set -- $1 -1 +while [ "$2" -ne "0" ] do - page_padded=$(printf '%0*d\n' ${#max} ${page}) - last_next_url="${next_url}" - # Results of $() statement are stored in next_url & num_items - read -r next_url num_items <<< $(curl -L -sS "${next_url}" | jq '.' | tee "rpde-${page_padded}.json" | jq -r '.next, (.items | length)') - printf 'got page: %s, with next url: %s, num items: %s\n' "${page}" "${next_url}" "${num_items}" - # If "next" URL isn't an absolute URL, like /rpde?afterTimestamp=123&afterId=abc, prepend the base URL to it to create an absolute URL - case ${next_url} in /*) - next_url="${base}${next_url}" - esac + page_padded=$(printf '%0*d\n' ${#max} $page) + if [[ $HTTP_USERNAME && $HTTP_PASSWORD ]]; then + set -- $(curl -L -sS -u $HTTP_USERNAME:$HTTP_PASSWORD "$1" | jq '.' | tee rpde-$page_padded.json | jq -r '.next, (.items | length)') + elif [[ -z $API_KEY ]]; then + set -- $(curl -L -sS "$1" | jq '.' | tee rpde-$page_padded.json | jq -r '.next, (.items | length)') + else + set -- $(curl -L -sS -H "X-API-KEY:$API_KEY" "$1" | jq '.' | tee rpde-$page_padded.json | jq -r '.view.next, (."imin:item" | length)') + fi + printf 'got page with next url: %s, num items: %s\n' "$1" $2 + if [ "${1%${1/?}}"x = '/x' ] + then + set -- $base$1 $2 + fi page=$((page=page+1)) done - -if [ "${single_file}" != "" ] -then - jq -s '[.[].items[]]' rpde-*.json > rpde.json - rm -rf rpde-*.json -fi