Download script - RID - Reanalysis Intercomparison Dataset

Updates:

2025/07/03 The previous script no longer functions correctly with recent wget releases. This updated version now reliably checks for newer files on the server before downloading, ensuring your local copies stay up to date.

The following is a Python script that downloads netCDF files provided by the reanalysis intercomparison dataset. It is left to the user to modify the script to download the desired data files.

Requirements: Python 3, wget

Those who wish to use the OPeNDAP functionalities can modify this script to suit their needs.

Usage: Save the following lines of code into a file, such as RIDscript.py, and then execute it in a terminal with Python using the command “python RIDscript.py”.

# User input===========================================================================
destination = "myRID"  # Destination folder on user's computers

# Selection of data holdings
# It is a example, not an exhaustive list
components = ["zonal", "single-level"]
temporal_resolutions = ["monthly"]
subcomponents = ["core"]
reanalyses = ["ERA5", "JRA-55"]

check_for_updated_files = False
# If True, the script verifies if there is a newer version of files available on server
# If False, existing files are skipped (much faster)

# End of user input====================================================================

# Importing necessary modules
import os
import urllib.request
from xml.dom import minidom
import time

source = "https://www.jamstec.go.jp/RID/thredds/catalog/testAll/"

if os.path.isdir(destination) == False:
    print("Destination directory does not exist, creating")
    os.makedirs(destination)

if os.name == "nt":
    windows = True
else:
    windows = False

for component in components:
    for temporal_resolution in temporal_resolutions:
        for subcomponent in subcomponents:
            for reanalysis in reanalyses:

                src = (
                    source
                    + component
                    + "/common-grid/"
                    + temporal_resolution
                    + "/"
                    + subcomponent
                    + "/"
                    + reanalysis
                    + "/"
                )
                print(src)

                response = urllib.request.urlopen(src)
                if response.getcode() == 200:
                    print("Received response from server")
                    dom = minidom.parse(response)

                    files = dom.getElementsByTagName("dataset")

                    for filenb, file in enumerate(files):
                        filename = file.attributes["name"].value

                        if ".nc" in filename:
                            urlPath = file.attributes["urlPath"].value
                            downloadpath = (
                                "https://www.jamstec.go.jp/RID/thredds/fileServer/"
                                + urlPath
                            )
                            opendappath = (
                                "https://www.jamstec.go.jp/RID/thredds/dodsC/" + urlPath
                            )
                            print("Download URL:", downloadpath)
                            print("OpenDap path", opendappath)

                            # Download
                            file_input = downloadpath
                            file_output = (
                                destination + "/" + "/".join(file_input.split("/")[7::])
                            )

                            if os.path.isfile(file_output) and (
                                check_for_updated_files == False
                            ):
                                print("File present on computer, skipping")
                            else:
                                time.sleep(2)

                                wget_command = (
                                    "wget --tries=20 --no-if-modified-since --timeout=30 -x --cut-dirs=4 -nH -N "
                                    + file_input
                                )
                                if windows == False:
                                    wget_code = (
                                        "cd " + destination + "; " + wget_command
                                    )
                                else:
                                    wget_code = (
                                        "cd " + destination + " && " + wget_command
                                    )

                                print(wget_code)

                                success = False
                                tries = 1
                                maxtries = 100
                                while success == False:
                                    try:
                                        os.system(wget_code)
                                        success = True
                                    except:
                                        wait = 10 * tries
                                        print(
                                            "Could not connect to server after",
                                            tries,
                                            "tries. Waiting for",
                                            wait,
                                            "seconds until next attempt.",
                                        )
                                        time.sleep(wait)
                                        tries += 1

                                    if tries > maxtries:
                                        break