Skip to Content
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;

import java.io.IOException;

public class HDFSUtilization {
    public static void main(String[] args) {
        Configuration conf = new Configuration();
        // Set your Hadoop configuration if not in classpath
        // conf.set("fs.defaultFS", "hdfs://namenode:8020");
       
        try (DistributedFileSystem fs = (DistributedFileSystem) FileSystem.get(conf)) {
            // Get HDFS utilization statistics
            FsStatus status = fs.getStatus();
            long total = status.getCapacity();
            long used = status.getUsed();
            long remaining = status.getRemaining();
           
            double utilizationPercent = ((double) used / total) * 100;
           
            System.out.println("HDFS Utilization:");
            System.out.println("Total space: " + formatBytes(total));
            System.out.println("Used space: " + formatBytes(used));
            System.out.println("Remaining space: " + formatBytes(remaining));
            System.out.println(String.format("Utilization: %.2f%%", utilizationPercent));
           
            // Get total block count
            long totalBlocks = fs.getTotalBlocks();
            System.out.println("Total blocks in HDFS: " + totalBlocks);
           
            // Alternative way to get block count via datanodes
            DatanodeManager datanodeManager = fs.getNamesystem().getBlockManager().getDatanodeManager();
            DatanodeInfo[] datanodes = datanodeManager.getDatanodeListForReport(DatanodeReportType.ALL);
           
            long totalBlocksAlternative = 0;
            for (DatanodeInfo datanode : datanodes) {
                DatanodeDescriptor descriptor = datanodeManager.getDatanode(datanode);
                totalBlocksAlternative += descriptor.getNumberOfBlocks();
            }
            System.out.println("Total blocks (alternative calculation): " + totalBlocksAlternative);
           
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
   
    private static String formatBytes(long bytes) {
        double tb = bytes / (1024.0 * 1024 * 1024 * 1024);
        double gb = bytes / (1024.0 * 1024 * 1024);
        double mb = bytes / (1024.0 * 1024);
        double kb = bytes / 1024.0;
       
        if (tb >= 1) {
            return String.format("%.2f TB", tb);
        } else if (gb >= 1) {
            return String.format("%.2f GB", gb);
        } else if (mb >= 1) {
            return String.format("%.2f MB", mb);
        } else if (kb >= 1) {
            return String.format("%.2f KB", kb);
        } else {
            return String.format("%d bytes", bytes);
        }
}
}


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;

public class DownloadFsImage {

    public static void main(String[] args) throws IOException, URISyntaxException {
        // Hadoop configuration
        Configuration conf = new Configuration();

        // Set up HA configuration
        conf.set("dfs.nameservices", "mycluster");
        conf.set("dfs.ha.namenodes.mycluster", "nn1,nn2");
        conf.set("dfs.namenode.rpc-address.mycluster.nn1", "namenode1-host:8020");
        conf.set("dfs.namenode.rpc-address.mycluster.nn2", "namenode2-host:8020");
        conf.set("dfs.client.failover.proxy.provider.mycluster",
                "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

        // Find the active NameNode
        String activeNamenode = findActiveNameNode(conf);
        if (activeNamenode == null) {
            System.err.println("No active NameNode found!");
            return;
        }

        System.out.println("Active NameNode: " + activeNamenode);

        // Output directory to save the fsimage
        String outputDir = "/tmp/fsimage";

        // Create output directory if it doesn't exist
        FileSystem fs = FileSystem.get(conf);
        Path outputPath = new Path(outputDir);
        if (!fs.exists(outputPath)) {
            fs.mkdirs(outputPath);
        }

        // Download the latest fsimage from the active NameNode
        String namenodeHttpAddress = "http://" + activeNamenode + ":50070";
        TransferFsImage.downloadMostRecentImageToDirectory(namenodeHttpAddress, outputDir);

        System.out.println("FsImage downloaded successfully to: " + outputDir);
    }

    /**
     * Finds the active NameNode in an HDFS HA setup.
     *
     * @param conf Hadoop configuration
     * @return The hostname of the active NameNode, or null if no active NameNode is found
     */
    private static String findActiveNameNode(Configuration conf) throws IOException {
        // Get all NameNode addresses
        Collection<String> namenodeAddresses = HAUtil.getNameNodeAddresses(conf);

        // Check which NameNode is active
        for (String address : namenodeAddresses) {
            if (HAUtil.isNameNodeActive(conf, address)) {
                return address.split(":")[0]; // Return the hostname (without port)
            }
        }

        return null; // No active NameNode found
    }
}





BackBencher

We are almost done!

80%