Get last access time of hdfs files

If you have requirement where you need last accessed time of file.There are two ways one is Ranger audit and other is via java program. But I tried to get it from ranger audit db but there are so many tmp files and dir which are creating any issue.

So I would like to go with java program.You can use following steps to make your jar ready and to server your requirement.

package com.saurabh;

import java.io.*;

import java.util.*;

import java.net.*;

import org.apache.hadoop.fs.*;

import org.apache.hadoop.conf.*;

import org.apache.hadoop.io.*;

import org.apache.hadoop.mapred.*;

import org.apache.hadoop.util.*;

// For Date Conversion from long to human readable.

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

public class Accesstime {

public static void main(String[] args) throws Exception {

System.out.println(“usage: hadoop jar accessTime.jar <file-path>”);

if (args.length != 0) {

try {

FileSystem fs = FileSystem.get(new Configuration());

// String hdfsFilePath = “hdfs://HDPINF/wordcount/words.txt”;

String hdfsFilePath = args[0];

// you need to pass in your hdfs path

// FileStatus status = fs.getFileStatus(new Path(hdfsFilePath));

// System.out.println(“Access time of file “+status.getAccessTime());

// long lastAccessTimeLong = status.getAccessTime();

// Date lastAccessTimeDate = new Date(lastAccessTimeLong);

// DateFormat df = new SimpleDateFormat(“EEE, d MMM yyyy HH:mm:ss”);

// System.out.println(“The file/Dir ‘” + hdfsFilePath + “‘ was accessed last at: “

// + df.format(lastAccessTimeDate));

//

// System.out.println(“Access time of file “+status.getAccessTime());

FileStatus[] status = fs.listStatus(new Path(hdfsFilePath));

for (int i = 0; i < status.length; i++) {

long lastAccessTimeLong = status[i].getAccessTime();

Date lastAccessTimeDate = new Date(lastAccessTimeLong);

DateFormat df = new SimpleDateFormat(

“EEE, d MMM yyyy HH:mm:ss”);

System.out.println(“The file/Dir ‘” + hdfsFilePath

+ “‘ was accessed last at: “

+ df.format(lastAccessTimeDate));

}

} catch (Exception e) {

System.out.println(“File not found”);

e.printStackTrace();

}

}else{

System.out.println(“Please provide the absolute file path.”);

}

}

}

 


1 Comment

Trump

May 23, 2020 at 6:04 pm

this is awesome

Leave a Reply