Simple Java API HBase manipulation
One of the ways to access HBase is through Java API. Here is the code for simple HBase manipulations like creating and changing a table, adding, retrieving and deleting data.
Contents
HOW TO
I used CDH 5.8.4 with Kerberos.
If you work with Maven there are 3 libraries that need to be imported:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
<dependency> <groupid>org.apache.hadoop</groupid> <artifactid>hadoop-common</artifactid> <version>2.6.0-cdh5.8.4</version> </dependency> <dependency> <groupid>org.apache.hbase</groupid> <artifactid>hbase-client</artifactid> <version>1.2.0-cdh5.8.4</version> </dependency> <dependency> <groupid>org.apache.hbase</groupid> <artifactid>hbase-common</artifactid> <version>1.2.0-cdh5.8.4</version> </dependency> |
Java Code
I assumed that the program is provided with name of the table to be created and manipulated.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.util.Bytes; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; public class HBaseManager { Connection connection; Admin admin; final static String DEFAULT_CF = "column_family1"; final static String NEW_CF = "column_family2"; public static void main(String args[]){ if (args.length < 1 || args[0] == null) { throw new IllegalArgumentException("no table name provided"); } String tableName = args[0]; HBaseManager hManager = new HBaseManager(); hManager.connect(); hManager.deleteTable(tableName); hManager.createTable(tableName); hManager.addColumnFamily(NEW_CF,tableName); hManager.addValue("row1",DEFAULT_CF,"column1","value1",tableName); hManager.addValue("row1",NEW_CF,"column2","value2",tableName); System.out.println(hManager.retrieveData("row1",NEW_CF,"column2", tableName)); System.out.println(hManager.retrieveData(tableName,1)); hManager.addValue("row2",DEFAULT_CF,"column1","value3",tableName); System.out.println(hManager.retrieveData(tableName,2)); hManager.closeCon(); } public Connection connect() { //initiating HBase connection Configuration conf = HBaseConfiguration.create(); try { connection = ConnectionFactory.createConnection(conf); admin = connection.getAdmin(); }catch(IOException e){ throw new RuntimeException("unable to connect to HBase ", e); } return connection; } public HTableDescriptor createTable(String tableName) { //HTableDescriptor contains the details about an HBase table such as //the descriptors of all the column families, is the table a catalog //table, hbase:meta, if the table is read only, the maximum size of //the memstore, when the region split should occur etc... HTableDescriptor table = new HTableDescriptor(TableName. valueOf(tableName)); table.addFamily(new HColumnDescriptor(DEFAULT_CF). setCompressionType(Compression.Algorithm.NONE)); try{ deleteTable(tableName); admin.createTable(table); }catch(IOException e){ throw new RuntimeException("unable to create HBase table ", e); } return table; } public void addColumnFamily(String columnName, String tableName) { //HColumnDescriptor contains information about a column family such as // the number of versions, compression settings, etc. It is used as an // input when creating a table or adding a column. HColumnDescriptor newColumn = new HColumnDescriptor(columnName); newColumn.setCompressionType(Compression.Algorithm.NONE); try { admin.addColumn(TableName.valueOf(tableName),newColumn); }catch(IOException e){ throw new RuntimeException("unable to add a column to HBase table " + tableName, e); } } public void addValue(String rowKey, String columnFamily, String columnName, String value, String tableName) { //Cell describes the values associated with row, column family and // qualifier. Put object is used for adding new records. Cell cell = CellUtil.createCell(Bytes.toBytes(rowKey), Bytes.toBytes(columnFamily), Bytes.toBytes(columnName), 201707091000L, (byte)4, Bytes.toBytes(value)); try (Table table = connection.getTable(TableName.valueOf(tableName))){ //Table can be used to get, put, delete or scan data from a table. Put p = new Put(Bytes.toBytes(rowKey)); p.add(cell); table.put(p); }catch(IOException e){ throw new RuntimeException("unable to add data to HBase table ", e); } } //retrieve the value public String retrieveData(String rowKey, String columnFamily, String columnName, String tableName) { Cell val = null; try (Table table = connection.getTable(TableName.valueOf(tableName))){ Get g = new Get(Bytes.toBytes(rowKey)); Result result = table.get(g); if (result != null) { val= result.getColumnLatestCell(Bytes.toBytes(columnFamily) Bytes.toBytes(columnName)); } }catch(IOException e){ throw new RuntimeException("unable to retrieve the data from HBase table ", e); } return Bytes.toString(CellUtil.cloneValue(val)); } //retrieve the whole data row public List<String> retrieveData(String rowKey, String tableName) { List<String> cellValues; try (Table table = connection.getTable(TableName.valueOf(tableName))){ Get g = new Get(Bytes.toBytes(rowKey)); Result result = table.get(g); cellValues = result.listCells().stream().map(CellUtil::cloneValue). map(Bytes::toString).collect(Collectors.toList()); }catch(IOException e){ throw new RuntimeException("unable to retrieve the data from HBase table ", e); } return cellValues; } //retrieve data from few rows public List<String> retrieveData(String tableName, Integer numRows) { List<String> cellValues; try (Table table = connection.getTable(TableName.valueOf(tableName))){ Scan scanner = new Scan(); scanner.setMaxResultSize(numRows); ResultScanner resultScanner = table.getScanner(scanner); cellValues = Arrays.stream(resultScanner.next(numRows)). map(Result::listCells).flatMap(List::stream). map(CellUtil::cloneValue).map(Bytes::toString). collect(Collectors.toList()); }catch(IOException e){ throw new RuntimeException("unable to retrieve the data from HBase table ", e); } return cellValues; } public void deleteTable(String tableName) { try { if (admin.tableExists(TableName.valueOf(tableName))) { //we need to disable the table before deletion admin.disableTable(TableName.valueOf(tableName)); admin.deleteTable(TableName.valueOf(tableName)); } }catch(IOException e){ throw new RuntimeException("unable to delete HBase table ", e); } } public void closeCon() { try { admin.close(); connection.close(); }catch(IOException e){ throw new RuntimeException(e); } } } |