当前位置: 代码迷 >> 综合 >> Hive自定义UDTF解析Json串
  详细解决方案

Hive自定义UDTF解析Json串

热度:53   发布时间:2023-09-14 15:43:54.0

用于解析et事件的Json字符串

Maven依赖如下:

<properties><project.build.sourceEncoding>UTF8</project.build.sourceEncoding><hive.version>1.2.1</hive.version>
</properties><dependencies><!--添加hive依赖--><dependency><groupId>org.apache.hive</groupId><artifactId>hive-exec</artifactId><version>${hive.version}</version></dependency>
</dependencies><build><plugins><plugin><artifactId>maven-compiler-plugin</artifactId><version>2.3.2</version><configuration><source>1.8</source><target>1.8</target></configuration></plugin><plugin><artifactId>maven-assembly-plugin</artifactId><configuration><descriptorRefs><descriptorRef>jar-with-dependencies</descriptorRef></descriptorRefs></configuration><executions><execution><id>make-assembly</id><phase>package</phase><goals><goal>single</goal></goals></execution></executions></plugin></plugins>
</build>

具体业务代码实现:

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.json.JSONArray;
import org.json.JSONException;
import java.util.ArrayList;
public class EventJsonUDTF extends GenericUDTF {
//该方法中,我们将指定输出参数的名称和参数类型:@Overridepublic StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
ArrayList<String> fieldNames = new ArrayList<String>();ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
//fieldNames叫什么名字不重要,重要的是类型fieldNames.add("event_name");fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);fieldNames.add("event_json");fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);}
//输入1条记录输出若干条结果@Overridepublic void process(Object[] objects) throws HiveException {
// 获取传入的etString input = objects[0].toString();
// 如果传进来的数据为空,直接返回过滤掉该数据if (StringUtils.isBlank(input)) {return;} else {try {// 获取一共有几个事件(ad/facoriters)JSONArray ja = new JSONArray(input);if (ja == null)return;// 循环遍历每一个事件for (int i = 0; i < ja.length(); i++) {String[] result = new String[2];try {// 取出每个的事件名称(ad/facoriters)result[0] = ja.getJSONObject(i).getString("en");// 取出每一个事件整体result[1] = ja.getString(i);} catch (JSONException e) {continue;}// 将结果返回forward(result);}} catch (JSONException e) {e.printStackTrace();}}}//当没有记录处理的时候该方法会被调用,用来清理代码或者产生额外的输出@Overridepublic void close() throws HiveException {}
}
  • 打jar包
  • 上传至服务器或者hdfs
  • add jar
  • 创建临时\永久函数