tar -zxvf apatch-hive-2.1.1-bin.tar.gz mv apatch-hive-2.1.1 hive-2.1.1
export HIVE_HOME=/opt/soft/hive-2.1.1 export PATH=${PATH}:${HIVE_HOME}/bin
cp hive-default.xml.template hive-site.xml cp hive-log4j.properties.template hive-log4j.properties
修改hive-site.xmljava
<!-- 配置路径, 在hive-site.xml 文件头配置 --> <property> <name>system:java.io.tmpdir</name> <value>/tmp/hive/java</value> </property> <property> <name>system:user.name</name> <value>${user.name}</value> </property> <!-- MySQL配置 --> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://localwork:3306/hive_metadata?createDatabaseIfNotExist=true</value> <description> JDBC connect string for a JDBC metastore. To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL. For example, jdbc:postgresql://myhost/db?ssl=true for postgres database. </description> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>Username to use against metastore database</description> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>root</value> <description>password to use against metastore database</description> </property> <!-- 数据表在hdfs存放位置 --> <property> <name>hive.metastore.warehouse.dir</name> <value>/hive/warehouse</value> <description>location of default database for the warehouse</description> </property>
修改hive-log4j.propertiesnode
<!-- log4j --> 可选择性修改
hadoop fs -mkdir /hive/warehouse hadoop fs -mkdir /tmp hadoop fs -chmod -R 755 /hive/warehouse hadoop fs -chmod -R 755 /tmp
CREATE DATABASE `hive_metadata` /*!40100 COLLATE 'utf8_general_ci' */
上传MySQL驱动到hive的lib目录下mysql
初始化hive数据库配置到MySQLsql
./bin/schematool -initSchema -dbType mysql
./bin/hive
sqoop中文手册:http://blog.csdn.net/myrainblues/article/details/43673129shell
hive新建表 数据库
create table t_test(id bigint,name string, age int) row format delimited fields terminated by '\t';
导入本地数据apache
load data local inpath '/opt/hadoop-test/test.txt' overwrite into table t_test;
导入hdfs数据缓存
load data inpath '/data/test.txt' overwrite into table t_test;
hive新建表导入其余表数据bash
create table t_test2 as select * from t_test;
hive已存在表导入其余表分布式
insert overwrite table t_test2 select * from t_test;
仅复制表结构不导数据
create table t_test3 like t_test;
经过Hive导出到本地文件系统
insert overwrite local directory '/tmp/t_crm/t_test' select * from t_test2
分区(分区字段不能存在新建时表结构定义里边)
create table t_user(id bigint,name string, age int) partitioned by (gender int) row format delimited fields terminated by '\t';
查询聚合查询等相似参考MySQL
(1)导入MySQL查询结果
注意: 查询结果有重复的,(1)先在hive建表,而后导入; (2)或者查询结果指定别名
hive建立外部表
create external table qf_sale_follow (id string,name string,short_name string,sale_id string,sale_name string,sale_city string,follow_type string,follow_status string,schedule_rate string,abandon_reason string,create_time string,create_user_id string,create_user_name string,detail string) partitioned by (logdate string) row format delimited fields terminated by '\t' location '/qf_dev/qf_sale_follow';
导入hive数据(导入query查询时, 必须携带 where $CONDITIONS, 而且必须指定--target-dir[hdfs 地址] ;若是查询有重复名称,则必须查询结果指定别名,或者先在数据库新建表):
sqoop import --connect jdbc:mysql://localwork:3306/qf_dev --username root --password root --query 'SELECT b.id, b.name, b.short_name, b.sale_id, b.sale_name, b.sale_city, a.follow_type, a.follow_status, a.schedule_rate, a.abandon_reason, a.create_time, a.create_user_id, a.create_user_name, a.detail FROM t_customer_follow_record a JOIN t_customer b ON a.customer_id=b.id where $CONDITIONS ORDER BY a.create_time ASC' --hive-import --hive-overwrite --hive-table qf_sale_follow -m 1 --target-dir /qf_dev/qf_sale_follow2/ --fields-terminated-by '\t';
导入hive(指定表):
sqoop import --connect jdbc:mysql://localwork:3306/qf_dev --username root --password root --table t_workflow_report --hive-import --hive-overwrite --hive-database qf_dev --hive-table t_workflow_report -m 1 --fields-terminated-by '\t';
注意:target-dir 临时目标目录,必定不要和hive 中 location 位置相同,由于target-dir是第一步MySQL数据导入到hdfs中临时存储目录,导入hdfs成功后,会再把 hdfs数据导入到hive,导入成功后,把临时文件删掉;
空值处理:--null-string '\\N' --null-non-string '\\N'
sqoop export --connect jdbc:mysql://localwork:3306/test --username root --password root --table t_workflow_report --export-dir /hive/warehouse/qf_dev.db/t_workflow_report2 --input-null-string '\\N' --input-null-non-string '\\N' --input-fields-terminated-by '\t';
(1)导入 hive IOException running import job: java.io.IOException: Hive exited with status 1
解决办法:http://blog.csdn.net/wind520/article/details/39128399
(2)找不到数据库表 生成的jar文件
解决方法: 修改hadoop配置文件 yarn-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
(3)org.apache.hadoop.yarn.exceptions.InvalidAuxServiceException: The auxService: mapreduce_shuffle do
解决方法: 修改hadoop配置文件 yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
http://blog.csdn.net/baiyangfu_love/article/details/13504849
(4)root scratch dir: /tmp/hive on HDFS should be writable. Current permissions are: rwxr-xr-x
修改hdfs tmp文件权限, hadoop fs -chmod -R 755 /tmp
(5)hive中导入的数据只有id一行
解决办法: 先在hive中建立表,创建好目录结构后,再执行import导入;
(6)java链接hive root is not allowed to impersonate root
修改hadoop的配置文件core-site.xml
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
<description>Allow the superuser oozie to impersonate any members of the group group1 and group2</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
</property>
hiverserver2 : http://blog.csdn.net/gamer_gyt/article/details/52062460