Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hive.ql.exec.vector.mapjoin;

import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;

public class VectorMapJoinLongHashUtil {

public static long deserializeLongKey(
BinarySortableDeserializeRead keyBinarySortableDeserializeRead,
HashTableKeyType hashTableKeyType)
throws RuntimeException {
return switch (hashTableKeyType) {
case BOOLEAN -> (keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0);
case BYTE -> keyBinarySortableDeserializeRead.currentByte;
case SHORT -> keyBinarySortableDeserializeRead.currentShort;
case INT -> keyBinarySortableDeserializeRead.currentInt;
case DATE -> keyBinarySortableDeserializeRead.currentDateWritable.getDays();
case LONG -> keyBinarySortableDeserializeRead.currentLong;
default ->
throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -824,6 +824,7 @@ protected void generateFullOuterSmallTableNoMatches(byte smallTablePos,
case SHORT:
case INT:
case LONG:
case DATE:
generateFullOuterLongKeySmallTableNoMatches();
break;
case STRING:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.hadoop.hive.common.MemoryEstimate;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
Expand Down Expand Up @@ -169,7 +170,7 @@ public long getHashCode(BytesWritable currentKey) throws HiveException, IOExcept
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
return HashCodeUtil.calculateLongHashCode(key);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMultiSet;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
Expand Down Expand Up @@ -87,7 +88,7 @@ public long getHashCode(BytesWritable currentKey) throws HiveException, IOExcept
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
return HashCodeUtil.calculateLongHashCode(key);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.IOException;

import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
Expand Down Expand Up @@ -86,7 +87,7 @@ public long getHashCode(BytesWritable currentKey) throws HiveException, IOExcept
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
return HashCodeUtil.calculateLongHashCode(key);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.io.IOException;

import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.slf4j.Logger;
Expand Down Expand Up @@ -77,7 +78,7 @@ public boolean adaptPutRow(long hashCode, BytesWritable currentKey, BytesWritabl
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType);
add(hashCode, key, currentValue);
return true;
}
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@

package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized;

import java.io.IOException;

import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
Expand All @@ -34,6 +32,9 @@
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;

import java.io.IOException;

/*
* An single long value hash map based on the BytesBytesMultiHashMap.
Expand Down Expand Up @@ -89,6 +90,9 @@ public void init() {
case LONG:
integerTypeInfo = TypeInfoFactory.longTypeInfo;
break;
case DATE:
integerTypeInfo = TypeInfoFactory.dateTypeInfo;
break;
default:
throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType);
}
Expand All @@ -107,25 +111,9 @@ private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveExceptio
if (!keyBinarySortableDeserializeRead.readNextField()) {
return false;
}
switch (hashMap.hashTableKeyType) {
case BOOLEAN:
longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0;
break;
case BYTE:
longValue = keyBinarySortableDeserializeRead.currentByte;
break;
case SHORT:
longValue = keyBinarySortableDeserializeRead.currentShort;
break;
case INT:
longValue = keyBinarySortableDeserializeRead.currentInt;
break;
case LONG:
longValue = keyBinarySortableDeserializeRead.currentLong;
break;
default:
throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType);
}
longValue =
VectorMapJoinLongHashUtil.deserializeLongKey(
keyBinarySortableDeserializeRead, hashMap.hashTableKeyType);
} catch (IOException e) {
throw new HiveException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
case DATE:
hashTableKeyType = HashTableKeyType.DATE;
break;
Comment on lines +397 to +399
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have unit tests exploiting this config? Do we need to add something in TestMapJoinOperator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add a testcase testDate0 in TestMapJoinOperator. As testString0 makes use of the DATE type, but it does so as a Value column, not as a Join Key.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well if its not a Join key then the tests are not directly targeting the fix so not sure how much are needed. Do we have unit tests for join keys with different types somewhere in the repo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In TestMapJoinOperator.java, the following are the join keys for UT. The bigTableKeyColumnNums = new int[] {0}; determines the join column.

- TestLong0: long
- testLong0_NoRegularKeys: long
- testLong1: int
- testLong2: short
- testLong3: int
- testLong3_NoRegularKeys: int
- testLong4: int
- testLong5: long
- testLong6: long
- testDate0: date
- testMultiKey0: short, int
- testMultiKey1: timestamp, short, string
- testMultiKey2: long, short, string
- testMultiKey3: date, byte
- testString0: string
- testString1: binary
- testString2: string

case STRING:
hashTableKeyType = HashTableKeyType.STRING;
break;
Expand Down Expand Up @@ -547,6 +550,7 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator(
case BYTE:
case SHORT:
case INT:
case DATE:
case LONG:
switch (VectorMapJoinVariation) {
case INNER:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,80 @@ public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation,
return false;
}

@Test
public void testDate0() throws Exception {
long seed = 8322;
int rowCount = 10;

int hiveConfVariation = 0;
boolean hiveConfVariationsDone = false;
do {
for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) {
hiveConfVariationsDone =
doTestDate0(
seed,
rowCount,
hiveConfVariation,
vectorMapJoinVariation,
MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
}
seed++;
hiveConfVariation++;
} while (!hiveConfVariationsDone);
}

public boolean doTestDate0(
long seed,
int rowCount,
int hiveConfVariation,
VectorMapJoinVariation vectorMapJoinVariation,
MapJoinPlanVariation mapJoinPlanVariation)
throws Exception {

HiveConf hiveConf = getHiveConf();

if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) {
return true;
}

TypeInfo[] bigTableTypeInfos;
int[] bigTableKeyColumnNums;
TypeInfo[] smallTableValueTypeInfos;
int[] smallTableRetainKeyColumnNums;

SmallTableGenerationParameters smallTableGenerationParameters =
new SmallTableGenerationParameters();

MapJoinTestDescription testDesc;
MapJoinTestData testData;

// Big Table: date key; Small Table: key retained, string value
bigTableTypeInfos = new TypeInfo[] {TypeInfoFactory.dateTypeInfo};
bigTableKeyColumnNums = new int[] {0};
smallTableRetainKeyColumnNums = new int[] {0};
smallTableValueTypeInfos = new TypeInfo[] {TypeInfoFactory.stringTypeInfo};

testDesc =
new MapJoinTestDescription(
hiveConf,
vectorMapJoinVariation,
bigTableTypeInfos,
bigTableKeyColumnNums,
smallTableValueTypeInfos,
smallTableRetainKeyColumnNums,
smallTableGenerationParameters,
mapJoinPlanVariation);

if (!goodTestVariation(testDesc)) {
return false;
}

testData = new MapJoinTestData(rowCount, testDesc, seed);
executeTest(testDesc, testData, "testDate0");

return false;
}

private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) {

// Set defaults.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.VerifyLazy;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
Expand Down Expand Up @@ -335,6 +336,7 @@ public void verify(VectorMapJoinFastHashTableContainerBase map,
case SHORT:
case INT:
case LONG:
case DATE:
{
Object[] keyRow = element.getKeyRow();
Object keyObject = keyRow[0];
Expand All @@ -357,6 +359,9 @@ public void verify(VectorMapJoinFastHashTableContainerBase map,
case LONG:
longKey = ((LongWritable) keyObject).get();
break;
case DATE:
longKey = ((DateWritableV2) keyObject).getDays();
break;
default:
throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name());
}
Expand Down
Loading