Skip to content

Commit 03d3e8d

Browse files
dprophetebyhr
authored andcommitted
Avoid ignoring files visible in Hive
Ignore in Hive only the files which have their names or the names of their ancestor beginning with `.` or `_` characters.
1 parent f2c1fcc commit 03d3e8d

File tree

3 files changed

+105
-1
lines changed

3 files changed

+105
-1
lines changed

plugin/trino-hive/src/main/java/io/trino/plugin/hive/fs/HiveFileIterator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ static boolean isHiddenOrWithinHiddenParentDirectory(Path path, String prefix)
150150
{
151151
String pathString = path.toUri().getPath();
152152
checkArgument(pathString.startsWith(prefix), "path %s does not start with prefix %s", pathString, prefix);
153-
return containsHiddenPathPartAfterIndex(pathString, prefix.length() + 1);
153+
return containsHiddenPathPartAfterIndex(pathString, prefix.endsWith("/") ? prefix.length() : prefix.length() + 1);
154154
}
155155

156156
@VisibleForTesting

plugin/trino-hive/src/test/java/io/trino/plugin/hive/fs/TestHiveFileIterator.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ public void testRelativeHiddenPathDetection()
3030
String root = new Path("file:///root-path").toUri().getPath();
3131
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, ".hidden/child"), root));
3232
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(root, "_hidden.txt"), root));
33+
String rootWithSlash = new Path("file:///root-path/").toUri().getPath();
34+
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, ".hidden/child"), rootWithSlash));
35+
assertTrue(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithSlash, "_hidden.txt"), rootWithSlash));
3336
String rootWithinHidden = new Path("file:///root/.hidden/listing-root").toUri().getPath();
3437
assertFalse(isHiddenOrWithinHiddenParentDirectory(new Path(rootWithinHidden, "file.txt"), rootWithinHidden));
3538
String rootHiddenEnding = new Path("file:///root/hidden-ending_").toUri().getPath();
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package io.trino.tests.product.hive;
15+
16+
import com.google.inject.Inject;
17+
import io.trino.tempto.ProductTest;
18+
import io.trino.tempto.assertions.QueryAssert;
19+
import io.trino.tempto.hadoop.hdfs.HdfsClient;
20+
import org.assertj.core.api.Assertions;
21+
import org.testng.annotations.Test;
22+
23+
import java.io.ByteArrayInputStream;
24+
import java.io.ByteArrayOutputStream;
25+
import java.io.IOException;
26+
import java.util.List;
27+
28+
import static io.trino.tempto.assertions.QueryAssert.Row.row;
29+
import static io.trino.tempto.assertions.QueryAssert.assertThat;
30+
import static io.trino.testing.TestingNames.randomNameSuffix;
31+
import static io.trino.tests.product.hive.util.TableLocationUtils.getTablePath;
32+
import static io.trino.tests.product.utils.QueryExecutors.onHive;
33+
import static io.trino.tests.product.utils.QueryExecutors.onTrino;
34+
35+
public class TestHiveHiddenFiles
36+
extends ProductTest
37+
{
38+
@Inject
39+
private HdfsClient hdfsClient;
40+
41+
@Test
42+
public void testSelectFromTableContainingHiddenFiles()
43+
throws Exception
44+
{
45+
String tableName = "test_table_hidden_files" + randomNameSuffix();
46+
onTrino().executeQuery("CREATE TABLE " + tableName + " (col integer)");
47+
48+
onTrino().executeQuery("INSERT INTO " + tableName + " VALUES 1");
49+
onTrino().executeQuery("INSERT INTO " + tableName + " VALUES 2");
50+
51+
List<QueryAssert.Row> tableRows = List.of(row(1), row(2));
52+
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
53+
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
54+
55+
String tableLocation = getTablePath(tableName);
56+
// Rename the table files to Hive hidden tableFiles (prefixed by `.` or `_` characters)
57+
List<String> tableFiles = hdfsClient.listDirectory(tableLocation);
58+
Assertions.assertThat(tableFiles).hasSize(2);
59+
renameFile(tableLocation, tableFiles.get(0), '.' + tableFiles.get(0));
60+
renameFile(tableLocation, tableFiles.get(1), '_' + tableFiles.get(1));
61+
62+
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).hasNoRows();
63+
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).hasNoRows();
64+
65+
onTrino().executeQuery("DROP TABLE IF EXISTS " + tableName);
66+
}
67+
68+
@Test
69+
public void testSelectFromTableContainingFilenamesWithUnderscore()
70+
throws Exception
71+
{
72+
String tableName = "test_table_visible_underscore_files" + randomNameSuffix();
73+
onTrino().executeQuery("CREATE TABLE " + tableName + " AS SELECT 1 AS col");
74+
75+
List<QueryAssert.Row> tableRows = List.of(row(1));
76+
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
77+
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
78+
79+
String tableLocation = getTablePath(tableName);
80+
// Prefix the table files with `f_` which should still keep them visible to Hive
81+
for (String filename : hdfsClient.listDirectory(tableLocation)) {
82+
// As long as the file is not hidden (starting with `.` or `_`), it should not be ignored by Hive
83+
renameFile(tableLocation, filename, "f_" + filename);
84+
}
85+
86+
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
87+
assertThat(onHive().executeQuery("SELECT * FROM " + tableName)).containsOnly(tableRows);
88+
89+
onTrino().executeQuery("DROP TABLE " + tableName);
90+
}
91+
92+
private void renameFile(String directoryLocation, String filename, String newFilename)
93+
throws IOException
94+
{
95+
try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
96+
hdfsClient.loadFile(directoryLocation + "/" + filename, bos);
97+
hdfsClient.saveFile(directoryLocation + "/" + newFilename, new ByteArrayInputStream(bos.toByteArray()));
98+
hdfsClient.delete(directoryLocation + "/" + filename);
99+
}
100+
}
101+
}

0 commit comments

Comments
 (0)