From 34dc6c79e2e2f1393b1255d20e443e1c6d46d31c Mon Sep 17 00:00:00 2001
From: kokila-19 <kokilanarayanan95@gmail.com>
Date: Mon, 11 May 2026 11:26:45 +0530
Subject: [PATCH 1/2] HIVE-29650: Upgrade ORC to 2.3.0

---
 hplsql/pom.xml                                |  5 ++
 pom.xml                                       |  2 +-
 ql/pom.xml                                    |  5 ++
 .../hive/ql/io/orc/TestFixAcidKeyIndex.java   |  3 +
 .../hive/ql/io/orc/TestInputOutputFormat.java | 61 +++++++++++++------
 .../TestOrcFileStripeMergeRecordReader.java   | 19 ++++--
 ...plainanalyze_acid_with_direct_insert.q.out | 20 +++---
 .../materialized_view_create_rewrite_10.q.out |  2 +-
 .../clientpositive/llap/orc_merge10.q.out     | 24 ++++----
 .../client/ThriftHiveMetaStoreClient.java     |  3 +-
 standalone-metastore/pom.xml                  |  2 +-
 11 files changed, 95 insertions(+), 51 deletions(-)
diff --git a/hplsql/pom.xml b/hplsql/pom.xml
index 6ab324cf3655..b821967811c7 100644
--- a/hplsql/pom.xml
+++ b/hplsql/pom.xml
@@ -106,6 +106,11 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.jetbrains</groupId>
+      <artifactId>annotations</artifactId>
+      <version>26.0.2</version>
+    </dependency>
     <dependency>
       <groupId>org.junit.jupiter</groupId>
       <artifactId>junit-jupiter-engine</artifactId>
diff --git a/pom.xml b/pom.xml
index 4481607165db..03b2229b3e72 100644
--- a/pom.xml
+++ b/pom.xml
@@ -187,7 +187,7 @@
     <postgres.version>42.7.3</postgres.version>
     <oracle.version>21.3.0.0</oracle.version>
     <opencsv.version>5.9</opencsv.version>
-    <orc.version>2.1.2</orc.version>
+    <orc.version>2.3.0</orc.version>
     <otel.version>1.60.1</otel.version>
     <mockito-core.version>5.17.0</mockito-core.version>
     <mockito-inline.version>5.2.0</mockito-inline.version>
diff --git a/ql/pom.xml b/ql/pom.xml
index 57672a607910..cad1f02b67aa 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -424,6 +424,11 @@
       <groupId>org.codehaus.groovy</groupId>
       <artifactId>groovy-all</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.jetbrains</groupId>
+      <artifactId>annotations</artifactId>
+      <version>26.0.2</version>
+    </dependency>
     <dependency>
       <groupId>org.jodd</groupId>
       <artifactId>jodd-util</artifactId>
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
index 4d45f9e4fbd3..4c0327b1ae7f 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestFixAcidKeyIndex.java
@@ -32,6 +32,7 @@
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.orc.OrcConf;
 import org.apache.orc.OrcFile.WriterContext;
 import org.apache.orc.impl.OrcAcidUtils;
 import org.junit.Before;
@@ -86,6 +87,8 @@ void createTestAcidFile(Path path,
     FileSystem fs = path.getFileSystem(conf);
     fs.delete(path, true);
     TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr);
+    OrcConf.STRIPE_SIZE_CHECKRATIO.setDouble(conf, 0);
+    OrcConf.STRIPE_ROW_COUNT.setLong(conf, 5000);
     Writer writer = OrcFile.createWriter(path,
         OrcFile.writerOptions(conf)
             .fileSystem(fs)
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 0584b7418e0a..dd2c828730a5 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -104,6 +104,7 @@
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
 import org.apache.hadoop.hive.shims.CombineHiveKey;
@@ -2624,7 +2625,7 @@ public void testCombinationInputFormatWithAcid() throws Exception {
     assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000",
         split.getPath().toString());
     assertEquals(0, split.getStart());
-    assertEquals(784, split.getLength());
+    assertEquals(791, split.getLength());
     split = (HiveInputFormat.HiveInputSplit) splits[1];
     assertEquals("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat",
         split.inputFormatClassName());
@@ -4172,21 +4173,19 @@ public void testAcidReadPastLastStripeOffset() throws Exception {
       // d
       ((BytesColumnVector) scv.fields[2]).setVal(r,
           Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
-      indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION,
-          1, (int)(((LongColumnVector) batch.cols[2]).vector[0]), r);
     }
 
-    // Minimum 5000 rows per stripe.
+    // Match OrcRecordUpdater: addKey before rows are written. If addKey runs after addRowBatch,
+    // a mid-batch flush can snapshot the index with the previous statement id while the stripe
+    // already contains rows from the current batch.
     for (int idx = 0; idx < 8; ++idx) {
-      writer.addRowBatch(batch);
-      // bucket
-      batch.cols[2].isRepeating = true;
-      ((LongColumnVector) batch.cols[2]).vector[0] = BucketCodec.V1.encode(new AcidOutputFormat
-          .Options(conf).bucket(0).statementId(idx + 1));
-      for(long row_id : ((LongColumnVector) batch.cols[3]).vector) {
-        indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION,
-            1, (int)(((LongColumnVector) batch.cols[2]).vector[0]), row_id);
+      final int bucketForBatch = (int) ((LongColumnVector) batch.cols[2]).vector[0];
+      for (long row_id : ((LongColumnVector) batch.cols[3]).vector) {
+        indexBuilder.addKey(OrcRecordUpdater.INSERT_OPERATION, 1, bucketForBatch, row_id);
       }
+      writer.addRowBatch(batch);
+      ((LongColumnVector) batch.cols[2]).vector[0] = BucketCodec.V1.encode(
+          new AcidOutputFormat.Options(conf).bucket(0).statementId(idx + 1));
     }
     writer.close();
     long fileLength = fs.getFileStatus(testFilePath).getLen();
@@ -4194,22 +4193,44 @@ public void testAcidReadPastLastStripeOffset() throws Exception {
     // Find the last stripe.
     List<StripeInformation> stripes;
     RecordIdentifier[] keyIndex;
-    try (Reader orcReader = OrcFile.createReader(fs, testFilePath)) {
+    try (Reader orcReader = OrcFile.createReader(fs, testFilePath);
+        RecordReader rr = orcReader.rows()) {
       stripes = orcReader.getStripes();
       keyIndex = OrcRecordUpdater.parseKeyIndex(orcReader);
+
+      StructObjectInspector soi = (StructObjectInspector) orcReader.getObjectInspector();
+      List<? extends StructField> structFields = soi.getAllStructFieldRefs();
+      StructField transactionField = structFields.get(1);
+      LongObjectInspector transactionOI =
+          (LongObjectInspector) transactionField.getFieldObjectInspector();
+      StructField bucketField = structFields.get(2);
+      IntObjectInspector bucketOI =
+          (IntObjectInspector) bucketField.getFieldObjectInspector();
+      StructField rowIdField = structFields.get(3);
+      LongObjectInspector rowIdOI =
+          (LongObjectInspector) rowIdField.getFieldObjectInspector();
+
+      Assert.assertEquals("Index length doesn't match number of stripes",
+          stripes.size(), keyIndex.length);
+      long rowsProcessed = 0;
+      for (int i = 0; i < stripes.size(); i++) {
+        rowsProcessed += stripes.get(i).getNumberOfRows();
+        rr.seekToRow(rowsProcessed - 1);
+        OrcStruct row = (OrcStruct) rr.next(null);
+        long lastTransaction =
+            transactionOI.get(soi.getStructFieldData(row, transactionField));
+        int lastBucket = bucketOI.get(soi.getStructFieldData(row, bucketField));
+        long lastRowId = rowIdOI.get(soi.getStructFieldData(row, rowIdField));
+        RecordIdentifier expected =
+            new RecordIdentifier(lastTransaction, lastBucket, lastRowId);
+        Assert.assertEquals("Index entry mismatch for stripe " + i, expected, keyIndex[i]);
+      }
     }
 
     StripeInformation lastStripe = stripes.get(stripes.size() - 1);
     long lastStripeOffset = lastStripe.getOffset();
     long lastStripeLength = lastStripe.getLength();
 
-    Assert.assertEquals("Index length doesn't match number of stripes",
-        stripes.size(), keyIndex.length);
-    Assert.assertEquals("1st Index entry mismatch",
-        new RecordIdentifier(1, 536870916, 999), keyIndex[0]);
-    Assert.assertEquals("2nd Index entry mismatch",
-        new RecordIdentifier(1, 536870920, 999), keyIndex[1]);
-
     // test with same schema with include
     conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:");
     conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d");
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFileStripeMergeRecordReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFileStripeMergeRecordReader.java
index 3b81f1e68d41..025778abf1b6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFileStripeMergeRecordReader.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcFileStripeMergeRecordReader.java
@@ -28,6 +28,8 @@
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileSplit;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.OrcConf;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -37,7 +39,9 @@
 
 public class TestOrcFileStripeMergeRecordReader {
 
-  private static final int TEST_STRIPE_SIZE = 5000;
+  private static final int MAX_ROWS_PER_STRIPE = 5000;
+
+  private static final long STRIPE_SIZE_BYTES = 128;
 
   private OrcFileKeyWrapper key;
   private OrcFileValueWrapper value;
@@ -51,11 +55,16 @@ public class TestOrcFileStripeMergeRecordReader {
   @Before
   public void setup() throws IOException {
     conf = new Configuration();
+    // ORC ≥2.x: orc.stripe.size.check.ratio triggers flushes when buffered tree bytes exceed ratio × orc.stripe.size
+    // Setting it to 0 disables it.
+    OrcConf.STRIPE_SIZE_CHECKRATIO.setDouble(conf, 0);
+    // Maximum number of rows a Stripe can hold in ORC file.
+    OrcConf.STRIPE_ROW_COUNT.setLong(conf, MAX_ROWS_PER_STRIPE);
     fs = FileSystem.getLocal(conf);
     key = new OrcFileKeyWrapper();
     value = new OrcFileValueWrapper();
     tmpPath  = prepareTmpPath();
-    createOrcFile(TEST_STRIPE_SIZE, TEST_STRIPE_SIZE + 1);
+    createOrcFile(MAX_ROWS_PER_STRIPE + 1);
   }
 
   @After
@@ -86,7 +95,7 @@ public void testSplitStartsWithZeroOffset() throws IOException {
     // both stripes will be processed, first stripe has 5000 rows and second stripe has 1 row
     reader.next(key, value);
     Assert.assertEquals("InputPath", tmpPath, key.getInputPath());
-    Assert.assertEquals("NumberOfValues", TEST_STRIPE_SIZE,
+    Assert.assertEquals("NumberOfValues", MAX_ROWS_PER_STRIPE,
         value.getStripeStatistics().getColStats(0).getNumberOfValues());
     reader.next(key, value);
     Assert.assertEquals("InputPath", tmpPath, key.getInputPath());
@@ -96,7 +105,7 @@ public void testSplitStartsWithZeroOffset() throws IOException {
     reader.close();
   }
 
-  private void createOrcFile(int stripSize, int numberOfRows) throws IOException {
+  private void createOrcFile(int numberOfRows) throws IOException {
     ObjectInspector inspector;
     synchronized (TestOrcFileStripeMergeRecordReader.class) {
       inspector = ObjectInspectorFactory.getReflectionObjectInspector
@@ -106,7 +115,7 @@ private void createOrcFile(int stripSize, int numberOfRows) throws IOException {
     Writer writer = OrcFile.createWriter(tmpPath,
         OrcFile.writerOptions(conf)
             .inspector(inspector)
-            .stripeSize(stripSize)
+            .stripeSize(STRIPE_SIZE_BYTES)
             .compress(CompressionKind.ZLIB)
             .bufferSize(5000)
             .rowIndexStride(1000));
diff --git a/ql/src/test/results/clientpositive/llap/explainanalyze_acid_with_direct_insert.q.out b/ql/src/test/results/clientpositive/llap/explainanalyze_acid_with_direct_insert.q.out
index def4bcbfbf8b..aacceb8805c0 100644
--- a/ql/src/test/results/clientpositive/llap/explainanalyze_acid_with_direct_insert.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainanalyze_acid_with_direct_insert.q.out
@@ -825,17 +825,17 @@ STAGE PLANS:
                 TableScan
                   alias: analyze_part_table
                   filterExpr: (b = 1) (type: boolean)
-                  Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                       null sort order: z
                       sort order: +
                       Map-reduce partition columns: UDFToInteger(_col0) (type: int)
-                      Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                       value expressions: 1 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
@@ -845,10 +845,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1795,17 +1795,17 @@ STAGE PLANS:
                 TableScan
                   alias: analyze_part_table
                   filterExpr: (b = 1) (type: boolean)
-                  Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                     Reduce Output Operator
                       key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                       null sort order: z
                       sort order: +
                       Map-reduce partition columns: UDFToInteger(_col0) (type: int)
-                      Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                       value expressions: 1 (type: int)
             Execution mode: vectorized, llap
             LLAP IO: may be used (ACID table)
@@ -1815,10 +1815,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2/2 Data size: 6376 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2/2 Data size: 6386 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
index 2f54d94a654e..62742a9e4636 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_create_rewrite_10.q.out
@@ -79,7 +79,7 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   filterExpr: (b = 1) (type: boolean)
-                  Statistics: Num rows: 69 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 70 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (b = 1) (type: boolean)
                     Statistics: Num rows: 1 Data size: #Masked# Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out
index 63c3feb4b2e8..4acb6ff50ebe 100644
--- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out
@@ -761,22 +761,22 @@ Stripe Statistics:
   Stripe 1:
     Column 0: count: 242 hasNull: false
     Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-    Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646
+    Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
 
 File Statistics:
   Column 0: count: 242 hasNull: false
   Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-  Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646
+  Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
 
 Stripes:
-  Stripe: offset: 3 data: 1399 rows: 242 tail: 73 index: 77
+  Stripe: offset: 3 data: 1403 rows: 242 tail: 73 index: 77
     Stream: column 0 section ROW_INDEX start: 3 length 12
     Stream: column 1 section ROW_INDEX start: 15 length 28
     Stream: column 2 section ROW_INDEX start: 43 length 37
     Stream: column 1 section DATA start: 80 length 489
     Stream: column 2 section DATA start: 569 length 247
-    Stream: column 2 section LENGTH start: 816 length 71
-    Stream: column 2 section DICTIONARY_DATA start: 887 length 592
+    Stream: column 2 section LENGTH start: 816 length 72
+    Stream: column 2 section DICTIONARY_DATA start: 888 length 595
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DICTIONARY_V2[153]
@@ -787,7 +787,7 @@ Stripes:
     Row group indices for column 2:
       Entry 0: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 positions: 0,0,0
 
-File length: 1763 bytes
+File length: 1767 bytes
 File raw data size: #Masked#
 Padding length: 0 bytes
 Padding ratio: 0%
@@ -813,22 +813,22 @@ Stripe Statistics:
   Stripe 1:
     Column 0: count: 242 hasNull: false
     Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-    Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646
+    Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
 
 File Statistics:
   Column 0: count: 242 hasNull: false
   Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-  Column 2: count: 242 hasNull: false bytesOnDisk: 910 min: val_0 max: val_97 sum: 1646
+  Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
 
 Stripes:
-  Stripe: offset: 3 data: 1399 rows: 242 tail: 73 index: 77
+  Stripe: offset: 3 data: 1403 rows: 242 tail: 73 index: 77
     Stream: column 0 section ROW_INDEX start: 3 length 12
     Stream: column 1 section ROW_INDEX start: 15 length 28
     Stream: column 2 section ROW_INDEX start: 43 length 37
     Stream: column 1 section DATA start: 80 length 489
     Stream: column 2 section DATA start: 569 length 247
-    Stream: column 2 section LENGTH start: 816 length 71
-    Stream: column 2 section DICTIONARY_DATA start: 887 length 592
+    Stream: column 2 section LENGTH start: 816 length 72
+    Stream: column 2 section DICTIONARY_DATA start: 888 length 595
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DICTIONARY_V2[153]
@@ -839,7 +839,7 @@ Stripes:
     Row group indices for column 2:
       Entry 0: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 positions: 0,0,0
 
-File length: 1763 bytes
+File length: 1767 bytes
 File raw data size: #Masked#
 Padding length: 0 bytes
 Padding ratio: 0%
diff --git a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/ThriftHiveMetaStoreClient.java b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/ThriftHiveMetaStoreClient.java
index 4cce91d84b19..ba6264c37f50 100644
--- a/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/ThriftHiveMetaStoreClient.java
+++ b/standalone-metastore/metastore-client/src/main/java/org/apache/hadoop/hive/metastore/client/ThriftHiveMetaStoreClient.java
@@ -64,10 +64,11 @@
 import org.apache.thrift.transport.TTransport;
 import org.apache.thrift.transport.TTransportException;
 import org.apache.thrift.transport.layered.TFramedTransport;
-import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index 2eb75caf9281..c366d7c429e9 100644
--- a/standalone-metastore/pom.xml
+++ b/standalone-metastore/pom.xml
@@ -98,7 +98,7 @@
     <libthrift.version>0.16.0</libthrift.version>
     <log4j2.version>2.25.3</log4j2.version>
     <mockito-core.version>5.17.0</mockito-core.version>
-    <orc.version>2.1.2</orc.version>
+    <orc.version>2.3.0</orc.version>
     <protobuf.version>3.25.5</protobuf.version>
     <protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
     <protoc.path>${env.PROTOC_PATH}</protoc.path>

From f5538b35e3dd352409389960c8f730a4d2990897 Mon Sep 17 00:00:00 2001
From: kokila-19 <kokilanarayanan95@gmail.com>
Date: Mon, 8 Jun 2026 00:13:28 +0530
Subject: [PATCH 2/2] Add Jetbrains annotations in dependency management and
 fix golden file

---
 hplsql/pom.xml                                |  1 -
 pom.xml                                       |  6 +++++
 ql/pom.xml                                    |  1 -
 .../test/queries/clientpositive/orc_merge10.q |  1 +
 .../clientpositive/llap/orc_merge10.q.out     | 24 +++++++++----------
 standalone-metastore/metastore-tools/pom.xml  |  7 ------
 standalone-metastore/pom.xml                  |  6 +++++
 7 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/hplsql/pom.xml b/hplsql/pom.xml
index b821967811c7..853c6605cca2 100644
--- a/hplsql/pom.xml
+++ b/hplsql/pom.xml
@@ -109,7 +109,6 @@
     <dependency>
       <groupId>org.jetbrains</groupId>
       <artifactId>annotations</artifactId>
-      <version>26.0.2</version>
     </dependency>
     <dependency>
       <groupId>org.junit.jupiter</groupId>
diff --git a/pom.xml b/pom.xml
index 03b2229b3e72..4c57f28e6c86 100644
--- a/pom.xml
+++ b/pom.xml
@@ -164,6 +164,7 @@
     <jettison.version>1.5.4</jettison.version>
     <jetty.version>9.4.57.v20241219</jetty.version>
     <jersey.version>1.19.4</jersey.version>
+    <jetbrains-annotations.version>26.0.2</jetbrains-annotations.version>
     <!-- HIVE-28992: only upgrade to newer than 3.25.0 if you tested the prompt -->
     <jline.version>3.25.0</jline.version>
     <jms.version>2.0.2</jms.version>
@@ -473,6 +474,11 @@
         <artifactId>javolution</artifactId>
         <version>${javolution.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.jetbrains</groupId>
+        <artifactId>annotations</artifactId>
+        <version>${jetbrains-annotations.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.jline</groupId>
         <artifactId>jline</artifactId>
diff --git a/ql/pom.xml b/ql/pom.xml
index cad1f02b67aa..af3764440c42 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -427,7 +427,6 @@
     <dependency>
       <groupId>org.jetbrains</groupId>
       <artifactId>annotations</artifactId>
-      <version>26.0.2</version>
     </dependency>
     <dependency>
       <groupId>org.jodd</groupId>
diff --git a/ql/src/test/queries/clientpositive/orc_merge10.q b/ql/src/test/queries/clientpositive/orc_merge10.q
index 961457944b3f..d6793ad27b17 100644
--- a/ql/src/test/queries/clientpositive/orc_merge10.q
+++ b/ql/src/test/queries/clientpositive/orc_merge10.q
@@ -1,6 +1,7 @@
 --! qt:dataset:src
 --! qt:dataset:part
 --! qt:replace:/(File Version:)(.+)/$1#Masked#/
+--! qt:replace:/(File length:\s+)\S+(\s+bytes)/$1#Masked#$2/
 set hive.vectorized.execution.enabled=false;
 set hive.compute.query.using.stats=false;
 set hive.mapred.mode=nonstrict;
diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out
index 4acb6ff50ebe..8adceeaf8b45 100644
--- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out
@@ -761,22 +761,22 @@ Stripe Statistics:
   Stripe 1:
     Column 0: count: 242 hasNull: false
     Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-    Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
+    Column 2: count: 242 hasNull: false bytesOnDisk: 913 min: val_0 max: val_97 sum: 1646
 
 File Statistics:
   Column 0: count: 242 hasNull: false
   Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-  Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
+  Column 2: count: 242 hasNull: false bytesOnDisk: 913 min: val_0 max: val_97 sum: 1646
 
 Stripes:
-  Stripe: offset: 3 data: 1403 rows: 242 tail: 73 index: 77
+  Stripe: offset: 3 data: 1402 rows: 242 tail: 73 index: 77
     Stream: column 0 section ROW_INDEX start: 3 length 12
     Stream: column 1 section ROW_INDEX start: 15 length 28
     Stream: column 2 section ROW_INDEX start: 43 length 37
     Stream: column 1 section DATA start: 80 length 489
     Stream: column 2 section DATA start: 569 length 247
-    Stream: column 2 section LENGTH start: 816 length 72
-    Stream: column 2 section DICTIONARY_DATA start: 888 length 595
+    Stream: column 2 section LENGTH start: 816 length 71
+    Stream: column 2 section DICTIONARY_DATA start: 887 length 595
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DICTIONARY_V2[153]
@@ -787,7 +787,7 @@ Stripes:
     Row group indices for column 2:
       Entry 0: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 positions: 0,0,0
 
-File length: 1767 bytes
+File length: #Masked# bytes
 File raw data size: #Masked#
 Padding length: 0 bytes
 Padding ratio: 0%
@@ -813,22 +813,22 @@ Stripe Statistics:
   Stripe 1:
     Column 0: count: 242 hasNull: false
     Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-    Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
+    Column 2: count: 242 hasNull: false bytesOnDisk: 913 min: val_0 max: val_97 sum: 1646
 
 File Statistics:
   Column 0: count: 242 hasNull: false
   Column 1: count: 242 hasNull: false bytesOnDisk: 489 min: 0 max: 497 sum: 60770
-  Column 2: count: 242 hasNull: false bytesOnDisk: 914 min: val_0 max: val_97 sum: 1646
+  Column 2: count: 242 hasNull: false bytesOnDisk: 913 min: val_0 max: val_97 sum: 1646
 
 Stripes:
-  Stripe: offset: 3 data: 1403 rows: 242 tail: 73 index: 77
+  Stripe: offset: 3 data: 1402 rows: 242 tail: 73 index: 77
     Stream: column 0 section ROW_INDEX start: 3 length 12
     Stream: column 1 section ROW_INDEX start: 15 length 28
     Stream: column 2 section ROW_INDEX start: 43 length 37
     Stream: column 1 section DATA start: 80 length 489
     Stream: column 2 section DATA start: 569 length 247
-    Stream: column 2 section LENGTH start: 816 length 72
-    Stream: column 2 section DICTIONARY_DATA start: 888 length 595
+    Stream: column 2 section LENGTH start: 816 length 71
+    Stream: column 2 section DICTIONARY_DATA start: 887 length 595
     Encoding column 0: DIRECT
     Encoding column 1: DIRECT_V2
     Encoding column 2: DICTIONARY_V2[153]
@@ -839,7 +839,7 @@ Stripes:
     Row group indices for column 2:
       Entry 0: count: 242 hasNull: false min: val_0 max: val_97 sum: 1646 positions: 0,0,0
 
-File length: 1767 bytes
+File length: #Masked# bytes
 File raw data size: #Masked#
 Padding length: 0 bytes
 Padding ratio: 0%
diff --git a/standalone-metastore/metastore-tools/pom.xml b/standalone-metastore/metastore-tools/pom.xml
index 89c0d8153e32..614144a272c5 100644
--- a/standalone-metastore/metastore-tools/pom.xml
+++ b/standalone-metastore/metastore-tools/pom.xml
@@ -30,7 +30,6 @@
     <errorprone.core.version>2.29.2</errorprone.core.version>
     <picocli.version>3.1.0</picocli.version>
     <commons-math3.version>3.6.1</commons-math3.version>
-    <jetbrain-annotation.version>16.0.2</jetbrain-annotation.version>
     <standalone.metastore.path.to.root>..</standalone.metastore.path.to.root>
   </properties>
   <!-- why the f*ck this pom.xml has a dependencyManagement block? -->
@@ -80,12 +79,6 @@
         <artifactId>slf4j-log4j12</artifactId>
         <version>1.7.30</version>
       </dependency>
-      <!-- https://mvnrepository.com/artifact/org.jetbrains/annotations -->
-      <dependency>
-        <groupId>org.jetbrains</groupId>
-        <artifactId>annotations</artifactId>
-        <version>${jetbrain-annotation.version}</version>
-      </dependency>
       <!-- https://mvnrepository.com/artifact/org.apache.maven.plugins/maven-jxr-plugin -->
       <dependency>
         <groupId>org.apache.maven.plugins</groupId>
diff --git a/standalone-metastore/pom.xml b/standalone-metastore/pom.xml
index c366d7c429e9..a3f2ec8b06e9 100644
--- a/standalone-metastore/pom.xml
+++ b/standalone-metastore/pom.xml
@@ -106,6 +106,7 @@
     <io.grpc.version>1.72.0</io.grpc.version>
     <sqlline.version>1.9.0</sqlline.version>
     <netty.version>4.1.127.Final</netty.version>
+    <jetbrains-annotations.version>26.0.2</jetbrains-annotations.version>
     <!-- HIVE-28992: only upgrade to newer than 3.25.0 if you tested the prompt -->
     <jline.version>3.25.0</jline.version>
     <ST4.version>4.0.4</ST4.version>
@@ -374,6 +375,11 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.jetbrains</groupId>
+        <artifactId>annotations</artifactId>
+        <version>${jetbrains-annotations.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.jline</groupId>
         <artifactId>jline</artifactId>