Skip to content

Commit

Permalink
chore(deps): upgrade common dependencies (#197)
Browse files Browse the repository at this point in the history
refactor: remove deprecated apis

build(deps): bump com.google.apis:google-api-services-calendar from v3-rev20241101-2.0.0 to v3-rev20250115-2.0.0 #189

build(deps): bump com.google.apis:google-api-services-drive from v3-rev20241206-2.0.0 to v3-rev20250122-2.0.0 #188

build(deps): bump com.google.auth:google-auth-library-oauth2-http from 1.30.1 to 1.31.0 #187

build(deps): bump com.google.api-client:google-api-client from 2.7.1 to 2.7.2 #186

build(deps): bump com.google.apis:google-api-services-sheets from v4-rev20241203-2.0.0 to v4-rev20250106-2.0.0 #185

close #131 
build(deps): bump org.apache.orc:orc-mapreduce from 1.8.7 to 2.1.0 #184

build(deps): bump org.apache.orc:orc-core from 1.8.7 to 2.1.0 #183

build(deps): bump org.apache.commons:commons-csv from 1.12.0 to 1.13.0 #182
  • Loading branch information
mgabelle authored Feb 5, 2025
1 parent d367f79 commit 0fcc986
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 38 deletions.
17 changes: 9 additions & 8 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ dependencies {

// Google libraries are managed by the the Kestra Platform so they are aligned on all plugins
api platform('com.google.cloud:libraries-bom')
api 'com.google.api-client:google-api-client:2.7.1'
api 'com.google.auth:google-auth-library-oauth2-http:1.30.1'
api 'com.google.apis:google-api-services-drive:v3-rev20241206-2.0.0'
api 'com.google.apis:google-api-services-sheets:v4-rev20241203-2.0.0'
api 'com.google.apis:google-api-services-calendar:v3-rev20241101-2.0.0'
api 'com.google.api-client:google-api-client:2.7.2'
api 'com.google.auth:google-auth-library-oauth2-http:1.31.0'
api 'com.google.apis:google-api-services-drive:v3-rev20250122-2.0.0'
api 'com.google.apis:google-api-services-sheets:v4-rev20250106-2.0.0'
api 'com.google.apis:google-api-services-calendar:v3-rev20250115-2.0.0'

// Logs
compileOnly'org.slf4j:slf4j-api'
Expand All @@ -63,8 +63,9 @@ dependencies {
api('org.apache.parquet:parquet-hadoop:1.15.0')

// For ORC parsing
api('org.apache.orc:orc-core:1.8.7')
api('org.apache.orc:orc-mapreduce:1.8.7')
api('org.apache.orc:orc-core:2.1.0')
api('org.apache.orc:orc-mapreduce:2.1.0')
api('org.apache.hive:hive-storage-api:4.0.1')

// Additional libraries for Parquet, Avro, ORC parsers
api('org.apache.hadoop:hadoop-hdfs-client:3.4.1') {
Expand All @@ -81,7 +82,7 @@ dependencies {
compileOnly 'com.fasterxml.jackson.core:jackson-databind'

// For CSV parsing
api('org.apache.commons:commons-csv:1.12.0')
api('org.apache.commons:commons-csv:1.13.0')
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.csv.DuplicateHeaderMode;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Expand Down Expand Up @@ -44,11 +45,11 @@ public List<List<Object>> parseCsv(InputStream inputStream, AbstractLoad.CsvOpti

InputStreamReader reader = new InputStreamReader(inputStream, charset);
CSVFormat format = getCsvFormat(csvOptions);
try (CSVParser parser = new CSVParser(reader, format)) {
for (CSVRecord record : parser) {
try (CSVParser parser = CSVParser.builder().setFormat(format).setReader(reader).get()) {
for (CSVRecord csvRecord : parser) {
List<Object> row = new ArrayList<>();

record.forEach(row::add);
csvRecord.forEach(row::add);
result.add(row);
}
return result;
Expand Down Expand Up @@ -93,7 +94,7 @@ public List<List<Object>> parseAvro(InputStream inputStream, boolean includeHead
}

while (reader.hasNext()) {
GenericRecord record = reader.next();
GenericRecord genericRecord = reader.next();
List<Object> row = new ArrayList<>();

if (includeHeaders && !isHeaderIncluded) {
Expand All @@ -109,7 +110,7 @@ public List<List<Object>> parseAvro(InputStream inputStream, boolean includeHead
}

schema.getFields()
.forEach(field -> row.add(record.get(field.name()).toString()));
.forEach(field -> row.add(genericRecord.get(field.name()).toString()));

result.add(row);
}
Expand All @@ -134,13 +135,13 @@ public List<List<Object>> parseParquet(InputStream inputStream, boolean includeH
try (ParquetReader<GenericRecord> reader = AvroParquetReader
.<GenericRecord>builder(inputFile).withConf(configuration).build()
) {
GenericRecord record;
while ((record = reader.read())!= null) {
GenericRecord genericRecord;
while ((genericRecord = reader.read())!= null) {
List<Object> row = new ArrayList<>();

if (includeHeaders && !isHeaderIncluded) {
List<Object> headers = new ArrayList<Object>(
record.getSchema()
List<Object> headers = new ArrayList<>(
genericRecord.getSchema()
.getFields()
.stream()
.map(Schema.Field::name)
Expand All @@ -151,8 +152,8 @@ public List<List<Object>> parseParquet(InputStream inputStream, boolean includeH
result.add(headers);
}

GenericRecord finalRecord = record;
record.getSchema()
GenericRecord finalRecord = genericRecord;
genericRecord.getSchema()
.getFields()
.forEach(field -> row.add(
finalRecord.get(field.name()).toString())
Expand Down Expand Up @@ -185,23 +186,23 @@ public List<List<Object>> parseORC(InputStream inputStream, boolean includeHeade
try (RecordReader rows = reader.rows()) {
if (includeHeaders) {
result.add(
new ArrayList<Object>(
new ArrayList<>(
schema.getFieldNames()
)
);
}

while (rows.nextBatch(rowBatch)) {
for (int row = 0; row < rowBatch.size; row++) {
List<Object> record = new ArrayList<>();
List<Object> records = new ArrayList<>();

for (ColumnVector vector : rowBatch.cols) {
record.add(
records.add(
getValue(vector, row)
);
}

result.add(record);
result.add(records);
}
}
}
Expand Down Expand Up @@ -237,21 +238,20 @@ private Object getValue(ColumnVector vector, int row) {

private CSVFormat getCsvFormat(AbstractLoad.CsvOptions csvOptions) throws IllegalVariableEvaluationException {
return CSVFormat.Builder.create()
.setDelimiter(
csvOptions.getFieldDelimiter() != null ?
this.runContext.render(csvOptions.getFieldDelimiter()).as(String.class).orElseThrow() :
CSVFormat.DEFAULT.getDelimiterString()
)
.setQuote(
csvOptions.getQuote() != null ?
this.runContext.render(csvOptions.getQuote()).as(String.class).orElseThrow().charAt(0) :
CSVFormat.DEFAULT.getQuoteCharacter()
)
.setRecordSeparator(CSVFormat.DEFAULT.getRecordSeparator())
.setIgnoreEmptyLines(true)
.setAllowDuplicateHeaderNames(false)
.setSkipHeaderRecord(csvOptions.getSkipLeadingRows() != null && runContext.render(csvOptions.getSkipLeadingRows()).as(Long.class).orElseThrow() > 0)
.build();
.setDelimiter(
csvOptions.getFieldDelimiter() != null ?
this.runContext.render(csvOptions.getFieldDelimiter()).as(String.class).orElseThrow() :
CSVFormat.DEFAULT.getDelimiterString()
)
.setQuote(
csvOptions.getQuote() != null ?
this.runContext.render(csvOptions.getQuote()).as(String.class).orElseThrow().charAt(0) :
CSVFormat.DEFAULT.getQuoteCharacter()
)
.setRecordSeparator(CSVFormat.DEFAULT.getRecordSeparator())
.setIgnoreEmptyLines(true)
.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY)
.setSkipHeaderRecord(csvOptions.getSkipLeadingRows() != null && runContext.render(csvOptions.getSkipLeadingRows()).as(Long.class).orElseThrow() > 0).get();
}

}

0 comments on commit 0fcc986

Please sign in to comment.