-
Notifications
You must be signed in to change notification settings - Fork 214
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add aggregate metrics for ddb source export and stream #3724
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
package org.opensearch.dataprepper.plugins.source.dynamodb.leader; | ||
|
||
import org.opensearch.dataprepper.plugins.source.dynamodb.utils.DynamoDBSourceAggregateMetrics; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import software.amazon.awssdk.services.dynamodb.model.DescribeStreamRequest; | ||
import software.amazon.awssdk.services.dynamodb.model.DescribeStreamResponse; | ||
import software.amazon.awssdk.services.dynamodb.model.InternalServerErrorException; | ||
import software.amazon.awssdk.services.dynamodb.model.Shard; | ||
import software.amazon.awssdk.services.dynamodb.streams.DynamoDbStreamsClient; | ||
|
||
|
@@ -43,10 +45,13 @@ public class ShardManager { | |
|
||
|
||
private final DynamoDbStreamsClient streamsClient; | ||
private final DynamoDBSourceAggregateMetrics dynamoDBSourceAggregateMetrics; | ||
|
||
|
||
public ShardManager(final DynamoDbStreamsClient streamsClient) { | ||
public ShardManager(final DynamoDbStreamsClient streamsClient, | ||
final DynamoDBSourceAggregateMetrics dynamoDBSourceAggregateMetrics) { | ||
this.streamsClient = streamsClient; | ||
this.dynamoDBSourceAggregateMetrics = dynamoDBSourceAggregateMetrics; | ||
streamMap = new HashMap<>(); | ||
endingSequenceNumberMap = new HashMap<>(); | ||
} | ||
|
@@ -148,22 +153,30 @@ private List<Shard> listShards(String streamArn, String lastEvaluatedShardId) { | |
long startTime = System.currentTimeMillis(); | ||
// Get all the shard IDs from the stream. | ||
List<Shard> shards = new ArrayList<>(); | ||
do { | ||
DescribeStreamRequest req = DescribeStreamRequest.builder() | ||
.streamArn(streamArn) | ||
.limit(MAX_SHARD_COUNT) | ||
.exclusiveStartShardId(lastEvaluatedShardId) | ||
.build(); | ||
|
||
DescribeStreamResponse describeStreamResult = streamsClient.describeStream(req); | ||
shards.addAll(describeStreamResult.streamDescription().shards()); | ||
try { | ||
do { | ||
DescribeStreamRequest req = DescribeStreamRequest.builder() | ||
.streamArn(streamArn) | ||
.limit(MAX_SHARD_COUNT) | ||
.exclusiveStartShardId(lastEvaluatedShardId) | ||
.build(); | ||
|
||
// If LastEvaluatedShardId is set, | ||
// at least one more page of shard IDs to retrieve | ||
lastEvaluatedShardId = describeStreamResult.streamDescription().lastEvaluatedShardId(); | ||
dynamoDBSourceAggregateMetrics.getStreamApiInvocations().increment(); | ||
DescribeStreamResponse describeStreamResult = streamsClient.describeStream(req); | ||
shards.addAll(describeStreamResult.streamDescription().shards()); | ||
|
||
// If LastEvaluatedShardId is set, | ||
// at least one more page of shard IDs to retrieve | ||
lastEvaluatedShardId = describeStreamResult.streamDescription().lastEvaluatedShardId(); | ||
|
||
} while (lastEvaluatedShardId != null); | ||
|
||
} while (lastEvaluatedShardId != null); | ||
} catch(final InternalServerErrorException e) { | ||
LOG.error("Received an internal server exception from DynamoDB while listing shards: {}", e.getMessage()); | ||
dynamoDBSourceAggregateMetrics.getStream5xxErrors().increment(); | ||
return shards; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we rethrow the error here instead of potentially returning partial shards? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think there's anything wrong with returning partial shards, it just means we will only check those shards for child shards. The next time we run |
||
} | ||
|
||
long endTime = System.currentTimeMillis(); | ||
LOG.info("Listing shards (DescribeStream call) took {} milliseconds with {} shards found", endTime - startTime, shards.size()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we also consider adding pipeline specific metric ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think there's any value in that for these metrics at least. There may be other pipeline specific metrics we can add, but that can be a future PR