-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Description
I am trying to ingest data in druid datasource using index_parallel but the ingestion is getting failed again and again in merge phase
Here is the task spec :
{
"type": "index_parallel",
"spec": {
"dataSchema": {
"dataSource": "X",
"timestampSpec": {
"column": "timestamp",
"format": "auto"
},
"dimensionsSpec": {
"dimensions": [
"segment",
"region",
"deviceIdType",
"scope"
]
},
"metricsSpec": [
{
"fieldName": "X",
"type": "thetaSketch",
"name": "X",
"isInputThetaSketch": true,
"size": 65536
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "day",
"queryGranularity": "day",
"intervals": ["PLACEHOLDER"],
"rollup": false
}
},
"ioConfig": {
"type": "index_parallel",
"inputSource": {
"type": "combining",
"delegates": [
{
"type": "druid",
"dataSource": "X",
"interval": "PLACEHOLDER",
"filter": {
"type": "not",
"field": {
"type": "in",
"dimension": "X",
"values": ["X"]
}
}
},
}
}
]
},
"inputFormat": {
"type": "parquet",
"binaryAsString": false
},
"appendToExisting": false
},
"tuningConfig": {
"type": "index_parallel",
"forceGuaranteedRollup": true,
"maxRowsInMemory": 25000,
"maxBytesInMemory": 500000000,
"partitionsSpec": {
"type": "hashed",
"targetRowsPerSegment": 6000
},
"maxNumConcurrentSubTasks": 10,
"maxRetry": 3,
"taskStatusCheckPeriodMs": 1000,
"chatHandlerTimeout": "PT10S",
"chatHandlerNumRetries": 5,
"pushTimeout": 0,
"ignoreInvalidRows": true,
"buildV9Directly": true
}
}
}
I am using a single middle manager with r6gd.8xlarge instance type with 12 worker slots, with 4GB heap for middle manager and 20GB ( 16GB heap + 4GB direct ) for peon task