5050 * @param <V> - Value passed to this class to be written. The value is ignored.
5151 */
5252public class ETLDBOutputFormat <K extends DBWritable , V > extends DBOutputFormat <K , V > {
53+ // Batch size before submitting a batch to the SQL engine. If set to 0, no batches will be submitted until commit.
54+ public static final String COMMIT_BATCH_SIZE = "io.cdap.plugin.db.output.commit.batch.size" ;
55+ public static final int DEFAULT_COMMIT_BATCH_SIZE = 1000 ;
5356
5457 private static final Logger LOG = LoggerFactory .getLogger (ETLDBOutputFormat .class );
5558
@@ -63,6 +66,7 @@ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOE
6366 DBConfiguration dbConf = new DBConfiguration (conf );
6467 String tableName = dbConf .getOutputTableName ();
6568 String [] fieldNames = dbConf .getOutputFieldNames ();
69+ final int batchSize = conf .getInt (COMMIT_BATCH_SIZE , DEFAULT_COMMIT_BATCH_SIZE );
6670
6771 if (fieldNames == null ) {
6872 fieldNames = new String [dbConf .getOutputFieldCount ()];
@@ -74,6 +78,7 @@ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOE
7478 return new DBRecordWriter (connection , statement ) {
7579
7680 private boolean emptyData = true ;
81+ private long numWrittenRecords = 0 ;
7782
7883 //Implementation of the close method below is the exact implementation in DBOutputFormat except that
7984 //we check if there is any data to be written and if not, we skip executeBatch call.
@@ -116,6 +121,13 @@ public void write(K key, V value) {
116121 try {
117122 key .write (getStatement ());
118123 getStatement ().addBatch ();
124+ numWrittenRecords ++;
125+
126+ // Submit a batch to the SQL engine every 10k records
127+ // This is done to reduce memory usage in the worker, as processed records can now be GC'd.
128+ if (batchSize > 0 && numWrittenRecords % batchSize == 0 ) {
129+ getStatement ().executeBatch ();
130+ }
119131 } catch (SQLException e ) {
120132 LOG .warn ("Failed to write value to database" , e );
121133 }
0 commit comments