digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
1 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 36 ms";
3 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 0 ms<br>peak memory: 16.3 MiB<br>number of output rows: 8<br>number of sort fallback tasks: 0<br>avg hash probes per key: 1"];
}
4 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 482.0 B<br>number of coalesced partitions: 1"];
5 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 8<br>local merged chunks fetched: 0<br>shuffle write time: 1 ms<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 8<br>local bytes read: 462.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size: 256.0 B<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written: 462.0 B"];
subgraph cluster6 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 162 ms";
7 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 111 ms<br>peak memory: 256.0 KiB<br>number of output rows: 8<br>number of sort fallback tasks: 0<br>avg hash probes per key: 0"];
8 [labelType="html" label="<br><b>Project</b><br><br>"];
9 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 19,130"];
10 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 40,165<br>number of input batches: 10"];
}
11 [labelType="html" label="<b>Scan parquet </b><br><br>number of files read: 1<br>scan time: 44 ms<br>metadata time: 0 ms<br>size of files read: 1219.7 KiB<br>number of output rows: 40,165"];
1->0;
3->1;
4->3;
5->4;
7->5;
8->7;
9->8;
10->9;
11->10;
}
12
AdaptiveSparkPlan isFinalPlan=true
TakeOrderedAndProject(limit=500000, orderBy=[COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L DESC NULLS LAST], output=[COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225,COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L])
HashAggregate(keys=[COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147], functions=[count(1)])
WholeStageCodegen (2)
AQEShuffleRead coalesced
Exchange hashpartitioning(COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, 10), ENSURE_REQUIREMENTS, [plan_id=15660]
HashAggregate(keys=[COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147], functions=[partial_count(1)])
Project [str_extract_by_str(COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946, -, LEFT) AS COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Filter NOT CASE WHEN isnull(Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition)) THEN false ELSE Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition) END
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945,COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946] Batched: true, DataFilters: [NOT CASE WHEN isnull(Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition)) THE..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/data/output/cache/parquet/datablock/10_CONSOLIDATIO_343570], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<COL_F8F4501B_9215_45B8_8482_A6D00C7E1219:string,COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8:s...
== Physical Plan ==
AdaptiveSparkPlan (17)
+- == Final Plan ==
TakeOrderedAndProject (10)
+- * HashAggregate (9)
+- AQEShuffleRead (8)
+- ShuffleQueryStage (7), Statistics(sizeInBytes=256.0 B, rowCount=8)
+- Exchange (6)
+- * HashAggregate (5)
+- * Project (4)
+- * Filter (3)
+- * ColumnarToRow (2)
+- Scan parquet (1)
+- == Initial Plan ==
TakeOrderedAndProject (16)
+- HashAggregate (15)
+- Exchange (14)
+- HashAggregate (13)
+- Project (12)
+- Filter (11)
+- Scan parquet (1)
(1) Scan parquet
Output [2]: [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946]
Batched: true
Location: InMemoryFileIndex [file:/data/output/cache/parquet/datablock/10_CONSOLIDATIO_343570]
ReadSchema: struct<COL_F8F4501B_9215_45B8_8482_A6D00C7E1219:string,COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8:string>
(2) ColumnarToRow [codegen id : 1]
Input [2]: [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946]
(3) Filter [codegen id : 1]
Input [2]: [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946]
Condition : NOT CASE WHEN isnull(Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition)) THEN false ELSE Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition) END
(4) Project [codegen id : 1]
Output [1]: [str_extract_by_str(COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946, -, LEFT) AS COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Input [2]: [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946]
(5) HashAggregate [codegen id : 1]
Input [1]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Keys [1]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Functions [1]: [partial_count(1)]
Aggregate Attributes [1]: [count#143531L]
Results [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
(6) Exchange
Input [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
Arguments: hashpartitioning(COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, 10), ENSURE_REQUIREMENTS, [plan_id=15660]
(7) ShuffleQueryStage
Output [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
Arguments: 0
(8) AQEShuffleRead
Input [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
Arguments: coalesced
(9) HashAggregate [codegen id : 2]
Input [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
Keys [1]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Functions [1]: [count(1)]
Aggregate Attributes [1]: [count(1)#143176L]
Results [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147 AS COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, count(1)#143176L AS COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
(10) TakeOrderedAndProject
Input [2]: [COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
Arguments: 500000, [COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L DESC NULLS LAST], [COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
(11) Filter
Input [2]: [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946]
Condition : NOT CASE WHEN isnull(Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition)) THEN false ELSE Contains(COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, Proposition) END
(12) Project
Output [1]: [str_extract_by_str(COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946, -, LEFT) AS COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Input [2]: [COL_F8F4501B_9215_45B8_8482_A6D00C7E1219#142945, COL_CE32BA7E_7F3E_45CB_9B4F_02D94DF188B8#142946]
(13) HashAggregate
Input [1]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Keys [1]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Functions [1]: [partial_count(1)]
Aggregate Attributes [1]: [count#143531L]
Results [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
(14) Exchange
Input [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
Arguments: hashpartitioning(COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, 10), ENSURE_REQUIREMENTS, [plan_id=15637]
(15) HashAggregate
Input [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147, count#143532L]
Keys [1]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147]
Functions [1]: [count(1)]
Aggregate Attributes [1]: [count(1)#143176L]
Results [2]: [COL_E8C58C8A_101A_4AC4_B888_FE8878C536D3#143147 AS COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, count(1)#143176L AS COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
(16) TakeOrderedAndProject
Input [2]: [COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
Arguments: 500000, [COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L DESC NULLS LAST], [COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
(17) AdaptiveSparkPlan
Output [2]: [COL_ABA5D8BE_2786_4E55_AF06_24037DBC6FF6#143225, COL_31624CCE_34AF_41D2_89A5_9358A2EF48D9#143226L]
Arguments: isFinalPlan=true