digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: 0 ms";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 0 ms<br>number of output rows: 1"];
3 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 0 ms<br>number of output rows: 1"];
}
4 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 200<br>local merged chunks fetched: 0<br>shuffle write time: 0 ms<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 200<br>local bytes read: 56.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size: 0.0 B<br>local merged bytes read: 0.0 B<br>number of partitions: 1<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written: 56.0 B"];
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 27 ms";
6 [labelType="html" label="<br><b>LocalLimit</b><br><br>"];
7 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 21 ms<br>peak memory: 16.3 MiB<br>number of output rows: 200<br>number of sort fallback tasks: 0<br>avg hash probes per key: 1"];
}
8 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 448.2 KiB<br>number of coalesced partitions: 1"];
9 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 31,919<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>10 ms (1 ms, 1 ms, 2 ms (stage 908.0: task 1243))<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 31,919<br>local bytes read: 429.2 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 5<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>853.3 KiB (10.9 KiB, 190.3 KiB, 281.2 KiB (stage 908.0: task 1242))<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>429.2 KiB (6.6 KiB, 91.8 KiB, 145.2 KiB (stage 908.0: task 1242))"];
subgraph cluster10 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n2.1 m (876 ms, 20.8 s, 51.7 s (stage 908.0: task 1244))";
11 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build total (min, med, max (stageId: taskId))<br>2.1 m (864 ms, 20.8 s, 51.7 s (stage 908.0: task 1244))<br>peak memory total (min, med, max (stageId: taskId))<br>65.3 MiB (256.0 KiB, 16.3 MiB, 16.3 MiB (stage 908.0: task 1245))<br>number of output rows: 31,919<br>number of sort fallback tasks: 0<br>avg hash probes per key (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 908.0: task 1245))"];
12 [labelType="html" label="<br><b>Project</b><br><br>"];
}
13 [labelType="html" label="<br><b>Project</b><br><br>"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n2.1 m (930 ms, 20.9 s, 51.7 s (stage 908.0: task 1244))";
15 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 316,675"];
}
16 [labelType="html" label="<br><b>Project</b><br><br>"];
17 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 5"];
18 [labelType="html" label="<b>Scan binaryFile </b><br><br>number of output rows: 5<br>number of files read: 5<br>metadata time: 0 ms<br>size of files read: 34.6 MiB"];
2->0;
3->2;
4->3;
6->4;
7->6;
8->7;
9->8;
11->9;
12->11;
13->12;
15->13;
16->15;
17->16;
18->17;
}
19
AdaptiveSparkPlan isFinalPlan=true
HashAggregate(keys=[], functions=[count(1)])
HashAggregate(keys=[], functions=[partial_count(1)])
WholeStageCodegen (4)
Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15161]
LocalLimit 200
HashAggregate(keys=[COL_BAC7322B_FA9B_430E_B73B_810A78B5874D#147568], functions=[])
WholeStageCodegen (3)
AQEShuffleRead coalesced
Exchange hashpartitioning(COL_BAC7322B_FA9B_430E_B73B_810A78B5874D#147568, 10), ENSURE_REQUIREMENTS, [plan_id=15028]
HashAggregate(keys=[COL_BAC7322B_FA9B_430E_B73B_810A78B5874D#147568], functions=[])
Project [str_if_with_rule((((CASE WHEN isnull(EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, ,)) THEN false ELSE EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, ,) END OR CASE WHEN isnull(EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, ;)) THEN false ELSE EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, ;) END) OR CASE WHEN isnull(EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, :)) THEN false ELSE EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, :) END) OR CASE WHEN isnull(EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, .)) THEN false ELSE EndsWith(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, .) END), str_extract(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467, LEFT, dec_to_int(dec_operator(DIFFERENCE, array(cast(str_size(COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467) as double), 1.0)))), COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467) AS COL_BAC7322B_FA9B_430E_B73B_810A78B5874D#147568]
WholeStageCodegen (2)
Project [str_transform_case(str_remove_accent(get_json_object(COL_21E39881_03B7_4478_981D_330734A120C0#147453, $.term)), UPPER) AS COL_8024C6FB_6C34_46E0_91CB_695910BF0C1B#147467]
Generate explode(COL_C3F4AA07_85F2_497E_9135_2A1292466B75#147449), false, [COL_21E39881_03B7_4478_981D_330734A120C0#147453]
WholeStageCodegen (1)
Project [from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, content#147382), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_C3F4AA07_85F2_497E_9135_2A1292466B75#147449]
Filter ((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, content#147382), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, content#147382), Some(Etc/UTC)), Some(Etc/UTC))))
FileScan binaryFile [content#147382] Batched: false, DataFilters: [(size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, content#147382), Som..., Format: org.apache.spark.sql.execution.datasources.binaryfile.BinaryFileFormat@7e993be, Location: InMemoryFileIndex(5 paths)[file:/data/input/depot/binary/execution/454FFE78_6FB4_4FFA_AB5C_590D50..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<content:binary>