digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 0 ms";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 0 ms<br>number of output rows: 1"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 7<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>2 ms (0 ms, 0 ms, 0 ms (stage 2642.0: task 2876))<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 7<br>local bytes read: 413.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 7<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>112.0 B (16.0 B, 16.0 B, 16.0 B (stage 2642.0: task 2876))<br>local merged bytes read: 0.0 B<br>number of partitions: 1<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>413.0 B (59.0 B, 59.0 B, 59.0 B (stage 2642.0: task 2876))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n2.4 s (282 ms, 292 ms, 477 ms (stage 2642.0: task 2877))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>2.4 s (282 ms, 292 ms, 477 ms (stage 2642.0: task 2877))<br>number of output rows: 7"];
6 [labelType="html" label="<br><b>Project</b><br><br>"];
7 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 1,393"];
8 [labelType="html" label="<br><b>Project</b><br><br>"];
9 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 7"];
}
10 [labelType="html" label="<b>Scan binaryFile </b><br><br>number of output rows: 7<br>number of files read: 7<br>metadata time: 0 ms<br>size of files read: 992.7 KiB"];
2->0;
3->2;
5->3;
6->5;
7->6;
8->7;
9->8;
10->9;
}
11
AdaptiveSparkPlan isFinalPlan=true
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (2)
Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=51947]
HashAggregate(keys=[], functions=[partial_count(1)])
Project
Generate explode(COL_469EFA20_191C_4DC7_9CD8_0A53C7DD5971#365509), false, [COL_4594B752_72F8_457F_B37D_B0AB041DF9AC#365541]
Project [str_split_from_regex(bin_content_str(pdf, content#364727), \r?\n) AS COL_469EFA20_191C_4DC7_9CD8_0A53C7DD5971#365509]
Filter ((size(str_split_from_regex(bin_content_str(pdf, content#364727), \r?\n), true) > 0) AND isnotnull(str_split_from_regex(bin_content_str(pdf, content#364727), \r?\n)))
WholeStageCodegen (1)
FileScan binaryFile [content#364727] Batched: false, DataFilters: [(size(str_split_from_regex(bin_content_str(pdf, content#364727), \r?\n), true) > 0), isnotnull(s..., Format: org.apache.spark.sql.execution.datasources.binaryfile.BinaryFileFormat@51fd5c1e, Location: InMemoryFileIndex(7 paths)[file:/data/input/depot/binary/execution/A225B276_202D_4198_B6C6_5BF504..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<content:binary>