digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
1 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (8)\n \nduration: 4 ms";
3 [labelType="html" label="<br><b>Project</b><br><br>"];
4 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 76<br>spill size: 0.0 B"];
}
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: 5 ms";
6 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 16.1 MiB<br>spill size: 0.0 B"];
}
7 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 28.6 KiB<br>number of coalesced partitions: 1"];
8 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 412<br>local merged chunks fetched: 0<br>shuffle write time: 1 ms<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 412<br>local bytes read: 26.9 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size: 57.8 KiB<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written: 26.9 KiB"];
subgraph cluster9 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: 35 ms";
10 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 30 ms<br>peak memory: 256.0 KiB<br>number of output rows: 412<br>number of sort fallback tasks: 0<br>avg hash probes per key: 0"];
11 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 28 ms<br>peak memory: 256.0 KiB<br>number of output rows: 412<br>number of sort fallback tasks: 0<br>avg hash probes per key: 0"];
12 [labelType="html" label="<br><b>Project</b><br><br>"];
}
13 [labelType="html" label="<b>Window</b><br><br>spill size: 0.0 B"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: 173 ms";
15 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 16.1 MiB<br>spill size: 0.0 B"];
}
16 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 74.8 KiB<br>number of coalesced partitions: 1"];
17 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3,992<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>14 ms (1 ms, 1 ms, 2 ms (stage 640.0: task 949))<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 3,992<br>local bytes read: 71.7 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 8<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>287.2 KiB (17.7 KiB, 32.6 KiB, 64.3 KiB (stage 640.0: task 947))<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>71.7 KiB (4.4 KiB, 8.0 KiB, 17.6 KiB (stage 640.0: task 947))"];
18 [labelType="html" label="<br><b>Project</b><br><br>"];
subgraph cluster19 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n2.5 s (245 ms, 329 ms, 375 ms (stage 640.0: task 947))";
20 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 3,992"];
}
21 [labelType="html" label="<br><b>Project</b><br><br>"];
22 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 8"];
subgraph cluster23 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n2.5 s (251 ms, 337 ms, 378 ms (stage 640.0: task 947))";
24 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 8<br>number of input batches: 8"];
}
25 [labelType="html" label="<b>Scan parquet </b><br><br>number of files read: 8<br>scan time total (min, med, max (stageId: taskId))<br>44 ms (2 ms, 5 ms, 12 ms (stage 640.0: task 945))<br>metadata time: 0 ms<br>size of files read: 837.5 KiB<br>number of output rows: 8"];
subgraph cluster26 {
isCluster="true";
label="WholeStageCodegen (7)\n \nduration: 5 ms";
27 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 16.1 MiB<br>spill size: 0.0 B"];
}
28 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 129.0 B<br>number of coalesced partitions: 1"];
29 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>local merged chunks fetched: 0<br>shuffle write time: 0 ms<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 1<br>local bytes read: 128.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size: 88.0 B<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written: 128.0 B"];
subgraph cluster30 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 6 ms";
31 [labelType="html" label="<br><b>Project</b><br><br>"];
32 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1"];
}
33 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 1<br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 369.0 B"];
1->0;
3->1;
4->3;
6->4;
7->6;
8->7;
10->8;
11->10;
12->11;
13->12;
15->13;
16->15;
17->16;
18->17;
20->18;
21->20;
22->21;
24->22;
25->24;
27->4;
28->27;
29->28;
31->29;
32->31;
33->32;
}
34
AdaptiveSparkPlan isFinalPlan=true
TakeOrderedAndProject(limit=500000, orderBy=[COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191 ASC NULLS FIRST,COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192 ASC NULLS FIRST], output=[COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191,COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192,COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193,COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194,COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195])
Project [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127 AS COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128 AS COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129 AS COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136 AS COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137 AS COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
SortMergeJoin [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127], [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138], Inner
WholeStageCodegen (8)
Sort [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127 ASC NULLS FIRST], false, 0
WholeStageCodegen (6)
AQEShuffleRead coalesced
Exchange hashpartitioning(COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, 10), ENSURE_REQUIREMENTS, [plan_id=10215]
HashAggregate(keys=[COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093], functions=[])
HashAggregate(keys=[COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093], functions=[])
Project [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, str_replace(COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033, |, ) AS COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
WholeStageCodegen (5)
Window [concatwithseparatorudaf(_w0#122034, com.adb.hdh.spark.job.scala.utils.ConcatWithSeparatorUDAF@7e021d09, class[value[0]: string], class[value[0]: string], true, true, 0, 0, None) windowspecdefinition(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, 1 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033], [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013], [1 DESC NULLS LAST]
Sort [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949 ASC NULLS FIRST, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013 ASC NULLS FIRST, 1 DESC NULLS LAST], false, 0
WholeStageCodegen (4)
AQEShuffleRead coalesced
Exchange hashpartitioning(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, 10), ENSURE_REQUIREMENTS, [plan_id=10065]
Project [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, str_to_integer(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958, $.sentence_number_in_text)) AS COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, ARRAY_TO_STR_BEFORE_CONCAT(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958, $.term)) AS _w0#122034]
Generate explode(COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953), [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949], false, [COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958]
WholeStageCodegen (2)
Project [_NOM_DE_FICHIER#121763 AS COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953]
Filter (((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)))) AND isnotnull(_NOM_DE_FICHIER#121763))
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet [_FICHIER#121762,_NOM_DE_FICHIER#121763] Batched: true, DataFilters: [(size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), So..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/data/output/cache/parquet/uet/DOCUMENT_331395], PartitionFilters: [], PushedFilters: [IsNotNull(_NOM_DE_FICHIER)], ReadSchema: struct<_FICHIER:binary,_NOM_DE_FICHIER:string>
Sort [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138 ASC NULLS FIRST], false, 0
WholeStageCodegen (7)
AQEShuffleRead coalesced
Exchange hashpartitioning(DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138, 10), ENSURE_REQUIREMENTS, [plan_id=10100]
Project [IPP#121885 AS DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, Séjour#121886 AS DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, Document#121887 AS DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Filter ((Séjour#121886 <=> S00002) AND isnotnull(Document#121887))
WholeStageCodegen (3)
FileScan csv [IPP#121885,Séjour#121886,Document#121887] Batched: false, DataFilters: [(Séjour#121886 <=> S00002), isnotnull(Document#121887)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/data/input/depot/csv/execution/LISTE_DOCUMENT__331639/current/Li..., PartitionFilters: [], PushedFilters: [EqualNullSafe(Séjour,S00002), IsNotNull(Document)], ReadSchema: struct<IPP:string,Séjour:string,Document:string>
== Physical Plan ==
AdaptiveSparkPlan (48)
+- == Final Plan ==
TakeOrderedAndProject (28)
+- * Project (27)
+- * SortMergeJoin Inner (26)
:- * Sort (18)
: +- AQEShuffleRead (17)
: +- ShuffleQueryStage (16), Statistics(sizeInBytes=57.8 KiB, rowCount=412)
: +- Exchange (15)
: +- * HashAggregate (14)
: +- * HashAggregate (13)
: +- * Project (12)
: +- Window (11)
: +- * Sort (10)
: +- AQEShuffleRead (9)
: +- ShuffleQueryStage (8), Statistics(sizeInBytes=287.2 KiB, rowCount=3.99E+3)
: +- Exchange (7)
: +- Project (6)
: +- * Generate (5)
: +- Project (4)
: +- Filter (3)
: +- * ColumnarToRow (2)
: +- Scan parquet (1)
+- * Sort (25)
+- AQEShuffleRead (24)
+- ShuffleQueryStage (23), Statistics(sizeInBytes=88.0 B, rowCount=1)
+- Exchange (22)
+- * Project (21)
+- * Filter (20)
+- Scan csv (19)
+- == Initial Plan ==
TakeOrderedAndProject (47)
+- Project (46)
+- SortMergeJoin Inner (45)
:- Sort (40)
: +- Exchange (39)
: +- HashAggregate (38)
: +- HashAggregate (37)
: +- Project (36)
: +- Window (35)
: +- Sort (34)
: +- Exchange (33)
: +- Project (32)
: +- Generate (31)
: +- Project (30)
: +- Filter (29)
: +- Scan parquet (1)
+- Sort (44)
+- Exchange (43)
+- Project (42)
+- Filter (41)
+- Scan csv (19)
(1) Scan parquet
Output [2]: [_FICHIER#121762, _NOM_DE_FICHIER#121763]
Batched: true
Location: InMemoryFileIndex [file:/data/output/cache/parquet/uet/DOCUMENT_331395]
PushedFilters: [IsNotNull(_NOM_DE_FICHIER)]
ReadSchema: struct<_FICHIER:binary,_NOM_DE_FICHIER:string>
(2) ColumnarToRow [codegen id : 1]
Input [2]: [_FICHIER#121762, _NOM_DE_FICHIER#121763]
(3) Filter
Input [2]: [_FICHIER#121762, _NOM_DE_FICHIER#121763]
Condition : (((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)))) AND isnotnull(_NOM_DE_FICHIER#121763))
(4) Project
Output [2]: [_NOM_DE_FICHIER#121763 AS COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953]
Input [2]: [_FICHIER#121762, _NOM_DE_FICHIER#121763]
(5) Generate [codegen id : 2]
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953]
Arguments: explode(COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953), [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949], false, [COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958]
(6) Project
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, str_to_integer(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958, $.sentence_number_in_text)) AS COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, ARRAY_TO_STR_BEFORE_CONCAT(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958, $.term)) AS _w0#122034]
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958]
(7) Exchange
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: hashpartitioning(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, 10), ENSURE_REQUIREMENTS, [plan_id=10065]
(8) ShuffleQueryStage
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: 0
(9) AQEShuffleRead
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: coalesced
(10) Sort [codegen id : 4]
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949 ASC NULLS FIRST, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013 ASC NULLS FIRST, 1 DESC NULLS LAST], false, 0
(11) Window
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: [concatwithseparatorudaf(_w0#122034, com.adb.hdh.spark.job.scala.utils.ConcatWithSeparatorUDAF@7e021d09, class[value[0]: string], class[value[0]: string], true, true, 0, 0, None) windowspecdefinition(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, 1 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033], [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013], [1 DESC NULLS LAST]
(12) Project [codegen id : 5]
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, str_replace(COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033, |, ) AS COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Input [4]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034, COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033]
(13) HashAggregate [codegen id : 5]
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
(14) HashAggregate [codegen id : 5]
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949 AS COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013 AS COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093 AS COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
(15) Exchange
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
Arguments: hashpartitioning(COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, 10), ENSURE_REQUIREMENTS, [plan_id=10215]
(16) ShuffleQueryStage
Output [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
Arguments: 2
(17) AQEShuffleRead
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
Arguments: coalesced
(18) Sort [codegen id : 6]
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
Arguments: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127 ASC NULLS FIRST], false, 0
(19) Scan csv
Output [3]: [IPP#121885, Séjour#121886, Document#121887]
Batched: false
Location: InMemoryFileIndex [file:/data/input/depot/csv/execution/LISTE_DOCUMENT__331639/current/ListePatDoc.csv]
PushedFilters: [EqualNullSafe(Séjour,S00002), IsNotNull(Document)]
ReadSchema: struct<IPP:string,Séjour:string,Document:string>
(20) Filter [codegen id : 3]
Input [3]: [IPP#121885, Séjour#121886, Document#121887]
Condition : ((Séjour#121886 <=> S00002) AND isnotnull(Document#121887))
(21) Project [codegen id : 3]
Output [3]: [IPP#121885 AS DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, Séjour#121886 AS DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, Document#121887 AS DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Input [3]: [IPP#121885, Séjour#121886, Document#121887]
(22) Exchange
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Arguments: hashpartitioning(DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138, 10), ENSURE_REQUIREMENTS, [plan_id=10100]
(23) ShuffleQueryStage
Output [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Arguments: 1
(24) AQEShuffleRead
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Arguments: coalesced
(25) Sort [codegen id : 7]
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Arguments: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138 ASC NULLS FIRST], false, 0
(26) SortMergeJoin [codegen id : 8]
Left keys [1]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127]
Right keys [1]: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Join type: Inner
Join condition: None
(27) Project [codegen id : 8]
Output [5]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127 AS COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128 AS COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129 AS COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136 AS COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137 AS COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
Input [6]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
(28) TakeOrderedAndProject
Input [5]: [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
Arguments: 500000, [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191 ASC NULLS FIRST, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192 ASC NULLS FIRST], [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
(29) Filter
Input [2]: [_FICHIER#121762, _NOM_DE_FICHIER#121763]
Condition : (((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)))) AND isnotnull(_NOM_DE_FICHIER#121763))
(30) Project
Output [2]: [_NOM_DE_FICHIER#121763 AS COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121762), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953]
Input [2]: [_FICHIER#121762, _NOM_DE_FICHIER#121763]
(31) Generate
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953]
Arguments: explode(COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121953), [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949], false, [COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958]
(32) Project
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, str_to_integer(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958, $.sentence_number_in_text)) AS COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, ARRAY_TO_STR_BEFORE_CONCAT(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958, $.term)) AS _w0#122034]
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121958]
(33) Exchange
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: hashpartitioning(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, 10), ENSURE_REQUIREMENTS, [plan_id=10010]
(34) Sort
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949 ASC NULLS FIRST, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013 ASC NULLS FIRST, 1 DESC NULLS LAST], false, 0
(35) Window
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034]
Arguments: [concatwithseparatorudaf(_w0#122034, com.adb.hdh.spark.job.scala.utils.ConcatWithSeparatorUDAF@7e021d09, class[value[0]: string], class[value[0]: string], true, true, 0, 0, None) windowspecdefinition(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, 1 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033], [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013], [1 DESC NULLS LAST]
(36) Project
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, str_replace(COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033, |, ) AS COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Input [4]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, _w0#122034, COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#122033]
(37) HashAggregate
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
(38) HashAggregate
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121949 AS COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#122013 AS COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#122093 AS COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
(39) Exchange
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
Arguments: hashpartitioning(COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, 10), ENSURE_REQUIREMENTS, [plan_id=10020]
(40) Sort
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129]
Arguments: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127 ASC NULLS FIRST], false, 0
(41) Filter
Input [3]: [IPP#121885, Séjour#121886, Document#121887]
Condition : ((Séjour#121886 <=> S00002) AND isnotnull(Document#121887))
(42) Project
Output [3]: [IPP#121885 AS DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, Séjour#121886 AS DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, Document#121887 AS DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Input [3]: [IPP#121885, Séjour#121886, Document#121887]
(43) Exchange
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Arguments: hashpartitioning(DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138, 10), ENSURE_REQUIREMENTS, [plan_id=10021]
(44) Sort
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Arguments: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138 ASC NULLS FIRST], false, 0
(45) SortMergeJoin
Left keys [1]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127]
Right keys [1]: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
Join type: Inner
Join condition: None
(46) Project
Output [5]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127 AS COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128 AS COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129 AS COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136 AS COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137 AS COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
Input [6]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#122127, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#122128, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#122129, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#122136, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#122137, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#122138]
(47) TakeOrderedAndProject
Input [5]: [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
Arguments: 500000, [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191 ASC NULLS FIRST, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192 ASC NULLS FIRST], [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
(48) AdaptiveSparkPlan
Output [5]: [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#122191, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#122192, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#122193, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#122194, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#122195]
Arguments: isFinalPlan=true