digraph G {
0 [labelType="html" label="<br><b>AdaptiveSparkPlan</b><br><br>"];
1 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (8)\n \nduration: 8 ms";
3 [labelType="html" label="<br><b>Project</b><br><br>"];
4 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 64<br>spill size: 0.0 B"];
}
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: 68 ms";
6 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 16.1 MiB<br>spill size: 0.0 B"];
}
7 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 28.6 KiB<br>number of coalesced partitions: 1"];
8 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 412<br>local merged chunks fetched: 0<br>shuffle write time: 1 ms<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 412<br>local bytes read: 26.9 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size: 57.8 KiB<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written: 26.9 KiB"];
subgraph cluster9 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: 85 ms";
10 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 54 ms<br>peak memory: 256.0 KiB<br>number of output rows: 412<br>number of sort fallback tasks: 0<br>avg hash probes per key: 0"];
11 [labelType="html" label="<b>HashAggregate</b><br><br>spill size: 0.0 B<br>time in aggregation build: 47 ms<br>peak memory: 256.0 KiB<br>number of output rows: 412<br>number of sort fallback tasks: 0<br>avg hash probes per key: 0"];
12 [labelType="html" label="<br><b>Project</b><br><br>"];
}
13 [labelType="html" label="<b>Window</b><br><br>spill size: 0.0 B"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: 288 ms";
15 [labelType="html" label="<b>Sort</b><br><br>sort time: 2 ms<br>peak memory: 16.1 MiB<br>spill size: 0.0 B"];
}
16 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 74.8 KiB<br>number of coalesced partitions: 1"];
17 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3,992<br>local merged chunks fetched: 0<br>shuffle write time total (min, med, max (stageId: taskId))<br>16 ms (1 ms, 1 ms, 3 ms (stage 626.0: task 931))<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 3,992<br>local bytes read: 71.7 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 8<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size total (min, med, max (stageId: taskId))<br>287.2 KiB (17.7 KiB, 32.6 KiB, 64.3 KiB (stage 626.0: task 930))<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>71.7 KiB (4.4 KiB, 8.0 KiB, 17.6 KiB (stage 626.0: task 930))"];
18 [labelType="html" label="<br><b>Project</b><br><br>"];
subgraph cluster19 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n20.3 s (2.5 s, 2.5 s, 2.7 s (stage 626.0: task 929))";
20 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 3,992"];
}
21 [labelType="html" label="<br><b>Project</b><br><br>"];
22 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 8"];
subgraph cluster23 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n20.6 s (2.5 s, 2.5 s, 2.7 s (stage 626.0: task 929))";
24 [labelType="html" label="<b>ColumnarToRow</b><br><br>number of output rows: 8<br>number of input batches: 8"];
}
25 [labelType="html" label="<b>Scan parquet </b><br><br>number of files read: 8<br>scan time total (min, med, max (stageId: taskId))<br>722 ms (68 ms, 85 ms, 138 ms (stage 626.0: task 932))<br>metadata time: 0 ms<br>size of files read: 837.5 KiB<br>number of output rows: 8"];
subgraph cluster26 {
isCluster="true";
label="WholeStageCodegen (7)\n \nduration: 82 ms";
27 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 16.1 MiB<br>spill size: 0.0 B"];
}
28 [labelType="html" label="<b>AQEShuffleRead</b><br><br>number of partitions: 1<br>partition data size: 129.0 B<br>number of coalesced partitions: 1"];
29 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>local merged chunks fetched: 0<br>shuffle write time: 0 ms<br>remote merged bytes read: 0.0 B<br>local merged blocks fetched: 0<br>corrupt merged block chunks: 0<br>remote merged reqs duration: 0 ms<br>remote merged blocks fetched: 0<br>records read: 1<br>local bytes read: 128.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 0.0 B<br>merged fetch fallback count: 0<br>local blocks read: 1<br>remote merged chunks fetched: 0<br>remote blocks read: 0<br>data size: 88.0 B<br>local merged bytes read: 0.0 B<br>number of partitions: 10<br>remote reqs duration: 0 ms<br>remote bytes read to disk: 0.0 B<br>shuffle bytes written: 128.0 B"];
subgraph cluster30 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 46 ms";
31 [labelType="html" label="<br><b>Project</b><br><br>"];
32 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 1"];
}
33 [labelType="html" label="<b>Scan csv </b><br><br>number of output rows: 1<br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 369.0 B"];
1->0;
3->1;
4->3;
6->4;
7->6;
8->7;
10->8;
11->10;
12->11;
13->12;
15->13;
16->15;
17->16;
18->17;
20->18;
21->20;
22->21;
24->22;
25->24;
27->4;
28->27;
29->28;
31->29;
32->31;
33->32;
}
34
AdaptiveSparkPlan isFinalPlan=true
TakeOrderedAndProject(limit=500000, orderBy=[COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498 ASC NULLS FIRST,COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499 ASC NULLS FIRST], output=[COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498,COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499,COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500,COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501,COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502])
Project [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434 AS COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435 AS COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436 AS COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443 AS COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444 AS COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
SortMergeJoin [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434], [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445], Inner
WholeStageCodegen (8)
Sort [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434 ASC NULLS FIRST], false, 0
WholeStageCodegen (6)
AQEShuffleRead coalesced
Exchange hashpartitioning(COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, 10), ENSURE_REQUIREMENTS, [plan_id=9804]
HashAggregate(keys=[COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400], functions=[])
HashAggregate(keys=[COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400], functions=[])
Project [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, str_replace(COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340, |, ) AS COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
WholeStageCodegen (5)
Window [concatwithseparatorudaf(_w0#121341, com.adb.hdh.spark.job.scala.utils.ConcatWithSeparatorUDAF@1cc8a327, class[value[0]: string], class[value[0]: string], true, true, 0, 0, None) windowspecdefinition(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, 1 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340], [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320], [1 DESC NULLS LAST]
Sort [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256 ASC NULLS FIRST, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320 ASC NULLS FIRST, 1 DESC NULLS LAST], false, 0
WholeStageCodegen (4)
AQEShuffleRead coalesced
Exchange hashpartitioning(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, 10), ENSURE_REQUIREMENTS, [plan_id=9654]
Project [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, str_to_integer(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265, $.sentence_number_in_text)) AS COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, ARRAY_TO_STR_BEFORE_CONCAT(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265, $.term)) AS _w0#121341]
Generate explode(COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260), [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256], false, [COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265]
WholeStageCodegen (2)
Project [_NOM_DE_FICHIER#121178 AS COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260]
Filter (((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)))) AND isnotnull(_NOM_DE_FICHIER#121178))
ColumnarToRow
WholeStageCodegen (1)
FileScan parquet [_FICHIER#121177,_NOM_DE_FICHIER#121178] Batched: true, DataFilters: [(size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), So..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/data/output/cache/parquet/uet/DOCUMENT_331395], PartitionFilters: [], PushedFilters: [IsNotNull(_NOM_DE_FICHIER)], ReadSchema: struct<_FICHIER:binary,_NOM_DE_FICHIER:string>
Sort [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445 ASC NULLS FIRST], false, 0
WholeStageCodegen (7)
AQEShuffleRead coalesced
Exchange hashpartitioning(DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445, 10), ENSURE_REQUIREMENTS, [plan_id=9689]
Project [IPP#121198 AS DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, Séjour#121199 AS DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, Document#121200 AS DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Filter ((Séjour#121199 <=> S00001) AND isnotnull(Document#121200))
WholeStageCodegen (3)
FileScan csv [IPP#121198,Séjour#121199,Document#121200] Batched: false, DataFilters: [(Séjour#121199 <=> S00001), isnotnull(Document#121200)], Format: CSV, Location: InMemoryFileIndex(1 paths)[file:/data/input/depot/csv/execution/LISTE_DOCUMENT__331639/current/Li..., PartitionFilters: [], PushedFilters: [EqualNullSafe(Séjour,S00001), IsNotNull(Document)], ReadSchema: struct<IPP:string,Séjour:string,Document:string>
== Physical Plan ==
AdaptiveSparkPlan (48)
+- == Final Plan ==
TakeOrderedAndProject (28)
+- * Project (27)
+- * SortMergeJoin Inner (26)
:- * Sort (18)
: +- AQEShuffleRead (17)
: +- ShuffleQueryStage (16), Statistics(sizeInBytes=57.8 KiB, rowCount=412)
: +- Exchange (15)
: +- * HashAggregate (14)
: +- * HashAggregate (13)
: +- * Project (12)
: +- Window (11)
: +- * Sort (10)
: +- AQEShuffleRead (9)
: +- ShuffleQueryStage (8), Statistics(sizeInBytes=287.2 KiB, rowCount=3.99E+3)
: +- Exchange (7)
: +- Project (6)
: +- * Generate (5)
: +- Project (4)
: +- Filter (3)
: +- * ColumnarToRow (2)
: +- Scan parquet (1)
+- * Sort (25)
+- AQEShuffleRead (24)
+- ShuffleQueryStage (23), Statistics(sizeInBytes=88.0 B, rowCount=1)
+- Exchange (22)
+- * Project (21)
+- * Filter (20)
+- Scan csv (19)
+- == Initial Plan ==
TakeOrderedAndProject (47)
+- Project (46)
+- SortMergeJoin Inner (45)
:- Sort (40)
: +- Exchange (39)
: +- HashAggregate (38)
: +- HashAggregate (37)
: +- Project (36)
: +- Window (35)
: +- Sort (34)
: +- Exchange (33)
: +- Project (32)
: +- Generate (31)
: +- Project (30)
: +- Filter (29)
: +- Scan parquet (1)
+- Sort (44)
+- Exchange (43)
+- Project (42)
+- Filter (41)
+- Scan csv (19)
(1) Scan parquet
Output [2]: [_FICHIER#121177, _NOM_DE_FICHIER#121178]
Batched: true
Location: InMemoryFileIndex [file:/data/output/cache/parquet/uet/DOCUMENT_331395]
PushedFilters: [IsNotNull(_NOM_DE_FICHIER)]
ReadSchema: struct<_FICHIER:binary,_NOM_DE_FICHIER:string>
(2) ColumnarToRow [codegen id : 1]
Input [2]: [_FICHIER#121177, _NOM_DE_FICHIER#121178]
(3) Filter
Input [2]: [_FICHIER#121177, _NOM_DE_FICHIER#121178]
Condition : (((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)))) AND isnotnull(_NOM_DE_FICHIER#121178))
(4) Project
Output [2]: [_NOM_DE_FICHIER#121178 AS COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260]
Input [2]: [_FICHIER#121177, _NOM_DE_FICHIER#121178]
(5) Generate [codegen id : 2]
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260]
Arguments: explode(COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260), [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256], false, [COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265]
(6) Project
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, str_to_integer(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265, $.sentence_number_in_text)) AS COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, ARRAY_TO_STR_BEFORE_CONCAT(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265, $.term)) AS _w0#121341]
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265]
(7) Exchange
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: hashpartitioning(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, 10), ENSURE_REQUIREMENTS, [plan_id=9654]
(8) ShuffleQueryStage
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: 0
(9) AQEShuffleRead
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: coalesced
(10) Sort [codegen id : 4]
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256 ASC NULLS FIRST, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320 ASC NULLS FIRST, 1 DESC NULLS LAST], false, 0
(11) Window
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: [concatwithseparatorudaf(_w0#121341, com.adb.hdh.spark.job.scala.utils.ConcatWithSeparatorUDAF@1cc8a327, class[value[0]: string], class[value[0]: string], true, true, 0, 0, None) windowspecdefinition(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, 1 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340], [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320], [1 DESC NULLS LAST]
(12) Project [codegen id : 5]
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, str_replace(COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340, |, ) AS COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Input [4]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341, COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340]
(13) HashAggregate [codegen id : 5]
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
(14) HashAggregate [codegen id : 5]
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256 AS COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320 AS COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400 AS COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
(15) Exchange
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
Arguments: hashpartitioning(COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, 10), ENSURE_REQUIREMENTS, [plan_id=9804]
(16) ShuffleQueryStage
Output [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
Arguments: 2
(17) AQEShuffleRead
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
Arguments: coalesced
(18) Sort [codegen id : 6]
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
Arguments: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434 ASC NULLS FIRST], false, 0
(19) Scan csv
Output [3]: [IPP#121198, Séjour#121199, Document#121200]
Batched: false
Location: InMemoryFileIndex [file:/data/input/depot/csv/execution/LISTE_DOCUMENT__331639/current/ListePatDoc.csv]
PushedFilters: [EqualNullSafe(Séjour,S00001), IsNotNull(Document)]
ReadSchema: struct<IPP:string,Séjour:string,Document:string>
(20) Filter [codegen id : 3]
Input [3]: [IPP#121198, Séjour#121199, Document#121200]
Condition : ((Séjour#121199 <=> S00001) AND isnotnull(Document#121200))
(21) Project [codegen id : 3]
Output [3]: [IPP#121198 AS DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, Séjour#121199 AS DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, Document#121200 AS DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Input [3]: [IPP#121198, Séjour#121199, Document#121200]
(22) Exchange
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Arguments: hashpartitioning(DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445, 10), ENSURE_REQUIREMENTS, [plan_id=9689]
(23) ShuffleQueryStage
Output [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Arguments: 1
(24) AQEShuffleRead
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Arguments: coalesced
(25) Sort [codegen id : 7]
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Arguments: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445 ASC NULLS FIRST], false, 0
(26) SortMergeJoin [codegen id : 8]
Left keys [1]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434]
Right keys [1]: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Join type: Inner
Join condition: None
(27) Project [codegen id : 8]
Output [5]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434 AS COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435 AS COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436 AS COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443 AS COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444 AS COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
Input [6]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
(28) TakeOrderedAndProject
Input [5]: [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
Arguments: 500000, [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498 ASC NULLS FIRST, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499 ASC NULLS FIRST], [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
(29) Filter
Input [2]: [_FICHIER#121177, _NOM_DE_FICHIER#121178]
Condition : (((size(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)), true) > 0) AND isnotnull(from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)))) AND isnotnull(_NOM_DE_FICHIER#121178))
(30) Project
Output [2]: [_NOM_DE_FICHIER#121178 AS COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, from_json(ArrayType(StringType,false), to_json(bin_content_words(pdf, _FICHIER#121177), Some(Etc/UTC)), Some(Etc/UTC)) AS COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260]
Input [2]: [_FICHIER#121177, _NOM_DE_FICHIER#121178]
(31) Generate
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260]
Arguments: explode(COL_8D79A787_F26C_4B94_A6EC_8189C5CE45A2#121260), [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256], false, [COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265]
(32) Project
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, str_to_integer(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265, $.sentence_number_in_text)) AS COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, ARRAY_TO_STR_BEFORE_CONCAT(get_json_object(COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265, $.term)) AS _w0#121341]
Input [2]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_4180689C_D343_4FC4_A328_E4E828A0E6CD#121265]
(33) Exchange
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: hashpartitioning(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, 10), ENSURE_REQUIREMENTS, [plan_id=9599]
(34) Sort
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256 ASC NULLS FIRST, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320 ASC NULLS FIRST, 1 DESC NULLS LAST], false, 0
(35) Window
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341]
Arguments: [concatwithseparatorudaf(_w0#121341, com.adb.hdh.spark.job.scala.utils.ConcatWithSeparatorUDAF@1cc8a327, class[value[0]: string], class[value[0]: string], true, true, 0, 0, None) windowspecdefinition(COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, 1 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340], [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320], [1 DESC NULLS LAST]
(36) Project
Output [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, str_replace(COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340, |, ) AS COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Input [4]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, _w0#121341, COL_8CD8BD74_2A92_496E_A91B_AEC72D10ED2B#121340]
(37) HashAggregate
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
(38) HashAggregate
Input [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Keys [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400]
Functions: []
Aggregate Attributes: []
Results [3]: [COL_C5AAA034_D543_482D_838A_9EB70F8DF2A5#121256 AS COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_00E55315_0253_4512_A9A3_5A2E4A2B77AF#121320 AS COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_5DA9DDDB_6754_457A_8DB5_E5924C9A8467#121400 AS COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
(39) Exchange
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
Arguments: hashpartitioning(COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, 10), ENSURE_REQUIREMENTS, [plan_id=9609]
(40) Sort
Input [3]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436]
Arguments: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434 ASC NULLS FIRST], false, 0
(41) Filter
Input [3]: [IPP#121198, Séjour#121199, Document#121200]
Condition : ((Séjour#121199 <=> S00001) AND isnotnull(Document#121200))
(42) Project
Output [3]: [IPP#121198 AS DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, Séjour#121199 AS DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, Document#121200 AS DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Input [3]: [IPP#121198, Séjour#121199, Document#121200]
(43) Exchange
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Arguments: hashpartitioning(DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445, 10), ENSURE_REQUIREMENTS, [plan_id=9610]
(44) Sort
Input [3]: [DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Arguments: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445 ASC NULLS FIRST], false, 0
(45) SortMergeJoin
Left keys [1]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434]
Right keys [1]: [DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
Join type: Inner
Join condition: None
(46) Project
Output [5]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434 AS COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435 AS COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436 AS COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443 AS COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444 AS COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
Input [6]: [COL_5CE86268_8E19_4A53_9935_985331F31B08#121434, COL_060C5BD6_3C1B_4348_8453_E2A220EA4E4D#121435, COL_8F3355B5_46DB_4ABF_BF70_D5B295BC12EC#121436, DB_LISTE_DOCUME_331644_COL_99D1E2E6_1B06_4D50_BFA1_BF45EFDDD5C0#121443, DB_LISTE_DOCUME_331644_COL_7386F5B7_2029_4CD3_B486_A07376A49163#121444, DB_LISTE_DOCUME_331644_COL_3DE778B7_19E2_4C70_90F1_5EEA17EB280E#121445]
(47) TakeOrderedAndProject
Input [5]: [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
Arguments: 500000, [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498 ASC NULLS FIRST, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499 ASC NULLS FIRST], [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
(48) AdaptiveSparkPlan
Output [5]: [COL_196BA79D_6EF1_4A52_9B82_F4E6D7E48FBF#121498, COL_689822FA_4A8B_4F90_A6C6_CB49CDAF72BB#121499, COL_417F4ADD_681C_4929_B8DE_FAF8D3D05A2F#121500, COL_C74E7066_6339_4860_9AAD_EF599FD9B20A#121501, COL_2D4E6ABF_0599_4A68_A774_621656300A0B#121502]
Arguments: isFinalPlan=true