@@ -2,56 +2,105 @@ package actions
22
33import (
44 "context"
5- "errors"
65 "fmt"
6+ "os"
77 "sync"
88
99 "github.com/google/uuid"
1010 "github.com/trickest/trickest-cli/pkg/filesystem"
1111 "github.com/trickest/trickest-cli/pkg/trickest"
1212)
1313
14- func DownloadRunOutput (client * trickest.Client , run * trickest.Run , nodes []string , files []string , destinationPath string ) error {
14+ // DownloadResult represents the result of downloading outputs for a single subjob
15+ type DownloadResult struct {
16+ SubJobName string
17+ FileName string
18+ Success bool
19+ Error error
20+ }
21+
22+ func PrintDownloadResults (results []DownloadResult ) {
23+ successCount := 0
24+ failureCount := 0
25+ for _ , result := range results {
26+ if result .Success {
27+ successCount ++
28+ } else {
29+ failureCount ++
30+ if result .FileName != "" {
31+ fmt .Fprintf (os .Stderr , "Warning: Failed to download file %q for node %q: %v\n " , result .FileName , result .SubJobName , result .Error )
32+ } else {
33+ fmt .Fprintf (os .Stderr , "Warning: Failed to download output for node %q: %v\n " , result .SubJobName , result .Error )
34+ }
35+ }
36+ }
37+
38+ if failureCount > 0 {
39+ fmt .Fprintf (os .Stderr , "Download completed with %d successful and %d failed downloads\n " , successCount , failureCount )
40+ } else if successCount > 0 {
41+ fmt .Printf ("Successfully downloaded outputs for %d sub-jobs\n " , successCount )
42+ }
43+ }
44+
45+ func DownloadRunOutput (client * trickest.Client , run * trickest.Run , nodes []string , files []string , destinationPath string ) ([]DownloadResult , error ) {
1546 if run .Status == "PENDING" || run .Status == "SUBMITTED" {
16- return fmt .Errorf ("run %s has not started yet (status: %s)" , run .ID .String (), run .Status )
47+ return nil , fmt .Errorf ("run %s has not started yet (status: %s)" , run .ID .String (), run .Status )
1748 }
1849
1950 ctx := context .Background ()
2051
2152 subJobs , err := client .GetSubJobs (ctx , * run .ID )
2253 if err != nil {
23- return fmt .Errorf ("failed to get subjobs for run %s: %w" , run .ID .String (), err )
54+ return nil , fmt .Errorf ("failed to get subjobs for run %s: %w" , run .ID .String (), err )
2455 }
2556
2657 version , err := client .GetWorkflowVersion (ctx , * run .WorkflowVersionInfo )
2758 if err != nil {
28- return fmt .Errorf ("could not get workflow version for run %s: %w" , run .ID .String (), err )
59+ return nil , fmt .Errorf ("could not get workflow version for run %s: %w" , run .ID .String (), err )
2960 }
3061 subJobs = trickest .LabelSubJobs (subJobs , * version )
3162
3263 matchingSubJobs , err := trickest .FilterSubJobs (subJobs , nodes )
3364 if err != nil {
34- return fmt .Errorf ("no completed node outputs matching your query were found in the run %s: %w" , run .ID .String (), err )
65+ return nil , fmt .Errorf ("no completed node outputs matching your query were found in the run %s: %w" , run .ID .String (), err )
3566 }
3667
3768 runDir , err := filesystem .CreateRunDir (destinationPath , * run )
3869 if err != nil {
39- return fmt .Errorf ("failed to create directory for run %s: %w" , run .ID .String (), err )
70+ return nil , fmt .Errorf ("failed to create directory for run %s: %w" , run .ID .String (), err )
4071 }
4172
73+ var allResults []DownloadResult
74+ var errCount int
4275 for _ , subJob := range matchingSubJobs {
4376 isModule := version .Data .Nodes [subJob .Name ].Type == "WORKFLOW"
44- if err := downloadSubJobOutput (client , runDir , & subJob , files , run .ID , isModule ); err != nil {
45- return fmt .Errorf ("failed to download output for node %s: %w" , subJob .Label , err )
77+ results := downloadSubJobOutput (client , runDir , & subJob , files , run .ID , isModule )
78+
79+ for _ , result := range results {
80+ if ! result .Success {
81+ errCount ++
82+ }
4683 }
84+ allResults = append (allResults , results ... )
4785 }
4886
49- return nil
87+ // If all subjobs failed, return an error
88+ if errCount == len (allResults ) && len (allResults ) > 0 {
89+ return allResults , fmt .Errorf ("failed to download outputs for all nodes" )
90+ }
91+
92+ // If only some failed, return results but no error
93+ return allResults , nil
5094}
5195
52- func downloadSubJobOutput (client * trickest.Client , savePath string , subJob * trickest.SubJob , files []string , runID * uuid.UUID , isModule bool ) error {
96+ func downloadSubJobOutput (client * trickest.Client , savePath string , subJob * trickest.SubJob , files []string , runID * uuid.UUID , isModule bool ) [] DownloadResult {
5397 if ! subJob .TaskGroup && subJob .Status != "SUCCEEDED" {
54- return fmt .Errorf ("subjob %s (ID: %s) is not completed (status: %s)" , subJob .Label , subJob .ID , subJob .Status )
98+ return []DownloadResult {{
99+ SubJobName : subJob .Label ,
100+ FileName : "" ,
101+ Success : false ,
102+ Error : fmt .Errorf ("subjob %s (ID: %s) is not completed (status: %s)" , subJob .Label , subJob .ID , subJob .Status ),
103+ }}
55104 }
56105
57106 if subJob .TaskGroup {
@@ -61,18 +110,28 @@ func downloadSubJobOutput(client *trickest.Client, savePath string, subJob *tric
61110 return downloadSingleSubJobOutput (client , savePath , subJob , files , runID , isModule )
62111}
63112
64- func downloadTaskGroupOutput (client * trickest.Client , savePath string , subJob * trickest.SubJob , files []string , runID * uuid.UUID ) error {
113+ func downloadTaskGroupOutput (client * trickest.Client , savePath string , subJob * trickest.SubJob , files []string , runID * uuid.UUID ) [] DownloadResult {
65114 ctx := context .Background ()
66115 children , err := client .GetChildSubJobs (ctx , subJob .ID )
67116 if err != nil {
68- return fmt .Errorf ("could not get child subjobs for subjob %s (ID: %s): %w" , subJob .Label , subJob .ID , err )
117+ return []DownloadResult {{
118+ SubJobName : subJob .Label ,
119+ FileName : "" ,
120+ Success : false ,
121+ Error : fmt .Errorf ("could not get child subjobs for subjob %s (ID: %s): %w" , subJob .Label , subJob .ID , err ),
122+ }}
69123 }
70124 if len (children ) == 0 {
71- return fmt .Errorf ("no child subjobs found for subjob %s (ID: %s)" , subJob .Label , subJob .ID )
125+ return []DownloadResult {{
126+ SubJobName : subJob .Label ,
127+ FileName : "" ,
128+ Success : false ,
129+ Error : fmt .Errorf ("no child subjobs found for subjob %s (ID: %s)" , subJob .Label , subJob .ID ),
130+ }}
72131 }
73132
74133 var mu sync.Mutex
75- var errs []error
134+ var results []DownloadResult
76135 var wg sync.WaitGroup
77136 sem := make (chan struct {}, 5 )
78137
@@ -86,51 +145,73 @@ func downloadTaskGroupOutput(client *trickest.Client, savePath string, subJob *t
86145 child , err := client .GetChildSubJob (ctx , subJob .ID , i )
87146 if err != nil {
88147 mu .Lock ()
89- errs = append (errs , fmt .Errorf ("could not get child %d subjobs for subjob %s (ID: %s): %w" , i , subJob .Label , subJob .ID , err ))
148+ results = append (results , DownloadResult {
149+ SubJobName : fmt .Sprintf ("%d-%s" , i , subJob .Label ),
150+ FileName : "" ,
151+ Success : false ,
152+ Error : fmt .Errorf ("could not get child %d subjobs for subjob %s (ID: %s): %w" , i , subJob .Label , subJob .ID , err ),
153+ })
90154 mu .Unlock ()
91155 return
92156 }
93157
94158 child .Label = fmt .Sprintf ("%d-%s" , i , subJob .Label )
95- if err := downloadSubJobOutput (client , savePath , & child , files , runID , false ); err != nil {
96- mu . Lock ()
97- errs = append ( errs , err )
98- mu . Unlock ( )
99- }
159+ childResults := downloadSubJobOutput (client , savePath , & child , files , runID , false )
160+
161+ mu . Lock ( )
162+ results = append ( results , childResults ... )
163+ mu . Unlock ()
100164 }(i )
101165 }
102166 wg .Wait ()
103167
104- if len (errs ) > 0 {
105- return fmt .Errorf ("errors occurred while downloading subjob children outputs:\n %s" , errors .Join (errs ... ))
106- }
107- return nil
168+ return results
108169}
109170
110- func downloadSingleSubJobOutput (client * trickest.Client , savePath string , subJob * trickest.SubJob , files []string , runID * uuid.UUID , isModule bool ) error {
171+ func downloadSingleSubJobOutput (client * trickest.Client , savePath string , subJob * trickest.SubJob , files []string , runID * uuid.UUID , isModule bool ) [] DownloadResult {
111172 ctx := context .Background ()
112- var errs []error
113173
114174 subJobOutputs , err := getSubJobOutputs (client , ctx , subJob , runID , isModule )
115175 if err != nil {
116- return err
176+ return []DownloadResult {{
177+ SubJobName : subJob .Label ,
178+ FileName : "" ,
179+ Success : false ,
180+ Error : err ,
181+ }}
117182 }
118183
119184 subJobOutputs = filterSubJobOutputsByFileNames (subJobOutputs , files )
120185 if len (subJobOutputs ) == 0 {
121- return fmt .Errorf ("no matching output files found for subjob %s (ID: %s)" , subJob .Label , subJob .ID )
186+ return []DownloadResult {{
187+ SubJobName : subJob .Label ,
188+ FileName : "" ,
189+ Success : false ,
190+ Error : fmt .Errorf ("no matching output files found for subjob %s (ID: %s)" , subJob .Label , subJob .ID ),
191+ }}
122192 }
123193
194+ var results []DownloadResult
195+
124196 for _ , output := range subJobOutputs {
125197 if err := downloadOutput (client , savePath , subJob , output ); err != nil {
126- errs = append (errs , err )
198+ results = append (results , DownloadResult {
199+ SubJobName : subJob .Label ,
200+ FileName : output .Name ,
201+ Success : false ,
202+ Error : err ,
203+ })
204+ } else {
205+ results = append (results , DownloadResult {
206+ SubJobName : subJob .Label ,
207+ FileName : output .Name ,
208+ Success : true ,
209+ Error : nil ,
210+ })
127211 }
128212 }
129213
130- if len (errs ) > 0 {
131- return fmt .Errorf ("errors occurred while downloading subjob outputs:\n %s" , errors .Join (errs ... ))
132- }
133- return nil
214+ return results
134215}
135216
136217func getSubJobOutputs (client * trickest.Client , ctx context.Context , subJob * trickest.SubJob , runID * uuid.UUID , isModule bool ) ([]trickest.SubJobOutput , error ) {
0 commit comments