From c0a488f9347ff3dc5fdea96fb9bbeb8bdbedad9b Mon Sep 17 00:00:00 2001 From: deurishhe Date: Thu, 30 Apr 2026 06:48:51 +0300 Subject: [PATCH] POS-84: Fixed downloader v2 --- .../parser/rosstat/rosstat/config/config.go | 6 +- .../rosstat/rosstat/downloader/downloader.go | 125 +++++++++--------- .../rosstat/rosstat/extractor/population.go | 4 +- 3 files changed, 71 insertions(+), 64 deletions(-) diff --git a/src/pkg/parser/rosstat/rosstat/config/config.go b/src/pkg/parser/rosstat/rosstat/config/config.go index abfe88a..439cb01 100644 --- a/src/pkg/parser/rosstat/rosstat/config/config.go +++ b/src/pkg/parser/rosstat/rosstat/config/config.go @@ -34,12 +34,12 @@ func NewConfig() *Config { }, DownloadHTMLMaxAttempts: 6, DownloadHTMLTimeoutSeconds: 5, - DownloadHTMLBatchSize: 5, + DownloadHTMLBatchSize: 10, DownloadHTMLTimeSleepSeconds: 2, DownloadCSVMaxAttempts: 8, DownloadCSVTimeoutSeconds: 5, - DownloadCSVBatchSize: 5, - DownloadCSVTimeSleepSeconds: 3, + DownloadCSVBatchSize: 10, + DownloadCSVTimeSleepSeconds: 2, } } diff --git a/src/pkg/parser/rosstat/rosstat/downloader/downloader.go b/src/pkg/parser/rosstat/rosstat/downloader/downloader.go index 743242d..599f19f 100644 --- a/src/pkg/parser/rosstat/rosstat/downloader/downloader.go +++ b/src/pkg/parser/rosstat/rosstat/downloader/downloader.go @@ -34,8 +34,6 @@ func DownloadCSV(ctx context.Context, subjectCode int, indicator int, codes []in bodyStr := params.buildRequestBodyStr() attempts := 0 - var resp *http.Response - var err error for attempts < config.DownloadCSVMaxAttempts { req, err := http.NewRequestWithContext(ctx, "POST", url, strings.NewReader(bodyStr)) @@ -57,44 +55,48 @@ func DownloadCSV(ctx context.Context, subjectCode int, indicator int, codes []in }, } - resp, err = client.Do(req) - if err == nil { - break + resp, err := client.Do(req) + if err != nil { + if resp != nil { + resp.Body.Close() + } + attempts++ + time.Sleep(time.Duration(config.DownloadHTMLTimeSleepSeconds) * time.Second) + continue } - attempts++ - time.Sleep(time.Duration(config.DownloadCSVTimeSleepSeconds) * time.Second) - } - if err != nil { - return "", fmt.Errorf("error executing request: %w", err) - } + if err != nil { + return "", fmt.Errorf("error executing request: %w", err) + } - defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + log.Printf("ERROR: unexpected status %d: code: %d\n", resp.StatusCode, subjectCode) + continue + } - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("unexpected status %d: code: %d, body: %s", resp.StatusCode, subjectCode, string(body)) - } + rawBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("reading body: %w", err) + } - rawBody, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("reading body: %w", err) - } + decoder := charmap.Windows1251.NewDecoder() + utf8Body, err := decoder.Bytes(rawBody) + if err != nil { + return "", fmt.Errorf("decoding Windows-1251: %w", err) + } - decoder := charmap.Windows1251.NewDecoder() - utf8Body, err := decoder.Bytes(rawBody) - if err != nil { - return "", fmt.Errorf("decoding Windows-1251: %w", err) - } + fileName := getCSVFileName(subjectCode, indicator) - fileName := getCSVFileName(subjectCode, indicator) + if err := os.WriteFile(fileName, utf8Body, 0644); err != nil { + return "", fmt.Errorf("error writing file %s: %w", fileName, err) + } - if err := os.WriteFile(fileName, utf8Body, 0644); err != nil { - return "", fmt.Errorf("error writing file %s: %w", fileName, err) + resp.Body.Close() + log.Printf("Saved CSV to %s\n", fileName) + return fileName, nil } - log.Printf("Saved CSV to %s\n", fileName) - return fileName, nil + return "", fmt.Errorf("Failed CSV request by code %d\n", subjectCode) } func DownloadHTML(ctx context.Context, subjectCode int) (string, error) { @@ -107,8 +109,6 @@ func DownloadHTML(ctx context.Context, subjectCode int) (string, error) { body := url.Values{} body.Set("pl", strconv.Itoa(config.GetPopulationIndicator(subjectCode))) attempts := 0 - var resp *http.Response - var err error for attempts < config.DownloadHTMLMaxAttempts { req, err := http.NewRequestWithContext(ctx, "POST", urlStr, strings.NewReader(body.Encode())) @@ -126,44 +126,49 @@ func DownloadHTML(ctx context.Context, subjectCode int) (string, error) { }, } - resp, err = client.Do(req) - if err == nil { - break + resp, err := client.Do(req) + + if err != nil { + if resp != nil { + resp.Body.Close() + } + attempts++ + time.Sleep(time.Duration(config.DownloadHTMLTimeSleepSeconds) * time.Second) + continue } - attempts++ - time.Sleep(time.Duration(config.DownloadHTMLTimeSleepSeconds) * time.Second) - } - if err != nil { - return "", fmt.Errorf("error executing request: %w", err) - } + if err != nil { + return "", fmt.Errorf("error executing request: %w", err) + } - defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + log.Printf("ERROR: unexpected status %d: code: %d\n", resp.StatusCode, subjectCode) + continue + } - if resp.StatusCode != http.StatusOK { - body, _ := io.ReadAll(resp.Body) - return "", fmt.Errorf("unexpected status %d: code: %d, body: %s", resp.StatusCode, subjectCode, string(body)) - } + rawBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("reading body: %w", err) + } - rawBody, err := io.ReadAll(resp.Body) - if err != nil { - return "", fmt.Errorf("reading body: %w", err) - } + decoder := charmap.Windows1251.NewDecoder() + utf8Body, err := decoder.Bytes(rawBody) + if err != nil { + return "", fmt.Errorf("decoding Windows-1251: %w", err) + } - decoder := charmap.Windows1251.NewDecoder() - utf8Body, err := decoder.Bytes(rawBody) - if err != nil { - return "", fmt.Errorf("decoding Windows-1251: %w", err) - } + fileName := getHTMLFileName(subjectCode) - fileName := getHTMLFileName(subjectCode) + if err := os.WriteFile(fileName, utf8Body, 0644); err != nil { + return "", fmt.Errorf("error writing file %s: %w", fileName, err) + } - if err := os.WriteFile(fileName, utf8Body, 0644); err != nil { - return "", fmt.Errorf("error writing file %s: %w", fileName, err) + resp.Body.Close() + log.Printf("Saved HTML to %s\n", fileName) + return fileName, nil } - log.Printf("Saved HTML to %s\n", fileName) - return fileName, nil + return "", fmt.Errorf("Failed HTML request by code %d\n", subjectCode) } func CleanupFile(path string) { diff --git a/src/pkg/parser/rosstat/rosstat/extractor/population.go b/src/pkg/parser/rosstat/rosstat/extractor/population.go index 3982738..ee23ac1 100644 --- a/src/pkg/parser/rosstat/rosstat/extractor/population.go +++ b/src/pkg/parser/rosstat/rosstat/extractor/population.go @@ -69,7 +69,9 @@ func (ext *PopulationExtractor) getNameOrPopulation(row []string, manager *state func (ext *PopulationExtractor) getPopulation(row []string, manager *state_manager.StateManager[dao.PopulationExtracted]) { populationDAOs := make([]*dao.PopulationExtracted, 0, len(ext.years)-1) - for i := 1; i < len(ext.years); i++ { + maxIndex := min(len(ext.years), len(row)) + + for i := 1; i < maxIndex; i++ { population, err := strconv.Atoi(row[i]) if err == nil {