1+ /* Copyright 2025 The xLLM Authors. All Rights Reserved.
2+
3+ Licensed under the Apache License, Version 2.0 (the "License");
4+ you may not use this file except in compliance with the License.
5+ You may obtain a copy of the License at
6+
7+ https://github.com/jd-opensource/xllm-service/blob/main/LICENSE
8+
9+ Unless required by applicable law or agreed to in writing, software
10+ distributed under the License is distributed on an "AS IS" BASIS,
11+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ See the License for the specific language governing permissions and
13+ limitations under the License.
14+ ==============================================================================*/
15+
16+ #include " time_predictor.h"
17+
18+ static constexpr int32_t kDegree = 2 ;
19+
20+ namespace xllm_service {
21+
22+ TimePredictor::TimePredictor (
23+ const std::vector<std::pair<int32_t , double >>& ttft_profiling_data,
24+ const std::vector<std::tuple<int32_t , int32_t , double >>&
25+ tpot_profiling_data) {
26+ if (!ttft_profiling_data.empty ()) {
27+ // construct Vandermonde matrix
28+ int32_t m = ttft_profiling_data.size ();
29+ int32_t n = kDegree + 1 ;
30+ Eigen::MatrixXd matrix (m, n);
31+ for (int32_t i = 0 ; i < m; ++i) {
32+ for (int32_t j = 0 ; j < n; ++j) {
33+ matrix (i, j) = std::pow (ttft_profiling_data[i].first , j);
34+ }
35+ }
36+
37+ // construct target vector
38+ Eigen::VectorXd target (m);
39+ for (int32_t i = 0 ; i < m; ++i) {
40+ target (i) = ttft_profiling_data[i].second ;
41+ }
42+
43+ // get coefficients
44+ ttft_coefficients_ = matrix.colPivHouseholderQr ().solve (target);
45+ } else {
46+ ttft_coefficients_ = Eigen::VectorXd::Zero (1 );
47+ }
48+
49+ if (!tpot_profiling_data.empty ()) {
50+ int32_t m = tpot_profiling_data.size ();
51+ int32_t n = kDegree + 1 ;
52+ Eigen::MatrixXd matrix (m, n);
53+ for (int32_t i = 0 ; i < m; ++i) {
54+ int32_t avg_length = std::get<0 >(tpot_profiling_data[i]);
55+ int32_t batch_size = std::get<1 >(tpot_profiling_data[i]);
56+
57+ matrix (i, 0 ) = 1.0 ; // the index 0 is always for constant
58+ matrix (i, 1 ) = batch_size;
59+ matrix (i, 2 ) = batch_size * (avg_length - 1 );
60+ }
61+
62+ // construct target vector
63+ Eigen::VectorXd target (m);
64+ for (int32_t i = 0 ; i < m; ++i) {
65+ target (i) = std::get<2 >(tpot_profiling_data[i]);
66+ }
67+
68+ // get coefficients
69+ tpot_coefficients_ = matrix.colPivHouseholderQr ().solve (target);
70+ } else {
71+ ttft_coefficients_ = Eigen::VectorXd::Zero (3 );
72+ }
73+ }
74+
75+ double TimePredictor::predict_ttft (int32_t length) {
76+ double result = 0.0 ;
77+ double power = 1.0 ;
78+ for (int32_t i = 0 ; i < ttft_coefficients_.size (); ++i) {
79+ result += ttft_coefficients_ (i) * power;
80+ power *= length;
81+ }
82+
83+ return result;
84+ }
85+
86+ double TimePredictor::predict_tpot (int32_t total_length, int32_t batch_size) {
87+ double result = 0.0 ;
88+ result = tpot_coefficients_ (0 ) + tpot_coefficients_ (1 ) * batch_size +
89+ tpot_coefficients_ (2 ) * total_length;
90+ return result;
91+ }
92+
93+ } // namespace xllm_service
0 commit comments