8282from scipy .optimize import minimize
8383
8484
85- def get_model_name (model ):
86- if model == 'mem_model1' :
87- return "k * log(x) + x * a + b"
88- elif model == 'mem_model2' :
89- return "k * log(x) + b * log(x)^2 + a"
90- elif model == 'mem_model3' :
91- return "k * log(x) + b * log(x)^2 + a * log(x)^3"
92- elif model == 'mem_model4' :
93- return "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
94- elif model == 'time_model1' :
95- return "a + b + log(x) * k"
96- elif model == 'time_model2' :
97- return "a + b * x + log(x) * k"
98- elif model == 'time_model3' :
99- return "a + b * log(x)^2 + log(x) * k"
100- elif model == 'time_model4' :
101- return "a * log(x)^3 + b * log(x)^2 + log(x) * k"
102- else :
103- return "Unknown model"
104-
105-
10685def scrub_data (s ):
10786 r"""Scrubs data fields of characters not allowed by PostgreSQL
10887
@@ -2369,7 +2348,7 @@ def resource_allocation_plot(df, col_name):
23692348 fig , axs = plt .subplots (ncols = 2 , figsize = (10 , 4 ), sharey = False )
23702349
23712350 ax = axs [0 ]
2372- mem_models , time_models = _retrieve_equations ()
2351+ mem_models , time_models = retrieve_equations ()
23732352
23742353 # models for memory
23752354 _resource_allocation_plot_helper (
@@ -2382,9 +2361,9 @@ def resource_allocation_plot(df, col_name):
23822361 return fig , axs
23832362
23842363
2385- def _retrieve_equations ():
2364+ def retrieve_equations ():
23862365 '''
2387- Helepr function for resource_allocation_plot.
2366+ Helper function for resource_allocation_plot.
23882367 Retrieves equations from db. Creates dictionary for memory and time models.
23892368
23902369 Returns
@@ -2397,16 +2376,40 @@ def _retrieve_equations():
23972376 '''
23982377 memory_models = {}
23992378 time_models = {}
2379+ res = []
24002380 with qdb .sql_connection .TRN :
24012381 sql = ''' SELECT * FROM qiita.allocation_equations; '''
24022382 qdb .sql_connection .TRN .add (sql )
24032383 res = qdb .sql_connection .TRN .execute_fetchindex ()
2404- for models in res :
2405- if 'mem' in models [1 ]:
2406- memory_models [models [1 ]] = lambda x , k , a , b : eval (models [2 ])
2407- else :
2408- time_models [models [1 ]] = lambda x , k , a , b : eval (models [2 ])
2409- return (memory_models , time_models )
2384+ for models in res :
2385+ model_name = "Unknown model"
2386+ if models [1 ] == 'mem_model1' :
2387+ model_name = "k * log(x) + x * a + b"
2388+ elif models [1 ] == 'mem_model2' :
2389+ model_name = "k * log(x) + b * log(x)^2 + a"
2390+ elif models [1 ] == 'mem_model3' :
2391+ model_name = "k * log(x) + b * log(x)^2 + a * log(x)^3"
2392+ elif models [1 ] == 'mem_model4' :
2393+ model_name = "k * log(x) + b * log(x)^2 + a * log(x)^2.5"
2394+ elif models [1 ] == 'time_model1' :
2395+ model_name = "a + b + log(x) * k"
2396+ elif models [1 ] == 'time_model2' :
2397+ model_name = "a + b * x + log(x) * k"
2398+ elif models [1 ] == 'time_model3' :
2399+ model_name = "a + b * log(x)^2 + log(x) * k"
2400+ elif models [1 ] == 'time_model4' :
2401+ model_name = "a * log(x)^3 + b * log(x)^2 + log(x) * k"
2402+ if 'mem' in models [1 ]:
2403+ memory_models [models [1 ]] = {
2404+ "equation_name" : model_name ,
2405+ "equation" : lambda x , k , a , b : eval (models [2 ])
2406+ }
2407+ else :
2408+ time_models [models [1 ]] = {
2409+ "equation_name" : model_name ,
2410+ "equation" : lambda x , k , a , b : eval (models [2 ])
2411+ }
2412+ return (memory_models , time_models )
24102413
24112414
24122415def retrieve_resource_data (cname , sname , version , columns ):
@@ -2483,9 +2486,20 @@ def _resource_allocation_plot_helper(
24832486 Specifies x axis for the graph
24842487 curr: str, required
24852488 Either MaxRSSRaw or ElapsedRaw (y axis)
2486- models: dictionary, required
2487- Dictionary of functions that will be used for visualization
2489+ models: dictionary, required. Follows this structure
2490+ equation_name: string
2491+ Human readable representation of the equation
2492+ equation: Python lambda function
2493+ Lambda function representing equation to optimizse
24882494
2495+ Returns
2496+ -------
2497+ best_model_name: string
2498+ the name of the best model from the table
2499+ best_model: function
2500+ best fitting function for the current dictionary models
2501+ options: object
2502+ object containing constants for the best model (e.g. k, a, b in kx+b*a)
24892503 """
24902504
24912505 x_data , y_data = df [col_name ], df [curr ]
@@ -2560,7 +2574,7 @@ def _resource_allocation_plot_helper(
25602574 label = host )
25612575 ax .set_title (
25622576 f'k||a||b: { k } ||{ a } ||{ b } \n '
2563- f'model: { get_model_name ( best_model_name ) } \n '
2577+ f'model: { models [ best_model_name ][ "equation_name" ] } \n '
25642578 f'real: { mini } || { maxi } \n '
25652579 f'calculated: { cmin } || { cmax } \n '
25662580 f'failures: { failures } ' )
@@ -2583,8 +2597,11 @@ def _resource_allocation_calculate(
25832597 current type (e.g. MaxRSSRaw)
25842598 col_name: str, required
25852599 Specifies x axis for the graph
2586- models: dictionary, required
2587- Dictionary of functions that will be used for visualization
2600+ models: dictionary, required. Follows this structure
2601+ equation_name: string
2602+ Human readable representation of the equation
2603+ equation: Python lambda function
2604+ Lambda function representing equation to optimizse
25882605 depth: int, required
25892606 Maximum number of iterations in binary search
25902607 tolerance: int, required,
@@ -2607,6 +2624,7 @@ def _resource_allocation_calculate(
26072624 best_failures = np .inf
26082625 best_max = np .inf
26092626 for model_name , model in models .items ():
2627+ model_equation = model ['equation' ]
26102628 # start values for binary search, where sl is left, sr is right
26112629 # penalty weight must be positive & non-zero, hence, sl >= 1.
26122630 # the upper bound for error can be an arbitrary large number
@@ -2624,13 +2642,13 @@ def _resource_allocation_calculate(
26242642 while left < right and cnt < depth :
26252643 middle = (left + right ) // 2
26262644 options = minimize (_resource_allocation_custom_loss , init ,
2627- args = (x , y , model , middle ))
2645+ args = (x , y , model_equation , middle ))
26282646 k , a , b = options .x
26292647 # important: here we take the 2nd (last) value of tuple since
26302648 # the helper function returns success, then failures.
26312649 failures_df = _resource_allocation_success_failures (
2632- df , k , a , b , model , col_name , type_ )[- 1 ]
2633- y_plot = model (x , k , a , b )
2650+ df , k , a , b , model_equation , col_name , type_ )[- 1 ]
2651+ y_plot = model_equation (x , k , a , b )
26342652 if not any (y_plot ):
26352653 continue
26362654 cmax = max (y_plot )
@@ -2678,7 +2696,7 @@ def _resource_allocation_calculate(
26782696 best_failures = prev_failures
26792697 best_max = min_max
26802698 best_model_name = model_name
2681- best_model = model
2699+ best_model = model_equation
26822700 best_result = res
26832701 return best_model_name , best_model , best_result
26842702
@@ -2695,8 +2713,8 @@ def _resource_allocation_custom_loss(params, x, y, model, p):
26952713 Represents x data for the function calculation
26962714 y: pandas.Series (pandas column), required
26972715 Represents y data for the function calculation
2698- models: list, required
2699- List of functions that will be used for visualization
2716+ model: Python function
2717+ Lambda function representing current equation
27002718 p: int, required
27012719 Penalty weight for custom loss function
27022720
0 commit comments