1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 1996-2018. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20-module(disksup_SUITE). 21-include_lib("common_test/include/ct.hrl"). 22 23%% Test server specific exports 24-export([all/0, suite/0]). 25-export([init_per_suite/1, end_per_suite/1]). 26-export([init_per_testcase/2, end_per_testcase/2]). 27 28%% Test cases 29-export([api/1, config/1, alarm/1]). 30-export([port/1]). 31-export([terminate/1, unavailable/1, restart/1]). 32-export([otp_5910/1]). 33-export([posix_only/1, parse_df_output_posix/1, parse_df_output_susv3/1]). 34 35init_per_suite(Config) when is_list(Config) -> 36 ok = application:start(os_mon), 37 Config. 38 39end_per_suite(Config) when is_list(Config) -> 40 ok = application:stop(os_mon), 41 Config. 42 43init_per_testcase(unavailable, Config) -> 44 terminate(Config), 45 init_per_testcase(dummy, Config); 46init_per_testcase(_Case, Config) -> 47 Config. 48 49end_per_testcase(TC, Config) when TC =:= unavailable; 50 TC =:= posix_only -> 51 restart(Config), 52 end_per_testcase(dummy, Config); 53end_per_testcase(_Case, _Config) -> 54 ok. 55 56suite() -> 57 [{ct_hooks,[ts_install_cth]}, 58 {timetrap,{minutes,1}}]. 59 60all() -> 61 Bugs = [otp_5910], 62 Always = [api, config, alarm, port, posix_only, unavailable, 63 parse_df_output_posix, parse_df_output_susv3] ++ Bugs, 64 case test_server:os_type() of 65 {unix, _OSname} -> Always; 66 {win32, _OSname} -> Always; 67 _OS -> [unavailable] 68 end. 69 70%% Test of API functions 71api(Config) when is_list(Config) -> 72 73 %% get_disk_data() 74 ok = check_get_disk_data(), 75 76 %% get_check_interval() 77 1800000 = disksup:get_check_interval(), 78 79 %% set_check_interval(Minutes) 80 ok = disksup:set_check_interval(20), 81 1200000 = disksup:get_check_interval(), 82 {'EXIT',{badarg,_}} = (catch disksup:set_check_interval(0.5)), 83 1200000 = disksup:get_check_interval(), 84 ok = disksup:set_check_interval(30), 85 86 %% get_almost_full_threshold() 87 80 = disksup:get_almost_full_threshold(), 88 89 %% set_almost_full_threshold(Float) 90 ok = disksup:set_almost_full_threshold(0.90), 91 90 = disksup:get_almost_full_threshold(), 92 {'EXIT',{badarg,_}} = 93 (catch disksup:set_almost_full_threshold(-0.5)), 94 90 = disksup:get_almost_full_threshold(), 95 ok = disksup:set_almost_full_threshold(0.80), 96 97 ok. 98 99%% Test configuration 100config(Config) when is_list(Config) -> 101 102 %% Change configuration parameters and make sure change is reflected 103 %% when disksup is restarted 104 ok = application:set_env(os_mon, disk_space_check_interval, 29), 105 ok = application:set_env(os_mon, disk_almost_full_threshold, 0.81), 106 107 ok = supervisor:terminate_child(os_mon_sup, disksup), 108 {ok, _Child1} = supervisor:restart_child(os_mon_sup, disksup), 109 110 1740000 = disksup:get_check_interval(), 111 81 = disksup:get_almost_full_threshold(), 112 113 %% Also try this with bad parameter values, should be ignored 114 ok = 115 application:set_env(os_mon, disk_space_check_interval, 0.5), 116 ok = 117 application:set_env(os_mon, disk_almost_full_threshold, -0.81), 118 119 ok = supervisor:terminate_child(os_mon_sup, disksup), 120 {ok, _Child2} = supervisor:restart_child(os_mon_sup, disksup), 121 122 1800000 = disksup:get_check_interval(), 123 80 = disksup:get_almost_full_threshold(), 124 125 %% Reset configuration parameters 126 ok = application:set_env(os_mon, disk_space_check_interval, 30), 127 ok = application:set_env(os_mon, disk_almost_full_threshold, 0.80), 128 ok. 129 130%%---------------------------------------------------------------------- 131%% NOTE: The test case is a bit weak as it will fail if the disk usage 132%% changes too much during its course, or if there are timing problems 133%% with the alarm_handler receiving the alarms too late 134%%---------------------------------------------------------------------- 135 136%% Test that alarms are set and cleared 137alarm(Config) when is_list(Config) -> 138 139 %% Find out how many disks exceed the threshold 140 %% and make sure the corresponding number of alarms is set 141 Threshold1 = disksup:get_almost_full_threshold(), % 80 142 Data1 = disksup:get_disk_data(), 143 Over1 = over_threshold(Data1, Threshold1), 144 Alarms1 = get_alarms(), 145 if 146 Over1==length(Alarms1) -> 147 true; 148 true -> 149 dump_info(), 150 ct:fail({bad_alarms, Threshold1, Data1, Alarms1}) 151 end, 152 153 %% Try to find a disk with space usage below Threshold1, 154 %% lower the threshold accordingly and make sure new alarms are set 155 Fun1 = fun({_Id, _Kbyte, Capacity}) -> 156 if 157 Capacity>0, Capacity<Threshold1 -> true; 158 true -> false 159 end 160 end, 161 case until(Fun1, Data1) of 162 {_, _, Cap1} -> 163 Threshold2 = Cap1-1, 164 ok = 165 disksup:set_almost_full_threshold(Threshold2/100), 166 disksup ! timeout, % force a disk check 167 Data2 = disksup:get_disk_data(), 168 Over2 = over_threshold(Data2, Threshold2), 169 Alarms2 = get_alarms(), 170 if 171 Over2==length(Alarms2), Over2>Over1 -> 172 true; 173 true -> 174 dump_info(), 175 ct:fail({bad_alarms, Threshold2, Data2, Alarms2}) 176 end; 177 false -> 178 ignore 179 end, 180 181 %% Find out the highest space usage among all disks 182 %% and try to raise the threshold above this value, 183 %% make sure all alarms are cleared 184 Fun2 = fun({_Id, _Kbyte, Capacity}, MaxAcc) -> 185 if 186 Capacity>MaxAcc -> Capacity; 187 true -> MaxAcc 188 end 189 end, 190 case lists:foldl(Fun2, 0, Data1) of 191 Max when Max<100 -> 192 Threshold3 = Max+1, 193 ok = disksup:set_almost_full_threshold(Threshold3/100), 194 disksup ! timeout, % force a disk check 195 Data3 = disksup:get_disk_data(), 196 Over3 = over_threshold(Data3, Threshold3), 197 Alarms3 = get_alarms(), 198 if 199 Over3==0, length(Alarms3)==0 -> 200 ok; 201 true -> 202 dump_info(), 203 ct:fail({bad_alarms, Threshold3, Data3, Alarms3}) 204 end; 205 100 -> 206 ignore 207 end, 208 209 %% Reset threshold 210 ok = disksup:set_almost_full_threshold(Threshold1/100), 211 ok. 212 213over_threshold(Data, Threshold) -> 214 Data2 = remove_duplicated_disks(lists:keysort(1, Data)), 215 lists:foldl(fun 216 ({_Id, _Kbyte, Cap}, N) when Cap>=Threshold -> N+1; 217 (_DiskData, N) -> N 218 end, 0, Data2). 219 220%% On some platforms (for example MontaVista) data for one disk can be 221%% "duplicated": 222%% Linux ppb 2.4.20_mvl31-pcore680 #1 Sun Feb 1 23:12:56 PST 2004 ppc unknown 223%% 224%% MontaVista(R) Linux(R) Professional Edition 3.1 225%% 226%% [ppb:~]> /bin/df -lk 227%% Filesystem 1k-blocks Used Available Use% Mounted on 228%% rootfs 8066141 3023763 4961717 38% / 229%% /dev/root 8066141 3023763 4961717 38% / 230%% tmpfs 192892 0 192892 0% /dev/shm 231%% 232%% disksup: 233%% [{"/",8066141,38}, {"/",8066141,38}, {"/dev/shm",192892,0}] 234%% 235%% disksup will only set ONE alarm for "/". 236%% Therefore the list of disk data must be sorted and duplicated disk 237%% tuples removed before calculating how many alarms should be set, or 238%% the testcase will fail erroneously. 239remove_duplicated_disks([{Id, _, _}, {Id, Kbyte, Cap}|T]) -> 240 remove_duplicated_disks([{Id, Kbyte, Cap}|T]); 241remove_duplicated_disks([H|T]) -> 242 [H|remove_duplicated_disks(T)]; 243remove_duplicated_disks([]) -> 244 []. 245 246get_alarms() -> 247 lists:filter(fun 248 ({{disk_almost_full, _Disk},_}) -> true; 249 (_) -> false 250 end, alarm_handler:get_alarms()). 251 252until(Fun, [H|T]) -> 253 case Fun(H) of 254 true -> H; 255 false -> until(Fun, T) 256 end; 257until(_Fun, []) -> false. 258 259%% Test that disksup handles a terminating port program 260port(Config) when is_list(Config) -> 261 Str = os:cmd("ps -ef | grep '[d]isksup'"), 262 case io_lib:fread("~s ~s", Str) of 263 {ok, [_Uid,Pid], _Rest} -> 264 265 %% Monitor disksup 266 MonRef = erlang:monitor(process, disksup), 267 [{_Disk1,Kbyte1,_Cap1}|_] = disksup:get_disk_data(), 268 true = Kbyte1>0, 269 270 %% Kill the port program 271 case os:cmd("kill -9 " ++ Pid) of 272 [] -> 273 274 %% disksup should now terminate 275 receive 276 {'DOWN', MonRef, _, _, {port_died, _Reason}} -> 277 ok; 278 {'DOWN', MonRef, _, _, Reason} -> 279 ct:fail({unexpected_exit_reason, Reason}) 280 after 281 3000 -> 282 ct:fail({still_alive, Str}) 283 end, 284 285 %% Give os_mon_sup time to restart disksup 286 ct:sleep({seconds,3}), 287 [{_Disk2,Kbyte2,_Cap2}|_] = disksup:get_disk_data(), 288 true = Kbyte2>0, 289 290 ok; 291 292 Line -> 293 erlang:demonitor(MonRef), 294 {skip, {not_killed, Line}} 295 end; 296 _ -> 297 {skip, {os_pid_not_found, Str}} 298 end. 299 300terminate(Config) when is_list(Config) -> 301 ok = application:set_env(os_mon, start_disksup, false), 302 ok = supervisor:terminate_child(os_mon_sup, disksup), 303 ok. 304 305%% Test correct behaviour when service is unavailable 306unavailable(Config) when is_list(Config) -> 307 308 %% Make sure all API functions return their dummy values 309 [{"none",0,0}] = disksup:get_disk_data(), 310 1800000 = disksup:get_check_interval(), 311 ok = disksup:set_check_interval(5), 312 80 = disksup:get_almost_full_threshold(), 313 ok = disksup:set_almost_full_threshold(0.9), 314 ok. 315 316restart(Config) when is_list(Config) -> 317 ok = application:set_env(os_mon, start_disksup, true), 318 ok = application:set_env(os_mon, disksup_posix_only, false), 319 case supervisor:restart_child(os_mon_sup, disksup) of 320 {ok, _Pid} -> ok; 321 {error, running} -> ok 322 end. 323 324%% Test that alarms are cleared if disksup crashes or 325%% if OS_Mon is stopped 326otp_5910(Config) when is_list(Config) -> 327 328 %% Make sure disksup sets at least one alarm 329 Data = lists:sort(disksup:get_disk_data()), 330 Threshold0 = disksup:get_almost_full_threshold(), 331 Threshold = case over_threshold(Data, Threshold0) of 332 0 -> 333 [{_Id,_Kbyte,Cap}|_] = Data, 334 io:format("Data ~p Threshold ~p ~n",[Data, Cap-1]), 335 ok = disksup:set_almost_full_threshold((Cap-1)/100), 336 Cap-1; 337 _N -> Threshold0 338 end, 339 ok = application:set_env(os_mon, disk_almost_full_threshold, Threshold/100), 340 disksup ! timeout, % force a disk check 341 Data2 = disksup:get_disk_data(), 342 Over = over_threshold(Data2, Threshold), 343 Alarms = get_alarms(), 344 if 345 Over==0 -> 346 ct:fail({threshold_too_low, Data2, Threshold}); 347 Over==length(Alarms) -> 348 ok; 349 true -> 350 dump_info(), 351 ct:fail({bad_alarms, Threshold, Data2, Alarms}) 352 end, 353 354 %% Kill disksup 355 exit(whereis(disksup), faked_disksup_crash), 356 357 %% Wait a little to make sure disksup has been restarted, 358 %% then make sure the alarms are set once, but not twice 359 ct:sleep({seconds,1}), 360 Data3 = disksup:get_disk_data(), 361 Alarms2 = get_alarms(), 362 if 363 length(Alarms2)==length(Alarms) -> ok; 364 true -> 365 dump_info(), 366 ct:fail({bad_alarms,Threshold,Data3,Alarms,Alarms2}) 367 end, 368 369 %% Stop OS_Mon and make sure all disksup alarms are cleared 370 ok = application:stop(os_mon), 371 ct:sleep({seconds,1}), 372 Alarms3 = get_alarms(), 373 case get_alarms() of 374 [] -> ok; 375 _ -> ct:fail({alarms_not_cleared, Alarms3}) 376 end, 377 378 %% Reset threshold and restart OS_Mon 379 ok = application:set_env(os_mon, disksup_almost_full_threshold, 0.8), 380 ok = disksup:set_almost_full_threshold(0.8), 381 ok = application:start(os_mon), 382 ok. 383 384%% Test disksup_posix_only option 385posix_only(Config) when is_list(Config) -> 386 %% Set option and restart disksup 387 ok = application:set_env(os_mon, disksup_posix_only, true), 388 ok = supervisor:terminate_child(os_mon_sup, disksup), 389 {ok, _Child1} = supervisor:restart_child(os_mon_sup, disksup), 390 391 ok = check_get_disk_data(). 392 393dump_info() -> 394 io:format("Status: ~p~n", [sys:get_status(disksup)]). 395 396check_get_disk_data() -> 397 [{Id,KByte,Capacity}|_] = get_disk_data(), 398 true = io_lib:printable_list(Id), 399 true = is_integer(KByte), 400 true = is_integer(Capacity), 401 true = Capacity>0, 402 true = KByte>0, 403 ok. 404 405% filter get_disk_data and remove entriew with zero capacity 406% "non-normal" filesystems report zero capacity 407% - Perhaps errorneous 'df -k -l'? 408% - Always list filesystems by type '-t ufs,zfs,..' instead? 409% It is unclear what the intention was from the beginning. 410get_disk_data() -> 411 get_disk_data(disksup:get_disk_data()). 412 413get_disk_data([{"none",0,0}=E]) -> [E]; 414get_disk_data([{_,_,0}|Es]) -> get_disk_data(Es); 415get_disk_data([E|Es]) -> [E|get_disk_data(Es)]; 416get_disk_data([]) -> []. 417 418%% @doc Test various expected inputs to 'df' command output (Linux/POSIX) 419parse_df_output_posix(Config) when is_list(Config) -> 420 PosixHdr = "Filesystem 1K-blocks Used Available Use% Mounted on\n", 421 {error, _} = disksup:parse_df(PosixHdr, posix), 422 {error, _} = disksup:parse_df("", posix), 423 {error, _} = disksup:parse_df("\n\n", posix), 424 425 %% Have a simple example with no funny spaces in mount path 426 Posix1 = "tmpfs 498048 7288 490760 2% /run\n", 427 {ok, {498048, 2, "/run"}, ""} = disksup:parse_df(Posix1, posix), 428 429 %% Have a mount path with some spaces in it 430 Posix2 = "tmpfs 498048 7288 490760 2% /spaces 1 2\n", 431 {ok, {498048, 2, "/spaces 1 2"}, ""} = disksup:parse_df(Posix2, posix). 432 433%% @doc Test various expected inputs to 'df' command output (Darwin/SUSv3) 434parse_df_output_susv3(Config) when is_list(Config) -> 435 DarwinHdr = "Filesystem 1024-blocks Used Available Capacity " ++ 436 "iused ifree %iused Mounted on", 437 {error, _} = disksup:parse_df(DarwinHdr, susv3), 438 {error, _} = disksup:parse_df("", susv3), 439 {error, _} = disksup:parse_df("\n\n", susv3), 440 441 %% Have a simple example with no funny spaces in mount path 442 Darwin1 = "/dev/disk1 243949060 157002380 86690680 65% 2029724 " ++ 443 "4292937555 0% /\n", 444 {ok, {243949060, 65, "/"}, ""} = disksup:parse_df(Darwin1, susv3), 445 446 %% Have a mount path with some spaces in it 447 Darwin2 = "/dev/disk1 243949060 157002380 86690680 65% 2029724 " ++ 448 "4292937555 0% /spaces 1 2\n", 449 {ok, {243949060, 65, "/spaces 1 2"}, ""} = disksup:parse_df(Darwin2, susv3). 450