Tries to estimate how large a file the test data will be
87
-
max_string = float('-inf')
88
-
min_string = float('inf')
90
-
record_size_unit = "bytes"
87
+
total_name_bytes = sum(len(s.encode("utf-8")) for s in weather_station_names)
88
+
avg_name_bytes = total_name_bytes / float(len(weather_station_names))
92
-
for station in weather_station_names:
93
-
if len(station) > max_string:
94
-
max_string = len(station)
95
-
if len(station) < min_string:
96
-
min_string = len(station)
97
-
per_record_size = ((max_string + min_string * 2) + len(",-123.4")) / 2
90
+
# avg_temp_bytes = sum(len(str(n / 10)) for n in range(-999, 1000)) / 1999
91
+
avg_temp_bytes = 4.400200100050025
93
+
# add 2 for separator and newline
94
+
avg_line_length = avg_name_bytes + avg_temp_bytes + 2
99
-
total_file_size = num_rows_to_create * per_record_size
100
-
human_file_size = convert_bytes(total_file_size)
96
+
human_file_size = convert_bytes(num_rows_to_create * avg_line_length)
102
-
return f"Estimated max file size is: {human_file_size}.\nTrue size is probably much smaller (around half)."
98
+
return f"Estimated max file size is: {human_file_size}."
def build_test_data(weather_station_names, num_rows_to_create):