From 9f98ea7e22d0ca84af36888e86e62de492427b1f Mon Sep 17 00:00:00 2001 From: "richgel99@gmail.com" Date: Tue, 27 Dec 2011 21:18:07 +0000 Subject: [PATCH] --- cpy.bat | 9 + crn.2008.sln | 55 + crn.workspace | 9 + crn_examples.2008.sln | 74 + crnlib/crn_arealist.cpp | 699 ++++ crnlib/crn_arealist.h | 74 + crnlib/crn_assert.cpp | 77 + crnlib/crn_assert.h | 61 + crnlib/crn_buffer_stream.h | 196 ++ crnlib/crn_cfile_stream.h | 246 ++ crnlib/crn_checksum.cpp | 63 + crnlib/crn_checksum.h | 13 + crnlib/crn_clusterizer.h | 764 +++++ crnlib/crn_color.h | 694 ++++ crnlib/crn_command_line_params.cpp | 439 +++ crnlib/crn_command_line_params.h | 82 + crnlib/crn_comp.cpp | 2173 ++++++++++++ crnlib/crn_comp.h | 181 + crnlib/crn_condition_var.cpp | 431 +++ crnlib/crn_condition_var.h | 91 + crnlib/crn_console.cpp | 231 ++ crnlib/crn_console.h | 94 + crnlib/crn_core.cpp | 7 + crnlib/crn_core.h | 103 + crnlib/crn_data_stream.cpp | 154 + crnlib/crn_data_stream.h | 91 + crnlib/crn_data_stream_serializer.h | 432 +++ crnlib/crn_dds_comp.cpp | 259 ++ crnlib/crn_dds_comp.h | 48 + crnlib/crn_dds_texture.cpp | 2514 ++++++++++++++ crnlib/crn_dds_texture.h | 292 ++ crnlib/crn_decomp.cpp | 6 + crnlib/crn_dxt.cpp | 381 +++ crnlib/crn_dxt.h | 281 ++ crnlib/crn_dxt1.cpp | 2138 ++++++++++++ crnlib/crn_dxt1.h | 352 ++ crnlib/crn_dxt5a.cpp | 209 ++ crnlib/crn_dxt5a.h | 66 + crnlib/crn_dxt_endpoint_refiner.cpp | 362 ++ crnlib/crn_dxt_endpoint_refiner.h | 62 + crnlib/crn_dxt_fast.cpp | 916 +++++ crnlib/crn_dxt_fast.h | 21 + crnlib/crn_dxt_hc.cpp | 2544 ++++++++++++++ crnlib/crn_dxt_hc.h | 440 +++ crnlib/crn_dxt_hc_common.cpp | 47 + crnlib/crn_dxt_hc_common.h | 43 + crnlib/crn_dxt_image.cpp | 1264 +++++++ crnlib/crn_dxt_image.h | 218 ++ crnlib/crn_dynamic_stream.h | 206 ++ crnlib/crn_dynamic_string.cpp | 695 ++++ crnlib/crn_dynamic_string.h | 163 + crnlib/crn_dynamic_wstring.cpp | 715 ++++ crnlib/crn_dynamic_wstring.h | 159 + crnlib/crn_event.h | 27 + crnlib/crn_hash.cpp | 68 + crnlib/crn_hash.h | 34 + crnlib/crn_hash_map.cpp | 155 + crnlib/crn_hash_map.h | 871 +++++ crnlib/crn_helpers.h | 64 + crnlib/crn_huffman_codes.cpp | 387 +++ crnlib/crn_huffman_codes.h | 14 + crnlib/crn_image.h | 612 ++++ crnlib/crn_image_utils.cpp | 1169 +++++++ crnlib/crn_image_utils.h | 141 + crnlib/crn_intersect.h | 123 + crnlib/crn_lzma_codec.cpp | 137 + crnlib/crn_lzma_codec.h | 60 + crnlib/crn_math.cpp | 76 + crnlib/crn_math.h | 222 ++ crnlib/crn_matrix.h | 565 ++++ crnlib/crn_mem.cpp | 289 ++ crnlib/crn_mem.h | 185 + crnlib/crn_mutex.h | 40 + crnlib/crn_packed_uint.h | 91 + crnlib/crn_pixel_format.cpp | 317 ++ crnlib/crn_pixel_format.h | 278 ++ crnlib/crn_platform.cpp | 19 + crnlib/crn_platform.h | 49 + crnlib/crn_prefix_coding.cpp | 356 ++ crnlib/crn_prefix_coding.h | 116 + crnlib/crn_qdxt1.cpp | 909 +++++ crnlib/crn_qdxt1.h | 187 ++ crnlib/crn_qdxt5.cpp | 826 +++++ crnlib/crn_qdxt5.h | 196 ++ crnlib/crn_rand.cpp | 365 ++ crnlib/crn_rand.h | 114 + crnlib/crn_ray.h | 52 + crnlib/crn_rect.h | 81 + crnlib/crn_resample_filters.cpp | 337 ++ crnlib/crn_resample_filters.h | 21 + crnlib/crn_resampler.cpp | 884 +++++ crnlib/crn_resampler.h | 173 + crnlib/crn_ryg_dxt.cpp | 609 ++++ crnlib/crn_semaphore.h | 25 + crnlib/crn_sparse_array.h | 399 +++ crnlib/crn_sparse_bit_array.cpp | 538 +++ crnlib/crn_sparse_bit_array.h | 176 + crnlib/crn_spinlock.h | 38 + crnlib/crn_stb_image.cpp | 3952 ++++++++++++++++++++++ crnlib/crn_strutils.cpp | 1260 +++++++ crnlib/crn_strutils.h | 59 + crnlib/crn_symbol_codec.cpp | 1901 +++++++++++ crnlib/crn_symbol_codec.h | 519 +++ crnlib/crn_task_pool.cpp | 243 ++ crnlib/crn_task_pool.h | 140 + crnlib/crn_texture_comp.cpp | 509 +++ crnlib/crn_texture_comp.h | 33 + crnlib/crn_texture_conversion.cpp | 705 ++++ crnlib/crn_texture_conversion.h | 109 + crnlib/crn_texture_file_types.cpp | 101 + crnlib/crn_texture_file_types.h | 62 + crnlib/crn_threaded_clusterizer.h | 361 ++ crnlib/crn_threaded_resampler.cpp | 321 ++ crnlib/crn_threaded_resampler.h | 87 + crnlib/crn_traits.h | 106 + crnlib/crn_tree_clusterizer.h | 457 +++ crnlib/crn_types.h | 57 + crnlib/crn_utils.cpp | 60 + crnlib/crn_utils.h | 234 ++ crnlib/crn_value.cpp | 22 + crnlib/crn_value.h | 1025 ++++++ crnlib/crn_vec.h | 838 +++++ crnlib/crn_vec_interval.h | 35 + crnlib/crn_vector.cpp | 84 + crnlib/crn_vector.h | 636 ++++ crnlib/crn_win32_console.cpp | 116 + crnlib/crn_win32_console.h | 21 + crnlib/crn_win32_file_utils.cpp | 69 + crnlib/crn_win32_file_utils.h | 15 + crnlib/crn_win32_find_files.cpp | 176 + crnlib/crn_win32_find_files.h | 55 + crnlib/crn_win32_threading.cpp | 36 + crnlib/crn_win32_threading.h | 18 + crnlib/crn_win32_timer.cpp | 119 + crnlib/crn_win32_timer.h | 40 + crnlib/crn_winhdr.h | 15 + crnlib/crn_zeng.cpp | 289 ++ crnlib/crn_zeng.h | 10 + crnlib/crnlib.2008.vcproj | 1482 ++++++++ crnlib/crnlib.cbp | 218 ++ crnlib/crnlib.cpp | 370 ++ crnlib/lzma_7zBuf.cpp | 41 + crnlib/lzma_7zBuf.h | 35 + crnlib/lzma_7zBuf2.cpp | 50 + crnlib/lzma_7zCrc.cpp | 40 + crnlib/lzma_7zCrc.h | 28 + crnlib/lzma_7zFile.cpp | 267 ++ crnlib/lzma_7zFile.h | 78 + crnlib/lzma_7zStream.cpp | 173 + crnlib/lzma_7zVersion.h | 7 + crnlib/lzma_Alloc.cpp | 131 + crnlib/lzma_Alloc.h | 36 + crnlib/lzma_Bcj2.cpp | 136 + crnlib/lzma_Bcj2.h | 34 + crnlib/lzma_Bra.cpp | 137 + crnlib/lzma_Bra.h | 64 + crnlib/lzma_Bra86.cpp | 89 + crnlib/lzma_BraIA64.cpp | 71 + crnlib/lzma_CpuArch.h | 69 + crnlib/lzma_LzFind.cpp | 755 +++++ crnlib/lzma_LzFind.h | 111 + crnlib/lzma_LzFindMt.cpp | 797 +++++ crnlib/lzma_LzFindMt.h | 101 + crnlib/lzma_LzHash.h | 54 + crnlib/lzma_LzmaDec.cpp | 1011 ++++++ crnlib/lzma_LzmaDec.h | 227 ++ crnlib/lzma_LzmaEnc.cpp | 2279 +++++++++++++ crnlib/lzma_LzmaEnc.h | 76 + crnlib/lzma_LzmaLib.cpp | 50 + crnlib/lzma_LzmaLib.h | 146 + crnlib/lzma_MyVersion.h | 8 + crnlib/lzma_Threads.cpp | 116 + crnlib/lzma_Threads.h | 72 + crnlib/lzma_Types.h | 219 ++ crunch/crunch.2008.vcproj | 373 +++ crunch/crunch.cbp | 49 + crunch/crunch.cpp | 1304 +++++++ example1/example1.2008.vcproj | 716 ++++ example1/example1.cpp | 579 ++++ example1/stb_image.h | 3942 ++++++++++++++++++++++ example2/example2.2008.vcproj | 720 ++++ example2/example2.cpp | 277 ++ example2/timer.cpp | 153 + example2/timer.h | 41 + example3/example3.2008.vcproj | 716 ++++ example3/example3.cpp | 278 ++ example3/stb_image.h | 3942 ++++++++++++++++++++++ inc/crn_decomp.h | 4849 +++++++++++++++++++++++++++ inc/crnlib.h | 552 +++ inc/dds_defs.h | 151 + license.txt | 22 + readme.txt | 271 ++ 192 files changed, 77747 insertions(+) create mode 100644 cpy.bat create mode 100644 crn.2008.sln create mode 100644 crn.workspace create mode 100644 crn_examples.2008.sln create mode 100644 crnlib/crn_arealist.cpp create mode 100644 crnlib/crn_arealist.h create mode 100644 crnlib/crn_assert.cpp create mode 100644 crnlib/crn_assert.h create mode 100644 crnlib/crn_buffer_stream.h create mode 100644 crnlib/crn_cfile_stream.h create mode 100644 crnlib/crn_checksum.cpp create mode 100644 crnlib/crn_checksum.h create mode 100644 crnlib/crn_clusterizer.h create mode 100644 crnlib/crn_color.h create mode 100644 crnlib/crn_command_line_params.cpp create mode 100644 crnlib/crn_command_line_params.h create mode 100644 crnlib/crn_comp.cpp create mode 100644 crnlib/crn_comp.h create mode 100644 crnlib/crn_condition_var.cpp create mode 100644 crnlib/crn_condition_var.h create mode 100644 crnlib/crn_console.cpp create mode 100644 crnlib/crn_console.h create mode 100644 crnlib/crn_core.cpp create mode 100644 crnlib/crn_core.h create mode 100644 crnlib/crn_data_stream.cpp create mode 100644 crnlib/crn_data_stream.h create mode 100644 crnlib/crn_data_stream_serializer.h create mode 100644 crnlib/crn_dds_comp.cpp create mode 100644 crnlib/crn_dds_comp.h create mode 100644 crnlib/crn_dds_texture.cpp create mode 100644 crnlib/crn_dds_texture.h create mode 100644 crnlib/crn_decomp.cpp create mode 100644 crnlib/crn_dxt.cpp create mode 100644 crnlib/crn_dxt.h create mode 100644 crnlib/crn_dxt1.cpp create mode 100644 crnlib/crn_dxt1.h create mode 100644 crnlib/crn_dxt5a.cpp create mode 100644 crnlib/crn_dxt5a.h create mode 100644 crnlib/crn_dxt_endpoint_refiner.cpp create mode 100644 crnlib/crn_dxt_endpoint_refiner.h create mode 100644 crnlib/crn_dxt_fast.cpp create mode 100644 crnlib/crn_dxt_fast.h create mode 100644 crnlib/crn_dxt_hc.cpp create mode 100644 crnlib/crn_dxt_hc.h create mode 100644 crnlib/crn_dxt_hc_common.cpp create mode 100644 crnlib/crn_dxt_hc_common.h create mode 100644 crnlib/crn_dxt_image.cpp create mode 100644 crnlib/crn_dxt_image.h create mode 100644 crnlib/crn_dynamic_stream.h create mode 100644 crnlib/crn_dynamic_string.cpp create mode 100644 crnlib/crn_dynamic_string.h create mode 100644 crnlib/crn_dynamic_wstring.cpp create mode 100644 crnlib/crn_dynamic_wstring.h create mode 100644 crnlib/crn_event.h create mode 100644 crnlib/crn_hash.cpp create mode 100644 crnlib/crn_hash.h create mode 100644 crnlib/crn_hash_map.cpp create mode 100644 crnlib/crn_hash_map.h create mode 100644 crnlib/crn_helpers.h create mode 100644 crnlib/crn_huffman_codes.cpp create mode 100644 crnlib/crn_huffman_codes.h create mode 100644 crnlib/crn_image.h create mode 100644 crnlib/crn_image_utils.cpp create mode 100644 crnlib/crn_image_utils.h create mode 100644 crnlib/crn_intersect.h create mode 100644 crnlib/crn_lzma_codec.cpp create mode 100644 crnlib/crn_lzma_codec.h create mode 100644 crnlib/crn_math.cpp create mode 100644 crnlib/crn_math.h create mode 100644 crnlib/crn_matrix.h create mode 100644 crnlib/crn_mem.cpp create mode 100644 crnlib/crn_mem.h create mode 100644 crnlib/crn_mutex.h create mode 100644 crnlib/crn_packed_uint.h create mode 100644 crnlib/crn_pixel_format.cpp create mode 100644 crnlib/crn_pixel_format.h create mode 100644 crnlib/crn_platform.cpp create mode 100644 crnlib/crn_platform.h create mode 100644 crnlib/crn_prefix_coding.cpp create mode 100644 crnlib/crn_prefix_coding.h create mode 100644 crnlib/crn_qdxt1.cpp create mode 100644 crnlib/crn_qdxt1.h create mode 100644 crnlib/crn_qdxt5.cpp create mode 100644 crnlib/crn_qdxt5.h create mode 100644 crnlib/crn_rand.cpp create mode 100644 crnlib/crn_rand.h create mode 100644 crnlib/crn_ray.h create mode 100644 crnlib/crn_rect.h create mode 100644 crnlib/crn_resample_filters.cpp create mode 100644 crnlib/crn_resample_filters.h create mode 100644 crnlib/crn_resampler.cpp create mode 100644 crnlib/crn_resampler.h create mode 100644 crnlib/crn_ryg_dxt.cpp create mode 100644 crnlib/crn_semaphore.h create mode 100644 crnlib/crn_sparse_array.h create mode 100644 crnlib/crn_sparse_bit_array.cpp create mode 100644 crnlib/crn_sparse_bit_array.h create mode 100644 crnlib/crn_spinlock.h create mode 100644 crnlib/crn_stb_image.cpp create mode 100644 crnlib/crn_strutils.cpp create mode 100644 crnlib/crn_strutils.h create mode 100644 crnlib/crn_symbol_codec.cpp create mode 100644 crnlib/crn_symbol_codec.h create mode 100644 crnlib/crn_task_pool.cpp create mode 100644 crnlib/crn_task_pool.h create mode 100644 crnlib/crn_texture_comp.cpp create mode 100644 crnlib/crn_texture_comp.h create mode 100644 crnlib/crn_texture_conversion.cpp create mode 100644 crnlib/crn_texture_conversion.h create mode 100644 crnlib/crn_texture_file_types.cpp create mode 100644 crnlib/crn_texture_file_types.h create mode 100644 crnlib/crn_threaded_clusterizer.h create mode 100644 crnlib/crn_threaded_resampler.cpp create mode 100644 crnlib/crn_threaded_resampler.h create mode 100644 crnlib/crn_traits.h create mode 100644 crnlib/crn_tree_clusterizer.h create mode 100644 crnlib/crn_types.h create mode 100644 crnlib/crn_utils.cpp create mode 100644 crnlib/crn_utils.h create mode 100644 crnlib/crn_value.cpp create mode 100644 crnlib/crn_value.h create mode 100644 crnlib/crn_vec.h create mode 100644 crnlib/crn_vec_interval.h create mode 100644 crnlib/crn_vector.cpp create mode 100644 crnlib/crn_vector.h create mode 100644 crnlib/crn_win32_console.cpp create mode 100644 crnlib/crn_win32_console.h create mode 100644 crnlib/crn_win32_file_utils.cpp create mode 100644 crnlib/crn_win32_file_utils.h create mode 100644 crnlib/crn_win32_find_files.cpp create mode 100644 crnlib/crn_win32_find_files.h create mode 100644 crnlib/crn_win32_threading.cpp create mode 100644 crnlib/crn_win32_threading.h create mode 100644 crnlib/crn_win32_timer.cpp create mode 100644 crnlib/crn_win32_timer.h create mode 100644 crnlib/crn_winhdr.h create mode 100644 crnlib/crn_zeng.cpp create mode 100644 crnlib/crn_zeng.h create mode 100644 crnlib/crnlib.2008.vcproj create mode 100644 crnlib/crnlib.cbp create mode 100644 crnlib/crnlib.cpp create mode 100644 crnlib/lzma_7zBuf.cpp create mode 100644 crnlib/lzma_7zBuf.h create mode 100644 crnlib/lzma_7zBuf2.cpp create mode 100644 crnlib/lzma_7zCrc.cpp create mode 100644 crnlib/lzma_7zCrc.h create mode 100644 crnlib/lzma_7zFile.cpp create mode 100644 crnlib/lzma_7zFile.h create mode 100644 crnlib/lzma_7zStream.cpp create mode 100644 crnlib/lzma_7zVersion.h create mode 100644 crnlib/lzma_Alloc.cpp create mode 100644 crnlib/lzma_Alloc.h create mode 100644 crnlib/lzma_Bcj2.cpp create mode 100644 crnlib/lzma_Bcj2.h create mode 100644 crnlib/lzma_Bra.cpp create mode 100644 crnlib/lzma_Bra.h create mode 100644 crnlib/lzma_Bra86.cpp create mode 100644 crnlib/lzma_BraIA64.cpp create mode 100644 crnlib/lzma_CpuArch.h create mode 100644 crnlib/lzma_LzFind.cpp create mode 100644 crnlib/lzma_LzFind.h create mode 100644 crnlib/lzma_LzFindMt.cpp create mode 100644 crnlib/lzma_LzFindMt.h create mode 100644 crnlib/lzma_LzHash.h create mode 100644 crnlib/lzma_LzmaDec.cpp create mode 100644 crnlib/lzma_LzmaDec.h create mode 100644 crnlib/lzma_LzmaEnc.cpp create mode 100644 crnlib/lzma_LzmaEnc.h create mode 100644 crnlib/lzma_LzmaLib.cpp create mode 100644 crnlib/lzma_LzmaLib.h create mode 100644 crnlib/lzma_MyVersion.h create mode 100644 crnlib/lzma_Threads.cpp create mode 100644 crnlib/lzma_Threads.h create mode 100644 crnlib/lzma_Types.h create mode 100644 crunch/crunch.2008.vcproj create mode 100644 crunch/crunch.cbp create mode 100644 crunch/crunch.cpp create mode 100644 example1/example1.2008.vcproj create mode 100644 example1/example1.cpp create mode 100644 example1/stb_image.h create mode 100644 example2/example2.2008.vcproj create mode 100644 example2/example2.cpp create mode 100644 example2/timer.cpp create mode 100644 example2/timer.h create mode 100644 example3/example3.2008.vcproj create mode 100644 example3/example3.cpp create mode 100644 example3/stb_image.h create mode 100644 inc/crn_decomp.h create mode 100644 inc/crnlib.h create mode 100644 inc/dds_defs.h create mode 100644 license.txt create mode 100644 readme.txt diff --git a/cpy.bat b/cpy.bat new file mode 100644 index 00000000..eae7d88d --- /dev/null +++ b/cpy.bat @@ -0,0 +1,9 @@ +xcopy /y /s E:\crunch17\*.vcproj . +xcopy /y /s E:\crunch17\*.sln . +xcopy /y /s E:\crunch17\*.workspace . +xcopy /y /s E:\crunch17\*.cbp . +xcopy /y /s E:\crunch17\*.cpp . +xcopy /y /s E:\crunch17\*.c . +xcopy /y /s E:\crunch17\*.inc . +xcopy /y /s E:\crunch17\*.h . +xcopy /y /s E:\crunch17\*.txt . diff --git a/crn.2008.sln b/crn.2008.sln new file mode 100644 index 00000000..64576f82 --- /dev/null +++ b/crn.2008.sln @@ -0,0 +1,55 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "crunch", "crunch\crunch.2008.vcproj", "{8F645BA1-B996-49EB-859B-970A671DE05D}" + ProjectSection(ProjectDependencies) = postProject + {CF2E70E8-7133-4D96-92C7-68BB406C0664} = {CF2E70E8-7133-4D96-92C7-68BB406C0664} + EndProjectSection +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "crnlib", "crnlib\crnlib.2008.vcproj", "{CF2E70E8-7133-4D96-92C7-68BB406C0664}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug_DLL|Win32 = Debug_DLL|Win32 + Debug_DLL|x64 = Debug_DLL|x64 + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release_DLL|Win32 = Release_DLL|Win32 + Release_DLL|x64 = Release_DLL|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8F645BA1-B996-49EB-859B-970A671DE05D}.Debug_DLL|Win32.ActiveCfg = Debug|Win32 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Debug_DLL|x64.ActiveCfg = Debug|x64 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Debug|Win32.ActiveCfg = Debug|Win32 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Debug|Win32.Build.0 = Debug|Win32 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Debug|x64.ActiveCfg = Debug|x64 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Debug|x64.Build.0 = Debug|x64 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Release_DLL|Win32.ActiveCfg = Release|Win32 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Release_DLL|x64.ActiveCfg = Release|x64 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Release|Win32.ActiveCfg = Release|Win32 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Release|Win32.Build.0 = Release|Win32 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Release|x64.ActiveCfg = Release|x64 + {8F645BA1-B996-49EB-859B-970A671DE05D}.Release|x64.Build.0 = Release|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug_DLL|Win32.ActiveCfg = Debug_DLL|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug_DLL|Win32.Build.0 = Debug_DLL|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug_DLL|x64.ActiveCfg = Debug_DLL|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug_DLL|x64.Build.0 = Debug_DLL|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug|Win32.ActiveCfg = Debug|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug|Win32.Build.0 = Debug|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug|x64.ActiveCfg = Debug|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Debug|x64.Build.0 = Debug|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release_DLL|Win32.ActiveCfg = Release_DLL|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release_DLL|Win32.Build.0 = Release_DLL|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release_DLL|x64.ActiveCfg = Release_DLL|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release_DLL|x64.Build.0 = Release_DLL|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release|Win32.ActiveCfg = Release|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release|Win32.Build.0 = Release|Win32 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release|x64.ActiveCfg = Release|x64 + {CF2E70E8-7133-4D96-92C7-68BB406C0664}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/crn.workspace b/crn.workspace new file mode 100644 index 00000000..dcc08dfa --- /dev/null +++ b/crn.workspace @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/crn_examples.2008.sln b/crn_examples.2008.sln new file mode 100644 index 00000000..9e68ef0d --- /dev/null +++ b/crn_examples.2008.sln @@ -0,0 +1,74 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example1", "example1\example1.2008.vcproj", "{8F745B42-F996-49EB-859B-970A671DE05D}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example2", "example2\example2.2008.vcproj", "{AF745B42-F996-49EB-859B-970A671DEF5E}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example3", "example3\example3.2008.vcproj", "{AF745B42-E296-46EB-859B-970A671DEF5E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug_DLL|Win32 = Debug_DLL|Win32 + Debug_DLL|x64 = Debug_DLL|x64 + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release_DLL|Win32 = Release_DLL|Win32 + Release_DLL|x64 = Release_DLL|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug_DLL|Win32.ActiveCfg = Debug_DLL|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug_DLL|Win32.Build.0 = Debug_DLL|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug_DLL|x64.ActiveCfg = Debug_DLL|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug_DLL|x64.Build.0 = Debug_DLL|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug|Win32.ActiveCfg = Debug|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug|Win32.Build.0 = Debug|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug|x64.ActiveCfg = Debug|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Debug|x64.Build.0 = Debug|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release_DLL|Win32.ActiveCfg = Release_DLL|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release_DLL|Win32.Build.0 = Release_DLL|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release_DLL|x64.ActiveCfg = Release_DLL|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release_DLL|x64.Build.0 = Release_DLL|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release|Win32.ActiveCfg = Release|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release|Win32.Build.0 = Release|Win32 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release|x64.ActiveCfg = Release|x64 + {8F745B42-F996-49EB-859B-970A671DE05D}.Release|x64.Build.0 = Release|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug_DLL|Win32.ActiveCfg = Debug_DLL|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug_DLL|Win32.Build.0 = Debug_DLL|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug_DLL|x64.ActiveCfg = Debug_DLL|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug_DLL|x64.Build.0 = Debug_DLL|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug|Win32.ActiveCfg = Debug|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug|Win32.Build.0 = Debug|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug|x64.ActiveCfg = Debug|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Debug|x64.Build.0 = Debug|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release_DLL|Win32.ActiveCfg = Release_DLL|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release_DLL|Win32.Build.0 = Release_DLL|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release_DLL|x64.ActiveCfg = Release_DLL|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release_DLL|x64.Build.0 = Release_DLL|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release|Win32.ActiveCfg = Release|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release|Win32.Build.0 = Release|Win32 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release|x64.ActiveCfg = Release|x64 + {AF745B42-F996-49EB-859B-970A671DEF5E}.Release|x64.Build.0 = Release|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug_DLL|Win32.ActiveCfg = Debug_DLL|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug_DLL|Win32.Build.0 = Debug_DLL|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug_DLL|x64.ActiveCfg = Debug_DLL|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug_DLL|x64.Build.0 = Debug_DLL|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug|Win32.ActiveCfg = Debug|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug|Win32.Build.0 = Debug|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug|x64.ActiveCfg = Debug|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Debug|x64.Build.0 = Debug|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release_DLL|Win32.ActiveCfg = Release_DLL|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release_DLL|Win32.Build.0 = Release_DLL|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release_DLL|x64.ActiveCfg = Release_DLL|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release_DLL|x64.Build.0 = Release_DLL|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release|Win32.ActiveCfg = Release|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release|Win32.Build.0 = Release|Win32 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release|x64.ActiveCfg = Release|x64 + {AF745B42-E296-46EB-859B-970A671DEF5E}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/crnlib/crn_arealist.cpp b/crnlib/crn_arealist.cpp new file mode 100644 index 00000000..9d566f67 --- /dev/null +++ b/crnlib/crn_arealist.cpp @@ -0,0 +1,699 @@ +// File: crn_arealist.cpp - 2D shape algebra (currently unused) +// See Copyright Notice and license at the end of inc/crnlib.h +// Ported from the PowerView DOS image viewer, a product I wrote back in 1993. Not currently used in the open source release of crnlib. +#include "crn_core.h" +#include "crn_arealist.h" +#include + +#define RECT_DEBUG + +namespace crnlib +{ + + static void area_fatal_error(const char* pFunc, const char* pMsg, ...) + { + pFunc; + va_list args; + va_start(args, pMsg); + + char buf[512]; +#ifdef _MSC_VER + _vsnprintf_s(buf, sizeof(buf), pMsg, args); +#else + _vsnprintf(buf, sizeof(buf), pMsg, args); +#endif + + va_end(args); + + CRNLIB_FAIL(buf); + } + + static Area * delete_area(Area_List *Plist, Area *Parea) + { + Area *p, *q; + + #ifdef RECT_DEBUG + if ((Parea == Plist->Phead) || (Parea == Plist->Ptail)) + area_fatal_error("delete_area", "tried to remove head or tail"); + #endif + + p = Parea->Pprev; + q = Parea->Pnext; + p->Pnext = q; + q->Pprev = p; + + Parea->Pnext = Plist->Pfree; + Parea->Pprev = NULL; + Plist->Pfree = Parea; + + return (q); + } + + static Area * alloc_area(Area_List *Plist) + { + Area *p = Plist->Pfree; + + if (p == NULL) + { + if (Plist->next_free == Plist->total_areas) + area_fatal_error("alloc_area", "Out of areas!"); + + p = Plist->Phead + Plist->next_free; + Plist->next_free++; + } + else + Plist->Pfree = p->Pnext; + + return (p); + } + + static Area * insert_area_before(Area_List *Plist, Area *Parea, + int x1, int y1, int x2, int y2) + { + Area *p, *Pnew_area = alloc_area(Plist); + + p = Parea->Pprev; + + p->Pnext = Pnew_area; + + Pnew_area->Pprev = p; + Pnew_area->Pnext = Parea; + + Parea->Pprev = Pnew_area; + + Pnew_area->x1 = x1; + Pnew_area->y1 = y1; + Pnew_area->x2 = x2; + Pnew_area->y2 = y2; + + return (Pnew_area); + } + + static Area * insert_area_after(Area_List *Plist, Area *Parea, + int x1, int y1, int x2, int y2) + { + Area *p, *Pnew_area = alloc_area(Plist); + + p = Parea->Pnext; + + p->Pprev = Pnew_area; + + Pnew_area->Pnext = p; + Pnew_area->Pprev = Parea; + + Parea->Pnext = Pnew_area; + + Pnew_area->x1 = x1; + Pnew_area->y1 = y1; + Pnew_area->x2 = x2; + Pnew_area->y2 = y2; + + return (Pnew_area); + } + + void Area_List_deinit(Area_List* Pobj_base) + { + Area_List *Plist = (Area_List *)Pobj_base; + + if (!Plist) + return; + + if (Plist->Phead) + { + crnlib_free(Plist->Phead); + Plist->Phead = NULL; + } + + crnlib_free(Plist); + } + + Area_List * Area_List_init(int max_areas) + { + Area_List *Plist = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); + + Plist->total_areas = max_areas + 2; + + Plist->Phead = (Area *)crnlib_calloc(max_areas + 2, sizeof(Area)); + Plist->Ptail = Plist->Phead + 1; + + Plist->Phead->Pprev = NULL; + Plist->Phead->Pnext = Plist->Ptail; + + Plist->Ptail->Pprev = Plist->Phead; + Plist->Ptail->Pnext = NULL; + + Plist->Pfree = NULL; + Plist->next_free = 2; + + return (Plist); + } + + void Area_List_print(Area_List *Plist) + { + Area *Parea = Plist->Phead->Pnext; + + while (Parea != Plist->Ptail) + { + printf("%04i %04i : %04i %04i\n", Parea->x1, Parea->y1, Parea->x2, Parea->y2); + + Parea = Parea->Pnext; + } + } + + Area_List * Area_List_dup_new(Area_List *Plist, + int x_ofs, int y_ofs) + { + int i; + Area_List *Pnew_list = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); + + Pnew_list->total_areas = Plist->total_areas; + + Pnew_list->Phead = (Area *)crnlib_malloc(sizeof(Area) * Plist->total_areas); + Pnew_list->Ptail = Pnew_list->Phead + 1; + + Pnew_list->Pfree = (Plist->Pfree) ? ((Plist->Pfree - Plist->Phead) + Pnew_list->Phead) : NULL; + + Pnew_list->next_free = Plist->next_free; + + memcpy(Pnew_list->Phead, Plist->Phead, sizeof(Area) * Plist->total_areas); + + for (i = 0; i < Plist->total_areas; i++) + { + Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == NULL) ? NULL : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; + Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == NULL) ? NULL : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; + + Pnew_list->Phead[i].x1 += x_ofs; + Pnew_list->Phead[i].y1 += y_ofs; + Pnew_list->Phead[i].x2 += x_ofs; + Pnew_list->Phead[i].y2 += y_ofs; + } + + return (Pnew_list); + } + + uint Area_List_get_num(Area_List* Plist) + { + uint num = 0; + + Area *Parea = Plist->Phead->Pnext; + + while (Parea != Plist->Ptail) + { + num++; + + Parea = Parea->Pnext; + } + + return num; + } + + void Area_List_dup(Area_List *Psrc_list, Area_List *Pdst_list, + int x_ofs, int y_ofs) + { + int i; + + if (Psrc_list->total_areas != Pdst_list->total_areas) + area_fatal_error("Area_List_dup", "Src and Dst total_areas must be equal!"); + + Pdst_list->Pfree = (Psrc_list->Pfree) ? ((Psrc_list->Pfree - Psrc_list->Phead) + Pdst_list->Phead) : NULL; + + Pdst_list->next_free = Psrc_list->next_free; + + memcpy(Pdst_list->Phead, Psrc_list->Phead, sizeof(Area) * Psrc_list->total_areas); + + if ((x_ofs) || (y_ofs)) + { + for (i = 0; i < Psrc_list->total_areas; i++) + { + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + + Pdst_list->Phead[i].x1 += x_ofs; + Pdst_list->Phead[i].y1 += y_ofs; + Pdst_list->Phead[i].x2 += x_ofs; + Pdst_list->Phead[i].y2 += y_ofs; + } + } + else + { + for (i = 0; i < Psrc_list->total_areas; i++) + { + Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; + Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; + } + } + } + + void Area_List_copy( + Area_List *Psrc_list, Area_List *Pdst_list, + int x_ofs, int y_ofs) + { + Area *Parea = Psrc_list->Phead->Pnext; + + Area_List_clear(Pdst_list); + + if ((x_ofs) || (y_ofs)) + { + Area *Pprev_area = Pdst_list->Phead; + + while (Parea != Psrc_list->Ptail) + { + // Area *p, *Pnew_area; + Area *Pnew_area; + + if (Pdst_list->next_free == Pdst_list->total_areas) + area_fatal_error("Area_List_copy", "Out of areas!"); + + Pnew_area = Pdst_list->Phead + Pdst_list->next_free; + Pdst_list->next_free++; + + Pnew_area->Pprev = Pprev_area; + Pprev_area->Pnext = Pnew_area; + + Pnew_area->x1 = Parea->x1 + x_ofs; + Pnew_area->y1 = Parea->y1 + y_ofs; + Pnew_area->x2 = Parea->x2 + x_ofs; + Pnew_area->y2 = Parea->y2 + y_ofs; + + Pprev_area = Pnew_area; + + Parea = Parea->Pnext; + } + + Pprev_area->Pnext = Pdst_list->Ptail; + } + else + { + #if 0 + while (Parea != Psrc_list->Ptail) + { + insert_area_after(Pdst_list, Pdst_list->Phead, + Parea->x1, + Parea->y1, + Parea->x2, + Parea->y2); + + Parea = Parea->Pnext; + } + #endif + + Area *Pprev_area = Pdst_list->Phead; + + while (Parea != Psrc_list->Ptail) + { + // Area *p, *Pnew_area; + Area *Pnew_area; + + if (Pdst_list->next_free == Pdst_list->total_areas) + area_fatal_error("Area_List_copy", "Out of areas!"); + + Pnew_area = Pdst_list->Phead + Pdst_list->next_free; + Pdst_list->next_free++; + + Pnew_area->Pprev = Pprev_area; + Pprev_area->Pnext = Pnew_area; + + Pnew_area->x1 = Parea->x1; + Pnew_area->y1 = Parea->y1; + Pnew_area->x2 = Parea->x2; + Pnew_area->y2 = Parea->y2; + + Pprev_area = Pnew_area; + + Parea = Parea->Pnext; + } + + Pprev_area->Pnext = Pdst_list->Ptail; + } + } + + void Area_List_clear(Area_List *Plist) + { + Plist->Phead->Pnext = Plist->Ptail; + Plist->Ptail->Pprev = Plist->Phead; + Plist->Pfree = NULL; + Plist->next_free = 2; + } + + void Area_List_set(Area_List *Plist, int x1, int y1, int x2, int y2) + { + Plist->Pfree = NULL; + + Plist->Phead[2].x1 = x1; + Plist->Phead[2].y1 = y1; + Plist->Phead[2].x2 = x2; + Plist->Phead[2].y2 = y2; + + Plist->Phead[2].Pprev = Plist->Phead; + Plist->Phead->Pnext = Plist->Phead + 2; + + Plist->Phead[2].Pnext = Plist->Ptail; + Plist->Ptail->Pprev = Plist->Phead + 2; + + Plist->next_free = 3; + } + + void Area_List_remove(Area_List *Plist, + int x1, int y1, int x2, int y2) + { + int l, h; + Area *Parea = Plist->Phead->Pnext; + + #ifdef RECT_DEBUG + if ((x1 > x2) || (y1 > y2)) + area_fatal_error("area_list_remove", "invalid coords: %i %i %i %i", x1, y1, x2, y2); + #endif + + while (Parea != Plist->Ptail) + { + // Not touching + if ((x2 < Parea->x1) || (x1 > Parea->x2) || + (y2 < Parea->y1) || (y1 > Parea->y2)) + { + Parea = Parea->Pnext; + continue; + } + + // Completely covers + if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && + (y1 <= Parea->y1) && (y2 >= Parea->y2)) + { + if ((x1 == Parea->x1) && (x2 == Parea->x2) && + (y1 == Parea->y1) && (y2 == Parea->y2)) + { + delete_area(Plist, Parea); + return; + } + + Parea = delete_area(Plist, Parea); + + continue; + } + + // top + if (y1 > Parea->y1) + { + insert_area_before(Plist, Parea, + Parea->x1, Parea->y1, + Parea->x2, y1 - 1); + } + + // bottom + if (y2 < Parea->y2) + { + insert_area_before(Plist, Parea, + Parea->x1, y2 + 1, + Parea->x2, Parea->y2); + } + + l = math::maximum(y1, Parea->y1); + h = math::minimum(y2, Parea->y2); + + // left middle + if (x1 > Parea->x1) + { + insert_area_before(Plist, Parea, + Parea->x1, l, + x1 - 1, h); + } + + // right middle + if (x2 < Parea->x2) + { + insert_area_before(Plist, Parea, + x2 + 1, l, + Parea->x2, h); + } + + // early out - we know there's nothing else to remove, as areas can + // never overlap + if ((x1 >= Parea->x1) && (x2 <= Parea->x2) && + (y1 >= Parea->y1) && (y2 <= Parea->y2)) + { + delete_area(Plist, Parea); + return; + } + + Parea = delete_area(Plist, Parea); + } + } + + void Area_List_insert(Area_List *Plist, + int x1, int y1, int x2, int y2, + bool combine) + { + Area *Parea = Plist->Phead->Pnext; + + #ifdef RECT_DEBUG + if ((x1 > x2) || (y1 > y2)) + area_fatal_error("Area_List_insert", "invalid coords: %i %i %i %i", x1, y1, x2, y2); + #endif + + while (Parea != Plist->Ptail) + { + // totally covers + if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && + (y1 <= Parea->y1) && (y2 >= Parea->y2)) + { + Parea = delete_area(Plist, Parea); + continue; + } + + // intersects + if ((x2 >= Parea->x1) && (x1 <= Parea->x2) && + (y2 >= Parea->y1) && (y1 <= Parea->y2)) + { + int ax1, ay1, ax2, ay2; + + ax1 = Parea->x1; + ay1 = Parea->y1; + ax2 = Parea->x2; + ay2 = Parea->y2; + + if (x1 < ax1) + Area_List_insert(Plist, x1, math::maximum(y1, ay1), ax1 - 1, math::minimum(y2, ay2), combine); + + if (x2 > ax2) + Area_List_insert(Plist, ax2 + 1, math::maximum(y1, ay1), x2, math::minimum(y2, ay2), combine); + + if (y1 < ay1) + Area_List_insert(Plist, x1, y1, x2, ay1 - 1, combine); + + if (y2 > ay2) + Area_List_insert(Plist, x1, ay2 + 1, x2, y2, combine); + + return; + } + + if (combine) + { + if ((x1 == Parea->x1) && (x2 == Parea->x2)) + { + if ((y2 == Parea->y1 - 1) || (y1 == Parea->y2 + 1)) + { + delete_area(Plist, Parea); + Area_List_insert(Plist, x1, math::minimum(y1, Parea->y1), x2, math::maximum(y2, Parea->y2), CRNLIB_TRUE); + return; + } + } + else if ((y1 == Parea->y1) && (y2 == Parea->y2)) + { + if ((x2 == Parea->x1 - 1) || (x1 == Parea->x2 + 1)) + { + delete_area(Plist, Parea); + Area_List_insert(Plist, math::minimum(x1, Parea->x1), y1, math::maximum(x2, Parea->x2), y2, CRNLIB_TRUE); + return; + } + } + } + + Parea = Parea->Pnext; + } + + insert_area_before(Plist, Parea, x1, y1, x2, y2); + } + + void Area_List_intersect_area(Area_List *Plist, + int x1, int y1, int x2, int y2) + { + Area *Parea = Plist->Phead->Pnext; + + while (Parea != Plist->Ptail) + { + // doesn't cover + if ((x2 < Parea->x1) || (x1 > Parea->x2) || + (y2 < Parea->y1) || (y1 > Parea->y2)) + { + Parea = delete_area(Plist, Parea); + continue; + } + + // totally covers + if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && + (y1 <= Parea->y1) && (y2 >= Parea->y2)) + { + Parea = Parea->Pnext; + continue; + } + + // Oct 21- should insert after, because deleted area will access the NEXT area! + // insert_area_after(Plist, Parea, + // math::maximum(x1, Parea->x1), + // math::maximum(y1, Parea->y1), + // math::minimum(x2, Parea->x2), + // math::minimum(y2, Parea->y2)); + + insert_area_before(Plist, Parea, + math::maximum(x1, Parea->x1), + math::maximum(y1, Parea->y1), + math::minimum(x2, Parea->x2), + math::minimum(y2, Parea->y2)); + + Parea = delete_area(Plist, Parea); + } + } + + #if 0 + void Area_List_intersect_Area_List( + Area_List *Pouter_list, + Area_List *Pinner_list, + Area_List *Pdst_list) + { + Area *Parea1 = Pouter_list->Phead->Pnext; + + while (Parea1 != Pouter_list->Ptail) + { + Area *Parea2 = Pinner_list->Phead->Pnext; + int x1, y1, x2, y2; + + x1 = Parea1->x1; x2 = Parea1->x2; + y1 = Parea1->y1; y2 = Parea1->y2; + + while (Parea2 != Pinner_list->Ptail) + { + if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && + (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) + { + insert_area_after(Pdst_list, Pdst_list->Phead, + math::maximum(x1, Parea2->x1), + math::maximum(y1, Parea2->y1), + math::minimum(x2, Parea2->x2), + math::minimum(y2, Parea2->y2)); + } + + Parea2 = Parea2->Pnext; + } + + Parea1 = Parea1->Pnext; + } + } + #endif + + #if 1 + void Area_List_intersect_Area_List(Area_List *Pouter_list, + Area_List *Pinner_list, + Area_List *Pdst_list) + { + Area *Parea1 = Pouter_list->Phead->Pnext; + + while (Parea1 != Pouter_list->Ptail) + { + Area *Parea2 = Pinner_list->Phead->Pnext; + int x1, y1, x2, y2; + + x1 = Parea1->x1; x2 = Parea1->x2; + y1 = Parea1->y1; y2 = Parea1->y2; + + while (Parea2 != Pinner_list->Ptail) + { + if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && + (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) + { + int nx1, ny1, nx2, ny2; + + nx1 = math::maximum(x1, Parea2->x1); + ny1 = math::maximum(y1, Parea2->y1); + nx2 = math::minimum(x2, Parea2->x2); + ny2 = math::minimum(y2, Parea2->y2); + + if (Pdst_list->Phead->Pnext == Pdst_list->Ptail) + { + insert_area_after(Pdst_list, Pdst_list->Phead, + nx1, ny1, nx2, ny2); + } + else + { + Area_Ptr Ptemp = Pdst_list->Phead->Pnext; + if ((Ptemp->x1 == nx1) && (Ptemp->x2 == nx2)) + { + if (Ptemp->y1 == (ny2+1)) + { + Ptemp->y1 = ny1; + goto next; + } + else if (Ptemp->y2 == (ny1-1)) + { + Ptemp->y2 = ny2; + goto next; + } + } + else if ((Ptemp->y1 == ny1) && (Ptemp->y2 == ny2)) + { + if (Ptemp->x1 == (nx2+1)) + { + Ptemp->x1 = nx1; + goto next; + } + else if (Ptemp->x2 == (nx1-1)) + { + Ptemp->x2 = nx2; + goto next; + } + } + + insert_area_after(Pdst_list, Pdst_list->Phead, + nx1, ny1, nx2, ny2); + } + } + + next: + + Parea2 = Parea2->Pnext; + } + + Parea1 = Parea1->Pnext; + } + } + #endif + + Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist) + { + Area_Ptr Parea = Plist->Phead->Pnext, Parea_after; + int num = 2; + Area_List_Ptr Pnew_list; + + while (Parea != Plist->Ptail) + { + num++; + Parea = Parea->Pnext; + } + + Pnew_list = Area_List_init(num); + + Parea = Plist->Phead->Pnext; + + Parea_after = Pnew_list->Phead; + + while (Parea != Plist->Ptail) + { + Parea_after = insert_area_after(Pnew_list, Parea_after, + Parea->x1, Parea->y1, + Parea->x2, Parea->y2); + + Parea = Parea->Pnext; + } + + return (Pnew_list); + } + +} // namespace crnlib diff --git a/crnlib/crn_arealist.h b/crnlib/crn_arealist.h new file mode 100644 index 00000000..3b9aea92 --- /dev/null +++ b/crnlib/crn_arealist.h @@ -0,0 +1,74 @@ +// File: crn_arealist.h - 2D shape algebra +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + struct Area + { + struct Area *Pprev, *Pnext; + + int x1, y1, x2, y2; + + uint get_width() const { return x2 - x1 + 1; } + uint get_height() const { return y2 - y1 + 1; } + uint get_area() const { return get_width() * get_height(); } + }; + + typedef Area * Area_Ptr; + + struct Area_List + { + int total_areas; + int next_free; + + Area *Phead, *Ptail, *Pfree; + }; + + typedef Area_List * Area_List_Ptr; + + Area_List * Area_List_init(int max_areas); + void Area_List_deinit(Area_List* Pobj_base); + + void Area_List_print(Area_List *Plist); + + Area_List * Area_List_dup_new(Area_List *Plist, + int x_ofs, int y_ofs); + + uint Area_List_get_num(Area_List* Plist); + + // src and dst area lists must have the same number of total areas. + void Area_List_dup(Area_List *Psrc_list, + Area_List *Pdst_list, + int x_ofs, int y_ofs); + + void Area_List_copy(Area_List *Psrc_list, + Area_List *Pdst_list, + int x_ofs, int y_ofs); + + void Area_List_clear(Area_List *Plist); + + void Area_List_set(Area_List *Plist, + int x1, int y1, int x2, int y2); + + // logical: x and (not y) + void Area_List_remove(Area_List *Plist, + int x1, int y1, int x2, int y2); + + // logical: x or y + void Area_List_insert(Area_List *Plist, + int x1, int y1, int x2, int y2, + bool combine); + + // logical: x and y + void Area_List_intersect_area(Area_List *Plist, + int x1, int y1, int x2, int y2); + + // logical: x and y + void Area_List_intersect_Area_List(Area_List *Pouter_list, + Area_List *Pinner_list, + Area_List *Pdst_list); + + Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist); + +} // namespace crnlib diff --git a/crnlib/crn_assert.cpp b/crnlib/crn_assert.cpp new file mode 100644 index 00000000..9af01ede --- /dev/null +++ b/crnlib/crn_assert.cpp @@ -0,0 +1,77 @@ +// File: crn_assert.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_winhdr.h" +#include + +static bool g_fail_exceptions; +static bool g_exit_on_failure = true; + +void crnlib_enable_fail_exceptions(bool enabled) +{ + g_fail_exceptions = enabled; +} + +void crnlib_assert(const char* pExp, const char* pFile, unsigned line) +{ + char buf[512]; + +#if defined(WIN32) && defined(_MSC_VER) + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failed: \"%s\"\n", pFile, line, pExp); +#else + sprintf(buf, "%s(%u): Assertion failed: \"%s\"\n", pFile, line, pExp); +#endif + + crnlib_output_debug_string(buf); + + printf(buf); + + if (crnlib_is_debugger_present()) + crnlib_debug_break(); +} + +void crnlib_fail(const char* pExp, const char* pFile, unsigned line) +{ + char buf[512]; + +#if defined(WIN32) && defined(_MSC_VER) + sprintf_s(buf, sizeof(buf), "%s(%u): Failure: \"%s\"\n", pFile, line, pExp); +#else + sprintf(buf, "%s(%u): Failure: \"%s\"\n", pFile, line, pExp); +#endif + + crnlib_output_debug_string(buf); + + printf(buf); + + if (crnlib_is_debugger_present()) + crnlib_debug_break(); + + if (g_fail_exceptions) + RaiseException(CRNLIB_FAIL_EXCEPTION_CODE, 0, 0, NULL); + else if (g_exit_on_failure) + exit(EXIT_FAILURE); +} + +void trace(const char* pFmt, va_list args) +{ + if (crnlib_is_debugger_present()) + { + char buf[512]; +#if defined(WIN32) && defined(_MSC_VER) + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsprintf(buf, pFmt, args); +#endif + + crnlib_output_debug_string(buf); + } +}; + +void trace(const char* pFmt, ...) +{ + va_list args; + va_start(args, pFmt); + trace(pFmt, args); + va_end(args); +}; diff --git a/crnlib/crn_assert.h b/crnlib/crn_assert.h new file mode 100644 index 00000000..44513695 --- /dev/null +++ b/crnlib/crn_assert.h @@ -0,0 +1,61 @@ +// File: crn_assert.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +const unsigned int CRNLIB_FAIL_EXCEPTION_CODE = 256U; +void crnlib_enable_fail_exceptions(bool enabled); + +void crnlib_assert(const char* pExp, const char* pFile, unsigned line); +void crnlib_fail(const char* pExp, const char* pFile, unsigned line); + +#ifdef NDEBUG + #define CRNLIB_ASSERT(x) ((void)0) + #undef CRNLIB_ASSERTS_ENABLED +#else + #define CRNLIB_ASSERT(_exp) (void)( (!!(_exp)) || (crnlib_assert(#_exp, __FILE__, __LINE__), 0) ) + #define CRNLIB_ASSERTS_ENABLED +#endif + +#define CRNLIB_VERIFY(_exp) (void)( (!!(_exp)) || (crnlib_assert(#_exp, __FILE__, __LINE__), 0) ) + +#define CRNLIB_FAIL(msg) do { crnlib_fail(#msg, __FILE__, __LINE__); } while(0) + +#define CRNLIB_ASSERT_OPEN_RANGE(x, l, h) CRNLIB_ASSERT((x >= l) && (x < h)) +#define CRNLIB_ASSERT_CLOSED_RANGE(x, l, h) CRNLIB_ASSERT((x >= l) && (x <= h)) + +void trace(const char* pFmt, va_list args); +void trace(const char* pFmt, ...); + +// Borrowed from boost libraries. +template struct crnlib_assume_failure; +template <> struct crnlib_assume_failure { enum { blah = 1 }; }; +template struct crnlib_assume_try { }; + +#define CRNLIB_JOINER_FINAL(a, b) a##b +#define CRNLIB_JOINER(a, b) CRNLIB_JOINER_FINAL(a, b) +#define CRNLIB_JOIN(a, b) CRNLIB_JOINER(a, b) +#define CRNLIB_ASSUME(p) typedef crnlib_assume_try < sizeof(crnlib_assume_failure< (bool)(p) > ) > CRNLIB_JOIN(crnlib_assume_typedef, __COUNTER__) + +#ifdef NDEBUG +template inline T crnlib_assert_range(T i, T m) +{ + m; + return i; +} +template inline T crnlib_assert_range_incl(T i, T m) +{ + m; + return i; +} +#else +template inline T crnlib_assert_range(T i, T m) +{ + CRNLIB_ASSERT((i >= 0) && (i < m)); + return i; +} +template inline T crnlib_assert_range_incl(T i, T m) +{ + CRNLIB_ASSERT((i >= 0) && (i <= m)); + return i; +} +#endif diff --git a/crnlib/crn_buffer_stream.h b/crnlib/crn_buffer_stream.h new file mode 100644 index 00000000..26e2532a --- /dev/null +++ b/crnlib/crn_buffer_stream.h @@ -0,0 +1,196 @@ +// File: crn_buffer_stream.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_data_stream.h" + +namespace crnlib +{ + class buffer_stream : public data_stream + { + public: + buffer_stream() : + data_stream(), + m_pBuf(NULL), + m_size(0), + m_ofs(0) + { + } + + buffer_stream(void* p, uint size) : + data_stream(), + m_pBuf(NULL), + m_size(0), + m_ofs(0) + { + open(p, size); + } + + buffer_stream(const void* p, uint size) : + data_stream(), + m_pBuf(NULL), + m_size(0), + m_ofs(0) + { + open(p, size); + } + + virtual ~buffer_stream() + { + } + + bool open(const void* p, uint size) + { + CRNLIB_ASSERT(p); + + close(); + + if ((!p) || (!size)) + return false; + + m_opened = true; + m_pBuf = (uint8*)(p); + m_size = size; + m_ofs = 0; + m_attribs = cDataStreamSeekable | cDataStreamReadable; + return true; + } + + bool open(void* p, uint size) + { + CRNLIB_ASSERT(p); + + close(); + + if ((!p) || (!size)) + return false; + + m_opened = true; + m_pBuf = static_cast(p); + m_size = size; + m_ofs = 0; + m_attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable; + return true; + } + + virtual bool close() + { + if (m_opened) + { + m_opened = false; + m_pBuf = NULL; + m_size = 0; + m_ofs = 0; + return true; + } + + return false; + } + + const void* get_buf() const { return m_pBuf; } + void* get_buf() { return m_pBuf; } + + virtual const void* get_ptr() const { return m_pBuf; } + + virtual uint read(void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_readable()) || (!len)) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_size); + + uint bytes_left = m_size - m_ofs; + + len = math::minimum(len, bytes_left); + + if (len) + memcpy(pBuf, &m_pBuf[m_ofs], len); + + m_ofs += len; + + return len; + } + + virtual uint write(const void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_writable()) || (!len)) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_size); + + uint bytes_left = m_size - m_ofs; + + len = math::minimum(len, bytes_left); + + if (len) + memcpy(&m_pBuf[m_ofs], pBuf, len); + + m_ofs += len; + + return len; + } + + virtual bool flush() + { + if (!m_opened) + return false; + + return true; + } + + virtual uint64 get_size() + { + if (!m_opened) + return 0; + + return m_size; + } + + virtual uint64 get_remaining() + { + if (!m_opened) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_size); + + return m_size - m_ofs; + } + + virtual uint64 get_ofs() + { + if (!m_opened) + return 0; + + return m_ofs; + } + + virtual bool seek(int64 ofs, bool relative) + { + if ((!m_opened) || (!is_seekable())) + return false; + + int64 new_ofs = relative ? (m_ofs + ofs) : ofs; + + if (new_ofs < 0) + return false; + else if (new_ofs > m_size) + return false; + + m_ofs = static_cast(new_ofs); + + post_seek(); + + return true; + } + + private: + uint8* m_pBuf; + uint m_size; + uint m_ofs; + }; + +} // namespace crnlib + diff --git a/crnlib/crn_cfile_stream.h b/crnlib/crn_cfile_stream.h new file mode 100644 index 00000000..2bd23a88 --- /dev/null +++ b/crnlib/crn_cfile_stream.h @@ -0,0 +1,246 @@ +// File: crn_cfile_stream.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_data_stream.h" +#include + +namespace crnlib +{ + class cfile_stream : public data_stream + { + public: + cfile_stream() : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) + { + } + + cfile_stream(FILE* pFile, const wchar_t* pFilename, uint attribs, bool has_ownership) : + data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) + { + open(pFile, pFilename, attribs, has_ownership); + } + + cfile_stream(const wchar_t* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) : + data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) + { + open(pFilename, attribs, open_existing); + } + + virtual ~cfile_stream() + { + close(); + } + + virtual bool close() + { + clear_error(); + + if (m_opened) + { + bool status = true; + if (m_has_ownership) + { + if (EOF == fclose(m_pFile)) + status = false; + } + + m_pFile = NULL; + m_opened = false; + m_size = 0; + m_ofs = 0; + m_has_ownership = false; + + return status; + } + + return false; + } + + bool open(FILE* pFile, const wchar_t* pFilename, uint attribs, bool has_ownership) + { + CRNLIB_ASSERT(pFile); + CRNLIB_ASSERT(pFilename); + + close(); + + set_name(pFilename); + m_pFile = pFile; + m_has_ownership = has_ownership; + m_attribs = static_cast(attribs); + + m_ofs = _ftelli64(m_pFile); + _fseeki64(m_pFile, 0, SEEK_END); + m_size = _ftelli64(m_pFile); + _fseeki64(m_pFile, m_ofs, SEEK_SET); + + m_opened = true; + + return true; + } + + bool open(const wchar_t* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) + { + CRNLIB_ASSERT(pFilename); + + close(); + + m_attribs = static_cast(attribs); + + const wchar_t* pMode; + if ((is_readable()) && (is_writable())) + pMode = open_existing ? L"r+b" : L"w+b"; + else if (is_writable()) + pMode = open_existing ? L"ab" : L"wb"; + else if (is_readable()) + pMode = L"rb"; + else + { + set_error(); + return false; + } + + FILE* pFile = NULL; +#ifdef _MSC_VER + _wfopen_s(&pFile, pFilename, pMode); +#else + pFile = _wfopen(pFilename, pMode); +#endif + m_has_ownership = true; + + if (!pFile) + { + set_error(); + return false; + } + + // TODO: Change stream class to support UCS2 filenames. + + return open(pFile, pFilename, attribs, true); + } + + FILE* get_file() const { return m_pFile; } + + virtual uint read(void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if (!m_opened || (!is_readable()) || (!len)) + return 0; + + len = static_cast(math::minimum(len, get_remaining())); + + if (fread(pBuf, 1, len, m_pFile) != len) + { + set_error(); + return 0; + } + + m_ofs += len; + return len; + } + + virtual uint write(const void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if (!m_opened || (!is_writable()) || (!len)) + return 0; + + if (fwrite(pBuf, 1, len, m_pFile) != len) + { + set_error(); + return 0; + } + + m_ofs += len; + m_size = math::maximum(m_size, m_ofs); + + return len; + } + + virtual bool flush() + { + if ((!m_opened) || (!is_writable())) + return false; + + if (EOF == fflush(m_pFile)) + { + set_error(); + return false; + } + + return true; + } + + virtual uint64 get_size() + { + if (!m_opened) + return 0; + + return m_size; + } + + virtual uint64 get_remaining() + { + if (!m_opened) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_size); + return m_size - m_ofs; + } + + virtual uint64 get_ofs() + { + if (!m_opened) + return 0; + + return m_ofs; + } + + virtual bool seek(int64 ofs, bool relative) + { + if ((!m_opened) || (!is_seekable())) + return false; + + int64 new_ofs = relative ? (m_ofs + ofs) : ofs; + if (new_ofs < 0) + return false; + else if (static_cast(new_ofs) > m_size) + return false; + + if (static_cast(new_ofs) != m_ofs) + { + if (_fseeki64(m_pFile, new_ofs, SEEK_SET) != 0) + { + set_error(); + return false; + } + + m_ofs = new_ofs; + } + + return true; + } + + static bool read_file_into_array(const wchar_t* pFilename, vector& buf) + { + cfile_stream in_stream(pFilename); + if (!in_stream.is_opened()) + return false; + return in_stream.read_array(buf); + } + + static bool write_array_to_file(const wchar_t* pFilename, const vector& buf) + { + cfile_stream out_stream(pFilename, cDataStreamWritable|cDataStreamSeekable); + if (!out_stream.is_opened()) + return false; + return out_stream.write_array(buf); + } + + private: + FILE* m_pFile; + uint64 m_size, m_ofs; + bool m_has_ownership; + }; + +} // namespace crnlib diff --git a/crnlib/crn_checksum.cpp b/crnlib/crn_checksum.cpp new file mode 100644 index 00000000..5dcb6f41 --- /dev/null +++ b/crnlib/crn_checksum.cpp @@ -0,0 +1,63 @@ +// File: crn_checksum.cpp +#include "crn_core.h" + +namespace crnlib +{ + // From the public domain stb.h header. + uint adler32(const void* pBuf, size_t buflen, uint adler32) + { + const uint8* buffer = static_cast(pBuf); + + const unsigned long ADLER_MOD = 65521; + unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; + size_t blocklen; + unsigned long i; + + blocklen = buflen % 5552; + while (buflen) { + for (i=0; i + 7 < blocklen; i += 8) { + s1 += buffer[0], s2 += s1; + s1 += buffer[1], s2 += s1; + s1 += buffer[2], s2 += s1; + s1 += buffer[3], s2 += s1; + s1 += buffer[4], s2 += s1; + s1 += buffer[5], s2 += s1; + s1 += buffer[6], s2 += s1; + s1 += buffer[7], s2 += s1; + + buffer += 8; + } + + for (; i < blocklen; ++i) + s1 += *buffer++, s2 += s1; + + s1 %= ADLER_MOD, s2 %= ADLER_MOD; + buflen -= blocklen; + blocklen = 5552; + } + return (s2 << 16) + s1; + } + + uint16 crc16(const void* pBuf, size_t len, uint16 crc) + { + crc = ~crc; + + const uint8* p = reinterpret_cast(pBuf); + while (len) + { + const uint16 q = *p++ ^ (crc >> 8); + crc <<= 8U; + uint16 r = (q >> 4) ^ q; + crc ^= r; + r <<= 5U; + crc ^= r; + r <<= 7U; + crc ^= r; + len--; + } + + return static_cast(~crc); + } + +} // namespace crnlib + diff --git a/crnlib/crn_checksum.h b/crnlib/crn_checksum.h new file mode 100644 index 00000000..12817a81 --- /dev/null +++ b/crnlib/crn_checksum.h @@ -0,0 +1,13 @@ +// File: crn_checksum.h +#pragma once + +namespace crnlib +{ + const uint cInitAdler32 = 1U; + uint adler32(const void* pBuf, size_t buflen, uint adler32 = cInitAdler32); + + // crc16() intended for small buffers - doesn't use an acceleration table. + const uint cInitCRC16 = 0; + uint16 crc16(const void* pBuf, size_t len, uint16 crc = cInitCRC16); + +} // namespace crnlib diff --git a/crnlib/crn_clusterizer.h b/crnlib/crn_clusterizer.h new file mode 100644 index 00000000..f6471160 --- /dev/null +++ b/crnlib/crn_clusterizer.h @@ -0,0 +1,764 @@ +// File: crn_clusterizer.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_matrix.h" + +namespace crnlib +{ + template + class clusterizer + { + public: + clusterizer() : + m_overall_variance(0.0f), + m_split_index(0), + m_heap_size(0), + m_quick(false) + { + } + + void clear() + { + m_training_vecs.clear(); + m_codebook.clear(); + m_nodes.clear(); + m_overall_variance = 0.0f; + m_split_index = 0; + m_heap_size = 0; + m_quick = false; + } + + void reserve_training_vecs(uint num_expected) + { + m_training_vecs.reserve(num_expected); + } + + void add_training_vec(const VectorType& v, uint weight) + { + m_training_vecs.push_back( std::make_pair(v, weight) ); + } + + typedef bool (*progress_callback_func_ptr)(uint percentage_completed, void* pData); + + bool generate_codebook(uint max_size, progress_callback_func_ptr pProgress_callback = NULL, void* pProgress_data = NULL, bool quick = false) + { + if (m_training_vecs.empty()) + return false; + + m_quick = quick; + + double ttsum = 0.0f; + + vq_node root; + root.m_vectors.reserve(m_training_vecs.size()); + + for (uint i = 0; i < m_training_vecs.size(); i++) + { + const VectorType& v = m_training_vecs[i].first; + const uint weight = m_training_vecs[i].second; + + root.m_centroid += (v * (float)weight); + root.m_total_weight += weight; + root.m_vectors.push_back(i); + + ttsum += v.dot(v) * weight; + } + + root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); + + root.m_centroid *= (1.0f / root.m_total_weight); + + m_nodes.clear(); + m_nodes.reserve(max_size * 2 + 1); + + m_nodes.push_back(root); + + m_heap.resize(max_size + 1); + m_heap[1] = 0; + m_heap_size = 1; + + m_split_index = 0; + + uint total_leaves = 1; + + m_left_children.reserve(m_training_vecs.size() + 1); + m_right_children.reserve(m_training_vecs.size() + 1); + + int prev_percentage = -1; + while ((total_leaves < max_size) && (m_heap_size)) + { + int worst_node_index = m_heap[1]; + + m_heap[1] = m_heap[m_heap_size]; + m_heap_size--; + if (m_heap_size) + down_heap(1); + + split_node(worst_node_index); + total_leaves++; + + if ((pProgress_callback) && ((total_leaves & 63) == 0) && (max_size)) + { + int cur_percentage = (total_leaves * 100U + (max_size / 2U)) / max_size; + if (cur_percentage != prev_percentage) + { + if (!(*pProgress_callback)(cur_percentage, pProgress_data)) + return false; + + prev_percentage = cur_percentage; + } + } + } + + m_codebook.clear(); + + m_overall_variance = 0.0f; + + for (uint i = 0; i < m_nodes.size(); i++) + { + vq_node& node = m_nodes[i]; + if (node.m_left != -1) + { + CRNLIB_ASSERT(node.m_right != -1); + continue; + } + + CRNLIB_ASSERT((node.m_left == -1) && (node.m_right == -1)); + + node.m_codebook_index = m_codebook.size(); + m_codebook.push_back(node.m_centroid); + + m_overall_variance += node.m_variance; + } + + m_heap.clear(); + m_left_children.clear(); + m_right_children.clear(); + + return true; + } + + inline uint get_num_training_vecs() const { return m_training_vecs.size(); } + const VectorType& get_training_vec(uint index) const { return m_training_vecs[index].first; } + const uint get_training_vec_weight(uint index) const { return m_training_vecs[index].second; } + + typedef crnlib::vector< std::pair > training_vec_array; + + const training_vec_array& get_training_vecs() const { return m_training_vecs; } + training_vec_array& get_training_vecs() { return m_training_vecs; } + + inline float get_overall_variance() const { return m_overall_variance; } + + inline uint get_codebook_size() const + { + return m_codebook.size(); + } + + inline const VectorType& get_codebook_entry(uint index) const + { + return m_codebook[index]; + } + + VectorType& get_codebook_entry(uint index) + { + return m_codebook[index]; + } + + typedef crnlib::vector vector_vec_type; + inline const vector_vec_type& get_codebook() const + { + return m_codebook; + } + + const uint find_best_codebook_entry(const VectorType& v) const + { + uint cur_node_index = 0; + + for ( ; ; ) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if (cur_node.m_left == -1) + return cur_node.m_codebook_index; + + const vq_node& left_node = m_nodes[cur_node.m_left]; + const vq_node& right_node = m_nodes[cur_node.m_right]; + + float left_dist = left_node.m_centroid.squared_distance(v); + float right_dist = right_node.m_centroid.squared_distance(v); + + if (left_dist < right_dist) + cur_node_index = cur_node.m_left; + else + cur_node_index = cur_node.m_right; + } + } + + const VectorType& find_best_codebook_entry(const VectorType& v, uint max_codebook_size) const + { + uint cur_node_index = 0; + + for ( ; ; ) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if ((cur_node.m_left == -1) || ((cur_node.m_codebook_index + 1) >= (int)max_codebook_size)) + return cur_node.m_centroid; + + const vq_node& left_node = m_nodes[cur_node.m_left]; + const vq_node& right_node = m_nodes[cur_node.m_right]; + + float left_dist = left_node.m_centroid.squared_distance(v); + float right_dist = right_node.m_centroid.squared_distance(v); + + if (left_dist < right_dist) + cur_node_index = cur_node.m_left; + else + cur_node_index = cur_node.m_right; + } + } + + const uint find_best_codebook_entry_fs(const VectorType& v) const + { + float best_dist = math::cNearlyInfinite; + uint best_index = 0; + + for (uint i = 0; i < m_codebook.size(); i++) + { + float dist = m_codebook[i].squared_distance(v); + if (dist < best_dist) + { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + break; + } + } + + return best_index; + } + + void retrieve_clusters(uint max_clusters, crnlib::vector< crnlib::vector >& clusters) const + { + clusters.resize(0); + clusters.reserve(max_clusters); + + crnlib::vector stack; + stack.reserve(512); + + uint cur_node_index = 0; + + for ( ; ; ) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if ( (cur_node.is_leaf()) || ((cur_node.m_codebook_index + 2) > (int)max_clusters) ) + { + clusters.resize(clusters.size() + 1); + clusters.back() = cur_node.m_vectors; + + if (stack.empty()) + break; + cur_node_index = stack.back(); + stack.pop_back(); + continue; + } + + cur_node_index = cur_node.m_left; + stack.push_back(cur_node.m_right); + } + } + + private: + training_vec_array m_training_vecs; + + struct vq_node + { + vq_node() : m_centroid(cClear), m_total_weight(0), m_left(-1), m_right(-1), m_codebook_index(-1), m_unsplittable(false) { } + + VectorType m_centroid; + uint64 m_total_weight; + + float m_variance; + + crnlib::vector m_vectors; + + int m_left; + int m_right; + + int m_codebook_index; + + bool m_unsplittable; + + bool is_leaf() const { return m_left < 0; } + }; + + typedef crnlib::vector node_vec_type; + + node_vec_type m_nodes; + + vector_vec_type m_codebook; + + float m_overall_variance; + + uint m_split_index; + + crnlib::vector m_heap; + uint m_heap_size; + + bool m_quick; + + void insert_heap(uint node_index) + { + const float variance = m_nodes[node_index].m_variance; + uint pos = ++m_heap_size; + + if (m_heap_size >= m_heap.size()) + m_heap.resize(m_heap_size + 1); + + for ( ; ; ) + { + uint parent = pos >> 1; + if (!parent) + break; + + float parent_variance = m_nodes[m_heap[parent]].m_variance; + if (parent_variance > variance) + break; + + m_heap[pos] = m_heap[parent]; + + pos = parent; + } + + m_heap[pos] = node_index; + } + + void down_heap(uint pos) + { + uint child; + uint orig = m_heap[pos]; + + const float orig_variance = m_nodes[orig].m_variance; + + while ((child = (pos << 1)) <= m_heap_size) + { + if (child < m_heap_size) + { + if (m_nodes[m_heap[child]].m_variance < m_nodes[m_heap[child + 1]].m_variance) + child++; + } + + if (orig_variance > m_nodes[m_heap[child]].m_variance) + break; + + m_heap[pos] = m_heap[child]; + + pos = child; + } + + m_heap[pos] = orig; + } + + void compute_split_estimate(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) + { + VectorType furthest; + double furthest_dist = -1.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(parent_node.m_centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } + } + + VectorType opposite; + double opposite_dist = -1.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } + } + + left_child_res = (furthest + parent_node.m_centroid) * .5f; + right_child_res = (opposite + parent_node.m_centroid) * .5f; + } + + void compute_split_pca(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) + { + if (parent_node.m_vectors.size() == 2) + { + left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; + right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; + return; + } + + const uint N = VectorType::num_elements; + + matrix covar; + covar.clear(); + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); + const VectorType w(v * (float)m_training_vecs[parent_node.m_vectors[i]].second); + + for (uint x = 0; x < N; x++) + for (uint y = x; y < N; y++) + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + + float one_over_total_weight = 1.0f / parent_node.m_total_weight; + + for (uint x = 0; x < N; x++) + for (uint y = x; y < N; y++) + covar[x][y] *= one_over_total_weight; + + for (uint x = 0; x < (N - 1); x++) + for (uint y = x + 1; y < N; y++) + covar[y][x] = covar[x][y]; + + VectorType axis;//(1.0f); + if (N == 1) + axis.set(1.0f); + else + { + for (uint i = 0; i < N; i++) + axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / math::maximum(N - 1, 1))); + } + + VectorType prev_axis(axis); + + for (uint iter = 0; iter < 10; iter++) + { + VectorType x; + + double max_sum = 0; + + for (uint i = 0; i < N; i++) + { + double sum = 0; + + for (uint j = 0; j < N; j++) + sum += axis[j] * covar[i][j]; + + x[i] = static_cast(sum); + + max_sum = math::maximum(max_sum, fabs(sum)); + } + + if (max_sum != 0.0f) + x *= static_cast(1.0f / max_sum); + + VectorType delta_axis(prev_axis - x); + + prev_axis = axis; + axis = x; + + if (delta_axis.norm() < .0025f) + break; + } + + axis.normalize(); + + VectorType left_child(0.0f); + VectorType right_child(0.0f); + + double left_weight = 0.0f; + double right_weight = 0.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; + + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double t = (v - parent_node.m_centroid) * axis; + if (t < 0.0f) + { + left_child += v * weight; + left_weight += weight; + } + else + { + right_child += v * weight; + right_weight += weight; + } + } + + if ((left_weight > 0.0f) && (right_weight > 0.0f)) + { + left_child_res = left_child * (float)(1.0f / left_weight); + right_child_res = right_child * (float)(1.0f / right_weight); + } + else + { + compute_split_estimate(left_child_res, right_child_res, parent_node); + } + } + +#if 0 + void compute_split_pca2(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) + { + if (parent_node.m_vectors.size() == 2) + { + left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; + right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; + return; + } + + const uint N = VectorType::num_elements; + + VectorType furthest; + double furthest_dist = -1.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(parent_node.m_centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } + } + + VectorType opposite; + double opposite_dist = -1.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } + } + + VectorType axis(opposite - furthest); + if (axis.normalize() < .000125f) + { + left_child_res = (furthest + parent_node.m_centroid) * .5f; + right_child_res = (opposite + parent_node.m_centroid) * .5f; + return; + } + + for (uint iter = 0; iter < 2; iter++) + { + double next_axis[N]; + utils::zero_object(next_axis); + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const double weight = m_training_vecs[parent_node.m_vectors[i]].second; + + VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); + + double dot = (v * axis) * weight; + + for (uint j = 0; j < N; j++) + next_axis[j] += dot * v[j]; + } + + double w = 0.0f; + for (uint j = 0; j < N; j++) + w += next_axis[j] * next_axis[j]; + + if (w > 0.0f) + { + w = 1.0f / sqrt(w); + for (uint j = 0; j < N; j++) + axis[j] = static_cast(next_axis[j] * w); + } + else + break; + } + + VectorType left_child(0.0f); + VectorType right_child(0.0f); + + double left_weight = 0.0f; + double right_weight = 0.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; + + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + + double t = (v - parent_node.m_centroid) * axis; + if (t < 0.0f) + { + left_child += v * weight; + left_weight += weight; + } + else + { + right_child += v * weight; + right_weight += weight; + } + } + + if ((left_weight > 0.0f) && (right_weight > 0.0f)) + { + left_child_res = left_child * (float)(1.0f / left_weight); + right_child_res = right_child * (float)(1.0f / right_weight); + } + else + { + left_child_res = (furthest + parent_node.m_centroid) * .5f; + right_child_res = (opposite + parent_node.m_centroid) * .5f; + } + } +#endif + + // thread safety warning: shared state! + crnlib::vector m_left_children; + crnlib::vector m_right_children; + + void split_node(uint index) + { + vq_node& parent_node = m_nodes[index]; + + if (parent_node.m_vectors.size() == 1) + return; + + VectorType left_child, right_child; + if (m_quick) + compute_split_estimate(left_child, right_child, parent_node); + else + compute_split_pca(left_child, right_child, parent_node); + + uint64 left_weight = 0; + uint64 right_weight = 0; + + float prev_total_variance = 1e+10f; + + float left_variance = 0.0f; + float right_variance = 0.0f; + + const uint cMaxLoops = m_quick ? 2 : 8; + for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) + { + m_left_children.resize(0); + m_right_children.resize(0); + + VectorType new_left_child(cClear); + VectorType new_right_child(cClear); + + double left_ttsum = 0.0f; + double right_ttsum = 0.0f; + + left_weight = 0; + right_weight = 0; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; + const uint weight = m_training_vecs[parent_node.m_vectors[i]].second; + + double left_dist2 = left_child.squared_distance(v); + double right_dist2 = right_child.squared_distance(v); + + if (left_dist2 < right_dist2) + { + m_left_children.push_back(parent_node.m_vectors[i]); + + new_left_child += (v * (float)weight); + left_weight += weight; + + left_ttsum += v.dot(v) * weight; + } + else + { + m_right_children.push_back(parent_node.m_vectors[i]); + + new_right_child += (v * (float)weight); + right_weight += weight; + + right_ttsum += v.dot(v) * weight; + } + } + + if ((!left_weight) || (!right_weight)) + { + parent_node.m_unsplittable = true; + return; + } + + left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); + right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); + + new_left_child *= (1.0f / left_weight); + new_right_child *= (1.0f / right_weight); + + left_child = new_left_child; + left_weight = left_weight; + + right_child = new_right_child; + right_weight = right_weight; + + float total_variance = left_variance + right_variance; + if (total_variance < .00001f) + break; + + //const float variance_delta_thresh = .00001f; + const float variance_delta_thresh = .00125f; + if (((prev_total_variance - total_variance) / total_variance) < variance_delta_thresh) + break; + + prev_total_variance = total_variance; + } + + const uint left_child_index = m_nodes.size(); + const uint right_child_index = m_nodes.size() + 1; + + parent_node.m_left = m_nodes.size(); + parent_node.m_right = m_nodes.size() + 1; + parent_node.m_codebook_index = m_split_index; + m_split_index++; + + m_nodes.resize(m_nodes.size() + 2); + + // parent_node is invalid now, because m_nodes has been changed + + vq_node& left_child_node = m_nodes[left_child_index]; + vq_node& right_child_node = m_nodes[right_child_index]; + + left_child_node.m_centroid = left_child; + left_child_node.m_total_weight = left_weight; + left_child_node.m_vectors.swap(m_left_children); + left_child_node.m_variance = left_variance; + if ((left_child_node.m_vectors.size() > 1) && (left_child_node.m_variance > 0.0f)) + insert_heap(left_child_index); + + right_child_node.m_centroid = right_child; + right_child_node.m_total_weight = right_weight; + right_child_node.m_vectors.swap(m_right_children); + right_child_node.m_variance = right_variance; + if ((right_child_node.m_vectors.size() > 1) && (right_child_node.m_variance > 0.0f)) + insert_heap(right_child_index); + } + + }; + +} // namespace crnlib + + + diff --git a/crnlib/crn_color.h b/crnlib/crn_color.h new file mode 100644 index 00000000..448236ef --- /dev/null +++ b/crnlib/crn_color.h @@ -0,0 +1,694 @@ +// File: crn_color.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_core.h" + +namespace crnlib +{ + template struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = UINT8_MIN, + cMax = UINT8_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = INT16_MIN, + cMax = INT16_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = UINT16_MIN, + cMax = UINT16_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = INT32_MIN, + cMax = INT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = UINT32_MIN, + cMax = UINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = INT32_MIN, + cMax = INT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = INT32_MIN, + cMax = INT32_MAX + }; + }; + + template + class color_quad : public helpers::rel_ops > + { + template + static inline T clamp(T v) + { + if (!component_traits::cFloat) + { + if (v < component_traits::cMin) + v = component_traits::cMin; + else if (v > component_traits::cMax) + v = component_traits::cMax; + } + return v; + } + +#ifdef _MSC_VER + template<> + static inline int clamp(int v) + { + if (!component_traits::cFloat) + { + if ((!component_traits::cSigned) && (component_traits::cMin == 0) && (component_traits::cMax == 0xFF)) + { + if (v & 0xFFFFFF00U) + v = (~(static_cast(v) >> 31)) & 0xFF; + } + else + { + if (v < component_traits::cMin) + v = component_traits::cMin; + else if (v > component_traits::cMax) + v = component_traits::cMax; + } + } + return v; + } +#endif + + public: + typedef component_type component_t; + typedef parameter_type parameter_t; + typedef color_quad_component_traits component_traits; + + enum { cNumComps = 4 }; + + union + { + struct + { + component_type r; + component_type g; + component_type b; + component_type a; + }; + + component_type c[cNumComps]; + + uint32 m_u32; + }; + + inline color_quad() + { + } + + inline color_quad(eClear) : + r(0), g(0), b(0), a(0) + { + } + + inline color_quad(const color_quad& other) : + r(other.r), g(other.g), b(other.b), a(other.a) + { + } + + explicit inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) + { + set(y, alpha); + } + + inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } + + explicit inline color_quad(eNoClamp, parameter_type y, parameter_type alpha = component_traits::cMax) + { + set_noclamp_y_alpha(y, alpha); + } + + inline color_quad(eNoClamp, parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set_noclamp_rgba(red, green, blue, alpha); + } + + template + inline color_quad(const color_quad& other) : + r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) + { + } + + inline void clear() + { + r = 0; + g = 0; + b = 0; + a = 0; + } + + inline color_quad& operator= (const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + a = other.a; + return *this; + } + + template + inline color_quad& operator=(const color_quad& other) + { + r = clamp(other.r); + g = clamp(other.g); + b = clamp(other.b); + a = clamp(other.a); + return *this; + } + + inline color_quad& operator= (parameter_type y) + { + set(y, component_traits::cMax); + return *this; + } + + inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) + { + y = clamp(y); + alpha = clamp(alpha); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set_noclamp_y_alpha(parameter_type y, parameter_type alpha = component_traits::cMax) + { + CRNLIB_ASSERT( (y >= component_traits::cMin) && (y <= component_traits::cMax) ); + CRNLIB_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); + + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } + + inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) + { + CRNLIB_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); + CRNLIB_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); + CRNLIB_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); + CRNLIB_ASSERT( (alpha >= component_traits::cMin) && (alpha <= component_traits::cMax) ); + + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } + + inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) + { + CRNLIB_ASSERT( (red >= component_traits::cMin) && (red <= component_traits::cMax) ); + CRNLIB_ASSERT( (green >= component_traits::cMin) && (green <= component_traits::cMax) ); + CRNLIB_ASSERT( (blue >= component_traits::cMin) && (blue <= component_traits::cMax) ); + + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } + + static inline parameter_type get_min_comp() { return component_traits::cMin; } + static inline parameter_type get_max_comp() { return component_traits::cMax; } + static inline bool get_comps_are_signed() { return component_traits::cSigned; } + + inline component_type operator[] (uint i) const { CRNLIB_ASSERT(i < cNumComps); return c[i]; } + inline component_type& operator[] (uint i) { CRNLIB_ASSERT(i < cNumComps); return c[i]; } + + inline color_quad& set_component(uint i, parameter_type f) + { + CRNLIB_ASSERT(i < cNumComps); + + c[i] = static_cast(clamp(f)); + + return *this; + } + + inline color_quad& set_grayscale(parameter_t l) + { + component_t x = static_cast(clamp(l)); + c[0] = x; + c[1] = x; + c[2] = x; + return *this; + } + + inline color_quad& clamp(const color_quad& l, const color_quad& h) + { + for (uint i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l[i], h[i])); + return *this; + } + + inline color_quad& clamp(parameter_type l, parameter_type h) + { + for (uint i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l, h)); + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_type get_luma() const + { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_type get_luma_rec709() const + { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + // Beware of endianness! + inline uint32 get_uint32() const + { + CRNLIB_ASSERT(sizeof(*this) == sizeof(uint32)); + return *reinterpret_cast(this); + } + + // Beware of endianness! + inline uint64 get_uint64() const + { + CRNLIB_ASSERT(sizeof(*this) == sizeof(uint64)); + return *reinterpret_cast(this); + } + + inline uint squared_distance(const color_quad& c, bool alpha = true) const + { + return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); + } + + inline bool rgb_equals(const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + inline bool operator== (const color_quad& rhs) const + { + if (sizeof(color_quad) == sizeof(uint32)) + return m_u32 == rhs.m_u32; + else + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); + } + + inline bool operator< (const color_quad& rhs) const + { + for (uint i = 0; i < cNumComps; i++) + { + if (c[i] < rhs.c[i]) + return true; + else if (!(c[i] == rhs.c[i])) + return false; + } + return false; + } + + color_quad& operator+= (const color_quad& other) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] + other.c[i])); + return *this; + } + + color_quad& operator-= (const color_quad& other) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] - other.c[i])); + return *this; + } + + color_quad& operator*= (parameter_type v) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] * v)); + return *this; + } + + color_quad& operator/= (parameter_type v) + { + for (uint i = 0; i < 4; i++) + c[i] = static_cast(c[i] / v); + return *this; + } + + color_quad get_swizzled(uint x, uint y, uint z, uint w) const + { + CRNLIB_ASSERT((x | y | z | w) < 4); + return color_quad(c[x], c[y], c[z], c[w]); + } + + friend color_quad operator+ (const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result += rhs; + return result; + } + + friend color_quad operator- (const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result -= rhs; + return result; + } + + friend color_quad operator* (const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result *= v; + return result; + } + + friend color_quad operator/ (const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result /= v; + return result; + } + + friend color_quad operator* (parameter_type v, const color_quad& rhs) + { + color_quad result(rhs); + result *= v; + return result; + } + + inline bool is_grayscale() const + { + return (c[0] == c[1]) && (c[1] == c[2]); + } + + uint get_min_component_index(bool alpha = true) const + { + uint index = 0; + uint limit = alpha ? cNumComps : (cNumComps - 1); + for (uint i = 1; i < limit; i++) + if (c[i] < c[index]) + index = i; + return index; + } + + uint get_max_component_index(bool alpha = true) const + { + uint index = 0; + uint limit = alpha ? cNumComps : (cNumComps - 1); + for (uint i = 1; i < limit; i++) + if (c[i] > c[index]) + index = i; + return index; + } + + operator size_t() const + { + return (size_t)fast_hash(this, sizeof(*this)); + } + + void get_float4(float* pDst) + { + for (uint i = 0; i < 4; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + void get_float3(float* pDst) + { + for (uint i = 0; i < 3; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + static color_quad component_min(const color_quad& a, const color_quad& b) + { + color_quad result; + for (uint i = 0; i < 4; i++) + result[i] = static_cast(math::minimum(a[i], b[i])); + return result; + } + + static color_quad component_max(const color_quad& a, const color_quad& b) + { + color_quad result; + for (uint i = 0; i < 4; i++) + result[i] = static_cast(math::maximum(a[i], b[i])); + return result; + } + + static color_quad make_black() + { + return color_quad(0, 0, 0, component_traits::cMax); + } + + static color_quad make_white() + { + return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); + } + }; // class color_quad + + template + struct scalar_type< color_quad > + { + enum { cFlag = true }; + static inline void construct(color_quad* p) { } + static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } + static inline void construct_array(color_quad* p, uint n) { p, n; } + static inline void destruct(color_quad* p) { p; } + static inline void destruct_array(color_quad* p, uint n) { p, n; } + }; + + typedef color_quad color_quad_u8; + typedef color_quad color_quad_i16; + typedef color_quad color_quad_u16; + typedef color_quad color_quad_i32; + typedef color_quad color_quad_u32; + typedef color_quad color_quad_f; + typedef color_quad color_quad_d; + + namespace color + { + inline uint elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + + return static_cast(dr * dr + dg * dg + db * db); + } + + inline uint elucidian_distance(uint r0, uint g0, uint b0, uint a0, uint r1, uint g1, uint b1, uint a1) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + int da = (int)a0 - (int)a1; + + return static_cast(dr * dr + dg * dg + db * db + da * da); + } + + inline uint elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, bool alpha) + { + if (alpha) + return elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); + else + return elucidian_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); + } + + inline uint weighted_elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1, uint wr, uint wg, uint wb) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + + return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db)); + } + + inline uint weighted_elucidian_distance( + uint r0, uint g0, uint b0, uint a0, + uint r1, uint g1, uint b1, uint a1, + uint wr, uint wg, uint wb, uint wa) + { + int dr = (int)r0 - (int)r1; + int dg = (int)g0 - (int)g1; + int db = (int)b0 - (int)b1; + int da = (int)a0 - (int)a1; + + return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db) + (wa * da * da)); + } + + inline uint weighted_elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, uint wr, uint wg, uint wb, uint wa) + { + return weighted_elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a, wr, wg, wb, wa); + } + + //const uint cRWeight = 8;//24; + //const uint cGWeight = 24;//73; + //const uint cBWeight = 1;//3; + + const uint cRWeight = 8;//24; + const uint cGWeight = 25;//73; + const uint cBWeight = 1;//3; + + inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) + { + if (perceptual) + { + if (alpha) + return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, cRWeight+cGWeight+cBWeight); + else + return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, 0); + } + else + return elucidian_distance(e1, e2, alpha); + } + + inline uint peak_color_error(const color_quad_u8& e1, const color_quad_u8& e2) + { + return math::maximum(labs(e1[0] - e2[0]), labs(e1[1] - e2[1]), labs(e1[2] - e2[2])); + //return math::square(e1[0] - e2[0]) + math::square(e1[1] - e2[1]) + math::square(e1[2] - e2[2]); + } + + // y - [0,255] + // co - [-127,127] + // cg - [-126,127] + inline void RGB_to_YCoCg(int r, int g, int b, int& y, int& co, int& cg) + { + y = (r >> 2) + (g >> 1) + (b >> 2); + co = (r >> 1) - (b >> 1); + cg = -(r >> 2) + (g >> 1) - (b >> 2); + } + + inline void YCoCg_to_RGB(int y, int co, int cg, int& r, int& g, int& b) + { + int tmp = y - cg; + g = y + cg; + r = tmp + co; + b = tmp - co; + } + + static inline uint8 clamp_component(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); } + + // RGB->YCbCr constants, scaled by 2^16 + const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; + // YCbCr->RGB constants, scaled by 2^16 + const int R_CR = 91881, B_CB = 116130, G_CR = -46802, G_CB = -22554; + + inline int RGB_to_Y(const color_quad_u8& rgb) + { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + return (r * YR + g * YG + b * YB + 32768) >> 16; + } + + // RGB to YCbCr (same as JFIF JPEG). + // Odd default biases account for 565 endpoint packing. + inline void RGB_to_YCC(color_quad_u8& ycc, const color_quad_u8& rgb, int cb_bias = 123, int cr_bias = 125) + { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + ycc.a = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + ycc.r = clamp_component(cb_bias + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + ycc.g = clamp_component(cr_bias + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + ycc.b = 0; + } + + // YCbCr to RGB. + // Odd biases account for 565 endpoint packing. + inline void YCC_to_RGB(color_quad_u8& rgb, const color_quad_u8& ycc, int cb_bias = 123, int cr_bias = 125) + { + const int y = ycc.a; + const int cb = ycc.r - cb_bias; + const int cr = ycc.g - cr_bias; + rgb.r = clamp_component(y + ((R_CR * cr + 32768) >> 16)); + rgb.g = clamp_component(y + ((G_CR * cr + G_CB * cb + 32768) >> 16)); + rgb.b = clamp_component(y + ((B_CB * cb + 32768) >> 16)); + rgb.a = 255; + } + + // Float RGB->YCbCr constants + const float S = 1.0f/65536.0f; + const float F_YR = S*YR, F_YG = S*YG, F_YB = S*YB, F_CB_R = S*CB_R, F_CB_G = S*CB_G, F_CB_B = S*CB_B, F_CR_R = S*CR_R, F_CR_G = S*CR_G, F_CR_B = S*CR_B; + // Float YCbCr->RGB constants + const float F_R_CR = S*R_CR, F_B_CB = S*B_CB, F_G_CR = S*G_CR, F_G_CB = S*G_CB; + + inline void RGB_to_YCC_float(color_quad_f& ycc, const color_quad_u8& rgb) + { + const int r = rgb[0], g = rgb[1], b = rgb[2]; + ycc.a = r * F_YR + g * F_YG + b * F_YB; + ycc.r = r * F_CB_R + g * F_CB_G + b * F_CB_B; + ycc.g = r * F_CR_R + g * F_CR_G + b * F_CR_B; + ycc.b = 0; + } + + inline void YCC_float_to_RGB(color_quad_u8& rgb, const color_quad_f& ycc) + { + float y = ycc.a, cb = ycc.r, cr = ycc.g; + rgb.r = color::clamp_component(static_cast(.5f + y + F_R_CR * cr)); + rgb.g = color::clamp_component(static_cast(.5f + y + F_G_CR * cr + F_G_CB * cb)); + rgb.b = color::clamp_component(static_cast(.5f + y + F_B_CB * cb)); + rgb.a = 255; + } + + } // namespace color + +} // namespace crnlib + diff --git a/crnlib/crn_command_line_params.cpp b/crnlib/crn_command_line_params.cpp new file mode 100644 index 00000000..06df53b9 --- /dev/null +++ b/crnlib/crn_command_line_params.cpp @@ -0,0 +1,439 @@ +// File: crn_command_line_params.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_command_line_params.h" +#include "crn_console.h" +#include "crn_cfile_stream.h" + +namespace crnlib +{ + command_line_params::command_line_params() + { + } + + void command_line_params::clear() + { + m_params.clear(); + + m_param_map.clear(); + } + + bool command_line_params::split_params(const wchar_t* p, dynamic_wstring_array& params) + { + bool within_param = false; + bool within_quote = false; + + uint ofs = 0; + dynamic_wstring str; + + while (p[ofs]) + { + const wchar_t c = p[ofs]; + + if (within_param) + { + if (within_quote) + { + if (c == L'"') + within_quote = false; + + str.append_char(c); + } + else if ((c == L' ') || (c == L'\t')) + { + if (!str.is_empty()) + { + params.push_back(str); + str.clear(); + } + within_param = false; + } + else + { + if (c == L'"') + within_quote = true; + + str.append_char(c); + } + } + else if ((c != L' ') && (c != L'\t')) + { + within_param = true; + + if (c == L'"') + within_quote = true; + + str.append_char(c); + } + + ofs++; + } + + if (within_quote) + { + console::error(L"Unmatched quote in command line \"%s\"", p); + return false; + } + + if (!str.is_empty()) + params.push_back(str); + + return true; + } + + bool command_line_params::load_string_file(const wchar_t* pFilename, dynamic_wstring_array& strings) + { + cfile_stream in_stream; + if (!in_stream.open(pFilename, cDataStreamReadable | cDataStreamSeekable)) + { + console::error(L"Unable to open file \"%s\" for reading!", pFilename); + return false; + } + + dynamic_string ansi_str; + + for ( ; ; ) + { + if (!in_stream.read_line(ansi_str)) + break; + + ansi_str.trim(); + if (ansi_str.is_empty()) + continue; + + strings.push_back(dynamic_wstring(ansi_str.get_ptr())); + } + + return true; + } + + bool command_line_params::parse(const dynamic_wstring_array& params, uint n, const param_desc* pParam_desc) + { + CRNLIB_ASSERT(n && pParam_desc); + + m_params = params; + + uint arg_index = 0; + while (arg_index < params.size()) + { + const uint cur_arg_index = arg_index; + const dynamic_wstring& src_param = params[arg_index++]; + + if (src_param.is_empty()) + continue; + + if ((src_param[0] == L'/') || (src_param[0] == L'-')) + { + if (src_param.get_len() < 2) + { + console::error(L"Invalid command line parameter: \"%s\"", src_param.get_ptr()); + return false; + } + + dynamic_wstring key_str(src_param); + + key_str.right(1); + + int modifier = 0; + wchar_t c = key_str[key_str.get_len() - 1]; + if (c == L'+') + modifier = 1; + else if (c == L'-') + modifier = -1; + + if (modifier) + key_str.left(key_str.get_len() - 1); + + uint param_index; + for (param_index = 0; param_index < n; param_index++) + if (key_str == pParam_desc[param_index].m_pName) + break; + + if (param_index == n) + { + console::error(L"Unrecognized command line parameter: \"%s\"", src_param.get_ptr()); + return false; + } + + const param_desc& desc = pParam_desc[param_index]; + + const uint cMaxValues = 16; + dynamic_wstring val_str[cMaxValues]; + uint num_val_strs = 0; + if (desc.m_num_values) + { + CRNLIB_ASSERT(desc.m_num_values <= cMaxValues); + + if ((arg_index + desc.m_num_values) > params.size()) + { + console::error(L"Expected %u value(s) after command line parameter: \"%s\"", desc.m_num_values, src_param.get_ptr()); + return false; + } + + for (uint v = 0; v < desc.m_num_values; v++) + val_str[num_val_strs++] = params[arg_index++]; + } + + dynamic_wstring_array strings; + + if ((desc.m_support_listing_file) && (val_str[0].get_len() >= 2) && (val_str[0][0] == L'@')) + { + dynamic_wstring filename(val_str[0]); + filename.right(1); + filename.unquote(); + + if (!load_string_file(filename.get_ptr(), strings)) + { + console::error(L"Failed loading listing file \"%s\"!", filename.get_ptr()); + return false; + } + } + else + { + for (uint v = 0; v < num_val_strs; v++) + { + val_str[v].unquote(); + strings.push_back(val_str[v]); + } + } + + param_value pv; + pv.m_values.swap(strings); + pv.m_index = cur_arg_index; + pv.m_modifier = (int8)modifier; + m_param_map.insert(std::make_pair(key_str, pv)); + } + else + { + param_value pv; + pv.m_values.push_back(src_param); + pv.m_values.back().unquote(); + pv.m_index = cur_arg_index; + m_param_map.insert(std::make_pair(g_empty_dynamic_wstring, pv)); + } + } + + return true; + } + + bool command_line_params::parse(const wchar_t* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param) + { + CRNLIB_ASSERT(n && pParam_desc); + + dynamic_wstring_array p; + if (!split_params(pCmd_line, p)) + return 0; + + if (p.empty()) + return 0; + + if (skip_first_param) + p.erase(0U); + + return parse(p, n, pParam_desc); + } + + bool command_line_params::is_param(uint index) const + { + CRNLIB_ASSERT(index < m_params.size()); + if (index >= m_params.size()) + return false; + + const dynamic_wstring& w = m_params[index]; + if (w.is_empty()) + return false; + + return (w.get_len() >= 2) && ((w[0] == L'-') || (w[0] == L'/')); + } + + uint command_line_params::find(uint num_keys, const wchar_t** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const + { + CRNLIB_ASSERT(ppKeys); + + if (pUnmatched_indices) + { + pUnmatched_indices->resize(m_params.size()); + for (uint i = 0; i < m_params.size(); i++) + (*pUnmatched_indices)[i] = i; + } + + uint n = 0; + for (uint i = 0; i < num_keys; i++) + { + const wchar_t* pKey = ppKeys[i]; + + param_map_const_iterator begin, end; + find(pKey, begin, end); + + while (begin != end) + { + if (pIterators) + pIterators->push_back(begin); + + if (pUnmatched_indices) + { + int k = pUnmatched_indices->find(begin->second.m_index); + if (k >= 0) + pUnmatched_indices->erase_unordered(k); + } + + n++; + begin++; + } + } + + return n; + } + + void command_line_params::find(const wchar_t* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const + { + dynamic_wstring key(pKey); + begin = m_param_map.lower_bound(key); + end = m_param_map.upper_bound(key); + } + + uint command_line_params::get_count(const wchar_t* pKey) const + { + param_map_const_iterator begin, end; + find(pKey, begin, end); + + uint n = 0; + + while (begin != end) + { + n++; + begin++; + } + + return n; + } + + command_line_params::param_map_const_iterator command_line_params::get_param(const wchar_t* pKey, uint index) const + { + param_map_const_iterator begin, end; + find(pKey, begin, end); + + if (begin == end) + return m_param_map.end(); + + uint n = 0; + + while ((begin != end) && (n != index)) + { + n++; + begin++; + } + + if (begin == end) + return m_param_map.end(); + + return begin; + } + + bool command_line_params::has_value(const wchar_t* pKey, uint index) const + { + return get_num_values(pKey, index) != 0; + } + + uint command_line_params::get_num_values(const wchar_t* pKey, uint index) const + { + param_map_const_iterator it = get_param(pKey, index); + + if (it == end()) + return 0; + + return it->second.m_values.size(); + } + + bool command_line_params::get_value_as_bool(const wchar_t* pKey, uint index, bool def) const + { + param_map_const_iterator it = get_param(pKey, index); + if (it == end()) + return def; + + if (it->second.m_modifier) + return it->second.m_modifier > 0; + else + return true; + } + + int command_line_params::get_value_as_int(const wchar_t* pKey, uint index, int def, int l, int h, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + return def; + + int val; + const wchar_t* p = it->second.m_values[value_index].get_ptr(); + if (!string_to_int(p, val)) + { + crnlib::console::warning(L"Invalid value specified for parameter \"%s\", using default value of %i", pKey, def); + return def; + } + + if (val < l) + { + crnlib::console::warning(L"Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, l); + val = l; + } + else if (val > h) + { + crnlib::console::warning(L"Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, h); + val = h; + } + + return val; + } + + float command_line_params::get_value_as_float(const wchar_t* pKey, uint index, float def, float l, float h, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + return def; + + float val; + const wchar_t* p = it->second.m_values[value_index].get_ptr(); + if (!string_to_float(p, val)) + { + crnlib::console::warning(L"Invalid value specified for float parameter \"%s\", using default value of %f", pKey, def); + return def; + } + + if (val < l) + { + crnlib::console::warning(L"Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, l); + val = l; + } + else if (val > h) + { + crnlib::console::warning(L"Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, h); + val = h; + } + + return val; + } + + bool command_line_params::get_value_as_string(const wchar_t* pKey, uint index, dynamic_wstring& value, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + { + value.empty(); + return false; + } + + value = it->second.m_values[value_index]; + return true; + } + + const dynamic_wstring& command_line_params::get_value_as_string_or_empty(const wchar_t* pKey, uint index, uint value_index) const + { + param_map_const_iterator it = get_param(pKey, index); + if ((it == end()) || (value_index >= it->second.m_values.size())) + return g_empty_dynamic_wstring; + + return it->second.m_values[value_index]; + } + +} // namespace crnlib + diff --git a/crnlib/crn_command_line_params.h b/crnlib/crn_command_line_params.h new file mode 100644 index 00000000..32f1592e --- /dev/null +++ b/crnlib/crn_command_line_params.h @@ -0,0 +1,82 @@ +// File: crn_command_line_params.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_value.h" +#include + +namespace crnlib +{ + class command_line_params + { + public: + struct param_value + { + param_value() : m_index(0), m_modifier(0) { } + + dynamic_wstring_array m_values; + uint m_index; + int8 m_modifier; + }; + + typedef std::multimap param_map; + typedef param_map::const_iterator param_map_const_iterator; + typedef param_map::iterator param_map_iterator; + + command_line_params(); + + void clear(); + + static bool split_params(const wchar_t* p, dynamic_wstring_array& params); + + struct param_desc + { + const wchar_t* m_pName; + uint m_num_values; + bool m_support_listing_file; + }; + + bool parse(const dynamic_wstring_array& params, uint n, const param_desc* pParam_desc); + bool parse(const wchar_t* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param = true); + + const dynamic_wstring_array& get_array() const { return m_params; } + + bool is_param(uint index) const; + + const param_map& get_map() const { return m_param_map; } + + uint get_num_params() const { return static_cast(m_param_map.size()); } + + param_map_const_iterator begin() const { return m_param_map.begin(); } + param_map_const_iterator end() const { return m_param_map.end(); } + + uint find(uint num_keys, const wchar_t** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const; + + void find(const wchar_t* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const; + + uint get_count(const wchar_t* pKey) const; + + // Returns end() if param cannot be found, or index is out of range. + param_map_const_iterator get_param(const wchar_t* pKey, uint index) const; + + bool has_key(const wchar_t* pKey) const { return get_param(pKey, 0) != end(); } + + bool has_value(const wchar_t* pKey, uint index) const; + uint get_num_values(const wchar_t* pKey, uint index) const; + + bool get_value_as_bool(const wchar_t* pKey, uint index = 0, bool def = false) const; + + int get_value_as_int(const wchar_t* pKey, uint index, int def, int l = INT_MIN, int h = INT_MAX, uint value_index = 0) const; + float get_value_as_float(const wchar_t* pKey, uint index, float def = 0.0f, float l = -math::cNearlyInfinite, float h = math::cNearlyInfinite, uint value_index = 0) const; + + bool get_value_as_string(const wchar_t* pKey, uint index, dynamic_wstring& value, uint value_index = 0) const; + const dynamic_wstring& get_value_as_string_or_empty(const wchar_t* pKey, uint index = 0, uint value_index = 0) const; + + private: + dynamic_wstring_array m_params; + + param_map m_param_map; + + static bool load_string_file(const wchar_t* pFilename, dynamic_wstring_array& strings); + }; + +} // namespace crnlib diff --git a/crnlib/crn_comp.cpp b/crnlib/crn_comp.cpp new file mode 100644 index 00000000..0d78ef22 --- /dev/null +++ b/crnlib/crn_comp.cpp @@ -0,0 +1,2173 @@ +// File: crn_comp.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_console.h" +#include "crn_comp.h" +#include "crn_zeng.h" +#include "crn_checksum.h" + +#define CRNLIB_CREATE_DEBUG_IMAGES 0 +#define CRNLIB_ENABLE_DEBUG_MESSAGES 0 + +namespace crnlib +{ + static const uint cEncodingMapNumChunksPerCode = 3; + + crn_comp::crn_comp() : + m_pParams(NULL) + { + } + + crn_comp::~crn_comp() + { + } + + float crn_comp::color_endpoint_similarity_func(uint index_a, uint index_b, void* pContext) + { + dxt_hc& hvq = *static_cast(pContext); + + uint endpoint_a = hvq.get_color_endpoint(index_a); + uint endpoint_b = hvq.get_color_endpoint(index_b); + + color_quad_u8 a[2]; + a[0] = dxt1_block::unpack_color((uint16)(endpoint_a & 0xFFFF), true); + a[1] = dxt1_block::unpack_color((uint16)((endpoint_a >> 16) & 0xFFFF), true); + + color_quad_u8 b[2]; + b[0] = dxt1_block::unpack_color((uint16)(endpoint_b & 0xFFFF), true); + b[1] = dxt1_block::unpack_color((uint16)((endpoint_b >> 16) & 0xFFFF), true); + + uint total_error = color::elucidian_distance(a[0], b[0], false) + color::elucidian_distance(a[1], b[1], false); + + float weight = 1.0f - math::clamp(total_error * 1.0f/8000.0f, 0.0f, 1.0f); + return weight; + } + + float crn_comp::alpha_endpoint_similarity_func(uint index_a, uint index_b, void* pContext) + { + dxt_hc& hvq = *static_cast(pContext); + + uint endpoint_a = hvq.get_alpha_endpoint(index_a); + int endpoint_a_lo = dxt5_block::unpack_endpoint(endpoint_a, 0); + int endpoint_a_hi = dxt5_block::unpack_endpoint(endpoint_a, 1); + + uint endpoint_b = hvq.get_alpha_endpoint(index_b); + int endpoint_b_lo = dxt5_block::unpack_endpoint(endpoint_b, 0); + int endpoint_b_hi = dxt5_block::unpack_endpoint(endpoint_b, 1); + + int total_error = math::square(endpoint_a_lo - endpoint_b_lo) + math::square(endpoint_a_hi - endpoint_b_hi); + + float weight = 1.0f - math::clamp(total_error * 1.0f/256.0f, 0.0f, 1.0f); + return weight; + } + + void crn_comp::sort_color_endpoint_codebook(crnlib::vector& remapping, const crnlib::vector& endpoints) + { + remapping.resize(endpoints.size()); + + uint lowest_energy = UINT_MAX; + uint lowest_energy_index = 0; + + for (uint i = 0; i < endpoints.size(); i++) + { + color_quad_u8 a(dxt1_block::unpack_color(static_cast(endpoints[i] & 0xFFFF), true)); + color_quad_u8 b(dxt1_block::unpack_color(static_cast((endpoints[i] >> 16) & 0xFFFF), true)); + + uint total = a.r + a.g + a.b + b.r + b.g + b.b; + + if (total < lowest_energy) + { + lowest_energy = total; + lowest_energy_index = i; + } + } + + uint cur_index = lowest_energy_index; + + crnlib::vector chosen_flags(endpoints.size()); + + uint n = 0; + for ( ; ; ) + { + chosen_flags[cur_index] = true; + + remapping[cur_index] = n; + n++; + if (n == endpoints.size()) + break; + + uint lowest_error = UINT_MAX; + uint lowest_error_index = 0; + + color_quad_u8 a(dxt1_block::unpack_endpoint(endpoints[cur_index], 0, true)); + color_quad_u8 b(dxt1_block::unpack_endpoint(endpoints[cur_index], 1, true)); + + for (uint i = 0; i < endpoints.size(); i++) + { + if (chosen_flags[i]) + continue; + + color_quad_u8 c(dxt1_block::unpack_endpoint(endpoints[i], 0, true)); + color_quad_u8 d(dxt1_block::unpack_endpoint(endpoints[i], 1, true)); + + uint total = color::elucidian_distance(a, c, false) + color::elucidian_distance(b, d, false); + + if (total < lowest_error) + { + lowest_error = total; + lowest_error_index = i; + } + } + + cur_index = lowest_error_index; + } + } + + void crn_comp::sort_alpha_endpoint_codebook(crnlib::vector& remapping, const crnlib::vector& endpoints) + { + remapping.resize(endpoints.size()); + + uint lowest_energy = UINT_MAX; + uint lowest_energy_index = 0; + + for (uint i = 0; i < endpoints.size(); i++) + { + uint a = dxt5_block::unpack_endpoint(endpoints[i], 0); + uint b = dxt5_block::unpack_endpoint(endpoints[i], 1); + + uint total = a + b; + + if (total < lowest_energy) + { + lowest_energy = total; + lowest_energy_index = i; + } + } + + uint cur_index = lowest_energy_index; + + crnlib::vector chosen_flags(endpoints.size()); + + uint n = 0; + for ( ; ; ) + { + chosen_flags[cur_index] = true; + + remapping[cur_index] = n; + n++; + if (n == endpoints.size()) + break; + + uint lowest_error = UINT_MAX; + uint lowest_error_index = 0; + + const int a = dxt5_block::unpack_endpoint(endpoints[cur_index], 0); + const int b = dxt5_block::unpack_endpoint(endpoints[cur_index], 1); + + for (uint i = 0; i < endpoints.size(); i++) + { + if (chosen_flags[i]) + continue; + + const int c = dxt5_block::unpack_endpoint(endpoints[i], 0); + const int d = dxt5_block::unpack_endpoint(endpoints[i], 1); + + uint total = math::square(a - c) + math::square(b - d); + + if (total < lowest_error) + { + lowest_error = total; + lowest_error_index = i; + } + } + + cur_index = lowest_error_index; + } + } + + // The indices are only used for statistical purposes. + bool crn_comp::pack_color_endpoints( + crnlib::vector& data, + const crnlib::vector& remapping, + const crnlib::vector& endpoint_indices, + uint trial_index) + { + trial_index; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"pack_color_endpoints: %u", trial_index); +#endif + + crnlib::vector remapped_endpoints(m_hvq.get_color_endpoint_codebook_size()); + + for (uint i = 0; i < m_hvq.get_color_endpoint_codebook_size(); i++) + remapped_endpoints[remapping[i]] = m_hvq.get_color_endpoint(i); + + const uint component_limits[6] = { 31, 63, 31, 31, 63, 31 }; + + symbol_histogram hist[2]; + hist[0].resize(32); + hist[1].resize(64); + +#if CRNLIB_CREATE_DEBUG_IMAGES + image_u8 endpoint_image(2, m_hvq.get_color_endpoint_codebook_size()); + image_u8 endpoint_residual_image(2, m_hvq.get_color_endpoint_codebook_size()); +#endif + + crnlib::vector residual_syms; + residual_syms.reserve(m_hvq.get_color_endpoint_codebook_size()*2*3); + + color_quad_u8 prev[2]; + prev[0].clear(); + prev[1].clear(); + + int total_residuals = 0; + + for (uint endpoint_index = 0; endpoint_index < m_hvq.get_color_endpoint_codebook_size(); endpoint_index++) + { + const uint endpoint = remapped_endpoints[endpoint_index]; + + color_quad_u8 cur[2]; + cur[0] = dxt1_block::unpack_color((uint16)(endpoint & 0xFFFF), false); + cur[1] = dxt1_block::unpack_color((uint16)((endpoint >> 16) & 0xFFFF), false); + +#if CRNLIB_CREATE_DEBUG_IMAGES + endpoint_image(0, endpoint_index) = dxt1_block::unpack_color((uint16)(endpoint & 0xFFFF), true); + endpoint_image(1, endpoint_index) = dxt1_block::unpack_color((uint16)((endpoint >> 16) & 0xFFFF), true); +#endif + + for (uint j = 0; j < 2; j++) + { + for (uint k = 0; k < 3; k++) + { + int delta = cur[j][k] - prev[j][k]; + total_residuals += delta*delta; + + int sym = delta & component_limits[j*3+k]; + int table = (k == 1) ? 1 : 0; + + hist[table].inc_freq(sym); + + residual_syms.push_back(sym); + +#if CRNLIB_CREATE_DEBUG_IMAGES + endpoint_residual_image(j, endpoint_index)[k] = static_cast(sym); +#endif + } + } + + prev[0] = cur[0]; + prev[1] = cur[1]; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total endpoint residuals: %i", total_residuals); +#endif + + if (endpoint_indices.size() > 1) + { + uint prev_index = remapping[endpoint_indices[0]]; + int64 total_delta = 0; + for (uint i = 1; i < endpoint_indices.size(); i++) + { + uint cur_index = remapping[endpoint_indices[i]]; + int delta = cur_index - prev_index; + prev_index = cur_index; + total_delta += delta * delta; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total endpoint index delta: %I64i", total_delta); +#endif + } + +#if CRNLIB_CREATE_DEBUG_IMAGES + image_utils::save_to_file(dynamic_wstring(cVarArg, L"color_endpoint_residuals_%u.tga", trial_index).get_ptr(), endpoint_residual_image); + image_utils::save_to_file(dynamic_wstring(cVarArg, L"color_endpoints_%u.tga", trial_index).get_ptr(), endpoint_image); +#endif + + static_huffman_data_model residual_dm[2]; + + symbol_codec codec; + codec.start_encoding(1024*1024); + + // Transmit residuals + for (uint i = 0; i < 2; i++) + { + if (!residual_dm[i].init(true, hist[i], 15)) + return false; + + if (!codec.encode_transmit_static_huffman_data_model(residual_dm[i], false)) + return false; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Wrote %u bits for color endpoint residual Huffman tables", codec.encode_get_total_bits_written()); +#endif + + uint start_bits = codec.encode_get_total_bits_written(); + start_bits; + + for (uint i = 0; i < residual_syms.size(); i++) + { + const uint sym = residual_syms[i]; + const uint table = ((i % 3) == 1) ? 1 : 0; + codec.encode(sym, residual_dm[table]); + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Wrote %u bits for color endpoint residuals", codec.encode_get_total_bits_written() - start_bits); +#endif + + codec.stop_encoding(false); + + data.swap(codec.get_encoding_buf()); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + console::debug(L"Wrote a total of %u bits for color endpoint codebook", codec.encode_get_total_bits_written()); + + console::debug(L"Wrote %f bits per each color endpoint", data.size() * 8.0f / m_hvq.get_color_endpoint_codebook_size()); + } +#endif + + return true; + } + + // The indices are only used for statistical purposes. + bool crn_comp::pack_alpha_endpoints( + crnlib::vector& data, + const crnlib::vector& remapping, + const crnlib::vector& endpoint_indices, + uint trial_index) + { + trial_index; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"pack_alpha_endpoints: %u", trial_index); +#endif + + crnlib::vector remapped_endpoints(m_hvq.get_alpha_endpoint_codebook_size()); + + for (uint i = 0; i < m_hvq.get_alpha_endpoint_codebook_size(); i++) + remapped_endpoints[remapping[i]] = m_hvq.get_alpha_endpoint(i); + + symbol_histogram hist; + hist.resize(256); + +#if CRNLIB_CREATE_DEBUG_IMAGES + image_u8 endpoint_image(2, m_hvq.get_alpha_endpoint_codebook_size()); + image_u8 endpoint_residual_image(2, m_hvq.get_alpha_endpoint_codebook_size()); +#endif + + crnlib::vector residual_syms; + residual_syms.reserve(m_hvq.get_alpha_endpoint_codebook_size()*2*3); + + uint prev[2]; + utils::zero_object(prev); + + int total_residuals = 0; + + for (uint endpoint_index = 0; endpoint_index < m_hvq.get_alpha_endpoint_codebook_size(); endpoint_index++) + { + const uint endpoint = remapped_endpoints[endpoint_index]; + + uint cur[2]; + cur[0] = dxt5_block::unpack_endpoint(endpoint, 0); + cur[1] = dxt5_block::unpack_endpoint(endpoint, 1); + +#if CRNLIB_CREATE_DEBUG_IMAGES + endpoint_image(0, endpoint_index) = cur[0]; + endpoint_image(1, endpoint_index) = cur[1]; +#endif + + for (uint j = 0; j < 2; j++) + { + int delta = cur[j] - prev[j]; + total_residuals += delta*delta; + + int sym = delta & 255; + + hist.inc_freq(sym); + + residual_syms.push_back(sym); + +#if CRNLIB_CREATE_DEBUG_IMAGES + endpoint_residual_image(j, endpoint_index) = static_cast(sym); +#endif + } + + prev[0] = cur[0]; + prev[1] = cur[1]; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total endpoint residuals: %i", total_residuals); +#endif + + if (endpoint_indices.size() > 1) + { + uint prev_index = remapping[endpoint_indices[0]]; + int64 total_delta = 0; + for (uint i = 1; i < endpoint_indices.size(); i++) + { + uint cur_index = remapping[endpoint_indices[i]]; + int delta = cur_index - prev_index; + prev_index = cur_index; + total_delta += delta * delta; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total endpoint index delta: %I64i", total_delta); +#endif + } + +#if CRNLIB_CREATE_DEBUG_IMAGES + image_utils::save_to_file(dynamic_wstring(cVarArg, L"alpha_endpoint_residuals_%u.tga", trial_index).get_ptr(), endpoint_residual_image); + image_utils::save_to_file(dynamic_wstring(cVarArg, L"alpha_endpoints_%u.tga", trial_index).get_ptr(), endpoint_image); +#endif + + static_huffman_data_model residual_dm; + + symbol_codec codec; + codec.start_encoding(1024*1024); + + // Transmit residuals + if (!residual_dm.init(true, hist, 15)) + return false; + + if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Wrote %u bits for alpha endpoint residual Huffman tables", codec.encode_get_total_bits_written()); +#endif + + uint start_bits = codec.encode_get_total_bits_written(); + start_bits; + + for (uint i = 0; i < residual_syms.size(); i++) + { + const uint sym = residual_syms[i]; + codec.encode(sym, residual_dm); + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Wrote %u bits for alpha endpoint residuals", codec.encode_get_total_bits_written() - start_bits); +#endif + + codec.stop_encoding(false); + + data.swap(codec.get_encoding_buf()); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + console::debug(L"Wrote a total of %u bits for alpha endpoint codebook", codec.encode_get_total_bits_written()); + + console::debug(L"Wrote %f bits per each alpha endpoint", data.size() * 8.0f / m_hvq.get_alpha_endpoint_codebook_size()); + } +#endif + + return true; + } + + float crn_comp::color_selector_similarity_func(uint index_a, uint index_b, void* pContext) + { + const crnlib::vector& selectors = *static_cast< const crnlib::vector* >(pContext); + + const dxt_hc::selectors& selectors_a = selectors[index_a]; + const dxt_hc::selectors& selectors_b = selectors[index_b]; + + int total = 0; + for (uint i = 0; i < 16; i++) + { + int a = g_dxt1_to_linear[selectors_a.get_by_index(i)]; + int b = g_dxt1_to_linear[selectors_b.get_by_index(i)]; + + int delta = a - b; + total += delta*delta; + } + + float weight = 1.0f - math::clamp(total * 1.0f/20.0f, 0.0f, 1.0f); + return weight; + } + + float crn_comp::alpha_selector_similarity_func(uint index_a, uint index_b, void* pContext) + { + const crnlib::vector& selectors = *static_cast< const crnlib::vector* >(pContext); + + const dxt_hc::selectors& selectors_a = selectors[index_a]; + const dxt_hc::selectors& selectors_b = selectors[index_b]; + + int total = 0; + for (uint i = 0; i < 16; i++) + { + int a = g_dxt5_to_linear[selectors_a.get_by_index(i)]; + int b = g_dxt5_to_linear[selectors_b.get_by_index(i)]; + + int delta = a - b; + total += delta*delta; + } + + float weight = 1.0f - math::clamp(total * 1.0f/100.0f, 0.0f, 1.0f); + return weight; + } + + void crn_comp::sort_selector_codebook(crnlib::vector& remapping, const crnlib::vector& selectors, const uint8* pTo_linear) + { + remapping.resize(selectors.size()); + + uint lowest_energy = UINT_MAX; + uint lowest_energy_index = 0; + + for (uint i = 0; i < selectors.size(); i++) + { + uint total = 0; + for (uint j = 0; j < 16; j++) + { + int a = pTo_linear[selectors[i].get_by_index(j)]; + + total += a*a; + } + + if (total < lowest_energy) + { + lowest_energy = total; + lowest_energy_index = i; + } + } + + uint cur_index = lowest_energy_index; + + crnlib::vector chosen_flags(selectors.size()); + + uint n = 0; + for ( ; ; ) + { + chosen_flags[cur_index] = true; + + remapping[cur_index] = n; + n++; + if (n == selectors.size()) + break; + + uint lowest_error = UINT_MAX; + uint lowest_error_index = 0; + + for (uint i = 0; i < selectors.size(); i++) + { + if (chosen_flags[i]) + continue; + + uint total = 0; + for (uint j = 0; j < 16; j++) + { + int a = pTo_linear[selectors[cur_index].get_by_index(j)]; + int b = pTo_linear[selectors[i].get_by_index(j)]; + + int delta = a - b; + total += delta*delta; + } + + if (total < lowest_error) + { + lowest_error = total; + lowest_error_index = i; + } + } + + cur_index = lowest_error_index; + } + } + + // The indices are only used for statistical purposes. + bool crn_comp::pack_selectors( + crnlib::vector& packed_data, + const crnlib::vector& selector_indices, + const crnlib::vector& selectors, + const crnlib::vector& remapping, + uint max_selector_value, + const uint8* pTo_linear, + uint trial_index) + { + trial_index; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"pack_selectors: %u", trial_index); +#endif + + crnlib::vector remapped_selectors(selectors.size()); + + for (uint i = 0; i < selectors.size(); i++) + remapped_selectors[remapping[i]] = selectors[i]; + +#if CRNLIB_CREATE_DEBUG_IMAGES + image_u8 residual_image(16, selectors.size());; + image_u8 selector_image(16, selectors.size());; +#endif + + crnlib::vector residual_syms; + residual_syms.reserve(selectors.size() * 8); + + const uint num_baised_selector_values = (max_selector_value * 2 + 1); + symbol_histogram hist(num_baised_selector_values * num_baised_selector_values); + + dxt_hc::selectors prev_selectors; + utils::zero_object(prev_selectors); + int total_residuals = 0; + for (uint selector_index = 0; selector_index < selectors.size(); selector_index++) + { + const dxt_hc::selectors& s = remapped_selectors[selector_index]; + + uint prev_sym = 0; + for (uint i = 0; i < 16; i++) + { + int p = pTo_linear[crnlib_assert_range_incl(prev_selectors.get_by_index(i), max_selector_value)]; + + int r = pTo_linear[crnlib_assert_range_incl(s.get_by_index(i), max_selector_value)] - p; + + total_residuals += r*r; + + uint sym = r + max_selector_value; + + CRNLIB_ASSERT(sym < num_baised_selector_values); + if (i & 1) + { + uint paired_sym = (sym * num_baised_selector_values) + prev_sym; + residual_syms.push_back(paired_sym); + hist.inc_freq(paired_sym); + } + else + prev_sym = sym; + +#if CRNLIB_CREATE_DEBUG_IMAGES + selector_image(i, selector_index) = (pTo_linear[crnlib_assert_range_incl(s.get_by_index(i), max_selector_value)] * 255) / max_selector_value; + residual_image(i, selector_index) = sym; +#endif + } + + prev_selectors = s; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total selector endpoint residuals: %u", total_residuals); +#endif + + if (selector_indices.size() > 1) + { + uint prev_index = remapping[selector_indices[1]]; + int64 total_delta = 0; + for (uint i = 1; i < selector_indices.size(); i++) + { + uint cur_index = remapping[selector_indices[i]]; + int delta = cur_index - prev_index; + prev_index = cur_index; + total_delta += delta * delta; + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total selector index delta: %I64i", total_delta); +#endif + } + +#if CRNLIB_CREATE_DEBUG_IMAGES + image_utils::save_to_file(dynamic_wstring(cVarArg, L"selectors_%u_%u.tga", trial_index, max_selector_value).get_ptr(), selector_image); + image_utils::save_to_file(dynamic_wstring(cVarArg, L"selector_residuals_%u_%u.tga", trial_index, max_selector_value).get_ptr(), residual_image); +#endif + + static_huffman_data_model residual_dm; + + symbol_codec codec; + codec.start_encoding(1024*1024); + + // Transmit residuals + if (!residual_dm.init(true, hist, 15)) + return false; + + if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Wrote %u bits for selector residual Huffman tables", codec.encode_get_total_bits_written()); +#endif + + uint start_bits = codec.encode_get_total_bits_written(); + start_bits; + + for (uint i = 0; i < residual_syms.size(); i++) + { + const uint sym = residual_syms[i]; + codec.encode(sym, residual_dm); + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Wrote %u bits for selector residuals", codec.encode_get_total_bits_written() - start_bits); +#endif + + codec.stop_encoding(false); + + packed_data.swap(codec.get_encoding_buf()); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + console::debug(L"Wrote a total of %u bits for selector codebook", codec.encode_get_total_bits_written()); + + console::debug(L"Wrote %f bits per each selector codebook entry", packed_data.size() * 8.0f / selectors.size()); + } +#endif + + return true; + } + + bool crn_comp::pack_chunks( + uint first_chunk, uint num_chunks, + bool clear_histograms, + symbol_codec* pCodec, + const crnlib::vector* pColor_endpoint_remap, + const crnlib::vector* pColor_selector_remap, + const crnlib::vector* pAlpha_endpoint_remap, + const crnlib::vector* pAlpha_selector_remap) + { + if (!pCodec) + { + m_chunk_encoding_hist.resize(1 << (3 * cEncodingMapNumChunksPerCode)); + if (clear_histograms) + m_chunk_encoding_hist.set_all(0); + + if (pColor_endpoint_remap) + { + CRNLIB_ASSERT(pColor_endpoint_remap->size() == m_hvq.get_color_endpoint_codebook_size()); + m_endpoint_index_hist[0].resize(pColor_endpoint_remap->size()); + if (clear_histograms) + m_endpoint_index_hist[0].set_all(0); + } + + if (pColor_selector_remap) + { + CRNLIB_ASSERT(pColor_selector_remap->size() == m_hvq.get_color_selector_codebook_size()); + m_selector_index_hist[0].resize(pColor_selector_remap->size()); + if (clear_histograms) + m_selector_index_hist[0].set_all(0); + } + + if (pAlpha_endpoint_remap) + { + CRNLIB_ASSERT(pAlpha_endpoint_remap->size() == m_hvq.get_alpha_endpoint_codebook_size()); + m_endpoint_index_hist[1].resize(pAlpha_endpoint_remap->size()); + if (clear_histograms) + m_endpoint_index_hist[1].set_all(0); + } + + if (pAlpha_selector_remap) + { + CRNLIB_ASSERT(pAlpha_selector_remap->size() == m_hvq.get_alpha_selector_codebook_size()); + m_selector_index_hist[1].resize(pAlpha_selector_remap->size()); + if (clear_histograms) + m_selector_index_hist[1].set_all(0); + } + } + + uint prev_endpoint_index[cNumComps]; + utils::zero_object(prev_endpoint_index); + + uint prev_selector_index[cNumComps]; + utils::zero_object(prev_selector_index); + + uint num_encodings_left = 0; + + for (uint chunk_index = first_chunk; chunk_index < (first_chunk + num_chunks); chunk_index++) + { + if (!num_encodings_left) + { + uint index = 0; + for (uint i = 0; i < cEncodingMapNumChunksPerCode; i++) + if ((chunk_index + i) < (first_chunk + num_chunks)) + index |= (m_hvq.get_chunk_encoding(chunk_index + i).m_encoding_index << (i * 3)); + + if (pCodec) + pCodec->encode(index, m_chunk_encoding_dm); + else + m_chunk_encoding_hist.inc_freq(index); + + num_encodings_left = cEncodingMapNumChunksPerCode; + } + num_encodings_left--; + + const dxt_hc::chunk_encoding& encoding = m_hvq.get_chunk_encoding(chunk_index); + const chunk_detail& details = m_chunk_details[chunk_index]; + + const uint comp_order[3] = { cAlpha0, cAlpha1, cColor }; + for (uint c = 0; c < 3; c++) + { + const uint comp_index = comp_order[c]; + if (!m_has_comp[comp_index]) + continue; + + // endpoints + if (comp_index == cColor) + { + if (pColor_endpoint_remap) + { + for (uint i = 0; i < encoding.m_num_tiles; i++) + { + uint cur_endpoint_index = (*pColor_endpoint_remap)[ m_endpoint_indices[cColor][details.m_first_endpoint_index + i] ]; + int endpoint_delta = cur_endpoint_index - prev_endpoint_index[cColor]; + + int sym = endpoint_delta; + if (sym < 0) + sym += pColor_endpoint_remap->size(); + + CRNLIB_ASSERT(sym >= 0 && sym < (int)pColor_endpoint_remap->size()); + + if (!pCodec) + m_endpoint_index_hist[cColor].inc_freq(sym); + else + pCodec->encode(sym, m_endpoint_index_dm[0]); + + prev_endpoint_index[cColor] = cur_endpoint_index; + } + } + } + else + { + if (pAlpha_endpoint_remap) + { + for (uint i = 0; i < encoding.m_num_tiles; i++) + { + uint cur_endpoint_index = (*pAlpha_endpoint_remap)[m_endpoint_indices[comp_index][details.m_first_endpoint_index + i]]; + int endpoint_delta = cur_endpoint_index - prev_endpoint_index[comp_index]; + + int sym = endpoint_delta; + if (sym < 0) + sym += pAlpha_endpoint_remap->size(); + + CRNLIB_ASSERT(sym >= 0 && sym < (int)pAlpha_endpoint_remap->size()); + + if (!pCodec) + m_endpoint_index_hist[1].inc_freq(sym); + else + pCodec->encode(sym, m_endpoint_index_dm[1]); + + prev_endpoint_index[comp_index] = cur_endpoint_index; + } + } + } + } // c + + // selectors + for (uint y = 0; y < 2; y++) + { + for (uint x = 0; x < 2; x++) + { + for (uint c = 0; c < 3; c++) + { + const uint comp_index = comp_order[c]; + if (!m_has_comp[comp_index]) + continue; + + if (comp_index == cColor) + { + if (pColor_selector_remap) + { + uint cur_selector_index = (*pColor_selector_remap)[ m_selector_indices[cColor][details.m_first_selector_index + x + y * 2] ]; + int selector_delta = cur_selector_index - prev_selector_index[cColor]; + + int sym = selector_delta; + if (sym < 0) + sym += pColor_selector_remap->size(); + + CRNLIB_ASSERT(sym >= 0 && sym < (int)pColor_selector_remap->size()); + + if (!pCodec) + m_selector_index_hist[cColor].inc_freq(sym); + else + pCodec->encode(sym, m_selector_index_dm[cColor]); + + prev_selector_index[cColor] = cur_selector_index; + } + } + else if (pAlpha_selector_remap) + { + uint cur_selector_index = (*pAlpha_selector_remap)[ m_selector_indices[comp_index][details.m_first_selector_index + x + y * 2] ]; + int selector_delta = cur_selector_index - prev_selector_index[comp_index]; + + int sym = selector_delta; + if (sym < 0) + sym += pAlpha_selector_remap->size(); + + CRNLIB_ASSERT(sym >= 0 && sym < (int)pAlpha_selector_remap->size()); + + if (!pCodec) + m_selector_index_hist[1].inc_freq(sym); + else + pCodec->encode(sym, m_selector_index_dm[1]); + + prev_selector_index[comp_index] = cur_selector_index; + } + + } // c + + } // x + } // y + + } // chunk_index + + return true; + } + + bool crn_comp::pack_chunks_simulation( + uint first_chunk, uint num_chunks, + uint& total_bits, + const crnlib::vector* pColor_endpoint_remap, + const crnlib::vector* pColor_selector_remap, + const crnlib::vector* pAlpha_endpoint_remap, + const crnlib::vector* pAlpha_selector_remap) + { + if (!pack_chunks(first_chunk, num_chunks, true, NULL, pColor_endpoint_remap, pColor_selector_remap, pAlpha_endpoint_remap, pAlpha_selector_remap)) + return false; + + symbol_codec codec; + codec.start_encoding(2*1024*1024); + codec.encode_enable_simulation(true); + + m_chunk_encoding_dm.init(true, m_chunk_encoding_hist, 16); + + for (uint i = 0; i < 2; i++) + { + if (m_endpoint_index_hist[i].size()) + { + m_endpoint_index_dm[i].init(true, m_endpoint_index_hist[i], 16); + + codec.encode_transmit_static_huffman_data_model(m_endpoint_index_dm[i], false); + } + + if (m_selector_index_hist[i].size()) + { + m_selector_index_dm[i].init(true, m_selector_index_hist[i], 16); + + codec.encode_transmit_static_huffman_data_model(m_selector_index_dm[i], false); + } + } + + if (!pack_chunks(first_chunk, num_chunks, false, &codec, pColor_endpoint_remap, pColor_selector_remap, pAlpha_endpoint_remap, pAlpha_selector_remap)) + return false; + + codec.stop_encoding(false); + + total_bits = codec.encode_get_total_bits_written(); + + return true; + } + + void crn_comp::append_vec(crnlib::vector& a, const void* p, uint size) + { + if (size) + { + uint ofs = a.size(); + a.resize(ofs + size); + + memcpy(&a[ofs], p, size); + } + } + + void crn_comp::append_vec(crnlib::vector& a, const crnlib::vector& b) + { + if (!b.empty()) + { + uint ofs = a.size(); + a.resize(ofs + b.size()); + + memcpy(&a[ofs], &b[0], b.size()); + } + } + +#if 0 + bool crn_comp::init_chunk_encoding_dm() + { + symbol_histogram hist(1 << (3 * cEncodingMapNumChunksPerCode)); + + for (uint chunk_index = 0; chunk_index < m_hvq.get_num_chunks(); chunk_index += cEncodingMapNumChunksPerCode) + { + uint index = 0; + for (uint i = 0; i < cEncodingMapNumChunksPerCode; i++) + { + if ((chunk_index + i) >= m_hvq.get_num_chunks()) + break; + const dxt_hc::chunk_encoding& encoding = m_hvq.get_chunk_encoding(chunk_index + i); + + index |= (encoding.m_encoding_index << (i * 3)); + } + + hist.inc_freq(index); + } + + if (!m_chunk_encoding_dm.init(true, hist, 16)) + return false; + + return true; + } +#endif + + bool crn_comp::alias_images() + { + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + const uint width = math::maximum(1U, m_pParams->m_width >> level_index); + const uint height = math::maximum(1U, m_pParams->m_height >> level_index); + + if (!m_pParams->m_pImages[face_index][level_index]) + return false; + + m_images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); + } + } + + image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); + if (conv_type != image_utils::cConversion_Invalid) + { + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + image_u8 cooked_image(m_images[face_index][level_index]); + + image_utils::convert_image(cooked_image, conv_type); + + m_images[face_index][level_index].swap(cooked_image); + } + } + } + + m_mip_groups.clear(); + m_mip_groups.resize(m_pParams->m_levels); + + utils::zero_object(m_levels); + + uint mip_group = 0; + uint chunk_index = 0; + uint mip_group_chunk_index = 0; + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + const uint width = math::maximum(1U, m_pParams->m_width >> level_index); + const uint height = math::maximum(1U, m_pParams->m_height >> level_index); + const uint chunk_width = math::align_up_value(width, cChunkPixelWidth) / cChunkPixelWidth; + const uint chunk_height = math::align_up_value(height, cChunkPixelHeight) / cChunkPixelHeight; + const uint num_chunks = m_pParams->m_faces * chunk_width * chunk_height; + + m_mip_groups[mip_group].m_first_chunk = chunk_index; + mip_group_chunk_index = 0; + + m_mip_groups[mip_group].m_num_chunks += num_chunks; + + m_levels[level_index].m_width = width; + m_levels[level_index].m_height = height; + m_levels[level_index].m_chunk_width = chunk_width; + m_levels[level_index].m_chunk_height = chunk_height; + m_levels[level_index].m_first_chunk = chunk_index; + m_levels[level_index].m_num_chunks = num_chunks; + m_levels[level_index].m_group_index = mip_group; + m_levels[level_index].m_group_first_chunk = 0; + + chunk_index += num_chunks; + + mip_group++; + } + + m_total_chunks = chunk_index; + + return true; + } + + void crn_comp::append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight) + { + for (uint y = 0; y < num_chunks_y; y++) + { + int x_start = 0; + int x_end = num_chunks_x; + int x_dir = 1; + if (y & 1) + { + x_start = num_chunks_x - 1; + x_end = -1; + x_dir = -1; + } + + for (int x = x_start; x != x_end; x += x_dir) + { + chunks.resize(chunks.size() + 1); + + dxt_hc::pixel_chunk& chunk = chunks.back(); + chunk.m_weight = weight; + + for (uint cy = 0; cy < cChunkPixelHeight; cy++) + { + uint py = y * cChunkPixelHeight + cy; + py = math::minimum(py, img.get_height() - 1); + + for (uint cx = 0; cx < cChunkPixelWidth; cx++) + { + uint px = x * cChunkPixelWidth + cx; + px = math::minimum(px, img.get_width() - 1); + + chunk(cx, cy) = img(px, py); + } + } + } + } + } + + void crn_comp::create_chunks() + { + m_chunks.reserve(m_total_chunks); + m_chunks.resize(0); + + for (uint level = 0; level < m_pParams->m_levels; level++) + { + for (uint face = 0; face < m_pParams->m_faces; face++) + { + if (!face) + { + CRNLIB_ASSERT(m_levels[level].m_first_chunk == m_chunks.size()); + } + + float mip_weight = math::minimum(12.0f, powf( 1.3f, static_cast(level) ) ); + //float mip_weight = 1.0f; + + append_chunks(m_images[face][level], m_levels[level].m_chunk_width, m_levels[level].m_chunk_height, m_chunks, mip_weight); + } + } + + CRNLIB_ASSERT(m_chunks.size() == m_total_chunks); + } + + void crn_comp::clear() + { + m_pParams = NULL; + + for (uint f = 0; f < cCRNMaxFaces; f++) + for (uint l = 0; l < cCRNMaxLevels; l++) + m_images[f][l].clear(); + + utils::zero_object(m_levels); + + m_mip_groups.clear(); + + utils::zero_object(m_has_comp); + + m_chunk_details.clear(); + + for (uint i = 0; i < cNumComps; i++) + { + m_endpoint_indices[i].clear(); + m_selector_indices[i].clear(); + } + + m_total_chunks = 0; + + m_chunks.clear(); + + utils::zero_object(m_crn_header); + + m_comp_data.clear(); + + m_hvq.clear(); + + m_chunk_encoding_hist.clear(); + m_chunk_encoding_dm.clear(); + for (uint i = 0; i < 2; i++) + { + m_endpoint_index_hist[i].clear(); + m_endpoint_index_dm[i].clear(); + m_selector_index_hist[i].clear(); + m_selector_index_dm[i].clear(); + } + + for (uint i = 0; i < cCRNMaxLevels; i++) + m_packed_chunks[i].clear(); + + m_packed_data_models.clear(); + + m_packed_color_endpoints.clear(); + m_packed_color_selectors.clear(); + m_packed_alpha_endpoints.clear(); + m_packed_alpha_selectors.clear(); + } + + bool crn_comp::quantize_chunks() + { + dxt_hc::params params; + + params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating; + params.m_adaptive_tile_color_psnr_derating = m_pParams->m_crn_adaptive_tile_color_psnr_derating; + + if (m_pParams->m_flags & cCRNCompFlagManualPaletteSizes) + { + params.m_color_endpoint_codebook_size = math::clamp(m_pParams->m_crn_color_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_color_selector_codebook_size = math::clamp(m_pParams->m_crn_color_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_alpha_endpoint_codebook_size = math::clamp(m_pParams->m_crn_alpha_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_alpha_selector_codebook_size = math::clamp(m_pParams->m_crn_alpha_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); + } + else + { + uint max_codebook_entries = ((m_pParams->m_width + 3) / 4) * ((m_pParams->m_height + 3) / 4); + + max_codebook_entries = math::clamp(max_codebook_entries, cCRNMinPaletteSize, cCRNMaxPaletteSize); + + float quality = math::clamp((float)m_pParams->m_quality_level / cCRNMaxQualityLevel, 0.0f, 1.0f); + float color_quality_power_mul = 1.0f; + float alpha_quality_power_mul = 1.0f; + if (m_pParams->m_format == cCRNFmtDXT5_CCxY) + { + color_quality_power_mul = 3.5f; + alpha_quality_power_mul = .35f; + params.m_adaptive_tile_color_psnr_derating = 5.0f; + } + else if (m_pParams->m_format == cCRNFmtDXT5) + color_quality_power_mul = .75f; + + float color_endpoint_quality = powf(quality, 1.8f * color_quality_power_mul); + float color_selector_quality = powf(quality, 1.65f * color_quality_power_mul); + params.m_color_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(64, cCRNMinPaletteSize), (float)max_codebook_entries, color_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); + params.m_color_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(96, cCRNMinPaletteSize), (float)max_codebook_entries, color_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); + + float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul); + float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul); + params.m_alpha_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);; + params.m_alpha_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize);; + } + + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + console::debug(L"Color endpoints: %u", params.m_color_endpoint_codebook_size); + console::debug(L"Color selectors: %u", params.m_color_selector_codebook_size); + console::debug(L"Alpha endpoints: %u", params.m_alpha_endpoint_codebook_size); + console::debug(L"Alpha selectors: %u", params.m_alpha_selector_codebook_size); + } + + params.m_hierarchical = (m_pParams->m_flags & cCRNCompFlagHierarchical) != 0; + params.m_perceptual = (m_pParams->m_flags & cCRNCompFlagPerceptual) != 0; + + params.m_pProgress_func = m_pParams->m_pProgress_func; + params.m_pProgress_func_data = m_pParams->m_pProgress_func_data; + + switch (m_pParams->m_format) + { + case cCRNFmtDXT1: + { + params.m_format = cDXT1; + m_has_comp[cColor] = true; + break; + } + case cCRNFmtDXT3: + { + m_has_comp[cAlpha0] = true; + return false; + } + case cCRNFmtDXT5: + { + params.m_format = cDXT5; + params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + break; + } + case cCRNFmtDXT5_CCxY: + { + params.m_format = cDXT5; + params.m_alpha_component_indices[0] = 3; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + params.m_perceptual = false; + + //params.m_adaptive_tile_color_alpha_weighting_ratio = 1.0f; + params.m_adaptive_tile_color_alpha_weighting_ratio = 1.5f; + break; + } + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtDXT5_xGxR: + { + params.m_format = cDXT5; + params.m_alpha_component_indices[0] = 3; + m_has_comp[cColor] = true; + m_has_comp[cAlpha0] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtDXN_XY: + { + params.m_format = cDXN_XY; + params.m_alpha_component_indices[0] = 0; + params.m_alpha_component_indices[1] = 1; + m_has_comp[cAlpha0] = true; + m_has_comp[cAlpha1] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtDXN_YX: + { + params.m_format = cDXN_YX; + params.m_alpha_component_indices[0] = 1; + params.m_alpha_component_indices[1] = 0; + m_has_comp[cAlpha0] = true; + m_has_comp[cAlpha1] = true; + params.m_perceptual = false; + break; + } + case cCRNFmtDXT5A: + { + params.m_format = cDXT5A; + params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; + m_has_comp[cAlpha0] = true; + params.m_perceptual = false; + break; + } + default: + { + return false; + } + } + params.m_debugging = (m_pParams->m_flags & cCRNCompFlagDebugging) != 0; + + params.m_num_levels = m_pParams->m_levels; + for (uint i = 0; i < m_pParams->m_levels; i++) + { + params.m_levels[i].m_first_chunk = m_levels[i].m_first_chunk; + params.m_levels[i].m_num_chunks = m_levels[i].m_num_chunks; + } + + if (!m_hvq.compress(params, m_total_chunks, &m_chunks[0], m_task_pool)) + return false; + +#if CRNLIB_CREATE_DEBUG_IMAGES + if (params.m_debugging) + { + const dxt_hc::pixel_chunk_vec& pixel_chunks = m_hvq.get_compressed_chunk_pixels_final(); + + image_u8 img; + dxt_hc::create_debug_image_from_chunks((m_pParams->m_width+7)>>3, (m_pParams->m_height+7)>>3, pixel_chunks, &m_hvq.get_chunk_encoding_vec(), img, true, -1); + image_utils::save_to_file(L"quantized_chunks.tga", img); + } +#endif + + return true; + } + + void crn_comp::create_chunk_indices() + { + m_chunk_details.resize(m_total_chunks); + + for (uint i = 0; i < cNumComps; i++) + { + m_endpoint_indices[i].clear(); + m_selector_indices[i].clear(); + } + + for (uint chunk_index = 0; chunk_index < m_total_chunks; chunk_index++) + { + const dxt_hc::chunk_encoding& chunk_encoding = m_hvq.get_chunk_encoding(chunk_index); + + for (uint i = 0; i < cNumComps; i++) + { + if (m_has_comp[i]) + { + m_chunk_details[chunk_index].m_first_endpoint_index = m_endpoint_indices[i].size(); + m_chunk_details[chunk_index].m_first_selector_index = m_selector_indices[i].size(); + break; + } + } + + for (uint i = 0; i < cNumComps; i++) + { + if (!m_has_comp[i]) + continue; + + for (uint tile_index = 0; tile_index < chunk_encoding.m_num_tiles; tile_index++) + m_endpoint_indices[i].push_back(chunk_encoding.m_endpoint_indices[i][tile_index]); + + for (uint y = 0; y < cChunkBlockHeight; y++) + for (uint x = 0; x < cChunkBlockWidth; x++) + m_selector_indices[i].push_back(chunk_encoding.m_selector_indices[i][y][x]); + } + } + } + + struct optimize_color_endpoint_codebook_params + { + crnlib::vector* m_pTrial_color_endpoint_remap; + uint m_iter_index; + uint m_max_iter_index; + }; + + void crn_comp::optimize_color_endpoint_codebook_task(uint64 data, void* pData_ptr) + { + data; + optimize_color_endpoint_codebook_params* pParams = reinterpret_cast(pData_ptr); + + if (pParams->m_iter_index == pParams->m_max_iter_index) + { + sort_color_endpoint_codebook(*pParams->m_pTrial_color_endpoint_remap, m_hvq.get_color_endpoint_vec()); + } + else + { + float f = pParams->m_iter_index / static_cast(pParams->m_max_iter_index - 1); + + create_zeng_reorder_table( + m_hvq.get_color_endpoint_codebook_size(), + m_endpoint_indices[cColor].size(), + &m_endpoint_indices[cColor][0], + *pParams->m_pTrial_color_endpoint_remap, + pParams->m_iter_index ? color_endpoint_similarity_func : NULL, + &m_hvq, + f); + } + + crnlib_delete(pParams); + } + + bool crn_comp::optimize_color_endpoint_codebook(crnlib::vector& remapping) + { + if (m_pParams->m_flags & cCRNCompFlagQuick) + { + remapping.resize(m_hvq.get_color_endpoint_vec().size()); + for (uint i = 0; i < m_hvq.get_color_endpoint_vec().size(); i++) + remapping[i] = i; + + if (!pack_color_endpoints(m_packed_color_endpoints, remapping, m_endpoint_indices[cColor], 0)) + return false; + + return true; + } + + const uint cMaxEndpointRemapIters = 3; + + uint best_bits = UINT_MAX; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"----- Begin optimization of color endpoint codebook"); +#endif + + crnlib::vector trial_color_endpoint_remaps[cMaxEndpointRemapIters + 1]; + + for (uint i = 0; i <= cMaxEndpointRemapIters; i++) + { + optimize_color_endpoint_codebook_params* pParams = crnlib_new(); + pParams->m_iter_index = i; + pParams->m_max_iter_index = cMaxEndpointRemapIters; + pParams->m_pTrial_color_endpoint_remap = &trial_color_endpoint_remaps[i]; + + m_task_pool.queue_object_task(this, &crn_comp::optimize_color_endpoint_codebook_task, 0, pParams); + } + + m_task_pool.join(); + + for (uint i = 0; i <= cMaxEndpointRemapIters; i++) + { + if (!update_progress(20, i, cMaxEndpointRemapIters+1)) + return false; + + crnlib::vector& trial_color_endpoint_remap = trial_color_endpoint_remaps[i]; + + crnlib::vector packed_data; + if (!pack_color_endpoints(packed_data, trial_color_endpoint_remap, m_endpoint_indices[cColor], i)) + return false; + + uint total_packed_chunk_bits; + if (!pack_chunks_simulation(0, m_total_chunks, total_packed_chunk_bits, &trial_color_endpoint_remap, NULL, NULL, NULL)) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Pack chunks simulation: %u bits", total_packed_chunk_bits); +#endif + + uint total_bits = packed_data.size() * 8 + total_packed_chunk_bits; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total bits: %u", total_bits); +#endif + + if (total_bits < best_bits) + { + m_packed_color_endpoints.swap(packed_data); + remapping.swap(trial_color_endpoint_remap); + best_bits = total_bits; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"End optimization of color endpoint codebook"); +#endif + + return true; + } + + struct optimize_color_selector_codebook_params + { + crnlib::vector* m_pTrial_color_selector_remap; + uint m_iter_index; + uint m_max_iter_index; + }; + + void crn_comp::optimize_color_selector_codebook_task(uint64 data, void* pData_ptr) + { + data; + optimize_color_selector_codebook_params* pParams = reinterpret_cast(pData_ptr); + + if (pParams->m_iter_index == pParams->m_max_iter_index) + { + sort_selector_codebook(*pParams->m_pTrial_color_selector_remap, m_hvq.get_color_selectors_vec(), g_dxt1_to_linear); + } + else + { + float f = pParams->m_iter_index / static_cast(pParams->m_max_iter_index - 1); + create_zeng_reorder_table( + m_hvq.get_color_selector_codebook_size(), + m_selector_indices[cColor].size(), + &m_selector_indices[cColor][0], + *pParams->m_pTrial_color_selector_remap, + pParams->m_iter_index ? color_selector_similarity_func : NULL, + (void*)&m_hvq.get_color_selectors_vec(), + f); + } + + crnlib_delete(pParams); + } + + bool crn_comp::optimize_color_selector_codebook(crnlib::vector& remapping) + { + if (m_pParams->m_flags & cCRNCompFlagQuick) + { + remapping.resize(m_hvq.get_color_selectors_vec().size()); + for (uint i = 0; i < m_hvq.get_color_selectors_vec().size(); i++) + remapping[i] = i; + + if (!pack_selectors( + m_packed_color_selectors, + m_selector_indices[cColor], + m_hvq.get_color_selectors_vec(), + remapping, + 3, + g_dxt1_to_linear, 0)) + { + return false; + } + + return true; + } + + const uint cMaxSelectorRemapIters = 3; + + uint best_bits = UINT_MAX; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"----- Begin optimization of color selector codebook"); +#endif + + crnlib::vector trial_color_selector_remaps[cMaxSelectorRemapIters + 1]; + + for (uint i = 0; i <= cMaxSelectorRemapIters; i++) + { + optimize_color_selector_codebook_params* pParams = crnlib_new(); + pParams->m_iter_index = i; + pParams->m_max_iter_index = cMaxSelectorRemapIters; + pParams->m_pTrial_color_selector_remap = &trial_color_selector_remaps[i]; + + m_task_pool.queue_object_task(this, &crn_comp::optimize_color_selector_codebook_task, 0, pParams); + } + + m_task_pool.join(); + + for (uint i = 0; i <= cMaxSelectorRemapIters; i++) + { + if (!update_progress(21, i, cMaxSelectorRemapIters+1)) + return false; + + crnlib::vector& trial_color_selector_remap = trial_color_selector_remaps[i]; + + crnlib::vector packed_data; + if (!pack_selectors( + packed_data, + m_selector_indices[cColor], + m_hvq.get_color_selectors_vec(), + trial_color_selector_remap, + 3, + g_dxt1_to_linear, i)) + { + return false; + } + + uint total_packed_chunk_bits; + if (!pack_chunks_simulation(0, m_total_chunks, total_packed_chunk_bits, NULL, &trial_color_selector_remap, NULL, NULL)) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Pack chunks simulation: %u bits", total_packed_chunk_bits); +#endif + + uint total_bits = packed_data.size() * 8 + total_packed_chunk_bits; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total bits: %u", total_bits); +#endif + if (total_bits < best_bits) + { + m_packed_color_selectors.swap(packed_data); + remapping.swap(trial_color_selector_remap); + best_bits = total_bits; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"End optimization of color selector codebook"); +#endif + + return true; + } + + struct optimize_alpha_endpoint_codebook_params + { + crnlib::vector* m_pAlpha_indices; + crnlib::vector* m_pTrial_alpha_endpoint_remap; + uint m_iter_index; + uint m_max_iter_index; + }; + + void crn_comp::optimize_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) + { + data; + optimize_alpha_endpoint_codebook_params* pParams = reinterpret_cast(pData_ptr); + + if (pParams->m_iter_index == pParams->m_max_iter_index) + { + sort_alpha_endpoint_codebook(*pParams->m_pTrial_alpha_endpoint_remap, m_hvq.get_alpha_endpoint_vec()); + } + else + { + float f = pParams->m_iter_index / static_cast(pParams->m_max_iter_index - 1); + + create_zeng_reorder_table( + m_hvq.get_alpha_endpoint_codebook_size(), + pParams->m_pAlpha_indices->size(), + &(*pParams->m_pAlpha_indices)[0], + *pParams->m_pTrial_alpha_endpoint_remap, + pParams->m_iter_index ? alpha_endpoint_similarity_func : NULL, + &m_hvq, + f); + } + + crnlib_delete(pParams); + } + + bool crn_comp::optimize_alpha_endpoint_codebook(crnlib::vector& remapping) + { + crnlib::vector alpha_indices; + alpha_indices.reserve(m_endpoint_indices[cAlpha0].size() + m_endpoint_indices[cAlpha1].size()); + for (uint i = 0; i < m_endpoint_indices[cAlpha0].size(); i++) + alpha_indices.push_back(m_endpoint_indices[cAlpha0][i]); + for (uint i = 0; i < m_endpoint_indices[cAlpha1].size(); i++) + alpha_indices.push_back(m_endpoint_indices[cAlpha1][i]); + + if (m_pParams->m_flags & cCRNCompFlagQuick) + { + remapping.resize(m_hvq.get_alpha_endpoint_vec().size()); + for (uint i = 0; i < m_hvq.get_alpha_endpoint_vec().size(); i++) + remapping[i] = i; + + if (!pack_alpha_endpoints(m_packed_alpha_endpoints, remapping, alpha_indices, 0)) + return false; + + return true; + } + + const uint cMaxEndpointRemapIters = 3; + uint best_bits = UINT_MAX; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"----- Begin optimization of alpha endpoint codebook"); +#endif + + crnlib::vector trial_alpha_endpoint_remaps[cMaxEndpointRemapIters + 1]; + + for (uint i = 0; i <= cMaxEndpointRemapIters; i++) + { + optimize_alpha_endpoint_codebook_params* pParams = crnlib_new(); + pParams->m_pAlpha_indices = &alpha_indices; + pParams->m_iter_index = i; + pParams->m_max_iter_index = cMaxEndpointRemapIters; + pParams->m_pTrial_alpha_endpoint_remap = &trial_alpha_endpoint_remaps[i]; + + m_task_pool.queue_object_task(this, &crn_comp::optimize_alpha_endpoint_codebook_task, 0, pParams); + } + + m_task_pool.join(); + + for (uint i = 0; i <= cMaxEndpointRemapIters; i++) + { + if (!update_progress(22, i, cMaxEndpointRemapIters+1)) + return false; + + crnlib::vector& trial_alpha_endpoint_remap = trial_alpha_endpoint_remaps[i]; + + crnlib::vector packed_data; + if (!pack_alpha_endpoints(packed_data, trial_alpha_endpoint_remap, alpha_indices, i)) + return false; + + uint total_packed_chunk_bits; + if (!pack_chunks_simulation(0, m_total_chunks, total_packed_chunk_bits, NULL, NULL, &trial_alpha_endpoint_remap, NULL)) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Pack chunks simulation: %u bits", total_packed_chunk_bits); +#endif + + uint total_bits = packed_data.size() * 8 + total_packed_chunk_bits; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total bits: %u", total_bits); +#endif + + if (total_bits < best_bits) + { + m_packed_alpha_endpoints.swap(packed_data); + remapping.swap(trial_alpha_endpoint_remap); + best_bits = total_bits; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"End optimization of alpha endpoint codebook"); +#endif + + return true; + } + + struct optimize_alpha_selector_codebook_params + { + crnlib::vector* m_pAlpha_indices; + crnlib::vector* m_pTrial_alpha_selector_remap; + uint m_iter_index; + uint m_max_iter_index; + }; + + void crn_comp::optimize_alpha_selector_codebook_task(uint64 data, void* pData_ptr) + { + data; + optimize_alpha_selector_codebook_params* pParams = reinterpret_cast(pData_ptr); + + if (pParams->m_iter_index == pParams->m_max_iter_index) + { + sort_selector_codebook(*pParams->m_pTrial_alpha_selector_remap, m_hvq.get_alpha_selectors_vec(), g_dxt5_to_linear); + } + else + { + float f = pParams->m_iter_index / static_cast(pParams->m_max_iter_index - 1); + create_zeng_reorder_table( + m_hvq.get_alpha_selector_codebook_size(), + pParams->m_pAlpha_indices->size(), + &(*pParams->m_pAlpha_indices)[0], + *pParams->m_pTrial_alpha_selector_remap, + pParams->m_iter_index ? alpha_selector_similarity_func : NULL, + (void*)&m_hvq.get_alpha_selectors_vec(), + f); + } + } + + bool crn_comp::optimize_alpha_selector_codebook(crnlib::vector& remapping) + { + crnlib::vector alpha_indices; + alpha_indices.reserve(m_selector_indices[cAlpha0].size() + m_selector_indices[cAlpha1].size()); + for (uint i = 0; i < m_selector_indices[cAlpha0].size(); i++) + alpha_indices.push_back(m_selector_indices[cAlpha0][i]); + for (uint i = 0; i < m_selector_indices[cAlpha1].size(); i++) + alpha_indices.push_back(m_selector_indices[cAlpha1][i]); + + if (m_pParams->m_flags & cCRNCompFlagQuick) + { + remapping.resize(m_hvq.get_alpha_selectors_vec().size()); + for (uint i = 0; i < m_hvq.get_alpha_selectors_vec().size(); i++) + remapping[i] = i; + + if (!pack_selectors( + m_packed_alpha_selectors, + alpha_indices, + m_hvq.get_alpha_selectors_vec(), + remapping, + 7, + g_dxt5_to_linear, 0)) + { + return false; + } + + return true; + } + + const uint cMaxSelectorRemapIters = 3; + + uint best_bits = UINT_MAX; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"----- Begin optimization of alpha selector codebook"); +#endif + + crnlib::vector trial_alpha_selector_remaps[cMaxSelectorRemapIters + 1]; + + for (uint i = 0; i <= cMaxSelectorRemapIters; i++) + { + optimize_alpha_selector_codebook_params* pParams = crnlib_new(); + pParams->m_pAlpha_indices = &alpha_indices; + pParams->m_iter_index = i; + pParams->m_max_iter_index = cMaxSelectorRemapIters; + pParams->m_pTrial_alpha_selector_remap = &trial_alpha_selector_remaps[i]; + + m_task_pool.queue_object_task(this, &crn_comp::optimize_alpha_selector_codebook_task, 0, pParams); + } + + m_task_pool.join(); + + for (uint i = 0; i <= cMaxSelectorRemapIters; i++) + { + if (!update_progress(23, i, cMaxSelectorRemapIters+1)) + return false; + + crnlib::vector& trial_alpha_selector_remap = trial_alpha_selector_remaps[i]; + + crnlib::vector packed_data; + if (!pack_selectors( + packed_data, + alpha_indices, + m_hvq.get_alpha_selectors_vec(), + trial_alpha_selector_remap, + 7, + g_dxt5_to_linear, i)) + { + return false; + } + + uint total_packed_chunk_bits; + if (!pack_chunks_simulation(0, m_total_chunks, total_packed_chunk_bits, NULL, NULL, NULL, &trial_alpha_selector_remap)) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Pack chunks simulation: %u bits", total_packed_chunk_bits); +#endif + + uint total_bits = packed_data.size() * 8 + total_packed_chunk_bits; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"Total bits: %u", total_bits); +#endif + if (total_bits < best_bits) + { + m_packed_alpha_selectors.swap(packed_data); + + remapping.swap(trial_alpha_selector_remap); + best_bits = total_bits; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + console::debug(L"End optimization of alpha selector codebook"); +#endif + + return true; + } + + bool crn_comp::pack_data_models() + { + symbol_codec codec; + codec.start_encoding(1024*1024); + + if (!codec.encode_transmit_static_huffman_data_model(m_chunk_encoding_dm, false)) + return false; + + for (uint i = 0; i < 2; i++) + { + if (m_endpoint_index_dm[i].get_total_syms()) + { + if (!codec.encode_transmit_static_huffman_data_model(m_endpoint_index_dm[i], false)) + return false; + } + + if (m_selector_index_dm[i].get_total_syms()) + { + if (!codec.encode_transmit_static_huffman_data_model(m_selector_index_dm[i], false)) + return false; + } + } + + codec.stop_encoding(false); + + m_packed_data_models.swap(codec.get_encoding_buf()); + + return true; + } + + bool crn_comp::create_comp_data() + { + utils::zero_object(m_crn_header); + + m_crn_header.m_width = static_cast(m_pParams->m_width); + m_crn_header.m_height = static_cast(m_pParams->m_height); + m_crn_header.m_levels = static_cast(m_pParams->m_levels); + m_crn_header.m_faces = static_cast(m_pParams->m_faces); + m_crn_header.m_format = static_cast(m_pParams->m_format); + m_crn_header.m_userdata0 = m_pParams->m_userdata0; + m_crn_header.m_userdata1 = m_pParams->m_userdata1; + + m_comp_data.clear(); + m_comp_data.reserve(2*1024*1024); + append_vec(m_comp_data, &m_crn_header, sizeof(m_crn_header)); + // tack on the rest of the variable size m_level_ofs array + m_comp_data.resize( m_comp_data.size() + sizeof(m_crn_header.m_level_ofs[0]) * (m_pParams->m_levels - 1) ); + + if (m_packed_color_endpoints.size()) + { + m_crn_header.m_color_endpoints.m_num = static_cast(m_hvq.get_color_endpoint_codebook_size()); + m_crn_header.m_color_endpoints.m_size = m_packed_color_endpoints.size(); + m_crn_header.m_color_endpoints.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_color_endpoints); + } + + if (m_packed_color_selectors.size()) + { + m_crn_header.m_color_selectors.m_num = static_cast(m_hvq.get_color_selector_codebook_size()); + m_crn_header.m_color_selectors.m_size = m_packed_color_selectors.size(); + m_crn_header.m_color_selectors.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_color_selectors); + } + + if (m_packed_alpha_endpoints.size()) + { + m_crn_header.m_alpha_endpoints.m_num = static_cast(m_hvq.get_alpha_endpoint_codebook_size()); + m_crn_header.m_alpha_endpoints.m_size = m_packed_alpha_endpoints.size(); + m_crn_header.m_alpha_endpoints.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_alpha_endpoints); + } + + if (m_packed_alpha_selectors.size()) + { + m_crn_header.m_alpha_selectors.m_num = static_cast(m_hvq.get_alpha_selector_codebook_size()); + m_crn_header.m_alpha_selectors.m_size = m_packed_alpha_selectors.size(); + m_crn_header.m_alpha_selectors.m_ofs = m_comp_data.size(); + append_vec(m_comp_data, m_packed_alpha_selectors); + } + + m_crn_header.m_tables_ofs = m_comp_data.size(); + m_crn_header.m_tables_size = m_packed_data_models.size(); + append_vec(m_comp_data, m_packed_data_models); + + uint level_ofs[cCRNMaxLevels]; + for (uint i = 0; i < m_mip_groups.size(); i++) + { + level_ofs[i] = m_comp_data.size(); + append_vec(m_comp_data, m_packed_chunks[i]); + } + + crnd::crn_header& dst_header = *(crnd::crn_header*)&m_comp_data[0]; + // don't change the m_comp_data vector - or dst_header will be invalidated! + + memcpy(&dst_header, &m_crn_header, sizeof(dst_header)); + + for (uint i = 0; i < m_mip_groups.size(); i++) + dst_header.m_level_ofs[i] = level_ofs[i]; + + const uint actual_header_size = sizeof(crnd::crn_header) + sizeof(dst_header.m_level_ofs[0]) * (m_mip_groups.size() - 1); + + dst_header.m_sig = crnd::crn_header::cCRNSigValue; + + dst_header.m_data_size = m_comp_data.size(); + dst_header.m_data_crc16 = crc16(&m_comp_data[actual_header_size], m_comp_data.size() - actual_header_size); + + dst_header.m_header_size = actual_header_size; + dst_header.m_header_crc16 = crc16(&dst_header.m_data_size, actual_header_size - (uint)((uint8*)&dst_header.m_data_size - (uint8*)&dst_header)); + + return true; + } + + bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subphase_total) + { + if (!m_pParams->m_pProgress_func) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_pParams->m_flags & cCRNCompFlagDebugging) + return true; +#endif + + return (*m_pParams->m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_pParams->m_pProgress_func_data) != 0; + } + + bool crn_comp::compress_internal() + { + if (!alias_images()) + return false; + + create_chunks(); + + if (!quantize_chunks()) + return false; + + create_chunk_indices(); + + crnlib::vector endpoint_remap[2]; + crnlib::vector selector_remap[2]; + + if (m_has_comp[cColor]) + { + if (!optimize_color_endpoint_codebook(endpoint_remap[0])) + return false; + if (!optimize_color_selector_codebook(selector_remap[0])) + return false; + } + + if (m_has_comp[cAlpha0]) + { + if (!optimize_alpha_endpoint_codebook(endpoint_remap[1])) + return false; + if (!optimize_alpha_selector_codebook(selector_remap[1])) + return false; + } + + m_chunk_encoding_hist.clear(); + for (uint i = 0; i < 2; i++) + { + m_endpoint_index_hist[i].clear(); + m_endpoint_index_dm[i].clear(); + m_selector_index_hist[i].clear(); + m_selector_index_dm[i].clear(); + } + + for (uint pass = 0; pass < 2; pass++) + { + for (uint mip_group = 0; mip_group < m_mip_groups.size(); mip_group++) + { + symbol_codec codec; + codec.start_encoding(2*1024*1024); + + if (!pack_chunks( + m_mip_groups[mip_group].m_first_chunk, m_mip_groups[mip_group].m_num_chunks, + !pass && !mip_group, pass ? &codec : NULL, + m_has_comp[cColor] ? &endpoint_remap[0] : NULL, m_has_comp[cColor] ? &selector_remap[0] : NULL, + m_has_comp[cAlpha0] ? &endpoint_remap[1] : NULL, m_has_comp[cAlpha0] ? &selector_remap[1] : NULL)) + { + return false; + } + + codec.stop_encoding(false); + + if (pass) + m_packed_chunks[mip_group].swap(codec.get_encoding_buf()); + } + + if (!pass) + { + m_chunk_encoding_dm.init(true, m_chunk_encoding_hist, 16); + + for (uint i = 0; i < 2; i++) + { + if (m_endpoint_index_hist[i].size()) + m_endpoint_index_dm[i].init(true, m_endpoint_index_hist[i], 16); + + if (m_selector_index_hist[i].size()) + m_selector_index_dm[i].init(true, m_selector_index_hist[i], 16); + } + } + } + + if (!pack_data_models()) + return false; + + if (!create_comp_data()) + return false; + + if (!update_progress(24, 1, 1)) + return false; + + if (m_pParams->m_flags & cCRNCompFlagDebugging) + { + crnlib_print_mem_stats(); + } + + return true; + } + + bool crn_comp::compress_init(const crn_comp_params& params) + { + params; + return true; + } + + bool crn_comp::compress_pass(const crn_comp_params& params, float *pEffective_bitrate) + { + clear(); + + if (pEffective_bitrate) *pEffective_bitrate = 0.0f; + + m_pParams = ¶ms; + + if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) + return false; + + if (!m_task_pool.init(params.m_num_helper_threads)) + return false; + + bool status = compress_internal(); + + m_task_pool.deinit(); + + if ((status) && (pEffective_bitrate)) + { + uint total_pixels = 0; + + for (uint f = 0; f < m_pParams->m_faces; f++) + for (uint l = 0; l < m_pParams->m_levels; l++) + total_pixels += m_images[f][l].get_total_pixels(); + + *pEffective_bitrate = (m_comp_data.size() * 8.0f) / total_pixels; + } + + return status; + } + + void crn_comp::compress_deinit() + { + } + +} // namespace crnlib + diff --git a/crnlib/crn_comp.h b/crnlib/crn_comp.h new file mode 100644 index 00000000..4f80e2e7 --- /dev/null +++ b/crnlib/crn_comp.h @@ -0,0 +1,181 @@ +// File: crn_comp.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#define CRND_HEADER_FILE_ONLY +#include "../inc/crn_decomp.h" +#undef CRND_HEADER_FILE_ONLY + +#include "../inc/crnlib.h" +#include "crn_symbol_codec.h" +#include "crn_dxt_hc.h" +#include "crn_image.h" +#include "crn_image_utils.h" +#include "crn_texture_comp.h" + +namespace crnlib +{ + class crn_comp : public itexture_comp + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_comp); + + public: + crn_comp(); + virtual ~crn_comp(); + + virtual const wchar_t *get_ext() const { return L"CRN"; } + + virtual bool compress_init(const crn_comp_params& params); + virtual bool compress_pass(const crn_comp_params& params, float *pEffective_bitrate); + virtual void compress_deinit(); + + virtual const crnlib::vector& get_comp_data() const { return m_comp_data; } + virtual crnlib::vector& get_comp_data() { return m_comp_data; } + + uint get_comp_data_size() const { return m_comp_data.size(); } + const uint8* get_comp_data_ptr() const { return m_comp_data.size() ? &m_comp_data[0] : NULL; } + + private: + task_pool m_task_pool; + const crn_comp_params* m_pParams; + + image_u8 m_images[cCRNMaxFaces][cCRNMaxLevels]; + + struct + { + uint m_width, m_height; + uint m_chunk_width, m_chunk_height; + uint m_group_index; + uint m_num_chunks; + uint m_first_chunk; + uint m_group_first_chunk; + } m_levels[cCRNMaxLevels]; + + struct mip_group + { + mip_group() : m_first_chunk(0), m_num_chunks(0) { } + + uint m_first_chunk; + uint m_num_chunks; + }; + crnlib::vector m_mip_groups; + + enum comp + { + cColor, + cAlpha0, + cAlpha1, + cNumComps + }; + + bool m_has_comp[cNumComps]; + + struct chunk_detail + { + chunk_detail() { utils::zero_object(*this); } + + uint m_first_endpoint_index; + uint m_first_selector_index; + }; + typedef crnlib::vector chunk_detail_vec; + chunk_detail_vec m_chunk_details; + + crnlib::vector m_endpoint_indices[cNumComps]; + crnlib::vector m_selector_indices[cNumComps]; + + uint m_total_chunks; + dxt_hc::pixel_chunk_vec m_chunks; + + crnd::crn_header m_crn_header; + crnlib::vector m_comp_data; + + dxt_hc m_hvq; + + symbol_histogram m_chunk_encoding_hist; + static_huffman_data_model m_chunk_encoding_dm; + + symbol_histogram m_endpoint_index_hist[2]; + static_huffman_data_model m_endpoint_index_dm[2]; // color, alpha + + symbol_histogram m_selector_index_hist[2]; + static_huffman_data_model m_selector_index_dm[2]; // color, alpha + + crnlib::vector m_packed_chunks[cCRNMaxLevels]; + crnlib::vector m_packed_data_models; + crnlib::vector m_packed_color_endpoints; + crnlib::vector m_packed_color_selectors; + crnlib::vector m_packed_alpha_endpoints; + crnlib::vector m_packed_alpha_selectors; + + void clear(); + + void append_chunks(const image_u8& img, uint num_chunks_x, uint num_chunks_y, dxt_hc::pixel_chunk_vec& chunks, float weight); + + static float color_endpoint_similarity_func(uint index_a, uint index_b, void* pContext); + static float alpha_endpoint_similarity_func(uint index_a, uint index_b, void* pContext); + void sort_color_endpoint_codebook(crnlib::vector& remapping, const crnlib::vector& endpoints); + void sort_alpha_endpoint_codebook(crnlib::vector& remapping, const crnlib::vector& endpoints); + + bool pack_color_endpoints(crnlib::vector& data, const crnlib::vector& remapping, const crnlib::vector& endpoint_indices, uint trial_index); + bool pack_alpha_endpoints(crnlib::vector& data, const crnlib::vector& remapping, const crnlib::vector& endpoint_indices, uint trial_index); + + static float color_selector_similarity_func(uint index_a, uint index_b, void* pContext); + static float alpha_selector_similarity_func(uint index_a, uint index_b, void* pContext); + void sort_selector_codebook(crnlib::vector& remapping, const crnlib::vector& selectors, const uint8* pTo_linear); + + bool pack_selectors( + crnlib::vector& packed_data, + const crnlib::vector& selector_indices, + const crnlib::vector& selectors, + const crnlib::vector& remapping, + uint max_selector_value, + const uint8* pTo_linear, + uint trial_index); + + bool alias_images(); + void create_chunks(); + bool quantize_chunks(); + void create_chunk_indices(); + + bool pack_chunks( + uint first_chunk, uint num_chunks, + bool clear_histograms, + symbol_codec* pCodec, + const crnlib::vector* pColor_endpoint_remap, + const crnlib::vector* pColor_selector_remap, + const crnlib::vector* pAlpha_endpoint_remap, + const crnlib::vector* pAlpha_selector_remap); + + bool pack_chunks_simulation( + uint first_chunk, uint num_chunks, + uint& total_bits, + const crnlib::vector* pColor_endpoint_remap, + const crnlib::vector* pColor_selector_remap, + const crnlib::vector* pAlpha_endpoint_remap, + const crnlib::vector* pAlpha_selector_remap); + + void optimize_color_endpoint_codebook_task(uint64 data, void* pData_ptr); + bool optimize_color_endpoint_codebook(crnlib::vector& remapping); + + void optimize_color_selector_codebook_task(uint64 data, void* pData_ptr); + bool optimize_color_selector_codebook(crnlib::vector& remapping); + + void optimize_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr); + bool optimize_alpha_endpoint_codebook(crnlib::vector& remapping); + + void optimize_alpha_selector_codebook_task(uint64 data, void* pData_ptr); + bool optimize_alpha_selector_codebook(crnlib::vector& remapping); + + bool create_comp_data(); + + bool pack_data_models(); + + bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); + + bool compress_internal(); + + static void append_vec(crnlib::vector& a, const void* p, uint size); + static void append_vec(crnlib::vector& a, const crnlib::vector& b); + }; + +} // namespace crnlib diff --git a/crnlib/crn_condition_var.cpp b/crnlib/crn_condition_var.cpp new file mode 100644 index 00000000..ac8e1354 --- /dev/null +++ b/crnlib/crn_condition_var.cpp @@ -0,0 +1,431 @@ +// File: crn_condition_var.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_condition_var.h" +#include "crn_spinlock.h" +#include "crn_winhdr.h" + +namespace crnlib +{ + void spinlock::lock(uint32 max_spins, bool yielding, bool memoryBarrier) + { + if (g_number_of_processors <= 1) + max_spins = 1; + + uint32 spinCount = 0; + uint32 yieldCount = 0; + + for ( ; ; ) + { + CRNLIB_ASSUME(sizeof(long) == sizeof(int32)); + if (!InterlockedExchange((volatile long*)&m_flag, TRUE)) + break; + + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + YieldProcessor(); + + spinCount++; + if ((yielding) && (spinCount >= max_spins)) + { + switch (yieldCount) + { + case 0: + { + spinCount = 0; + + Sleep(0); + + yieldCount++; + break; + } + case 1: + { + if (g_number_of_processors <= 1) + spinCount = 0; + else + spinCount = max_spins / 2; + + Sleep(1); + + yieldCount++; + break; + } + case 2: + { + if (g_number_of_processors <= 1) + spinCount = 0; + else + spinCount = max_spins; + + Sleep(2); + break; + } + } + } + } + + if (memoryBarrier) + { +#ifdef _MSC_VER + MemoryBarrier(); +#elif defined(__MINGW32__) && defined(__MINGW64__) + __sync_synchronize(); +#endif + } + } + + void spinlock::unlock() + { +#ifdef _MSC_VER + MemoryBarrier(); +#elif defined(__MINGW32__) && defined(__MINGW64__) + __sync_synchronize(); +#endif + + m_flag = FALSE; + } + + mutex::mutex(unsigned int spin_count) + { + CRNLIB_ASSUME(sizeof(mutex) >= sizeof(CRITICAL_SECTION)); + + void *p = m_buf; + CRITICAL_SECTION &m_cs = *static_cast(p); + + BOOL status = true; +#ifdef _XBOX + InitializeCriticalSectionAndSpinCount(&m_cs, spin_count); +#else + status = InitializeCriticalSectionAndSpinCount(&m_cs, spin_count); +#endif + if (!status) + crnlib_fail("mutex::mutex: InitializeCriticalSectionAndSpinCount failed", __FILE__, __LINE__); + +#ifdef CRNLIB_BUILD_DEBUG + m_lock_count = 0; +#endif + } + + mutex::~mutex() + { + void *p = m_buf; + CRITICAL_SECTION &m_cs = *static_cast(p); + +#ifdef CRNLIB_BUILD_DEBUG + if (m_lock_count) + crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); +#endif + DeleteCriticalSection(&m_cs); + } + + void mutex::lock() + { + void *p = m_buf; + CRITICAL_SECTION &m_cs = *static_cast(p); + + EnterCriticalSection(&m_cs); +#ifdef CRNLIB_BUILD_DEBUG + m_lock_count++; +#endif + } + + void mutex::unlock() + { + void *p = m_buf; + CRITICAL_SECTION &m_cs = *static_cast(p); + +#ifdef CRNLIB_BUILD_DEBUG + if (!m_lock_count) + crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); + m_lock_count--; +#endif + LeaveCriticalSection(&m_cs); + } + + void mutex::set_spin_count(unsigned int count) + { + void *p = m_buf; + CRITICAL_SECTION &m_cs = *static_cast(p); + + SetCriticalSectionSpinCount(&m_cs, count); + } + + semaphore::semaphore(int32 initialCount, int32 maximumCount, const char* pName) + { + m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName); + if (NULL == m_handle) + { + CRNLIB_FAIL("semaphore: CreateSemaphore() failed"); + } + } + + semaphore::~semaphore() + { + if (m_handle) + { + CloseHandle(m_handle); + m_handle = NULL; + } + } + + void semaphore::release(int32 releaseCount, int32 *pPreviousCount) + { + CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); + if (0 == ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount)) + { + CRNLIB_FAIL("semaphore: ReleaseSemaphore() failed"); + } + } + + bool semaphore::wait(uint32 milliseconds) + { + uint32 result = WaitForSingleObject(m_handle, milliseconds); + + if (WAIT_FAILED == result) + { + CRNLIB_FAIL("semaphore: WaitForSingleObject() failed"); + } + + return WAIT_OBJECT_0 == result; + } + + event::event(bool manual_reset, bool initial_state, const char* pName) + { + m_handle = CreateEventA(NULL, manual_reset, initial_state, pName); + + if (NULL == m_handle) + CRNLIB_FAIL("event: CreateEvent() failed"); + } + + event::~event() + { + if (m_handle) + { + CloseHandle(m_handle); + m_handle = NULL; + } + } + + void event::set(void) + { + SetEvent(m_handle); + } + + void event::reset(void) + { + ResetEvent(m_handle); + } + + void event::pulse(void) + { + PulseEvent(m_handle); + } + + bool event::wait(uint32 milliseconds) + { + uint32 result = WaitForSingleObject(m_handle, milliseconds); + + if (result == WAIT_FAILED) + { + CRNLIB_FAIL("event: WaitForSingleObject() failed"); + } + + return (result == WAIT_OBJECT_0); + } + + condition_var::condition_var(uint spin_count) : + m_condition_var_lock(1, 1), + m_tls(TlsAlloc()), + m_cur_age(0), + m_max_waiter_array_index(-1) + { + CRNLIB_ASSERT(TLS_OUT_OF_INDEXES != m_tls); + + m_waiters_array_lock.set_spin_count(spin_count); + + m_waiters_array_lock.lock(); + + for (uint i = 0; i < cMaxWaitingThreads; i++) + m_waiters[i].clear(); + + m_waiters_array_lock.unlock(); + } + + condition_var::~condition_var() + { + TlsFree(m_tls); + } + + void condition_var::lock() + { + uint32 cur_count = get_cur_lock_count(); + CRNLIB_ASSERT(cur_count != 0xFFFFFFFF); + cur_count++; + set_cur_lock_count(cur_count); + + if (1 == cur_count) + m_condition_var_lock.wait(); + } + + void condition_var::unlock() + { + uint32 cur_count = get_cur_lock_count(); + CRNLIB_ASSERT(cur_count); + cur_count--; + set_cur_lock_count(cur_count); + + if (!cur_count) + leave_and_scan(); + } + + void condition_var::leave_and_scan(int index_to_ignore) + { + m_waiters_array_lock.lock(); + + uint best_age = 0; + int best_index = -1; + for (int i = 0; i <= m_max_waiter_array_index; i++) + { + waiting_thread& waiter = m_waiters[i]; + + if ((i != index_to_ignore) && (waiter.m_occupied) && (!waiter.m_satisfied)) + { + uint age = m_cur_age - waiter.m_age; + + if ((age > best_age) || (best_index < 0)) + { + if ((!waiter.m_callback_func) || (waiter.m_callback_func(waiter.m_pCallback_ptr, waiter.m_callback_data))) + { + best_age = age; + best_index = i; + } + } + } + } + + if (best_index >= 0) + { + waiting_thread& waiter = m_waiters[best_index]; + waiter.m_satisfied = true; + waiter.m_event.set(); + m_waiters_array_lock.unlock(); + } + else + { + m_waiters_array_lock.unlock(); + m_condition_var_lock.release(); + } + } + + uint32 condition_var::get_cur_lock_count() const + { + return (uint32)((intptr_t)TlsGetValue(m_tls)); + } + + int condition_var::wait( + pCondition_func pCallback, void* pCallback_data_ptr, uint64 callback_data, + uint num_wait_handles, const void** pWait_handles, uint32 max_time_to_wait) + { + CRNLIB_ASSERT(get_cur_lock_count()); + + // First, see if the calling thread's condition function is satisfied. If so, there's no need to wait. + if ((pCallback) && (pCallback(pCallback_data_ptr, callback_data))) + return 0; + + // Add this thread to the list of waiters. + m_waiters_array_lock.lock(); + + uint i; + for (i = 0; i < cMaxWaitingThreads; i++) + if (!m_waiters[i].m_occupied) + break; + + CRNLIB_VERIFY(i != cMaxWaitingThreads); + + m_max_waiter_array_index = math::maximum(m_max_waiter_array_index, i); + + waiting_thread& waiter = m_waiters[i]; + + waiter.m_callback_func = pCallback; + waiter.m_pCallback_ptr = pCallback_data_ptr; + waiter.m_callback_data = callback_data; + waiter.m_satisfied = false; + waiter.m_occupied = true; + waiter.m_age = m_cur_age++; + waiter.m_event.reset(); + + m_waiters_array_lock.unlock(); + + // Now leave the condition_var and scan to see if there are any satisfied waiters. + leave_and_scan(i); + + // Let's wait for this thread's condition to be satisfied, or until timeout, or until one of the user supplied handles is signaled. + int return_index = 0; + + const uint cMaxWaitHandles = 64; + CRNLIB_ASSERT(num_wait_handles < cMaxWaitHandles); + + HANDLE handles[cMaxWaitHandles]; + + handles[0] = waiter.m_event.get_handle(); + uint total_handles = 1; + + if (num_wait_handles) + { + CRNLIB_ASSERT(pWait_handles); + memcpy(handles + total_handles, pWait_handles, sizeof(HANDLE) * num_wait_handles); + total_handles += num_wait_handles; + } + + uint32 result; + if (max_time_to_wait == UINT32_MAX) + { + do + { + result = WaitForMultipleObjects(total_handles, handles, FALSE, 2000); + } while (result == WAIT_TIMEOUT); + } + else + result = WaitForMultipleObjects(total_handles, handles, FALSE, max_time_to_wait); + + if ((result == WAIT_ABANDONED) || (result == WAIT_TIMEOUT)) + return_index = -1; + else + return_index = result - WAIT_OBJECT_0; + + // See if our condition was satisfied, and remove this thread from the waiter list. + m_waiters_array_lock.lock(); + + const bool was_satisfied = waiter.m_satisfied; + + waiter.m_occupied = false; + + m_waiters_array_lock.unlock(); + + if (0 == return_index) + { + CRNLIB_ASSERT(was_satisfied); + } + else + { + // Enter the condition_var if a user supplied handle was signaled. This guarantees that on exit of this function we're still inside the condition_var, no matter + // what happened during the WaitForMultipleObjects() call. + if (!was_satisfied) + m_condition_var_lock.wait(); + } + + return return_index; + } + + void condition_var::set_cur_lock_count(uint32 newCount) + { + TlsSetValue(m_tls, (void*)newCount); + } + +} // namespace crnlib diff --git a/crnlib/crn_condition_var.h b/crnlib/crn_condition_var.h new file mode 100644 index 00000000..1c76b954 --- /dev/null +++ b/crnlib/crn_condition_var.h @@ -0,0 +1,91 @@ +// File: crn_condition_var.h +// See Copyright Notice and license at the end of inc/crnlib.h +// Inspired by the "monitor" class in "Win32 Multithreaded Programming" by Cohen and Woodring. +// Also see http://en.wikipedia.org/wiki/Monitor_(synchronization) +#pragma once + +#include "crn_mutex.h" +#include "crn_event.h" +#include "crn_semaphore.h" + +namespace crnlib +{ + class condition_var + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(condition_var); + + public: + condition_var(uint spin_count = 4096U); + ~condition_var(); + + // Locks the condition_var. + // Recursive locks are supported. + void lock(); + + // Returns TRUE if the thread owning this condition function should stop waiting. + // This function will always be called from within the condition_var, but it may be called from several different threads! + typedef bool (*pCondition_func)(void* pCallback_data_ptr, uint64 callback_data); + + // Temporarily leaves the lock and waits for a condition to be satisfied. + // If pCallback is NULL, this method will return after another thread enters and exits the lock (like a Vista-style condition variable). + // Otherwise, this method will only return when the specified condition function returns TRUE when another thread exits the lock. + // When this method returns, the calling thread will be inside the lock. + // Returns -1 on timeout or error, 0 if the wait was satisfied, or 1 or higher if one of the extra wait handles became signaled. + // It is highly recommended you use a non-null condition callback. If you don't be sure to check for race conditions! + int wait(pCondition_func pCallback = NULL, void* pCallback_data_ptr = NULL, uint64 callback_data = 0, + uint num_wait_handles = 0, const void** pWait_handles = NULL, uint32 max_time_to_wait = UINT32_MAX); + + // Unlocks the condition_var. Another thread may be woken up if its condition function has become satisfied. + void unlock(); + + uint32 get_cur_lock_count() const; + + private: + enum { cMaxWaitingThreads = 16, cMaxWaitingThreadsMask = cMaxWaitingThreads - 1 }; + + semaphore m_condition_var_lock; + mutex m_waiters_array_lock; + uint32 m_tls; + uint m_cur_age; + + struct waiting_thread + { + uint64 m_callback_data; + void* m_pCallback_ptr; + pCondition_func m_callback_func; + uint m_age; + bool m_satisfied; + bool m_occupied; + + event m_event; + + void clear() + { + m_callback_data = 0; + m_pCallback_ptr = NULL; + m_callback_func = NULL; + m_age = 0; + m_satisfied = false; + m_occupied = false; + } + }; + waiting_thread m_waiters[cMaxWaitingThreads]; + + int m_max_waiter_array_index; + + void set_cur_lock_count(uint32 newCount); + + void leave_and_scan(int index_to_ignore = -1); + }; + + class scoped_condition_var + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(scoped_condition_var); + public: + inline scoped_condition_var(condition_var& m) : m_condition_var(m) { m_condition_var.lock(); } + inline ~scoped_condition_var() { m_condition_var.unlock(); } + private: + condition_var& m_condition_var; + }; + +} // namespace crnlib diff --git a/crnlib/crn_console.cpp b/crnlib/crn_console.cpp new file mode 100644 index 00000000..08eee841 --- /dev/null +++ b/crnlib/crn_console.cpp @@ -0,0 +1,231 @@ +// File: crn_console.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_console.h" +#include "crn_data_stream.h" + +namespace crnlib +{ + eConsoleMessageType console::m_default_category = cInfoConsoleMessage; + crnlib::vector console::m_output_funcs; + bool console::m_crlf = true; + bool console::m_prefixes = true; + bool console::m_output_disabled; + data_stream* console::m_pLog_stream; + mutex* console::m_pMutex; + uint console::m_num_messages[cCMTTotal]; + + const uint cConsoleBufSize = 4096; + + void console::init() + { + if (!m_pMutex) + { + m_pMutex = crnlib_new(); + } + } + + void console::deinit() + { + if (m_pMutex) + { + crnlib_delete(m_pMutex); + m_pMutex = NULL; + } + } + + void console::disable_crlf() + { + init(); + + m_crlf = false; + } + + void console::enable_crlf() + { + init(); + + m_crlf = true; + } + + void console::vprintf(eConsoleMessageType type, const wchar_t* p, va_list args) + { + init(); + + scoped_mutex lock(*m_pMutex); + + m_num_messages[type]++; + + wchar_t buf[cConsoleBufSize]; +#ifdef _MSC_VER + vswprintf_s(buf, cConsoleBufSize, p, args); +#else + vswprintf(buf, p, args); +#endif + + bool handled = false; + + if (m_output_funcs.size()) + { + for (uint i = 0; i < m_output_funcs.size(); i++) + if (m_output_funcs[i].m_func(type, buf, m_output_funcs[i].m_pData)) + handled = true; + } + + const wchar_t* pPrefix = NULL; + if (m_prefixes) + { + switch (type) + { + case cDebugConsoleMessage: pPrefix = L"Debug: "; break; + case cWarningConsoleMessage: pPrefix = L"Warning: "; break; + case cErrorConsoleMessage: pPrefix = L"Error: "; break; + default: break; + } + } + + if ((!m_output_disabled) && (!handled)) + { +#ifdef _XBOX + if (pPrefix) + OutputDebugStringW(pPrefix); + OutputDebugStringW(buf); + if (m_crlf) + OutputDebugStringW(L"\n"); +#else + if (pPrefix) + ::wprintf(pPrefix); + ::wprintf(m_crlf ? L"%s\n" : L"%s", buf); +#endif + } + + if ((type != cProgressConsoleMessage) && (m_pLog_stream)) + { + // Yes this is bad. + dynamic_wstring utf16_buf(buf); + + dynamic_string ansi_buf; + utf16_buf.as_ansi(ansi_buf); + ansi_buf.translate_lf_to_crlf(); + + m_pLog_stream->printf(m_crlf ? "%s\r\n" : "%s", ansi_buf.get_ptr()); + m_pLog_stream->flush(); + } + } + + void console::printf(eConsoleMessageType type, const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(type, p, args); + va_end(args); + } + + void console::printf(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(m_default_category, p, args); + va_end(args); + } + + void console::set_default_category(eConsoleMessageType category) + { + init(); + + m_default_category = category; + } + + eConsoleMessageType console::get_default_category() + { + init(); + + return m_default_category; + } + + void console::add_console_output_func(console_output_func pFunc, void* pData) + { + init(); + + scoped_mutex lock(*m_pMutex); + + m_output_funcs.push_back(console_func(pFunc, pData)); + } + + void console::remove_console_output_func(console_output_func pFunc) + { + init(); + + scoped_mutex lock(*m_pMutex); + + for (int i = m_output_funcs.size() - 1; i >= 0; i--) + { + if (m_output_funcs[i].m_func == pFunc) + { + m_output_funcs.erase(m_output_funcs.begin() + i); + } + } + + if (!m_output_funcs.size()) + { + m_output_funcs.clear(); + } + } + + void console::progress(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cProgressConsoleMessage, p, args); + va_end(args); + } + + void console::info(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cInfoConsoleMessage, p, args); + va_end(args); + } + + void console::message(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cMessageConsoleMessage, p, args); + va_end(args); + } + + void console::cons(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cConsoleConsoleMessage, p, args); + va_end(args); + } + + void console::debug(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cDebugConsoleMessage, p, args); + va_end(args); + } + + void console::warning(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cWarningConsoleMessage, p, args); + va_end(args); + } + + void console::error(const wchar_t* p, ...) + { + va_list args; + va_start(args, p); + vprintf(cErrorConsoleMessage, p, args); + va_end(args); + } + +} // namespace crnlib diff --git a/crnlib/crn_console.h b/crnlib/crn_console.h new file mode 100644 index 00000000..3a44bd3f --- /dev/null +++ b/crnlib/crn_console.h @@ -0,0 +1,94 @@ +// File: crn_console.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dynamic_string.h" + +namespace crnlib +{ + class dynamic_string; + class data_stream; + class mutex; + + enum eConsoleMessageType + { + cDebugConsoleMessage, // debugging messages + cProgressConsoleMessage, // progress messages + cInfoConsoleMessage, // ordinary messages + cConsoleConsoleMessage, // user console output + cMessageConsoleMessage, // high importance messages + cWarningConsoleMessage, // warnings + cErrorConsoleMessage, // errors + + cCMTTotal + }; + + typedef bool (*console_output_func)(eConsoleMessageType type, const wchar_t* pMsg, void* pData); + + class console + { + public: + static void init(); + static void deinit(); + + static bool is_initialized() { return m_pMutex != NULL; } + + static void set_default_category(eConsoleMessageType category); + static eConsoleMessageType get_default_category(); + + static void add_console_output_func(console_output_func pFunc, void* pData); + static void remove_console_output_func(console_output_func pFunc); + + static void printf(const wchar_t* p, ...); + + static void vprintf(eConsoleMessageType type, const wchar_t* p, va_list args); + static void printf(eConsoleMessageType type, const wchar_t* p, ...); + + static void cons(const wchar_t* p, ...); + static void debug(const wchar_t* p, ...); + static void progress(const wchar_t* p, ...); + static void info(const wchar_t* p, ...); + static void message(const wchar_t* p, ...); + static void warning(const wchar_t* p, ...); + static void error(const wchar_t* p, ...); + + // FIXME: All console state is currently global! + static void disable_prefixes(); + static void enable_prefixes(); + static bool get_prefixes() { return m_prefixes; } + + static void disable_crlf(); + static void enable_crlf(); + static bool get_crlf() { return m_crlf; } + + static void disable_output() { m_output_disabled = true; } + static void enable_output() { m_output_disabled = false; } + static bool get_output_disabled() { return m_output_disabled; } + + static void set_log_stream(data_stream* pStream) { m_pLog_stream = pStream; } + static data_stream* get_log_stream() { return m_pLog_stream; } + + static uint get_num_messages(eConsoleMessageType type) { return m_num_messages[type]; } + + private: + static eConsoleMessageType m_default_category; + + struct console_func + { + console_func(console_output_func func = NULL, void* pData = NULL) : m_func(func), m_pData(pData) { } + + console_output_func m_func; + void* m_pData; + }; + static crnlib::vector m_output_funcs; + + static bool m_crlf, m_prefixes, m_output_disabled; + + static data_stream* m_pLog_stream; + + static mutex* m_pMutex; + + static uint m_num_messages[cCMTTotal]; + }; + +} // namespace crnlib + diff --git a/crnlib/crn_core.cpp b/crnlib/crn_core.cpp new file mode 100644 index 00000000..665d3bbf --- /dev/null +++ b/crnlib/crn_core.cpp @@ -0,0 +1,7 @@ +// File: crn_core.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_winhdr.h" + +char *g_copyright_str = "Copyright (c) 2010-2011 Tenacious Software LLC"; +char *g_sig_str = "C8cfRlaorj0wLtnMSxrBJxTC85rho2L9hUZKHcBL"; diff --git a/crnlib/crn_core.h b/crnlib/crn_core.h new file mode 100644 index 00000000..f92c9191 --- /dev/null +++ b/crnlib/crn_core.h @@ -0,0 +1,103 @@ +// File: crn_core.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#if defined(WIN32) && defined(_MSC_VER) + #pragma warning (disable: 4201) // nonstandard extension used : nameless struct/union + #pragma warning (disable: 4127) // conditional expression is constant + #pragma warning (disable: 4793) // function compiled as native +#endif + +#if defined(WIN32) + +#if 0 + #ifdef NDEBUG + // Ensure checked iterators are disabled. + #define _SECURE_SCL 0 + #define _HAS_ITERATOR_DEBUGGING 0 + #endif + + #ifndef _DLL + // If we're using the DLL form of the run-time libs, we're also going to be enabling exceptions because we'll be building CLR apps. + // Otherwise, we disable exceptions for a small (up to 5%) speed boost. + #define _HAS_EXCEPTIONS 0 + #endif +#endif + + //#define _CRT_SECURE_NO_WARNINGS + #define NOMINMAX + + #define CRNLIB_PLATFORM_PC 1 + + #ifdef _WIN64 + #define CRNLIB_PLATFORM_PC_X64 1 + #else + #define CRNLIB_PLATFORM_PC_X86 1 + #endif + + #define CRNLIB_USE_WIN32_API 1 + + #ifdef _WIN64 + #define CRNLIB_PLATFORM_PC_X64 1 + #define CRNLIB_64BIT_POINTERS 1 + #define CRNLIB_CPU_HAS_64BIT_REGISTERS 1 + #define CRNLIB_LITTLE_ENDIAN_CPU 1 + #else + #define CRNLIB_PLATFORM_PC_X86 1 + #define CRNLIB_64BIT_POINTERS 0 + #define CRNLIB_CPU_HAS_64BIT_REGISTERS 0 + #define CRNLIB_LITTLE_ENDIAN_CPU 1 + #endif +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef min + #undef min +#endif + +#ifdef max + #undef max +#endif + +#define CRNLIB_FALSE (0) +#define CRNLIB_TRUE (1) +#define CRNLIB_MAX_PATH (260) + +#ifdef _DEBUG + #define CRNLIB_BUILD_DEBUG +#else + #define CRNLIB_BUILD_RELEASE + + #ifndef NDEBUG + #define NDEBUG + #endif +#endif + +#include "crn_platform.h" + +#if defined(WIN32) + #include "crn_mutex.h" +#endif + +#include "crn_assert.h" +#include "crn_types.h" +#include "crn_helpers.h" +#include "crn_traits.h" +#include "crn_mem.h" +#include "crn_math.h" +#include "crn_utils.h" +#include "crn_hash.h" +#include "crn_vector.h" +#include "crn_win32_timer.h" +#include "crn_win32_threading.h" +#include "crn_dynamic_string.h" +#include "crn_dynamic_wstring.h" + + diff --git a/crnlib/crn_data_stream.cpp b/crnlib/crn_data_stream.cpp new file mode 100644 index 00000000..036cb264 --- /dev/null +++ b/crnlib/crn_data_stream.cpp @@ -0,0 +1,154 @@ +// File: crn_data_stream.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_data_stream.h" +#include + +namespace crnlib +{ + data_stream::data_stream() : + m_attribs(0), + m_opened(false), m_error(false), m_got_cr(false) + { + } + + data_stream::data_stream(const wchar_t* pName, uint attribs) : + m_name(pName), + m_attribs(static_cast(attribs)), + m_opened(false), m_error(false), m_got_cr(false) + { + } + + uint64 data_stream::skip(uint64 len) + { + uint64 total_bytes_read = 0; + + const uint cBufSize = 1024; + uint8 buf[cBufSize]; + + while (len) + { + const uint64 bytes_to_read = math::minimum(sizeof(buf), len); + const uint64 bytes_read = read(buf, static_cast(bytes_to_read)); + total_bytes_read += bytes_read; + + if (bytes_read != bytes_to_read) + break; + + len -= bytes_read; + } + + return total_bytes_read; + } + + bool data_stream::read_line(dynamic_string& str) + { + str.empty(); + + for ( ; ; ) + { + const int c = read_byte(); + + const bool prev_got_cr = m_got_cr; + m_got_cr = false; + + if (c < 0) + { + if (!str.is_empty()) + break; + + return false; + } + else if ((26 == c) || (!c)) + continue; + else if (13 == c) + { + m_got_cr = true; + break; + } + else if (10 == c) + { + if (prev_got_cr) + continue; + + break; + } + + str.append_char(static_cast(c)); + } + + return true; + } + + bool data_stream::printf(const char* p, ...) + { + va_list args; + + va_start(args, p); + char buf[4096]; +#ifdef _MSC_VER + int l = vsprintf_s(buf, sizeof(buf), p, args); +#else + int l = vsprintf(buf, p, args); +#endif + va_end(args); + if (l < 0) + return false; + return write(buf, l) == static_cast(l); + } + + bool data_stream::printf(const wchar_t* p, ...) + { + va_list args; + + va_start(args, p); + dynamic_wstring buf; + buf.format_args(p, args); + va_end(args); + + return write(buf.get_ptr(), buf.get_len() * sizeof(wchar_t)) == buf.get_len() * sizeof(wchar_t); + } + + bool data_stream::write_line(const dynamic_string& str) + { + if (!str.is_empty()) + return write(str.get_ptr(), str.get_len()) == str.get_len(); + + return true; + } + + bool data_stream::write_line(const dynamic_wstring& str) + { + if (!str.is_empty()) + return write(str.get_ptr(), str.get_len() * sizeof(wchar_t)) == str.get_len() * sizeof(wchar_t); + + return true; + } + + bool data_stream::read_array(vector& buf) + { + if (buf.size() < get_remaining()) + { + if (get_remaining() > 1024U*1024U*1024U) + return false; + + buf.resize((uint)get_remaining()); + } + + if (!get_remaining()) + { + buf.resize(0); + return true; + } + + return read(&buf[0], buf.size()) == buf.size(); + } + + bool data_stream::write_array(const vector& buf) + { + if (!buf.empty()) + return write(&buf[0], buf.size()) == buf.size(); + return true; + } + +} // namespace crnlib diff --git a/crnlib/crn_data_stream.h b/crnlib/crn_data_stream.h new file mode 100644 index 00000000..aa8c96de --- /dev/null +++ b/crnlib/crn_data_stream.h @@ -0,0 +1,91 @@ +// File: crn_data_stream.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + enum data_stream_attribs + { + cDataStreamReadable = 1, + cDataStreamWritable = 2, + cDataStreamSeekable = 4 + }; + + const int64 DATA_STREAM_SIZE_UNKNOWN = INT64_MAX; + const int64 DATA_STREAM_SIZE_INFINITE = UINT64_MAX; + + class data_stream + { + data_stream(const data_stream&); + data_stream& operator= (const data_stream&); + + public: + data_stream(); + data_stream(const wchar_t* pName, uint attribs); + + virtual ~data_stream() { } + + virtual data_stream *get_parent() { return NULL; } + + virtual bool close() { m_opened = false; m_error = false; m_got_cr = false; return true; } + + typedef uint16 attribs_t; + inline attribs_t get_attribs() const { return m_attribs; } + + inline bool is_opened() const { return m_opened; } + + inline bool is_readable() const { return utils::is_bit_set(m_attribs, cDataStreamReadable); } + inline bool is_writable() const { return utils::is_bit_set(m_attribs, cDataStreamWritable); } + inline bool is_seekable() const { return utils::is_bit_set(m_attribs, cDataStreamSeekable); } + + inline bool get_error() const { return m_error; } + + inline const dynamic_wstring& get_name() const { return m_name; } + inline void set_name(const wchar_t* pName) { m_name.set(pName); } + + virtual uint read(void* pBuf, uint len) = 0; + virtual uint64 skip(uint64 len); + + virtual uint write(const void* pBuf, uint len) = 0; + virtual bool flush() = 0; + + virtual bool is_size_known() const { return true; } + + // Returns DATA_STREAM_SIZE_UNKNOWN if size hasn't been determined yet, or DATA_STREAM_SIZE_INFINITE for infinite streams. + virtual uint64 get_size() = 0; + virtual uint64 get_remaining() = 0; + + virtual uint64 get_ofs() = 0; + virtual bool seek(int64 ofs, bool relative) = 0; + + virtual const void* get_ptr() const { return NULL; } + + inline int read_byte() { uint8 c; if (read(&c, 1) != 1) return -1; return c; } + inline bool write_byte(uint8 c) { return write(&c, 1) == 1; } + + bool read_line(dynamic_string& str); + bool printf(const char* p, ...); + bool printf(const wchar_t* p, ...); + bool write_line(const dynamic_string& str); + bool write_line(const dynamic_wstring& str); + bool write_bom() { uint16 bom = 0xFEFF; return write(&bom, sizeof(bom)) == sizeof(bom); } + + bool read_array(vector& buf); + bool write_array(const vector& buf); + + protected: + dynamic_wstring m_name; + + attribs_t m_attribs; + bool m_opened : 1; + bool m_error : 1; + bool m_got_cr : 1; + + inline void set_error() { m_error = true; } + inline void clear_error() { m_error = false; } + + inline void post_seek() { m_got_cr = false; } + }; + +} // namespace crnlib + diff --git a/crnlib/crn_data_stream_serializer.h b/crnlib/crn_data_stream_serializer.h new file mode 100644 index 00000000..4bb17e2a --- /dev/null +++ b/crnlib/crn_data_stream_serializer.h @@ -0,0 +1,432 @@ +// File: data_stream_serializer.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_data_stream.h" + +namespace crnlib +{ + // Defaults to little endian mode. + class data_stream_serializer + { + public: + data_stream_serializer() : m_pStream(NULL), m_little_endian(true) { } + data_stream_serializer(data_stream* pStream) : m_pStream(pStream), m_little_endian(true) { } + data_stream_serializer(data_stream& stream) : m_pStream(&stream), m_little_endian(true) { } + data_stream_serializer(const data_stream_serializer& other) : m_pStream(other.m_pStream), m_little_endian(other.m_little_endian) { } + + data_stream_serializer& operator= (const data_stream_serializer& rhs) { m_pStream = rhs.m_pStream; m_little_endian = rhs.m_little_endian; return *this; } + + data_stream* get_stream() const { return m_pStream; } + void set_stream(data_stream* pStream) { m_pStream = pStream; } + + bool get_error() { return m_pStream ? m_pStream->get_error() : false; } + + bool get_little_endian() const { return m_little_endian; } + void set_little_endian(bool little_endian) { m_little_endian = little_endian; } + + bool write(const void* pBuf, uint len) + { + return m_pStream->write(pBuf, len) == len; + } + + bool read(void* pBuf, uint len) + { + return m_pStream->read(pBuf, len) == len; + } + + bool write_chars(const char* pBuf, uint len) + { + return write(pBuf, len); + } + + bool read_chars(char* pBuf, uint len) + { + return read(pBuf, len); + } + + bool skip(uint len) + { + return m_pStream->skip(len) == len; + } + + template + bool write_object(const T& obj) + { + if (m_little_endian == c_crnlib_little_endian_platform) + return write(&obj, sizeof(obj)); + else + { + uint8 buf[sizeof(T)]; + uint buf_size = sizeof(T); + void* pBuf = buf; + utils::write_obj(obj, pBuf, buf_size, m_little_endian); + + return write(buf, sizeof(T)); + } + } + + template + bool read_object(T& obj) + { + if (m_little_endian == c_crnlib_little_endian_platform) + return read(&obj, sizeof(obj)); + else + { + uint8 buf[sizeof(T)]; + if (!read(buf, sizeof(T))) + return false; + + uint buf_size = sizeof(T); + const void* pBuf = buf; + utils::read_obj(obj, pBuf, buf_size, m_little_endian); + + return true; + } + } + + template + bool write_value(T value) + { + return write_object(value); + } + + template + T read_value(const T& on_error_value = T()) + { + T result; + if (!read_object(result)) + result = on_error_value; + return result; + } + + template + bool write_enum(T e) + { + int val = static_cast(e); + return write_object(val); + } + + template + T read_enum() + { + return static_cast(read_value()); + } + + // Writes uint using a simple variable length code (VLC). + bool write_uint_vlc(uint val) + { + do + { + uint8 c = static_cast(val) & 0x7F; + if (val <= 0x7F) + c |= 0x80; + + if (!write_value(c)) + return false; + + val >>= 7; + } while (val); + + return true; + } + + // Reads uint using a simple variable length code (VLC). + bool read_uint_vlc(uint& val) + { + val = 0; + uint shift = 0; + + for ( ; ; ) + { + if (shift >= 32) + return false; + + uint8 c; + if (!read_object(c)) + return false; + + val |= ((c & 0x7F) << shift); + shift += 7; + + if (c & 0x80) + break; + } + + return true; + } + + bool write_c_str(const char* p) + { + uint len = static_cast(strlen(p)); + if (!write_uint_vlc(len)) + return false; + + return write_chars(p, len); + } + + bool read_c_str(char* pBuf, uint buf_size) + { + uint len; + if (!read_uint_vlc(len)) + return false; + if ((len + 1) > buf_size) + return false; + + pBuf[len] = '\0'; + + return read_chars(pBuf, len); + } + + bool write_string(const dynamic_string& str) + { + if (!write_uint_vlc(str.get_len())) + return false; + + return write_chars(str.get_ptr(), str.get_len()); + } + + bool read_string(dynamic_string& str) + { + uint len; + if (!read_uint_vlc(len)) + return false; + + if (!str.set_len(len)) + return false; + + if (len) + { + if (!read_chars(str.get_ptr_raw(), len)) + return false; + + if (memchr(str.get_ptr(), 0, len) != NULL) + { + str.truncate(0); + return false; + } + } + + return true; + } + + template + bool write_vector(const T& vec) + { + if (!write_uint_vlc(vec.size())) + return false; + + for (uint i = 0; i < vec.size(); i++) + { + *this << vec[i]; + if (get_error()) + return false; + } + + return true; + }; + + template + bool read_vector(T& vec, uint num_expected = UINT_MAX) + { + uint size; + if (!read_uint_vlc(size)) + return false; + + if ((size * sizeof(T::value_type)) >= 2U*1024U*1024U*1024U) + return false; + + if ((num_expected != UINT_MAX) && (size != num_expected)) + return false; + + vec.resize(size); + for (uint i = 0; i < vec.size(); i++) + { + *this >> vec[i]; + + if (get_error()) + return false; + } + + return true; + } + + // Got this idea from the Molly Rocket forums. + // fmt may contain the characters "1", "2", or "4". + bool writef(char *fmt, ...) + { + va_list v; + va_start(v, fmt); + + while (*fmt) + { + switch (*fmt++) + { + case '1': + { + const uint8 x = static_cast(va_arg(v, uint)); + if (!write_value(x)) + return false; + } + case '2': + { + const uint16 x = static_cast(va_arg(v, uint)); + if (!write_value(x)) + return false; + } + case '4': + { + const uint32 x = static_cast(va_arg(v, uint)); + if (!write_value(x)) + return false; + } + case ' ': + case ',': + { + break; + } + default: + { + CRNLIB_ASSERT(0); + return false; + } + } + } + + va_end(v); + return true; + } + + // Got this idea from the Molly Rocket forums. + // fmt may contain the characters "1", "2", or "4". + bool readf(char *fmt, ...) + { + va_list v; + va_start(v, fmt); + + while (*fmt) + { + switch (*fmt++) + { + case '1': + { + uint8* x = va_arg(v, uint8*); + CRNLIB_ASSERT(x); + if (!read_object(*x)) + return false; + } + case '2': + { + uint16* x = va_arg(v, uint16*); + CRNLIB_ASSERT(x); + if (!read_object(*x)) + return false; + } + case '4': + { + uint32* x = va_arg(v, uint32*); + CRNLIB_ASSERT(x); + if (!read_object(*x)) + return false; + } + case ' ': + case ',': + { + break; + } + default: + { + CRNLIB_ASSERT(0); + return false; + } + } + } + + va_end(v); + return true; + } + + private: + data_stream* m_pStream; + + bool m_little_endian; + }; + + // Write operators + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, bool val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, int8 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, uint8 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, int16 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, uint16 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, int32 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, uint32 val) { serializer.write_uint_vlc(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, int64 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, uint64 val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, long val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, unsigned long val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, float val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, double val) { serializer.write_value(val); return serializer; } + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, const char* p) { serializer.write_c_str(p); return serializer; } + + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, const dynamic_string& str) + { + serializer.write_string(str); + return serializer; + } + + template + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, const crnlib::vector& vec) + { + serializer.write_vector(vec); + return serializer; + } + + template + inline data_stream_serializer& operator<< (data_stream_serializer& serializer, const T* p) + { + serializer.write_object(*p); + return serializer; + } + + // Read operators + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, bool& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, int8& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, uint8& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, int16& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, uint16& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, int32& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, uint32& val) { serializer.read_uint_vlc(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, int64& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, uint64& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, long& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, unsigned long& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, float& val) { serializer.read_object(val); return serializer; } + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, double& val) { serializer.read_object(val); return serializer; } + + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, dynamic_string& str) + { + serializer.read_string(str); + return serializer; + } + + template + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, crnlib::vector& vec) + { + serializer.read_vector(vec); + return serializer; + } + + template + inline data_stream_serializer& operator>> (data_stream_serializer& serializer, T* p) + { + serializer.read_object(*p); + return serializer; + } + +} // namespace crnlib + + + + + + + diff --git a/crnlib/crn_dds_comp.cpp b/crnlib/crn_dds_comp.cpp new file mode 100644 index 00000000..f382ceef --- /dev/null +++ b/crnlib/crn_dds_comp.cpp @@ -0,0 +1,259 @@ +// File: crn_dds_comp.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dds_comp.h" +#include "crn_dynamic_stream.h" +#include "crn_lzma_codec.h" + +namespace crnlib +{ + dds_comp::dds_comp() : + m_pParams(NULL), + m_pixel_fmt(PIXEL_FMT_INVALID), + m_pQDXT_state(NULL) + { + } + + dds_comp::~dds_comp() + { + crnlib_delete(m_pQDXT_state); + } + + void dds_comp::clear() + { + m_src_tex.clear(); + m_packed_tex.clear(); + m_comp_data.clear(); + m_pParams = NULL; + m_pixel_fmt = PIXEL_FMT_INVALID; + m_task_pool.deinit(); + if (m_pQDXT_state) + { + crnlib_delete(m_pQDXT_state); + m_pQDXT_state = NULL; + } + } + + bool dds_comp::create_dds_tex(dds_texture &dds_tex) + { + image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; + + bool has_alpha = false; + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + const uint width = math::maximum(1U, m_pParams->m_width >> level_index); + const uint height = math::maximum(1U, m_pParams->m_height >> level_index); + + if (!m_pParams->m_pImages[face_index][level_index]) + return false; + + images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); + if (!has_alpha) + has_alpha = image_utils::has_alpha(images[face_index][level_index]); + } + } + + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + images[face_index][level_index].set_component_valid(3, has_alpha); + + image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); + if (conv_type != image_utils::cConversion_Invalid) + { + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + image_u8 cooked_image(images[face_index][level_index]); + + image_utils::convert_image(cooked_image, conv_type); + + images[face_index][level_index].swap(cooked_image); + } + } + } + + face_vec faces(m_pParams->m_faces); + + for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) + { + for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) + { + mip_level *pMip = crnlib_new(); + + image_u8 *pImage = crnlib_new(); + pImage->swap(images[face_index][level_index]); + pMip->assign(pImage); + + faces[face_index].push_back(pMip); + } + } + + dds_tex.assign(faces); +#ifdef CRNLIB_BUILD_DEBUG + CRNLIB_ASSERT(dds_tex.check()); +#endif + + return true; + } + + static bool progress_callback_func(uint percentage_complete, void* pUser_data_ptr) + { + const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; + return params.m_pProgress_func(0, 1, percentage_complete, 100, params.m_pProgress_func_data) != 0; + } + + static bool progress_callback_func_phase_0(uint percentage_complete, void* pUser_data_ptr) + { + const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; + return params.m_pProgress_func(0, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; + } + + static bool progress_callback_func_phase_1(uint percentage_complete, void* pUser_data_ptr) + { + const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; + return params.m_pProgress_func(1, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; + } + + bool dds_comp::convert_to_dxt(const crn_comp_params& params) + { + if ((params.m_quality_level == cCRNMaxQualityLevel) || (params.m_format == cCRNFmtDXT3)) + { + m_packed_tex = m_src_tex; + if (!m_packed_tex.convert(m_pixel_fmt, false, m_pack_params)) + return false; + } + else + { + const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; + + m_q1_params.m_quality_level = params.m_quality_level; + m_q1_params.m_hierarchical = hierarchical; + + m_q5_params.m_quality_level = params.m_quality_level; + m_q5_params.m_hierarchical = hierarchical; + + if (!m_pQDXT_state) + { + m_pQDXT_state = crnlib_new(m_task_pool); + + if (params.m_pProgress_func) + { + m_q1_params.m_pProgress_func = progress_callback_func_phase_0; + m_q1_params.m_pProgress_data = (void*)¶ms; + m_q5_params.m_pProgress_func = progress_callback_func_phase_0; + m_q5_params.m_pProgress_data = (void*)¶ms; + } + + if (!m_src_tex.qdxt_pack_init(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params, m_pixel_fmt, false)) + return false; + + if (params.m_pProgress_func) + { + m_q1_params.m_pProgress_func = progress_callback_func_phase_1; + m_q5_params.m_pProgress_func = progress_callback_func_phase_1; + } + } + else + { + if (params.m_pProgress_func) + { + m_q1_params.m_pProgress_func = progress_callback_func; + m_q1_params.m_pProgress_data = (void*)¶ms; + m_q5_params.m_pProgress_func = progress_callback_func; + m_q5_params.m_pProgress_data = (void*)¶ms; + } + } + + if (!m_src_tex.qdxt_pack(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params)) + return false; + } + + return true; + } + + bool dds_comp::compress_init(const crn_comp_params& params) + { + clear(); + + m_pParams = ¶ms; + + if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) + return false; + + if (math::minimum(m_pParams->m_faces, m_pParams->m_levels) < 1) + return false; + + if (!create_dds_tex(m_src_tex)) + return false; + + m_pack_params.init(*m_pParams); + if (params.m_pProgress_func) + { + m_pack_params.m_pProgress_callback = progress_callback_func; + m_pack_params.m_pProgress_callback_user_data_ptr = (void*)¶ms; + } + + m_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(static_cast(m_pParams->m_format)); + if (m_pixel_fmt == PIXEL_FMT_INVALID) + return false; + if ((m_pixel_fmt == PIXEL_FMT_DXT1) && (m_src_tex.has_alpha()) && (m_pack_params.m_use_both_block_types) && (m_pParams->m_flags & cCRNCompFlagDXT1AForTransparency)) + m_pixel_fmt = PIXEL_FMT_DXT1A; + + if (!m_task_pool.init(m_pParams->m_num_helper_threads)) + return false; + m_pack_params.m_pTask_pool = &m_task_pool; + + const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; + m_q1_params.init(m_pack_params, params.m_quality_level, hierarchical); + m_q5_params.init(m_pack_params, params.m_quality_level, hierarchical); + + return true; + } + + bool dds_comp::compress_pass(const crn_comp_params& params, float *pEffective_bitrate) + { + if (pEffective_bitrate) *pEffective_bitrate = 0.0f; + + if (!m_pParams) + return false; + + if (!convert_to_dxt(params)) + return false; + + dynamic_stream out_stream; + out_stream.reserve(512*1024); + data_stream_serializer serializer(out_stream); + + if (!m_packed_tex.write_dds(serializer)) + return false; + out_stream.reserve(0); + + m_comp_data.swap(out_stream.get_buf()); + + if (pEffective_bitrate) + { + lzma_codec lossless_codec; + + crnlib::vector cmp_tex_bytes; + if (lossless_codec.pack(m_comp_data.get_ptr(), m_comp_data.size(), cmp_tex_bytes)) + { + uint comp_size = cmp_tex_bytes.size(); + if (comp_size) + { + *pEffective_bitrate = (comp_size * 8.0f) / m_src_tex.get_total_pixels_in_all_faces_and_mips(); + } + } + } + + return true; + } + + void dds_comp::compress_deinit() + { + clear(); + } + +} // namespace crnlib diff --git a/crnlib/crn_dds_comp.h b/crnlib/crn_dds_comp.h new file mode 100644 index 00000000..dc671452 --- /dev/null +++ b/crnlib/crn_dds_comp.h @@ -0,0 +1,48 @@ +// File: crn_comp.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_comp.h" +#include "crn_dds_texture.h" +#include "crn_texture_comp.h" + +namespace crnlib +{ + class dds_comp : public itexture_comp + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(dds_comp); + + public: + dds_comp(); + virtual ~dds_comp(); + + virtual const wchar_t *get_ext() const { return L"DDS"; } + + virtual bool compress_init(const crn_comp_params& params); + virtual bool compress_pass(const crn_comp_params& params, float *pEffective_bitrate); + virtual void compress_deinit(); + + virtual const crnlib::vector& get_comp_data() const { return m_comp_data; } + virtual crnlib::vector& get_comp_data() { return m_comp_data; } + + private: + dds_texture m_src_tex; + dds_texture m_packed_tex; + + crnlib::vector m_comp_data; + + const crn_comp_params* m_pParams; + + pixel_format m_pixel_fmt; + dxt_image::pack_params m_pack_params; + + task_pool m_task_pool; + qdxt1_params m_q1_params; + qdxt5_params m_q5_params; + dds_texture::qdxt_state *m_pQDXT_state; + + void clear(); + bool create_dds_tex(dds_texture &dds_tex); + bool convert_to_dxt(const crn_comp_params& params); + }; + +} // namespace crnlib diff --git a/crnlib/crn_dds_texture.cpp b/crnlib/crn_dds_texture.cpp new file mode 100644 index 00000000..2b92bf1c --- /dev/null +++ b/crnlib/crn_dds_texture.cpp @@ -0,0 +1,2514 @@ +// File: crn_dds_texture.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dds_texture.h" +#include "crn_cfile_stream.h" +#include "crn_image_utils.h" +#include "crn_console.h" +#include "crn_texture_comp.h" + +#define CRND_HEADER_FILE_ONLY +#include "../inc/crn_decomp.h" + +namespace crnlib +{ + const vec2I g_vertical_cross_image_offsets[6] = { vec2I(2, 1), vec2I(0, 1), vec2I(1, 0), vec2I(1, 2), vec2I(1, 1), vec2I(1, 3) }; + + mip_level::mip_level() : + m_width(0), + m_height(0), + m_comp_flags(pixel_format_helpers::cDefaultCompFlags), + m_format(PIXEL_FMT_INVALID), + m_pImage(NULL), + m_pDXTImage(NULL) + { + } + + mip_level::mip_level(const mip_level& other) : + m_width(0), + m_height(0), + m_comp_flags(pixel_format_helpers::cDefaultCompFlags), + m_format(PIXEL_FMT_INVALID), + m_pImage(NULL), + m_pDXTImage(NULL) + { + *this = other; + } + + mip_level& mip_level::operator= (const mip_level& rhs) + { + clear(); + + m_width = rhs.m_width; + m_height = rhs.m_height; + m_comp_flags = rhs.m_comp_flags; + m_format = rhs.m_format; + + if (rhs.m_pImage) + m_pImage = crnlib_new(*rhs.m_pImage); + + if (rhs.m_pDXTImage) + m_pDXTImage = crnlib_new(*rhs.m_pDXTImage); + + return *this; + } + + mip_level::~mip_level() + { + crnlib_delete(m_pImage); + crnlib_delete(m_pDXTImage); + } + + void mip_level::clear() + { + m_width = 0; + m_height = 0; + m_comp_flags = pixel_format_helpers::cDefaultCompFlags; + m_format = PIXEL_FMT_INVALID; + + if (m_pImage) + { + crnlib_delete(m_pImage); + m_pImage = NULL; + } + + if (m_pDXTImage) + { + crnlib_delete(m_pDXTImage); + m_pDXTImage = NULL; + } + } + + void mip_level::assign(image_u8* p, pixel_format fmt) + { + CRNLIB_ASSERT(p); + + clear(); + + m_pImage = p; + + m_width = p->get_width(); + m_height = p->get_height(); + + if (fmt != PIXEL_FMT_INVALID) + m_format = fmt; + else + { + if (p->is_grayscale()) + m_format = p->is_component_valid(3) ? PIXEL_FMT_A8L8 : PIXEL_FMT_L8; + else + m_format = p->is_component_valid(3) ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; + } + + m_comp_flags = p->get_comp_flags(); //pixel_format_helpers::get_component_flags(m_format); + } + + void mip_level::assign(dxt_image* p, pixel_format fmt) + { + CRNLIB_ASSERT(p); + + clear(); + + m_pDXTImage = p; + + m_width = p->get_width(); + m_height = p->get_height(); + + if (fmt != PIXEL_FMT_INVALID) + m_format = fmt; + else + m_format = pixel_format_helpers::from_dxt_format(p->get_format()); + + m_comp_flags = pixel_format_helpers::get_component_flags(m_format); + } + + bool mip_level::pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& orig_params) + { + CRNLIB_ASSERT(pixel_format_helpers::is_dxt(fmt)); + if (!pixel_format_helpers::is_dxt(fmt)) + return false; + + dxt_image::pack_params p(orig_params); + if (pixel_format_helpers::is_pixel_format_non_srgb(fmt) || (img.get_comp_flags() & pixel_format_helpers::cCompFlagNormalMap) || (img.get_comp_flags() & pixel_format_helpers::cCompFlagLumaChroma)) + { + // Disable perceptual colorspace metrics when packing to swizzled or non-RGB pixel formats. + p.m_perceptual = false; + } + + image_u8 tmp_img(img); + + clear(); + + m_format = fmt; + + if (cook) + cook_image(tmp_img); + + if ((pixel_format_helpers::is_alpha_only(fmt)) && (!tmp_img.has_alpha())) + tmp_img.set_alpha_to_luma(); + + dxt_format dxt_fmt = pixel_format_helpers::get_dxt_format(fmt); + + dxt_image* pDXT_image = crnlib_new(); + if (!pDXT_image->init(dxt_fmt, tmp_img, p)) + { + clear(); + return false; + } + + assign(pDXT_image, fmt); + + return true; + } + + bool mip_level::pack_to_dxt(pixel_format fmt, bool cook, const dxt_image::pack_params& p) + { + CRNLIB_ASSERT(pixel_format_helpers::is_dxt(fmt)); + if (!pixel_format_helpers::is_dxt(fmt)) + return false; + + image_u8 tmp_img; + image_u8* pImage = get_unpacked_image(tmp_img, true); + + return pack_to_dxt(*pImage, fmt, cook, p); + } + + bool mip_level::unpack_from_dxt(bool uncook) + { + if (!m_pDXTImage) + return false; + + image_u8* pNew_img = crnlib_new(); + image_u8* pImg = get_unpacked_image(*pNew_img, uncook); + pImg; + + CRNLIB_ASSERT(pImg == pNew_img); + + assign(pNew_img); + return true; + } + + bool mip_level::set_alpha_to_luma() + { + if (m_pDXTImage) + unpack_from_dxt(true); + + m_pImage->set_alpha_to_luma(); + + m_comp_flags = m_pImage->get_comp_flags(); + + if (m_pImage->is_grayscale()) + m_format = PIXEL_FMT_A8L8; + else + m_format = PIXEL_FMT_A8R8G8B8; + + return true; + } + + bool mip_level::convert(image_utils::conversion_type conv_type) + { + if (m_pDXTImage) + unpack_from_dxt(true); + + image_utils::convert_image(*m_pImage, conv_type); + + m_comp_flags = m_pImage->get_comp_flags(); + + if (m_pImage->is_grayscale()) + m_format = m_pImage->has_alpha() ? PIXEL_FMT_A8L8 : PIXEL_FMT_L8; + else + m_format = m_pImage->has_alpha() ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; + + return true; + } + + bool mip_level::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p) + { + if (pixel_format_helpers::is_dxt(fmt)) + return pack_to_dxt(fmt, cook, p); + + image_u8 tmp_img; + image_u8* pImg = get_unpacked_image(tmp_img, true); + + image_u8* pImage = crnlib_new(); + pImage->set_comp_flags(pixel_format_helpers::get_component_flags(fmt)); + + if (!pImage->resize(pImg->get_width(), pImg->get_height())) + return false; + + for (uint y = 0; y < pImg->get_height(); y++) + { + for (uint x = 0; x < pImg->get_width(); x++) + { + color_quad_u8 c((*pImg)(x, y)); + + if ((pixel_format_helpers::is_alpha_only(fmt)) && (!pImg->has_alpha())) + { + c.a = static_cast(c.get_luma()); + } + else + { + if (pImage->is_grayscale()) + { + uint8 g = static_cast(c.get_luma()); + c.r = g; + c.g = g; + c.b = g; + } + + if (!pImage->is_component_valid(3)) + c.a = 255; + } + + (*pImage)(x, y) = c; + } + } + + assign(pImage, fmt); + + return true; + } + + void mip_level::cook_image(image_u8& img) const + { + image_utils::conversion_type conv_type = image_utils::get_conversion_type(true, m_format); + + if (conv_type != image_utils::cConversion_Invalid) + image_utils::convert_image(img, conv_type); + } + + void mip_level::uncook_image(image_u8& img) const + { + image_utils::conversion_type conv_type = image_utils::get_conversion_type(false, m_format); + + if (conv_type != image_utils::cConversion_Invalid) + image_utils::convert_image(img, conv_type); + } + + image_u8* mip_level::get_unpacked_image(image_u8& tmp, bool uncook) const + { + if (m_pImage) + return m_pImage; + + if (m_pDXTImage) + { + m_pDXTImage->unpack(tmp); + + tmp.set_comp_flags(m_comp_flags); + + if (uncook) + uncook_image(tmp); + + return &tmp; + } + + return NULL; + } + + // ------------------------------------------------------------------------- + + dds_texture::dds_texture() : + m_width(0), + m_height(0), + m_comp_flags(pixel_format_helpers::cDefaultCompFlags), + m_format(PIXEL_FMT_INVALID), + m_source_file_type(texture_file_types::cFormatInvalid) + { + } + + dds_texture::~dds_texture() + { + free_all_mips(); + } + + void dds_texture::clear() + { + free_all_mips(); + + m_name.clear(); + m_width = 0; + m_height = 0; + m_comp_flags = pixel_format_helpers::cDefaultCompFlags; + m_format = PIXEL_FMT_INVALID; + m_source_file_type = texture_file_types::cFormatInvalid; + m_last_error.clear(); + } + + void dds_texture::free_all_mips() + { + for (uint i = 0; i < m_faces.size(); i++) + for (uint j = 0; j < m_faces[i].size(); j++) + crnlib_delete(m_faces[i][j]); + + m_faces.clear(); + } + + dds_texture::dds_texture(const dds_texture& other) : + m_width(0), + m_height(0), + m_comp_flags(pixel_format_helpers::cDefaultCompFlags), + m_format(PIXEL_FMT_INVALID) + { + *this = other; + } + + dds_texture& dds_texture::operator= (const dds_texture& rhs) + { + if (this == &rhs) + return *this; + + clear(); + + m_name = rhs.m_name; + m_width = rhs.m_width; + m_height = rhs.m_height; + + m_comp_flags = rhs.m_comp_flags; + m_format = rhs.m_format; + + m_faces.resize(rhs.m_faces.size()); + for (uint i = 0; i < m_faces.size(); i++) + { + m_faces[i].resize(rhs.m_faces[i].size()); + + for (uint j = 0; j < rhs.m_faces[i].size(); j++) + m_faces[i][j] = crnlib_new(*rhs.m_faces[i][j]); + } + + CRNLIB_ASSERT((!is_valid()) || check()); + + return *this; + } + + bool dds_texture::write_dds(const wchar_t* pFilename) const + { + cfile_stream out_stream(pFilename, cDataStreamWritable | cDataStreamSeekable); + if (!out_stream.is_opened()) + return false; + + data_stream_serializer out_serializer(out_stream); + return write_dds(out_serializer); + } + + bool dds_texture::read_dds(const wchar_t* pFilename) + { + cfile_stream in_stream(pFilename); + if (!in_stream.is_opened()) + return false; + + data_stream_serializer in_serializer(in_stream); + return read_dds(in_serializer); + } + + bool dds_texture::read_dds(data_stream_serializer& serializer) + { + if (!read_dds_internal(serializer)) + { + clear(); + return false; + } + + return true; + } + + bool dds_texture::read_dds_internal(data_stream_serializer& serializer) + { + CRNLIB_ASSERT(serializer.get_little_endian()); + + clear(); + + set_last_error(L"Not a DDS file"); + + uint8 hdr[4]; + if (!serializer.read(hdr, sizeof(hdr))) + return false; + + if (memcmp(hdr, "DDS ", 4) != 0) + return false; + + DDSURFACEDESC2 desc; + if (!serializer.read(&desc, sizeof(desc))) + return false; + + if (!c_crnlib_little_endian_platform) + utils::endian_switch_dwords(reinterpret_cast(&desc), sizeof(desc) / sizeof(uint32)); + + if (desc.dwSize != sizeof(desc)) + return false; + + if ((!desc.dwHeight) || (!desc.dwWidth) || (desc.dwHeight > cDDSMaxImageDimensions) || (desc.dwWidth > cDDSMaxImageDimensions)) + return false; + + m_width = desc.dwWidth; + m_height = desc.dwHeight; + + uint num_mip_levels = 1; + + if ((desc.dwFlags & DDSD_MIPMAPCOUNT) && (desc.ddsCaps.dwCaps & DDSCAPS_MIPMAP) && (desc.dwMipMapCount)) + { + num_mip_levels = desc.dwMipMapCount; + if (num_mip_levels > utils::compute_max_mips(desc.dwWidth, desc.dwHeight)) + return false; + } + + uint num_faces = 1; + + if (desc.ddsCaps.dwCaps & DDSCAPS_COMPLEX) + { + if (desc.ddsCaps.dwCaps2 & DDSCAPS2_CUBEMAP) + { + const uint all_faces_mask = DDSCAPS2_CUBEMAP_POSITIVEX|DDSCAPS2_CUBEMAP_NEGATIVEX|DDSCAPS2_CUBEMAP_POSITIVEY|DDSCAPS2_CUBEMAP_NEGATIVEY|DDSCAPS2_CUBEMAP_POSITIVEZ|DDSCAPS2_CUBEMAP_NEGATIVEZ; + if ((desc.ddsCaps.dwCaps2 & all_faces_mask) != all_faces_mask) + { + set_last_error(L"Incomplete cubemaps unsupported"); + return false; + } + + num_faces = 6; + } + else if (desc.ddsCaps.dwCaps2 & DDSCAPS2_VOLUME) + { + set_last_error(L"Volume textures unsupported"); + return false; + } + } + + if (desc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) + { + // It's difficult to even make P8 textures with existing tools: + // nvdxt just hangs + // dxtex.exe just makes all-white textures + // So screw it. + set_last_error(L"Palettized textures unsupported"); + return false; + } + + dxt_format dxt_fmt = cDXTInvalid; + + if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) + { + // http://code.google.com/p/nvidia-texture-tools/issues/detail?id=41 + // ATI2 YX: 0 (0x00000000) + // ATI2 XY: 1498952257 (0x59583241) (BC5) + // ATI Compressonator obeys this stuff, nvidia's tools (like readdxt) don't - oh great + + switch (desc.ddpfPixelFormat.dwFourCC) + { + case PIXEL_FMT_DXT1: + { + m_format = PIXEL_FMT_DXT1; + dxt_fmt = cDXT1; + break; + } + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + { + m_format = PIXEL_FMT_DXT3; + dxt_fmt = cDXT3; + break; + } + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + { + switch (desc.ddpfPixelFormat.dwRGBBitCount) + { + case PIXEL_FMT_DXT5_CCxY: + m_format = PIXEL_FMT_DXT5_CCxY; + break; + case PIXEL_FMT_DXT5_xGxR: + m_format = PIXEL_FMT_DXT5_xGxR; + break; + case PIXEL_FMT_DXT5_xGBR: + m_format = PIXEL_FMT_DXT5_xGBR; + break; + case PIXEL_FMT_DXT5_AGBR: + m_format = PIXEL_FMT_DXT5_AGBR; + break; + default: + m_format = PIXEL_FMT_DXT5; + break; + } + + dxt_fmt = cDXT5; + break; + } + case PIXEL_FMT_3DC: + { + if (desc.ddpfPixelFormat.dwRGBBitCount == CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y')) + { + dxt_fmt = cDXN_XY; + m_format = PIXEL_FMT_DXN; + } + else + { + dxt_fmt = cDXN_YX; // aka ATI2 + m_format = PIXEL_FMT_3DC; + } + + break; + } + case PIXEL_FMT_DXT5A: + { + m_format = PIXEL_FMT_DXT5A; + dxt_fmt = cDXT5A; + break; + } + default: + { + dynamic_wstring err_msg(cVarArg, L"Unsupported DDS FOURCC format: 0x%08X", desc.ddpfPixelFormat.dwFourCC); + set_last_error(err_msg.get_ptr()); + return false; + } + } + } + else if ((desc.ddpfPixelFormat.dwRGBBitCount < 8) || (desc.ddpfPixelFormat.dwRGBBitCount > 32) || (desc.ddpfPixelFormat.dwRGBBitCount & 7)) + { + set_last_error(L"Unsupported bit count"); + return false; + } + else if (desc.ddpfPixelFormat.dwFlags & DDPF_RGB) + { + if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) + { + if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) + m_format = PIXEL_FMT_A8L8; + else + m_format = PIXEL_FMT_L8; + } + else if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) + m_format = PIXEL_FMT_A8R8G8B8; + else + m_format = PIXEL_FMT_R8G8B8; + } + else if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) + { + if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) + m_format = PIXEL_FMT_A8L8; + else + m_format = PIXEL_FMT_A8; + } + else if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) + { + m_format = PIXEL_FMT_L8; + } + else if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHA) + { + m_format = PIXEL_FMT_A8; + } + else + { + set_last_error(L"Unsupported format"); + return false; + } + + m_comp_flags = pixel_format_helpers::get_component_flags(m_format); + + uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; + + if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) + //bits_per_pixel = ((m_format == PIXEL_FMT_DXT1) || (m_format == PIXEL_FMT_DXT5A)) ? 4 : 8; + bits_per_pixel = pixel_format_helpers::get_bpp(m_format); + + set_last_error(L"Load failed"); + + uint default_pitch; + if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) + default_pitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; + else + default_pitch = (desc.dwWidth * bits_per_pixel) >> 3; + + uint pitch = desc.lPitch; + if (!pitch) + pitch = default_pitch; + else if ((pitch > default_pitch * 8) || (pitch & 3)) + { + set_last_error(L"Invalid pitch"); + return false; + } + + crnlib::vector load_buf; + + uint mask_size[4]; + mask_size[0] = math::bitmask_size(desc.ddpfPixelFormat.dwRBitMask); + mask_size[1] = math::bitmask_size(desc.ddpfPixelFormat.dwGBitMask); + mask_size[2] = math::bitmask_size(desc.ddpfPixelFormat.dwBBitMask); + mask_size[3] = math::bitmask_size(desc.ddpfPixelFormat.dwRGBAlphaBitMask); + + uint mask_ofs[4]; + mask_ofs[0] = math::bitmask_ofs(desc.ddpfPixelFormat.dwRBitMask); + mask_ofs[1] = math::bitmask_ofs(desc.ddpfPixelFormat.dwGBitMask); + mask_ofs[2] = math::bitmask_ofs(desc.ddpfPixelFormat.dwBBitMask); + mask_ofs[3] = math::bitmask_ofs(desc.ddpfPixelFormat.dwRGBAlphaBitMask); + + if ((desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) && (!mask_size[0])) + { + mask_size[0] = desc.ddpfPixelFormat.dwRGBBitCount >> 3; + if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) + mask_size[0] /= 2; + } + + m_faces.resize(num_faces); + + bool dxt1_alpha = false; + + for (uint face_index = 0; face_index < num_faces; face_index++) + { + m_faces[face_index].resize(num_mip_levels); + + for (uint level_index = 0; level_index < num_mip_levels; level_index++) + { + const uint width = math::maximum(desc.dwWidth >> level_index, 1U); + const uint height = math::maximum(desc.dwHeight >> level_index, 1U); + + mip_level* pMip = crnlib_new(); + m_faces[face_index][level_index] = pMip; + + if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) + { + const uint bytes_per_block = ((m_format == PIXEL_FMT_DXT1) || (m_format == PIXEL_FMT_DXT1A) || (m_format == PIXEL_FMT_DXT5A)) ? 8 : 16; + + const uint num_blocks_x = (width + 3) >> 2; + const uint num_blocks_y = (height + 3) >> 2; + + const uint actual_level_pitch = num_blocks_x * num_blocks_y * bytes_per_block; + const uint level_pitch = level_index ? actual_level_pitch : pitch; + + dxt_image* pDXTImage = crnlib_new(); + if (!pDXTImage->init(dxt_fmt, width, height, false)) + { + crnlib_delete(pDXTImage); + + CRNLIB_ASSERT(0); + return false; + } + + CRNLIB_ASSERT(pDXTImage->get_element_vec().size() * sizeof(dxt_image::element) == actual_level_pitch); + + if (!serializer.read(&pDXTImage->get_element_vec()[0], actual_level_pitch)) + { + crnlib_delete(pDXTImage); + + return false; + } + + // DDS image in memory are always assumed to be little endian - the same as DDS itself. + //if (c_crnlib_big_endian_platform) + // utils::endian_switch_words(reinterpret_cast(&pDXTImage->get_element_vec()[0]), actual_level_pitch / sizeof(uint16)); + + if (level_pitch > actual_level_pitch) + { + if (!serializer.skip(level_pitch - actual_level_pitch)) + { + crnlib_delete(pDXTImage); + + return false; + } + } + + if ((m_format == PIXEL_FMT_DXT1) && (!dxt1_alpha)) + dxt1_alpha = pDXTImage->has_alpha(); + + pMip->assign(pDXTImage, m_format); + } + else + { + image_u8* pImage = crnlib_new(width, height); + + pImage->set_comp_flags(m_comp_flags); + + const uint bytes_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount >> 3; + const uint actual_line_pitch = width * bytes_per_pixel; + const uint line_pitch = level_index ? actual_line_pitch : pitch; + + if (load_buf.size() < line_pitch) + load_buf.resize(line_pitch); + + color_quad_u8 q(0, 0, 0, 255); + + for (uint y = 0; y < height; y++) + { + if (!serializer.read(&load_buf[0], line_pitch)) + { + crnlib_delete(pImage); + return false; + } + + color_quad_u8* pDst = pImage->get_scanline(y); + + for (uint x = 0; x < width; x++) + { + const uint8* pPixel = &load_buf[x * bytes_per_pixel]; + + uint c = 0; + // Assumes DDS is always little endian. + for (uint l = 0; l < bytes_per_pixel; l++) + c |= (pPixel[l] << (l * 8U)); + + for (uint i = 0; i < 4; i++) + { + if (!mask_size[i]) + continue; + + uint mask = (1U << mask_size[i]) - 1U; + uint bits = (c >> mask_ofs[i]) & mask; + + uint v = (bits * 255 + (mask >> 1)) / mask; + + q.set_component(i, v); + } + + if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) + { + q.g = q.r; + q.b = q.r; + } + + *pDst++ = q; + } + } + + pMip->assign(pImage, m_format); + + CRNLIB_ASSERT(pMip->get_comp_flags() == m_comp_flags); + } + } + } + + clear_last_error(); + + if (dxt1_alpha) + { + m_format = PIXEL_FMT_DXT1A; + + m_comp_flags = pixel_format_helpers::get_component_flags(m_format); + + for (uint f = 0; f < m_faces.size(); f++) + { + for (uint l = 0; l < m_faces[f].size(); l++) + { + if (m_faces[f][l]->get_dxt_image()) + { + m_faces[f][l]->set_format(m_format); + m_faces[f][l]->set_comp_flags(m_comp_flags); + + m_faces[f][l]->get_dxt_image()->change_dxt1_to_dxt1a(); + } + } + } + } + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::check() const + { + uint levels = 0; + for (uint f = 0; f < m_faces.size(); f++) + { + if (!f) + levels = m_faces[f].size(); + else if (m_faces[f].size() != levels) + return false; + + for (uint l = 0; l < m_faces[f].size(); l++) + { + mip_level* p = m_faces[f][l]; + if (!p) + return false; + + if (!p->is_valid()) + return false; + + if (!l) + { + if (m_width != p->get_width()) + return false; + if (m_height != p->get_height()) + return false; + } + + if (p->get_comp_flags() != m_comp_flags) + return false; + + if (p->get_format() != m_format) + return false; + + if (p->get_image()) + { + if (pixel_format_helpers::is_dxt(p->get_format())) + return false; + + if (p->get_image()->get_width() != p->get_width()) + return false; + if (p->get_image()->get_height() != p->get_height()) + return false; + if (p->get_image()->get_comp_flags() != m_comp_flags) + return false; + } + else if (!pixel_format_helpers::is_dxt(p->get_format())) + return false; + } + } + + return true; + } + + bool dds_texture::write_dds(data_stream_serializer& serializer) const + { + if (!m_width) + { + set_last_error(L"Nothing to write"); + return false; + } + + set_last_error(L"Write_dds() failed"); + + if (!serializer.write("DDS ", sizeof(uint32))) + return false; + + DDSURFACEDESC2 desc; + utils::zero_object(desc); + + desc.dwSize = sizeof(desc); + desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS; + + desc.dwWidth = m_width; + desc.dwHeight = m_height; + + desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat); + + if (get_num_levels() > 1) + { + desc.dwMipMapCount = get_num_levels(); + desc.dwFlags |= DDSD_MIPMAPCOUNT; + desc.ddsCaps.dwCaps |= (DDSCAPS_MIPMAP | DDSCAPS_COMPLEX); + } + + if (get_num_faces() > 1) + { + desc.ddsCaps.dwCaps |= DDSCAPS_COMPLEX; + desc.ddsCaps.dwCaps2 |= DDSCAPS2_CUBEMAP; + desc.ddsCaps.dwCaps2 |= DDSCAPS2_CUBEMAP_POSITIVEX|DDSCAPS2_CUBEMAP_NEGATIVEX|DDSCAPS2_CUBEMAP_POSITIVEY|DDSCAPS2_CUBEMAP_NEGATIVEY|DDSCAPS2_CUBEMAP_POSITIVEZ|DDSCAPS2_CUBEMAP_NEGATIVEZ; + } + + bool dxt_format = false; + if (pixel_format_helpers::is_dxt(m_format)) + { + dxt_format = true; + + desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC; + + switch (m_format) + { + case PIXEL_FMT_DXN: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_3DC; + desc.ddpfPixelFormat.dwRGBBitCount = PIXEL_FMT_DXN; + break; + } + case PIXEL_FMT_DXT1A: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT1; + desc.ddpfPixelFormat.dwRGBBitCount = 0; + break; + } + case PIXEL_FMT_DXT5_CCxY: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; + desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_CCxY; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; + desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_xGxR; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; + desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_xGBR; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; + desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_AGBR; + break; + } + default: + { + desc.ddpfPixelFormat.dwFourCC = (uint32)m_format; + desc.ddpfPixelFormat.dwRGBBitCount = 0; + break; + } + } + } + else + { + switch (m_format) + { + case PIXEL_FMT_A8R8G8B8: + { + desc.ddpfPixelFormat.dwFlags |= (DDPF_RGB | DDPF_ALPHAPIXELS); + desc.ddpfPixelFormat.dwRGBBitCount = 32; + desc.ddpfPixelFormat.dwRBitMask = 0xFF0000; + desc.ddpfPixelFormat.dwGBitMask = 0x00FF00; + desc.ddpfPixelFormat.dwBBitMask = 0x0000FF; + desc.ddpfPixelFormat.dwRGBAlphaBitMask = 0xFF000000; + break; + } + case PIXEL_FMT_R8G8B8: + { + desc.ddpfPixelFormat.dwFlags |= DDPF_RGB; + desc.ddpfPixelFormat.dwRGBBitCount = 24; + desc.ddpfPixelFormat.dwRBitMask = 0xFF0000; + desc.ddpfPixelFormat.dwGBitMask = 0x00FF00; + desc.ddpfPixelFormat.dwBBitMask = 0x0000FF; + break; + } + case PIXEL_FMT_A8: + { + desc.ddpfPixelFormat.dwFlags |= DDPF_ALPHA; + desc.ddpfPixelFormat.dwRGBBitCount = 8; + desc.ddpfPixelFormat.dwRGBAlphaBitMask = 0xFF; + break; + } + case PIXEL_FMT_L8: + { + desc.ddpfPixelFormat.dwFlags |= DDPF_LUMINANCE; + desc.ddpfPixelFormat.dwRGBBitCount = 8; + desc.ddpfPixelFormat.dwRBitMask = 0xFF; + break; + } + case PIXEL_FMT_A8L8: + { + desc.ddpfPixelFormat.dwFlags |= DDPF_ALPHAPIXELS | DDPF_LUMINANCE; + desc.ddpfPixelFormat.dwRGBBitCount = 16; + desc.ddpfPixelFormat.dwRBitMask = 0xFF; + desc.ddpfPixelFormat.dwRGBAlphaBitMask = 0xFF00; + break; + } + default: + { + CRNLIB_ASSERT(false); + return false; + } + } + } + + if (!c_crnlib_little_endian_platform) + utils::endian_switch_dwords(reinterpret_cast(&desc), sizeof(desc) / sizeof(uint32)); + + if (!serializer.write(&desc, sizeof(desc))) + return false; + + if (!c_crnlib_little_endian_platform) + utils::endian_switch_dwords(reinterpret_cast(&desc), sizeof(desc) / sizeof(uint32)); + + crnlib::vector write_buf; + + for (uint face = 0; face < get_num_faces(); face++) + { + for (uint level = 0; level < get_num_levels(); level++) + { + const mip_level* pLevel = get_level(face, level); + + if (dxt_format) + { + const uint width = pLevel->get_width(); + const uint height = pLevel->get_height(); + + CRNLIB_ASSERT(width == math::maximum(1, m_width >> level)); + CRNLIB_ASSERT(height == math::maximum(1, m_height >> level)); + + const dxt_image* p = pLevel->get_dxt_image(); + + const uint num_blocks_x = (width + 3) >> 2; + const uint num_blocks_y = (height + 3) >> 2; + + CRNLIB_ASSERT(num_blocks_x * num_blocks_y * p->get_elements_per_block() == p->get_num_elements()); + width, height, num_blocks_x, num_blocks_y; + + const uint size_in_bytes = p->get_num_elements() * sizeof(dxt_image::element); + if (size_in_bytes > write_buf.size()) + write_buf.resize(size_in_bytes); + + memcpy(&write_buf[0], p->get_element_ptr(), size_in_bytes); + + // DXT data is always little endian in memory, just like the DDS format. + //if (!c_crnlib_little_endian_platform) + // utils::endian_switch_words(reinterpret_cast(&write_buf[0]), size_in_bytes / sizeof(WORD)); + + if (!serializer.write(&write_buf[0], size_in_bytes)) + return false; + } + else + { + const uint width = pLevel->get_width(); + const uint height = pLevel->get_height(); + + const image_u8* p = pLevel->get_image(); + + const uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; + const uint bytes_per_pixel = bits_per_pixel >> 3; + + const uint pitch = width * bytes_per_pixel; + if (pitch > write_buf.size()) + write_buf.resize(pitch); + + for (uint y = 0; y < height; y++) + { + const color_quad_u8* pSrc = p->get_scanline(y); + const color_quad_u8* pEnd = pSrc + width; + + uint8* pDst = &write_buf[0]; + + do + { + const color_quad_u8& c = *pSrc; + + uint x = 0; + switch (m_format) + { + case PIXEL_FMT_A8R8G8B8: + { + x = (c.a << 24) | (c.r << 16) | (c.g << 8) | c.b; + break; + } + case PIXEL_FMT_R8G8B8: + { + x = (c.r << 16) | (c.g << 8) | c.b; + break; + } + case PIXEL_FMT_A8: + { + x = c.a; + break; + } + case PIXEL_FMT_A8L8: + { + x = (c.a << 8) | c.get_luma(); + break; + } + case PIXEL_FMT_L8: + { + x = c.get_luma(); + break; + } + default: break; + } + + pDst[0] = static_cast(x); + if (bytes_per_pixel > 1) + { + pDst[1] = static_cast(x >> 8); + + if (bytes_per_pixel > 2) + { + pDst[2] = static_cast(x >> 16); + + if (bytes_per_pixel > 3) + pDst[3] = static_cast(x >> 24); + } + } + + pSrc++; + pDst += bytes_per_pixel; + + } while (pSrc != pEnd); + + if (!serializer.write(&write_buf[0], pitch)) + return false; + } + } + } + } + + clear_last_error(); + + return true; + } + + void dds_texture::assign(face_vec& faces) + { + CRNLIB_ASSERT(!faces.empty()); + if (faces.empty()) + return; + + free_all_mips(); + +#ifdef CRNLIB_BUILD_DEBUG + for (uint i = 1; i < faces.size(); i++) + CRNLIB_ASSERT(faces[i].size() == faces[0].size()); +#endif + + mip_level* p = faces[0][0]; + m_width = p->get_width(); + m_height = p->get_height(); + m_comp_flags = p->get_comp_flags(); + m_format = p->get_format(); + + m_faces.swap(faces); + + CRNLIB_ASSERT(check()); + } + + void dds_texture::assign(mip_level* pLevel) + { + face_vec faces(1, mip_ptr_vec(1, pLevel)); + assign(faces); + } + + void dds_texture::assign(image_u8* p, pixel_format fmt) + { + mip_level* pLevel = crnlib_new(); + pLevel->assign(p, fmt); + assign(pLevel); + } + + void dds_texture::assign(dxt_image* p, pixel_format fmt) + { + mip_level* pLevel = crnlib_new(); + pLevel->assign(p, fmt); + assign(pLevel); + } + + void dds_texture::set(texture_file_types::format source_file_type, const dds_texture& dds_texture) + { + clear(); + + *this = dds_texture; + m_source_file_type = source_file_type; + } + + image_u8* dds_texture::get_level_image(uint face, uint level, image_u8& img, bool uncook) const + { + if (!is_valid()) + return NULL; + + const mip_level* pLevel = get_level(face, level); + + return pLevel->get_unpacked_image(img, uncook); + } + + void dds_texture::swap(dds_texture& img) + { + utils::swap(m_width, img.m_width); + utils::swap(m_height, img.m_height); + utils::swap(m_comp_flags, img.m_comp_flags); + utils::swap(m_format, img.m_format); + m_faces.swap(img.m_faces); + m_last_error.swap(img.m_last_error); + utils::swap(m_source_file_type, img.m_source_file_type); + + CRNLIB_ASSERT(check()); + } + + texture_type dds_texture::determine_texture_type() const + { + if (!is_valid()) + return cTextureTypeUnknown; + + if (get_num_faces() == 6) + return cTextureTypeCubemap; + else if (is_vertical_cross()) + return cTextureTypeVerticalCrossCubemap; + else if (is_normal_map()) + return cTextureTypeNormalMap; + + return cTextureTypeRegularMap; + } + + void dds_texture::discard_mips() + { + for (uint f = 0; f < m_faces.size(); f++) + { + if (m_faces[f].size() > 1) + { + for (uint l = 1; l < m_faces[f].size(); l++) + crnlib_delete(m_faces[f][l]); + + m_faces[f].resize(1); + } + } + + CRNLIB_ASSERT(check()); + } + + void dds_texture::init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const wchar_t* pName) + { + clear(); + + CRNLIB_ASSERT((width > 0) && (height > 0) && (levels > 0)); + CRNLIB_ASSERT((faces == 1) || (faces == 6)); + + m_width = width; + m_height = height; + m_comp_flags = pixel_format_helpers::get_component_flags(fmt); + m_format = fmt; + if (pName) + m_name.set(pName); + + m_faces.resize(faces); + for (uint f = 0; f < faces; f++) + { + m_faces[f].resize(levels); + for (uint l = 0; l < levels; l++) + { + m_faces[f][l] = crnlib_new(); + + const uint mip_width = math::maximum(1U, width >> l); + const uint mip_height = math::maximum(1U, height >> l); + if (pixel_format_helpers::is_dxt(fmt)) + { + dxt_image* p = crnlib_new(); + p->init(pixel_format_helpers::get_dxt_format(fmt), mip_width, mip_height, true); + m_faces[f][l]->assign(p, m_format); + } + else + { + image_u8* p = crnlib_new(mip_width, mip_height); + m_faces[f][l]->assign(p, m_format); + } + } + } + + CRNLIB_ASSERT(check()); + } + + void dds_texture::discard_mipmaps() + { + if (!is_valid()) + return; + + discard_mips(); + } + + bool dds_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p) + { + if (!is_valid()) + return false; + + if (fmt == get_format()) + return true; + + uint total_pixels = 0; + for (uint f = 0; f < m_faces.size(); f++) + for (uint l = 0; l < m_faces[f].size(); l++) + total_pixels += m_faces[f][l]->get_total_pixels(); + + uint num_pixels_processed = 0; + + uint progress_start = p.m_progress_start; + + for (uint f = 0; f < m_faces.size(); f++) + { + for (uint l = 0; l < m_faces[f].size(); l++) + { + const uint num_pixels = m_faces[f][l]->get_total_pixels(); + + uint progress_range = (num_pixels * p.m_progress_range) / total_pixels; + + dxt_image::pack_params tmp_params(p); + tmp_params.m_progress_start = math::clamp(progress_start, 0, p.m_progress_range); + tmp_params.m_progress_range = math::clamp(progress_range, 0, p.m_progress_range - tmp_params.m_progress_start); + + progress_start += tmp_params.m_progress_range; + + if (!m_faces[f][l]->convert(fmt, cook, tmp_params)) + { + clear(); + return false; + } + + num_pixels_processed += num_pixels; + } + } + + m_format = get_level(0, 0)->get_format(); + m_comp_flags = get_level(0, 0)->get_comp_flags(); + + CRNLIB_ASSERT(check()); + + if (p.m_pProgress_callback) + { + if (!p.m_pProgress_callback(p.m_progress_start + p.m_progress_range, p.m_pProgress_callback_user_data_ptr)) + return false; + } + + return true; + } + + bool dds_texture::convert(pixel_format fmt, const dxt_image::pack_params& p) + { + return convert(fmt, true, p); + } + + bool dds_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p, int qdxt_quality, bool hierarchical) + { + if ((!pixel_format_helpers::is_dxt(fmt)) || (fmt == PIXEL_FMT_DXT3)) + { + // QDXT doesn't support DXT3. + return convert(fmt, cook, p); + } + + dds_texture src_tex(*this); + + if (src_tex.is_packed()) + src_tex.unpack_from_dxt(true); + + if (cook) + { + dds_texture cooked_tex(src_tex); + + for (uint f = 0; f < m_faces.size(); f++) + for (uint l = 0; l < m_faces[f].size(); l++) + src_tex.m_faces[f][l]->cook_image(*cooked_tex.m_faces[f][l]->get_image()); + + src_tex.swap(cooked_tex); + } + + qdxt1_params q1_params; + q1_params.init(p, qdxt_quality, hierarchical); + + qdxt5_params q5_params; + q5_params.init(p, qdxt_quality, hierarchical); + + if (pixel_format_helpers::is_pixel_format_non_srgb(fmt) || (m_comp_flags & pixel_format_helpers::cCompFlagNormalMap) || (m_comp_flags & pixel_format_helpers::cCompFlagLumaChroma)) + { + // Disable perceptual colorspace metrics when packing to swizzled or non-RGB pixel formats. + q1_params.m_perceptual = false; + } + + task_pool tp; + if (!tp.init(p.m_num_helper_threads)) + return false; + + dds_texture packed_tex; + + qdxt_state state(tp); + if (!src_tex.qdxt_pack_init(state, packed_tex, q1_params, q5_params, fmt, false)) + return false; + + if (!src_tex.qdxt_pack(state, packed_tex, q1_params, q5_params)) + return false; + + swap(packed_tex); + + return true; + } + + bool dds_texture::is_packed() const + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + return get_level(0, 0)->is_packed(); + } + + bool dds_texture::set_alpha_to_luma() + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + if (is_packed()) + unpack_from_dxt(true); + + for (uint f = 0; f < m_faces.size(); f++) + for (uint l = 0; l < get_num_levels(); l++) + get_level(f, l)->set_alpha_to_luma(); + + m_format = get_level(0, 0)->get_format(); + m_comp_flags = get_level(0, 0)->get_comp_flags(); + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::convert(image_utils::conversion_type conv_type) + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + if (is_packed()) + unpack_from_dxt(true); + + for (uint f = 0; f < m_faces.size(); f++) + for (uint l = 0; l < get_num_levels(); l++) + get_level(f, l)->convert(conv_type); + + m_format = get_level(0, 0)->get_format(); + m_comp_flags = get_level(0, 0)->get_comp_flags(); + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::unpack_from_dxt(bool uncook) + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + CRNLIB_ASSERT(pixel_format_helpers::is_dxt(m_format)); + if (!pixel_format_helpers::is_dxt(m_format)) + return false; + + for (uint f = 0; f < m_faces.size(); f++) + for (uint l = 0; l < get_num_levels(); l++) + if (!get_level(f, l)->unpack_from_dxt(uncook)) + return false; + + m_format = get_level(0, 0)->get_format(); + m_comp_flags = get_level(0, 0)->get_comp_flags(); + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::has_alpha() const + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + if (pixel_format_helpers::has_alpha(m_format)) + return true; + + if ((m_format == PIXEL_FMT_DXT1) && (get_level(0, 0)->get_dxt_image())) + { + // Try scanning DXT1 mip levels to find blocks with transparent pixels. + for (uint f = 0; f < get_num_faces(); f++) + if (get_level(f, 0)->get_dxt_image()->has_alpha()) + return true; + } + + return false; + } + + bool dds_texture::is_normal_map() const + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + if (pixel_format_helpers::is_normal_map(get_format())) + return true; + + const mip_level* pLevel = get_level(0, 0); + + if (pLevel->get_image()) + return image_utils::is_normal_map(*pLevel->get_image(), m_name.get_ptr()); + + image_u8 tmp; + pLevel->get_dxt_image()->unpack(tmp); + return image_utils::is_normal_map(tmp, m_name.get_ptr()); + } + + bool dds_texture::is_vertical_cross() const + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + if (get_num_faces() > 1) + return false; + + if (!((math::is_power_of_2(m_height)) && (!math::is_power_of_2(m_width)) && (m_height / 4U == m_width / 3U))) + return false; + + return true; + } + + bool dds_texture::resize(uint new_width, uint new_height, const resample_params& params) + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + CRNLIB_ASSERT((new_width >= 1) && (new_height >= 1)); + + face_vec faces(get_num_faces()); + for (uint f = 0; f < faces.size(); f++) + { + faces[f].resize(1); + faces[f][0] = crnlib_new(); + } + + for (uint f = 0; f < faces.size(); f++) + { + image_u8 tmp; + image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, true); + + image_u8* pMip = crnlib_new(); + + image_utils::resample_params rparams; + rparams.m_dst_width = new_width; + rparams.m_dst_height = new_height; + rparams.m_filter_scale = params.m_filter_scale; + rparams.m_first_comp = 0; + rparams.m_num_comps = pImg->is_component_valid(3) ? 4 : 3; + rparams.m_srgb = params.m_srgb; + rparams.m_wrapping = params.m_wrapping; + rparams.m_pFilter = params.m_pFilter; + rparams.m_multithreaded = params.m_multithreaded; + + if (!image_utils::resample(*pImg, *pMip, rparams)) + { + crnlib_delete(pMip); + + for (uint f = 0; f < faces.size(); f++) + for (uint l = 0; l < faces[f].size(); l++) + crnlib_delete(faces[f][l]); + + return false; + } + + if (params.m_renormalize) + image_utils::renorm_normal_map(*pMip); + + pMip->set_comp_flags(pImg->get_comp_flags()); + + faces[f][0]->assign(pMip); + } + + assign(faces); + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::generate_mipmaps(const generate_mipmap_params& params, bool force) + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + + uint num_levels = 1; + { + uint width = get_width(); + uint height = get_height(); + while ((width > params.m_min_mip_size) || (height > params.m_min_mip_size)) + { + width >>= 1U; + height >>= 1U; + num_levels++; + } + } + + if ((params.m_max_mips > 0) && (num_levels > params.m_max_mips)) + num_levels = params.m_max_mips; + + if ((force) && (get_num_levels() > 1)) + discard_mipmaps(); + + if (num_levels == get_num_levels()) + return true; + + face_vec faces(get_num_faces()); + for (uint f = 0; f < faces.size(); f++) + { + faces[f].resize(num_levels); + for (uint l = 0; l < num_levels; l++) + faces[f][l] = crnlib_new(); + } + + for (uint f = 0; f < faces.size(); f++) + { + image_u8 tmp; + image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, true); + + for (uint l = 0; l < num_levels; l++) + { + const uint mip_width = math::maximum(1U, get_width() >> l); + const uint mip_height = math::maximum(1U, get_height() >> l); + + image_u8* pMip = crnlib_new(); + + if (!l) + *pMip = *pImg; + else + { + image_utils::resample_params rparams; + rparams.m_dst_width = mip_width; + rparams.m_dst_height = mip_height; + rparams.m_filter_scale = params.m_filter_scale; + rparams.m_first_comp = 0; + rparams.m_num_comps = pImg->is_component_valid(3) ? 4 : 3; + rparams.m_srgb = params.m_srgb; + rparams.m_wrapping = params.m_wrapping; + rparams.m_pFilter = params.m_pFilter; + rparams.m_multithreaded = params.m_multithreaded; + + if (!image_utils::resample(*pImg, *pMip, rparams)) + { + crnlib_delete(pMip); + + for (uint f = 0; f < faces.size(); f++) + for (uint l = 0; l < faces[f].size(); l++) + crnlib_delete(faces[f][l]); + + return false; + } + + if (params.m_renormalize) + image_utils::renorm_normal_map(*pMip); + + pMip->set_comp_flags(pImg->get_comp_flags()); + } + + faces[f][l]->assign(pMip); + } + } + + assign(faces); + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::crop(uint x, uint y, uint width, uint height) + { + CRNLIB_ASSERT(is_valid()); + if (!is_valid()) + return false; + if (get_num_faces() > 1) + return false; + + if ((width < 1) || (height < 1)) + return false; + + image_u8 tmp; + image_u8* pImg = get_level(0, 0)->get_unpacked_image(tmp, true); + + image_u8* pMip = crnlib_new(width, height); + + if (!pImg->extract_block(pMip->get_ptr(), x, y, width, height)) + return false; + + face_vec faces(1); + faces[0].resize(1); + faces[0][0] = crnlib_new(); + + pMip->set_comp_flags(pImg->get_comp_flags()); + + faces[0][0]->assign(pMip); + + assign(faces); + + CRNLIB_ASSERT(check()); + + return true; + } + + bool dds_texture::vertical_cross_to_cubemap() + { + if (!is_vertical_cross()) + return false; + + const uint face_width = get_height() / 4; + + bool alpha_is_valid = has_alpha(); + + dds_texture cubemap; + + pixel_format fmt = alpha_is_valid ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; + + cubemap.init(face_width, face_width, 1, 6, fmt, m_name.get_ptr()); + + // +x -x +y -y +z -z + // 0 1 2 + // 0 +y + // 1 -x +z +x + // 2 -y + // 3 -z + + for (uint face_index = 0; face_index < 6; face_index++) + { + const mip_level* pSrc = get_level(0, 0); + + image_u8 tmp_img; + image_u8* pSrc_image = pSrc->get_unpacked_image(tmp_img, true); + + const mip_level* pDst = get_level(face_index, 0); + image_u8* pDst_image = pDst->get_image(); + CRNLIB_ASSERT(pDst_image); + + const bool flipped = (face_index == 5); + const uint x_ofs = g_vertical_cross_image_offsets[face_index][0] * face_width; + const uint y_ofs = g_vertical_cross_image_offsets[face_index][1] * face_width; + + for (uint y = 0; y < face_width; y++) + { + for (uint x = 0; x < face_width; x++) + { + const color_quad_u8& c = (*pSrc_image)(x_ofs + x, y_ofs + y); + + if (!flipped) + (*pDst_image)(x, y) = c; + else + (*pDst_image)(face_width - 1 - x, face_width - 1 - y) = c; + } + } + } + + swap(cubemap); + + return true; + } + + bool dds_texture::qdxt_pack_init(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook) + { + if (!is_valid()) + return false; + + state.m_qdxt1_params = dxt1_params; + state.m_qdxt5_params[0] = dxt5_params; + state.m_qdxt5_params[1] = dxt5_params; + utils::zero_object(state.m_has_blocks); + + switch (fmt) + { + case PIXEL_FMT_DXT1: + { + state.m_has_blocks[0] = true; + break; + } + case PIXEL_FMT_DXT1A: + { + state.m_has_blocks[0] = true; + state.m_qdxt1_params.m_use_alpha_blocks = true; + break; + } + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + { + state.m_has_blocks[0] = true; + state.m_has_blocks[1] = true; + state.m_qdxt1_params.m_use_alpha_blocks = false; + state.m_qdxt5_params[0].m_comp_index = 3; + break; + } + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + { + state.m_has_blocks[0] = true; + state.m_has_blocks[1] = true; + state.m_qdxt1_params.m_use_alpha_blocks = false; + state.m_qdxt1_params.m_perceptual = false; + state.m_qdxt5_params[0].m_comp_index = 3; + break; + } + case PIXEL_FMT_3DC: + { + state.m_has_blocks[1] = true; + state.m_has_blocks[2] = true; + state.m_qdxt5_params[0].m_comp_index = 1; + state.m_qdxt5_params[1].m_comp_index = 0; + break; + } + case PIXEL_FMT_DXN: + { + state.m_has_blocks[1] = true; + state.m_has_blocks[2] = true; + state.m_qdxt5_params[0].m_comp_index = 0; + state.m_qdxt5_params[1].m_comp_index = 1; + break; + } + case PIXEL_FMT_DXT5A: + { + state.m_has_blocks[1] = true; + state.m_qdxt5_params[0].m_comp_index = 3; + break; + } + default: + { + return false; + } + } + + const uint num_elements = state.m_has_blocks[0] + state.m_has_blocks[1] + state.m_has_blocks[2]; + + uint cur_progress_start = dxt1_params.m_progress_start; + if (state.m_has_blocks[0]) + { + state.m_qdxt1_params.m_progress_start = cur_progress_start; + state.m_qdxt1_params.m_progress_range = dxt1_params.m_progress_range / num_elements; + cur_progress_start += state.m_qdxt1_params.m_progress_range; + } + + if (state.m_has_blocks[1]) + { + state.m_qdxt5_params[0].m_progress_start = cur_progress_start; + state.m_qdxt5_params[0].m_progress_range = dxt1_params.m_progress_range / num_elements; + cur_progress_start += state.m_qdxt5_params[0].m_progress_range; + } + + if (state.m_has_blocks[2]) + { + state.m_qdxt5_params[1].m_progress_start = cur_progress_start; + state.m_qdxt5_params[1].m_progress_range = dxt1_params.m_progress_range - cur_progress_start; + } + + state.m_fmt = fmt; + + dst_tex.init(get_width(), get_height(), get_num_levels(), get_num_faces(), fmt, get_name().get_ptr()); + + state.m_pixel_blocks.resize(0); + + image_utils::conversion_type cook_conv_type = image_utils::cConversion_Invalid; + if (cook) + { + cook_conv_type = image_utils::get_conversion_type(true, fmt); + if (pixel_format_helpers::is_alpha_only(fmt) && !pixel_format_helpers::has_alpha(m_format)) + cook_conv_type = image_utils::cConversion_Y_To_A; + } + + state.m_qdxt1_params.m_num_mips = 0; + state.m_qdxt5_params[0].m_num_mips = 0; + state.m_qdxt5_params[1].m_num_mips = 0; + + for (uint f = 0; f < get_num_faces(); f++) + { + for (uint l = 0; l < get_num_levels(); l++) + { + mip_level* pLevel = get_level(f, l); + + image_u8 tmp_img; + image_u8 img(*pLevel->get_unpacked_image(tmp_img, true)); + + if (cook_conv_type != image_utils::cConversion_Invalid) + image_utils::convert_image(img, cook_conv_type); + + const uint num_blocks_x = (img.get_width() + 3) / 4; + const uint num_blocks_y = (img.get_height() + 3) / 4; + const uint total_blocks = num_blocks_x * num_blocks_y; + + const uint cur_size = state.m_pixel_blocks.size(); + state.m_pixel_blocks.resize(cur_size + total_blocks); + dxt_pixel_block* pDst_blocks = &state.m_pixel_blocks[cur_size]; + + { + CRNLIB_ASSERT(state.m_qdxt1_params.m_num_mips < qdxt1_params::cMaxMips); + qdxt1_params::mip_desc& mip_desc = state.m_qdxt1_params.m_mip_desc[state.m_qdxt1_params.m_num_mips]; + mip_desc.m_first_block = cur_size; + mip_desc.m_block_width = num_blocks_x; + mip_desc.m_block_height = num_blocks_y; + state.m_qdxt1_params.m_num_mips++; + } + + for (uint i = 0; i < 2; i++) + { + CRNLIB_ASSERT(state.m_qdxt5_params[i].m_num_mips < qdxt5_params::cMaxMips); + qdxt5_params::mip_desc& mip_desc = state.m_qdxt5_params[i].m_mip_desc[state.m_qdxt5_params[i].m_num_mips]; + mip_desc.m_first_block = cur_size; + mip_desc.m_block_width = num_blocks_x; + mip_desc.m_block_height = num_blocks_y; + state.m_qdxt5_params[i].m_num_mips++; + } + + for (uint block_y = 0; block_y < num_blocks_y; block_y++) + { + const uint img_y = block_y << 2; + + for (uint block_x = 0; block_x < num_blocks_x; block_x++) + { + const uint img_x = block_x << 2; + + color_quad_u8* pDst_pixel = &pDst_blocks->m_pixels[0][0]; + + pDst_blocks++; + + for (uint by = 0; by < 4; by++) + for (uint bx = 0; bx < 4; bx++) + *pDst_pixel++ = img.get_clamped(img_x + bx, img_y + by); + } // block_x + } // block_y + } // l + } // f + + if (state.m_has_blocks[0]) + { + if (!state.m_qdxt1.init(state.m_pixel_blocks.size(), &state.m_pixel_blocks[0], state.m_qdxt1_params)) + return false; + } + + if (state.m_has_blocks[1]) + { + if (!state.m_qdxt5a.init(state.m_pixel_blocks.size(), &state.m_pixel_blocks[0], state.m_qdxt5_params[0])) + return false; + } + + if (state.m_has_blocks[2]) + { + if (!state.m_qdxt5b.init(state.m_pixel_blocks.size(), &state.m_pixel_blocks[0], state.m_qdxt5_params[1])) + return false; + } + + return true; + } + + bool dds_texture::qdxt_pack(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params) + { + if (!is_valid()) + return false; + + CRNLIB_ASSERT(dxt1_params.m_quality_level <= qdxt1_params::cMaxQuality); + CRNLIB_ASSERT(dxt5_params.m_quality_level <= qdxt5_params::cMaxQuality); + + state.m_qdxt1_params.m_quality_level = dxt1_params.m_quality_level; + state.m_qdxt1_params.m_pProgress_func = dxt1_params.m_pProgress_func; + state.m_qdxt1_params.m_pProgress_data = dxt1_params.m_pProgress_data; + + state.m_qdxt5_params[0].m_quality_level = dxt5_params.m_quality_level; + state.m_qdxt5_params[0].m_pProgress_func = dxt5_params.m_pProgress_func; + state.m_qdxt5_params[0].m_pProgress_data = dxt5_params.m_pProgress_data; + + state.m_qdxt5_params[1].m_quality_level = dxt5_params.m_quality_level; + state.m_qdxt5_params[1].m_pProgress_func = dxt5_params.m_pProgress_func; + state.m_qdxt5_params[1].m_pProgress_data = dxt5_params.m_pProgress_data; + + const uint num_elements = state.m_has_blocks[0] + state.m_has_blocks[1] + state.m_has_blocks[2]; + + uint cur_progress_start = dxt1_params.m_progress_start; + if (state.m_has_blocks[0]) + { + state.m_qdxt1_params.m_progress_start = cur_progress_start; + state.m_qdxt1_params.m_progress_range = dxt1_params.m_progress_range / num_elements; + cur_progress_start += state.m_qdxt1_params.m_progress_range; + } + + if (state.m_has_blocks[1]) + { + state.m_qdxt5_params[0].m_progress_start = cur_progress_start; + state.m_qdxt5_params[0].m_progress_range = dxt1_params.m_progress_range / num_elements; + cur_progress_start += state.m_qdxt5_params[0].m_progress_range; + } + + if (state.m_has_blocks[2]) + { + state.m_qdxt5_params[1].m_progress_start = cur_progress_start; + state.m_qdxt5_params[1].m_progress_range = dxt1_params.m_progress_range - cur_progress_start; + } + + crnlib::vector dxt1_blocks; + if (state.m_has_blocks[0]) + { + dxt1_blocks.resize(state.m_pixel_blocks.size()); + float pow_mul = 1.0f; + + if (state.m_fmt == PIXEL_FMT_DXT5_CCxY) + { + // use a "deeper" codebook size curves when compressing chroma into DXT1, because it's not as important + pow_mul = 1.5f; + } + else if (state.m_fmt == PIXEL_FMT_DXT5) + { + // favor color more than alpha + pow_mul = .75f; + } + + if (!state.m_qdxt1.pack(&dxt1_blocks[0], 1, state.m_qdxt1_params, pow_mul)) + return false; + } + + crnlib::vector dxt5_blocks[2]; + for (uint i = 0; i < 2; i++) + { + if (state.m_has_blocks[i + 1]) + { + dxt5_blocks[i].resize(state.m_pixel_blocks.size()); + + if (!(i ? state.m_qdxt5b : state.m_qdxt5a).pack(&dxt5_blocks[i][0], 1, state.m_qdxt5_params[i])) + return false; + } + } + + uint cur_block_ofs = 0; + + for (uint f = 0; f < dst_tex.get_num_faces(); f++) + { + for (uint l = 0; l < dst_tex.get_num_levels(); l++) + { + mip_level* pDst_level = dst_tex.get_level(f, l); + + const uint num_blocks_x = (pDst_level->get_width() + 3) / 4; + const uint num_blocks_y = (pDst_level->get_height() + 3) / 4; + const uint total_blocks = num_blocks_x * num_blocks_y; + + dxt_image* pDst_dxt_image = pDst_level->get_dxt_image(); + + dxt_image::element* pDst = pDst_dxt_image->get_element_ptr(); + for (uint block_index = 0; block_index < total_blocks; block_index++) + { + if (state.m_has_blocks[1]) + memcpy(pDst, &dxt5_blocks[0][cur_block_ofs + block_index], 8); + + if (state.m_has_blocks[2]) + memcpy(pDst + 1, &dxt5_blocks[1][cur_block_ofs + block_index], 8); + + if (state.m_has_blocks[0]) + memcpy(pDst + state.m_has_blocks[1], &dxt1_blocks[cur_block_ofs + block_index], 8); + + pDst += pDst_dxt_image->get_elements_per_block(); + } + + cur_block_ofs += total_blocks; + } + } + + if (dxt1_params.m_pProgress_func) + { + if (!dxt1_params.m_pProgress_func(dxt1_params.m_progress_start + dxt1_params.m_progress_range, dxt1_params.m_pProgress_data)) + return false; + } + + return true; + } + + bool dds_texture::load_from_file(const wchar_t* pFilename, texture_file_types::format file_format) + { + clear(); + + if (file_format == texture_file_types::cFormatInvalid) + file_format = texture_file_types::determine_file_format(pFilename); + + if (file_format == texture_file_types::cFormatInvalid) + { + set_last_error(L"Unrecognized image format extension"); + return false; + } + + set_last_error(L"Image file load failed"); + + bool success = false; + switch (file_format) + { + case texture_file_types::cFormatDDS: + { + success = load_dds(pFilename); + break; + } + case texture_file_types::cFormatCRN: + { + success = load_crn(pFilename); + break; + } + default: + { + success = load_regular(pFilename, file_format); + break; + } + } + + if (success) + { + m_source_file_type = file_format; + clear_last_error(); + } + + return success; + } + + bool dds_texture::load_regular(const wchar_t* pFilename, texture_file_types::format file_format) + { + file_format; + + image_u8* pImg = crnlib_new(); + bool status = image_utils::load_from_file(*pImg, pFilename, 0); + if (!status) + { + crnlib_delete(pImg); + + set_last_error(L"Failed loading image file"); + return false; + } + + mip_level* pLevel = crnlib_new(); + pLevel->assign(pImg); + + assign(pLevel); + set_name(pFilename); + + return true; + } + + bool dds_texture::load_dds(const wchar_t* pFilename) + { + cfile_stream in_stream; + if (!in_stream.open(pFilename)) + { + set_last_error(L"Failed opening file"); + return false; + } + + data_stream_serializer serializer(in_stream); + + if (!read_dds(serializer)) + { + set_last_error(get_last_error().get_ptr()); + return false; + } + + set_name(pFilename); + + return true; + } + + bool dds_texture::load_crn_from_memory(const wchar_t* pFilename, const void *pData, uint data_size) + { + clear(); + + set_last_error(L"Image file load failed"); + + if ((!pData) || (data_size < 1)) return false; + + crnd::crn_texture_info tex_info; + tex_info.m_struct_size = sizeof(crnd::crn_texture_info); + if (!crnd_get_texture_info(pData, data_size, &tex_info)) + { + set_last_error(L"crnd_get_texture_info() failed"); + return false; + } + + const pixel_format dds_fmt = (pixel_format)crnd::crnd_crn_format_to_fourcc(tex_info.m_format); + if (dds_fmt == PIXEL_FMT_INVALID) + { + set_last_error(L"Unsupported DXT format"); + return false; + } + + const dxt_format dxt_fmt = pixel_format_helpers::get_dxt_format(dds_fmt); + + face_vec faces(tex_info.m_faces); + for (uint f = 0; f < tex_info.m_faces; f++) + { + faces[f].resize(tex_info.m_levels); + + for (uint l = 0; l < tex_info.m_levels; l++) + faces[f][l] = crnlib_new(); + } + + const uint tex_num_blocks_x = (tex_info.m_width + 3) >> 2; + const uint tex_num_blocks_y = (tex_info.m_height + 3) >> 2; + + vector dxt_data; + // Create temp buffer big enough to hold the largest mip level, and all faces if it's a cubemap. + dxt_data.resize(tex_info.m_bytes_per_block * tex_num_blocks_x * tex_num_blocks_y * tex_info.m_faces); + + set_last_error(L"CRN unpack failed"); + + timer t; + double total_time = 0.0f; + t.start(); + crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pData, data_size); + total_time += t.get_elapsed_secs(); + + if (!pContext) + { + for (uint f = 0; f < faces.size(); f++) + for (uint l = 0; l < faces[f].size(); l++) + crnlib_delete(faces[f][l]); + return false; + } + + uint total_pixels = 0; + + void* pFaces[cCRNMaxFaces]; + for (uint f = tex_info.m_faces; f < cCRNMaxFaces; f++) + pFaces[f] = NULL; + + for (uint l = 0; l < tex_info.m_levels; l++) + { + const uint level_width = math::maximum(1U, tex_info.m_width >> l); + const uint level_height = math::maximum(1U, tex_info.m_height >> l); + const uint num_blocks_x = (level_width + 3U) >> 2U; + const uint num_blocks_y = (level_height + 3U) >> 2U; + + const uint row_pitch = num_blocks_x * tex_info.m_bytes_per_block; + const uint size_of_face = num_blocks_y * row_pitch; + + total_pixels += num_blocks_x * num_blocks_y * 4 * 4 * tex_info.m_faces; + + t.start(); + + for (uint f = 0; f < tex_info.m_faces; f++) + pFaces[f] = &dxt_data[f * size_of_face]; + + if (!crnd::crnd_unpack_level(pContext, pFaces, dxt_data.size(), row_pitch, l)) + { + crnd::crnd_unpack_end(pContext); + for (uint f = 0; f < faces.size(); f++) + for (uint l = 0; l < faces[f].size(); l++) + crnlib_delete(faces[f][l]); + return false; + } + + total_time += t.get_elapsed_secs(); + + for (uint f = 0; f < tex_info.m_faces; f++) + { + dxt_image* pDXT_image = crnlib_new(); + + if (!pDXT_image->init( + dxt_fmt, level_width, level_height, + num_blocks_x * num_blocks_y * (tex_info.m_bytes_per_block / sizeof(dxt_image::element)), + reinterpret_cast(pFaces[f]), true)) + { + crnlib_delete(pDXT_image); + + crnd::crnd_unpack_end(pContext); + for (uint f = 0; f < faces.size(); f++) + for (uint l = 0; l < faces[f].size(); l++) + crnlib_delete(faces[f][l]); + + return false; + } + + faces[f][l]->assign(pDXT_image, dds_fmt); + } + } + +#if 0 + if (total_pixels) + { + console::info(L"load_crn: Total pixels: %u, ms: %3.3fms, megapixels/sec: %3.3f", + total_pixels, total_time * 1000.0f, total_pixels / total_time); + } +#endif + + crnd::crnd_unpack_end(pContext); + + assign(faces); + set_name(pFilename); + + m_source_file_type = texture_file_types::cFormatCRN; + clear_last_error(); + + return true; + } + + bool dds_texture::load_crn(const wchar_t* pFilename) + { + cfile_stream in_stream; + if (!in_stream.open(pFilename)) + { + set_last_error(L"Failed opening CRN file"); + return false; + } + + crnlib::vector crn_data; + if (!in_stream.read_array(crn_data)) + { + set_last_error(L"Failed reading CRN file"); + return false; + } + + in_stream.close(); + + return load_crn_from_memory(pFilename, crn_data.get_ptr(), crn_data.size()); + } + + bool dds_texture::write_to_file( + const wchar_t* pFilename, + texture_file_types::format file_format, + crn_comp_params* pCRN_comp_params, + uint32 *pActual_quality_level, float *pActual_bitrate) + { + if (pActual_quality_level) *pActual_quality_level = 0; + if (pActual_bitrate) *pActual_bitrate = 0.0f; + + if (!is_valid()) + { + set_last_error(L"Unable to save empty texture"); + return false; + } + + bool success = false; + + switch (file_format) + { + case texture_file_types::cFormatDDS: + { + if (!pCRN_comp_params) + success = save_dds(pFilename); + else + success = save_comp_texture(pFilename, *pCRN_comp_params, pActual_quality_level, pActual_bitrate); + break; + } + case texture_file_types::cFormatCRN: + { + if (!pCRN_comp_params) + return false; + success = save_comp_texture(pFilename, *pCRN_comp_params, pActual_quality_level, pActual_bitrate); + break; + } + default: + { + success = save_regular(pFilename); + break; + } + } + + return success; + } + + bool dds_texture::save_regular(const wchar_t* pFilename) + { + image_u8 tmp; + image_u8* pLevel_image = get_level_image(0, 0, tmp); + + if (!image_utils::save_to_file(pFilename, *pLevel_image, 0)) + { + set_last_error(L"File write failed"); + return false; + } + + return true; + } + + bool dds_texture::save_dds(const wchar_t* pFilename) + { + cfile_stream out_stream; + if (!out_stream.open(pFilename, cDataStreamWritable | cDataStreamSeekable)) + { + set_last_error(L"Unable to open file"); + return false; + } + + data_stream_serializer serializer(out_stream); + + if (!write_dds(serializer)) + { + set_last_error(L"File write failed"); + return false; + } + + return true; + } + + void dds_texture::print_crn_comp_params(const crn_comp_params& p) + { + console::debug(L"CRN compression params:"); + console::debug(L" File Type: %s", crn_get_file_type_ext(p.m_file_type)); + console::debug(L" Quality level: %u", p.m_quality_level); + console::debug(L" Target Bitrate: %f", p.m_target_bitrate); + console::debug(L" Faces: %u", p.m_faces); + console::debug(L" Width: %u", p.m_width); + console::debug(L" Height: %u", p.m_height); + console::debug(L" Levels: %u", p.m_levels); + console::debug(L" Pixel Format: %s", crn_get_format_string(p.m_format)); + console::debug(L"Use manual CRN palette sizes: %u", p.get_flag(cCRNCompFlagManualPaletteSizes)); + console::debug(L"Color endpoints: %u", p.m_crn_color_endpoint_palette_size); + console::debug(L"Color selectors: %u", p.m_crn_color_selector_palette_size); + console::debug(L"Alpha endpoints: %u", p.m_crn_alpha_endpoint_palette_size); + console::debug(L"Alpha selectors: %u", p.m_crn_alpha_selector_palette_size); + console::debug(L"Flags:"); + console::debug(L" Perceptual: %u", p.get_flag(cCRNCompFlagPerceptual)); + console::debug(L" Hierarchical: %u", p.get_flag(cCRNCompFlagHierarchical)); + console::debug(L" UseBothBlockTypes: %u", p.get_flag(cCRNCompFlagUseBothBlockTypes)); + console::debug(L" UseTransparentIndicesForBlack: %u", p.get_flag(cCRNCompFlagUseTransparentIndicesForBlack)); + console::debug(L" DisableEndpointCaching: %u", p.get_flag(cCRNCompFlagDisableEndpointCaching)); + console::debug(L"GrayscaleSampling: %u", p.get_flag(cCRNCompFlagGrayscaleSampling)); + console::debug(L" UseDXT1ATransparency: %u", p.get_flag(cCRNCompFlagDXT1AForTransparency)); + console::debug(L"AdaptiveTileColorPSNRDerating: %2.2fdB", p.m_crn_adaptive_tile_color_psnr_derating); + console::debug(L"AdaptiveTileAlphaPSNRDerating: %2.2fdB", p.m_crn_adaptive_tile_alpha_psnr_derating); + console::debug(L"NumHelperThreads: %u", p.m_num_helper_threads); + } + + bool dds_texture::save_comp_texture(const wchar_t* pFilename, const crn_comp_params &orig_comp_params, uint32 *pActual_quality_level, float *pActual_bitrate) + { + crn_comp_params comp_params(orig_comp_params); + + if (pActual_quality_level) *pActual_quality_level = 0; + if (pActual_bitrate) *pActual_bitrate = 0.0f; + + if (math::maximum(get_height(), get_width()) > cCRNMaxLevelResolution) + { + set_last_error(L"Texture resolution is too big!"); + return false; + } + + comp_params.m_faces = get_num_faces(); + comp_params.m_levels = get_num_levels(); + comp_params.m_width = get_width(); + comp_params.m_height = get_height(); + + image_u8 temp_images[cCRNMaxFaces][cCRNMaxLevels]; + for (uint f = 0; f < get_num_faces(); f++) + { + for (uint l = 0; l < get_num_levels(); l++) + { + image_u8* p = get_level_image(f, l, temp_images[f][l]); + + comp_params.m_pImages[f][l] = (crn_uint32*)p->get_ptr(); + } + } + + if (comp_params.get_flag(cCRNCompFlagDebugging)) + print_crn_comp_params(comp_params); + + timer t; + t.start(); + + crnlib::vector comp_data; + if (!create_compressed_texture(comp_params, comp_data, pActual_quality_level, pActual_bitrate)) + { + set_last_error(L"CRN compression failed"); + return false; + } + + double total_time = t.get_elapsed_secs(); + if (comp_params.get_flag(cCRNCompFlagDebugging)) + { + console::debug(L"\nTotal compression time: %3.3fs", total_time); + } + + cfile_stream out_stream; + if (!out_stream.open(pFilename, cDataStreamWritable | cDataStreamSeekable)) + { + set_last_error(L"Failed opening file"); + return false; + } + + if (out_stream.write(comp_data.get_ptr(), comp_data.size()) != comp_data.size()) + { + set_last_error(L"Failed writing to file"); + return false; + } + + if (!out_stream.close()) + { + set_last_error(L"Failed writing to file"); + return false; + } + + return true; + } + + uint dds_texture::get_total_pixels_in_all_faces_and_mips() const + { + uint total_pixels = 0; + for (uint l = 0; l < m_faces.size(); l++) + for (uint m = 0; m < m_faces[l].size(); m++) + total_pixels += m_faces[l][m]->get_total_pixels(); + + return total_pixels; + } + +} // namespace crnlib + diff --git a/crnlib/crn_dds_texture.h b/crnlib/crn_dds_texture.h new file mode 100644 index 00000000..ebe01873 --- /dev/null +++ b/crnlib/crn_dds_texture.h @@ -0,0 +1,292 @@ +// File: crn_dds_texture.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt_image.h" +#include "../inc/dds_defs.h" +#include "crn_pixel_format.h" +#include "crn_image.h" +#include "crn_resampler.h" +#include "crn_data_stream_serializer.h" +#include "crn_qdxt1.h" +#include "crn_qdxt5.h" +#include "crn_texture_file_types.h" +#include "crn_image_utils.h" + +namespace crnlib +{ + extern const vec2I g_vertical_cross_image_offsets[6]; + + class mip_level + { + friend class dds_texture; + + public: + mip_level(); + ~mip_level(); + + mip_level(const mip_level& other); + mip_level& operator= (const mip_level& rhs); + + // Assumes ownership. + void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID); + void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID); + + void clear(); + + inline uint get_width() const { return m_width; } + inline uint get_height() const { return m_height; } + inline uint get_total_pixels() const { return m_width * m_height; } + + inline image_u8* get_image() const { return m_pImage; } + inline dxt_image* get_dxt_image() const { return m_pDXTImage; } + + image_u8* get_unpacked_image(image_u8& tmp, bool uncook) const; + + inline bool is_packed() const { return m_pDXTImage != NULL; } + + inline bool is_valid() const { return (m_pImage != NULL) || (m_pDXTImage != NULL); } + + inline pixel_format_helpers::component_flags get_comp_flags() const { return m_comp_flags; } + inline void set_comp_flags(pixel_format_helpers::component_flags comp_flags) { m_comp_flags = comp_flags; } + + inline pixel_format get_format() const { return m_format; } + inline void set_format(pixel_format fmt) { m_format = fmt; } + + bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p); + + bool pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& p); + bool pack_to_dxt(pixel_format fmt, bool cook, const dxt_image::pack_params& p); + + bool unpack_from_dxt(bool uncook = true); + + bool set_alpha_to_luma(); + bool convert(image_utils::conversion_type conv_type); + + private: + uint m_width; + uint m_height; + + pixel_format_helpers::component_flags m_comp_flags; + pixel_format m_format; + + image_u8* m_pImage; + dxt_image* m_pDXTImage; + + void cook_image(image_u8& img) const; + void uncook_image(image_u8& img) const; + }; + + // A face is an array of mip_level ptr's. + typedef crnlib::vector mip_ptr_vec; + + // And an array of one, six, or N faces make up a texture. + typedef crnlib::vector face_vec; + + class dds_texture + { + public: + // Construction/destruction + dds_texture(); + ~dds_texture(); + + dds_texture(const dds_texture& other); + dds_texture& operator= (const dds_texture& rhs); + + void clear(); + + void init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const wchar_t* pName); + + // Assumes ownership. + void assign(face_vec& faces); + void assign(mip_level* pLevel); + void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID); + void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID); + + void set(texture_file_types::format source_file_type, const dds_texture& dds_texture); + + // Accessors + image_u8* get_level_image(uint face, uint level, image_u8& img, bool uncook = true) const; + + inline bool is_valid() const { return m_faces.size() > 0; } + + const dynamic_wstring& get_name() const { return m_name; } + void set_name(const dynamic_wstring& name) { m_name = name; } + + const dynamic_wstring& get_source_filename() const { return get_name(); } + texture_file_types::format get_source_file_type() const { return m_source_file_type; } + + inline uint get_width() const { return m_width; } + inline uint get_height() const { return m_height; } + inline uint get_total_pixels() const { return m_width * m_height; } + uint get_total_pixels_in_all_faces_and_mips() const; + + inline uint get_num_faces() const { return m_faces.size(); } + inline uint get_num_levels() const { if (m_faces.empty()) return 0; else return m_faces[0].size(); } + + inline pixel_format_helpers::component_flags get_comp_flags() const { return m_comp_flags; } + inline pixel_format get_format() const { return m_format; } + + inline bool is_unpacked() const { if (get_num_faces()) { return get_level(0, 0)->get_image() != NULL; } return false; } + + inline const mip_ptr_vec& get_face(uint face) const { return m_faces[face]; } + inline mip_ptr_vec& get_face(uint face) { return m_faces[face]; } + + inline const mip_level* get_level(uint face, uint mip) const { return m_faces[face][mip]; } + inline mip_level* get_level(uint face, uint mip) { return m_faces[face][mip]; } + + bool has_alpha() const; + bool is_normal_map() const; + bool is_vertical_cross() const; + bool is_packed() const; + texture_type determine_texture_type() const; + + const dynamic_wstring& get_last_error() const { return m_last_error; } + void clear_last_error() { m_last_error.clear(); } + + // Loading/saving + bool read_dds(const wchar_t* pFilename); + bool read_dds(data_stream_serializer& serializer); + + bool write_dds(const wchar_t* pFilename) const; + bool write_dds(data_stream_serializer& serializer) const; + + bool load_crn_from_memory(const wchar_t* pFilename, const void *pData, uint data_size); + + // If file_format is texture_file_types::cFormatInvalid, the format will be determined from the filename's extension. + bool load_from_file(const wchar_t* pFilename, texture_file_types::format file_format); + + bool write_to_file( + const wchar_t* pFilename, + texture_file_types::format file_format, + crn_comp_params* pCRN_comp_params, + uint32 *pActual_quality_level, float *pActual_bitrate); + + // Conversion + bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p); + bool convert(pixel_format fmt, const dxt_image::pack_params& p); + bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p, int qdxt_quality, bool hierarchical = true); + bool convert(image_utils::conversion_type conv_type); + + bool unpack_from_dxt(bool uncook = true); + + bool set_alpha_to_luma(); + + void discard_mipmaps(); + + void discard_mips(); + + struct resample_params + { + resample_params() : + m_pFilter("kaiser"), + m_wrapping(false), + m_srgb(false), + m_renormalize(false), + m_filter_scale(.9f), + m_gamma(1.75f), // or 2.2f + m_multithreaded(true) + { + } + + const char* m_pFilter; + bool m_wrapping; + bool m_srgb; + bool m_renormalize; + float m_filter_scale; + float m_gamma; + bool m_multithreaded; + }; + + bool resize(uint new_width, uint new_height, const resample_params& params); + + struct generate_mipmap_params : public resample_params + { + generate_mipmap_params() : + resample_params(), + m_min_mip_size(1), + m_max_mips(0) + { + } + + uint m_min_mip_size; + uint m_max_mips; // actually the max # of total levels + }; + + bool generate_mipmaps(const generate_mipmap_params& params, bool force); + + bool crop(uint x, uint y, uint width, uint height); + + bool vertical_cross_to_cubemap(); + + // Low-level clustered DXT (QDXT) compression + struct qdxt_state + { + qdxt_state(task_pool& tp) : m_fmt(PIXEL_FMT_INVALID), m_qdxt1(tp), m_qdxt5a(tp), m_qdxt5b(tp) + { + } + + pixel_format m_fmt; + qdxt1 m_qdxt1; + qdxt5 m_qdxt5a; + qdxt5 m_qdxt5b; + crnlib::vector m_pixel_blocks; + + qdxt1_params m_qdxt1_params; + qdxt5_params m_qdxt5_params[2]; + bool m_has_blocks[3]; + + void clear() + { + m_fmt = PIXEL_FMT_INVALID; + m_qdxt1.clear(); + m_qdxt5a.clear(); + m_qdxt5b.clear(); + m_pixel_blocks.clear(); + m_qdxt1_params.clear(); + m_qdxt5_params[0].clear(); + m_qdxt5_params[1].clear(); + utils::zero_object(m_has_blocks); + } + }; + bool qdxt_pack_init(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook); + bool qdxt_pack(qdxt_state& state, dds_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params); + + void swap(dds_texture& img); + + bool check() const; + + private: + dynamic_wstring m_name; + + uint m_width; + uint m_height; + + pixel_format_helpers::component_flags m_comp_flags; + pixel_format m_format; + + face_vec m_faces; + + texture_file_types::format m_source_file_type; + + mutable dynamic_wstring m_last_error; + + inline void clear_last_error() const { m_last_error.clear(); } + inline void set_last_error(const wchar_t* p) const { m_last_error = p; } + + void free_all_mips(); + bool read_dds_internal(data_stream_serializer& serializer); + bool load_regular(const wchar_t* pFilename, texture_file_types::format file_format); + bool load_dds(const wchar_t* pFilename); + bool load_crn(const wchar_t* pFilename); + void print_crn_comp_params(const crn_comp_params& p); + bool save_regular(const wchar_t* pFilename); + bool save_dds(const wchar_t* pFilename); + bool save_comp_texture(const wchar_t* pFilename, const crn_comp_params &comp_params, uint32 *pActual_quality_level, float *pActual_bitrate); + }; + + inline void swap(dds_texture& a, dds_texture& b) + { + a.swap(b); + } + +} // namespace crnlib diff --git a/crnlib/crn_decomp.cpp b/crnlib/crn_decomp.cpp new file mode 100644 index 00000000..a8562255 --- /dev/null +++ b/crnlib/crn_decomp.cpp @@ -0,0 +1,6 @@ +// File: crn_decomp.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" + +// Include the single-file header library with no defines, which brings in the full CRN decompressor. +#include "../inc/crn_decomp.h" diff --git a/crnlib/crn_dxt.cpp b/crnlib/crn_dxt.cpp new file mode 100644 index 00000000..5a4e3a5d --- /dev/null +++ b/crnlib/crn_dxt.cpp @@ -0,0 +1,381 @@ +// File: crn_dxt.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt.h" +#include "crn_dxt1.h" +#include "crn_ryg_dxt.hpp" +#include "crn_dxt_fast.h" +#include "crn_intersect.h" + +namespace crnlib +{ + const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = { 0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U }; + const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = { 0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U }; + + const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues] = { 0U, 5U, 1U, 2U, 3U, 4U, 0U, 0U }; + + const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = { 0U, 2U, 3U, 1U }; + const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = { 0U, 3U, 1U, 2U }; + + const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 5, 4, 3, 2, 6, 7 }; + const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 7, 6, 5, 4, 3, 2 }; + + const wchar_t* get_dxt_format_string(dxt_format fmt) + { + switch (fmt) + { + case cDXT1: return L"DXT1"; + case cDXT1A: return L"DXT1A"; + case cDXT3: return L"DXT3"; + case cDXT5: return L"DXT5"; + case cDXT5A: return L"DXT5A"; + case cDXN_XY: return L"DXN_XY"; + case cDXN_YX: return L"DXN_YX"; + default: break; + } + CRNLIB_ASSERT(false); + return L"?"; + } + + const wchar_t* get_dxt_compressor_name(crn_dxt_compressor_type c) + { + switch (c) + { + case cCRNDXTCompressorCRN: return L"CRN"; + case cCRNDXTCompressorCRNF: return L"CRNF"; + case cCRNDXTCompressorRYG: return L"RYG"; + default: break; + } + CRNLIB_ASSERT(false); + return L"?"; + } + + uint get_dxt_format_bits_per_pixel(dxt_format fmt) + { + switch (fmt) + { + case cDXT1: + case cDXT1A: + case cDXT5A: + return 4; + case cDXT3: + case cDXT5: + case cDXN_XY: + case cDXN_YX: + return 8; + default: break; + } + CRNLIB_ASSERT(false); + return 0; + } + + bool get_dxt_format_has_alpha(dxt_format fmt) + { + switch (fmt) + { + case cDXT1A: + case cDXT3: + case cDXT5: + case cDXT5A: + return true; + default: break; + } + return false; + } + + uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint bias) + { + uint r = color.r; + uint g = color.g; + uint b = color.b; + + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = math::minimum(r, 31U); + g = math::minimum(g, 63U); + b = math::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 11U)); + } + + uint16 dxt1_block::pack_color(uint r, uint g, uint b, bool scaled, uint bias) + { + return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); + } + + color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint alpha) + { + uint b = packed_color & 31U; + uint g = (packed_color >> 5U) & 63U; + uint r = (packed_color >> 11U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 2U) | (g >> 4U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); + } + + void dxt1_block::unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled) + { + color_quad_u8 c(unpack_color(packed_color, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + void dxt1_block::get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4) + { + color_quad_u8 col0(unpack_color(packed_col0, false)); + color_quad_u8 col1(unpack_color(packed_col1, false)); + + pDst[0].r = (3 * col0.r * 22) / 8; + pDst[0].b = (3 * col0.b * 22) / 8; + pDst[0].g = (col0.g << 2) | (col0.g >> 4); + pDst[0].a = 0xFF; + + pDst[1].r = (3 * col1.r * 22) / 8; + pDst[1].g = (col1.g << 2) | (col1.g >> 4); + pDst[1].b = (3 * col1.b * 22) / 8; + pDst[1].a = 0xFF; + + int gdiff = pDst[1].g - pDst[0].g; + + if (color4) //(packed_col0 > packed_col1) + { + pDst[2].r = static_cast(((2 * col0.r + col1.r) * 22) / 8); + pDst[2].g = static_cast((256 * pDst[0].g + gdiff/4 + 128 + gdiff * 80) / 256); + pDst[2].b = static_cast(((2 * col0.b + col1.b) * 22) / 8); + pDst[2].a = 0xFF; + + pDst[3].r = static_cast(((2 * col1.r + col0.r) * 22) / 8); + pDst[3].g = static_cast((256 * pDst[1].g - gdiff/4 + 128 - gdiff * 80) / 256); + pDst[3].b = static_cast(((2 * col1.b + col0.b) * 22) / 8); + pDst[3].a = 0xFF; + } + else { + pDst[2].r = static_cast(((col0.r + col1.r) * 33) / 8); + pDst[2].g = static_cast((256 * pDst[0].g + gdiff/4 + 128 + gdiff * 128) / 256); + pDst[2].b = static_cast(((col0.b + col1.b) * 33) / 8); + pDst[2].a = 0xFF; + + pDst[3].r = 0x00; + pDst[3].g = 0x00; + pDst[3].b = 0x00; + pDst[3].a = 0x00; + } + } + + uint dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set_noclamp_rgba( (c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); + pDst[3].set_noclamp_rgba(0, 0, 0, 0); + + return 3; + } + + uint dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // The compiler changes the div3 into a mul by recip+shift. + pDst[2].set_noclamp_rgba( (c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); + pDst[3].set_noclamp_rgba( (c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); + + return 4; + } + + uint dxt1_block::get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set_noclamp_rgba( (c0.r + c1.r + 1) >> 1U, (c0.g + c1.g + 1) >> 1U, (c0.b + c1.b + 1) >> 1U, 255U); + pDst[3].set_noclamp_rgba(0, 0, 0, 0); + + return 3; + } + + uint dxt1_block::get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // 12/14/08 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? + // The compiler changes the div3 into a mul by recip+shift. + pDst[2].set_noclamp_rgba( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); + pDst[3].set_noclamp_rgba( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); + + return 4; + } + + uint dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + if (color0 > color1) + return get_block_colors4(pDst, color0, color1); + else + return get_block_colors3(pDst, color0, color1); + } + + uint dxt1_block::get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + if (color0 > color1) + return get_block_colors4_round(pDst, color0, color1); + else + return get_block_colors3_round(pDst, color0, color1); + } + + color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha) + { + CRNLIB_ASSERT(index < 2); + return unpack_color( static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha ); + } + + uint dxt1_block::pack_endpoints(uint lo, uint hi) + { + CRNLIB_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); + return lo | (hi << 16U); + } + + void dxt3_block::set_alpha(uint x, uint y, uint value, bool scaled) + { + CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + if (scaled) + { + CRNLIB_ASSERT(value <= 0xFF); + value = (value * 15U + 128U) / 255U; + } + else + { + CRNLIB_ASSERT(value <= 0xF); + } + + uint ofs = (y << 1U) + (x >> 1U); + uint c = m_alpha[ofs]; + + c &= ~(0xF << ((x & 1U) << 2U)); + c |= (value << ((x & 1U) << 2U)); + + m_alpha[ofs] = static_cast(c); + } + + uint dxt3_block::get_alpha(uint x, uint y, bool scaled) const + { + CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + uint value = m_alpha[(y << 1U) + (x >> 1U)]; + if (x & 1) + value >>= 4; + value &= 0xF; + + if (scaled) + value = (value << 4U) | value; + + return value; + } + + uint dxt5_block::get_block_values6(color_quad_u8* pDst, uint l, uint h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h ) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; + } + + uint dxt5_block::get_block_values8(color_quad_u8* pDst, uint l, uint h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h ) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; + } + + uint dxt5_block::get_block_values(color_quad_u8* pDst, uint l, uint h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + + uint dxt5_block::get_block_values6(uint* pDst, uint l, uint h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 4 + h ) / 5; + pDst[3] = (l * 3 + h * 2) / 5; + pDst[4] = (l * 2 + h * 3) / 5; + pDst[5] = (l + h * 4) / 5; + pDst[6] = 0; + pDst[7] = 255; + return 6; + } + + uint dxt5_block::get_block_values8(uint* pDst, uint l, uint h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 6 + h ) / 7; + pDst[3] = (l * 5 + h * 2) / 7; + pDst[4] = (l * 4 + h * 3) / 7; + pDst[5] = (l * 3 + h * 4) / 7; + pDst[6] = (l * 2 + h * 5) / 7; + pDst[7] = (l + h * 6) / 7; + return 8; + } + + uint dxt5_block::unpack_endpoint(uint packed, uint index) + { + CRNLIB_ASSERT(index < 2); + return (packed >> (8 * index)) & 0xFF; + } + + uint dxt5_block::pack_endpoints(uint lo, uint hi) + { + CRNLIB_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); + return lo | (hi << 8U); + } + + uint dxt5_block::get_block_values(uint* pDst, uint l, uint h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + +} // namespace crnlib + diff --git a/crnlib/crn_dxt.h b/crnlib/crn_dxt.h new file mode 100644 index 00000000..5a1244cd --- /dev/null +++ b/crnlib/crn_dxt.h @@ -0,0 +1,281 @@ +// File: crn_dxt.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "../inc/crnlib.h" +#include "crn_color.h" +#include "crn_vec.h" +#include "crn_rand.h" +#include "crn_sparse_bit_array.h" +#include "crn_hash_map.h" +#include + +#define CRNLIB_DXT_ALT_ROUNDING 1 + +namespace crnlib +{ + enum dxt_constants + { + cDXT1BytesPerBlock = 8U, + cDXT5NBytesPerBlock = 16U, + + cDXT5SelectorBits = 3U, + cDXT5SelectorValues = 1U << cDXT5SelectorBits, + cDXT5SelectorMask = cDXT5SelectorValues - 1U, + + cDXT1SelectorBits = 2U, + cDXT1SelectorValues = 1U << cDXT1SelectorBits, + cDXT1SelectorMask = cDXT1SelectorValues - 1U, + + cDXTBlockShift = 2U, + cDXTBlockSize = 1U << cDXTBlockShift + }; + + enum dxt_format + { + cDXTInvalid = -1, + + // cDXT1/1A must appear first! + cDXT1, + cDXT1A, + + cDXT3, + cDXT5, + cDXT5A, + + cDXN_XY, // inverted relative to standard ATI2, 360's DXN + cDXN_YX // standard ATI2 + }; + + const float cDXT1MaxLinearValue = 3.0f; + const float cDXT1InvMaxLinearValue = 1.0f/3.0f; + + const float cDXT5MaxLinearValue = 7.0f; + const float cDXT5InvMaxLinearValue = 1.0f/7.0f; + + // Converts DXT1 raw color selector index to a linear value. + extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; + + // Converts DXT5 raw alpha selector index to a linear value. + extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; + + // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). + extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; + + // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). + extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; + + extern const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues]; + + extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; + extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; + + const wchar_t* get_dxt_format_string(dxt_format fmt); + uint get_dxt_format_bits_per_pixel(dxt_format fmt); + bool get_dxt_format_has_alpha(dxt_format fmt); + + const wchar_t* get_dxt_quality_string(crn_dxt_quality q); + + const wchar_t* get_dxt_compressor_name(crn_dxt_compressor_type c); + + struct dxt1_block + { + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + // These methods assume the in-memory rep is in LE byte order. + inline uint get_low_color() const + { + return m_low_color[0] | (m_low_color[1] << 8U); + } + + inline uint get_high_color() const + { + return m_high_color[0] | (m_high_color[1] << 8U); + } + + inline void set_low_color(uint16 c) + { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline void set_high_color(uint16 c) + { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline bool is_constant_color_block() const { return get_low_color() == get_high_color(); } + inline bool is_alpha_block() const { return get_low_color() <= get_high_color(); } + inline bool is_non_alpha_block() const { return !is_alpha_block(); } + + inline uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; + } + + inline void set_selector(uint x, uint y, uint val) + { + CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); + + m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); + m_selectors[y] |= (val << (x * cDXT1SelectorBits)); + } + + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint bias = 127U); + static uint16 pack_color(uint r, uint g, uint b, bool scaled, uint bias = 127U); + + static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint alpha = 255U); + static void unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); + + static uint get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static uint get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1); + + // pDst must point to an array at least cDXT1SelectorValues long. + static uint get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static uint get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static color_quad_u8 unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha = 255U); + static uint pack_endpoints(uint lo, uint hi); + + static void get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4); + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_block); + + struct dxt3_block + { + enum { cNumAlphaBytes = 8 }; + uint8 m_alpha[cNumAlphaBytes]; + + void set_alpha(uint x, uint y, uint value, bool scaled); + uint get_alpha(uint x, uint y, bool scaled) const; + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt3_block); + + struct dxt5_block + { + uint8 m_endpoints[2]; + + enum { cNumSelectorBytes = 6 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + inline uint get_low_alpha() const + { + return m_endpoints[0]; + } + + inline uint get_high_alpha() const + { + return m_endpoints[1]; + } + + inline void set_low_alpha(uint i) + { + CRNLIB_ASSERT(i <= UINT8_MAX); + m_endpoints[0] = static_cast(i); + } + + inline void set_high_alpha(uint i) + { + CRNLIB_ASSERT(i <= UINT8_MAX); + m_endpoints[1] = static_cast(i); + } + + inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } + + uint get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } + uint get_selectors_as_word(uint index) { CRNLIB_ASSERT(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); } + + inline uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x < 4U) && (y < 4U)); + + uint selector_index = (y * 4) + x; + uint bit_index = selector_index * cDXT5SelectorBits; + + uint byte_index = bit_index >> 3; + uint bit_ofs = bit_index & 7; + + uint v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + return (v >> bit_ofs) & 7; + } + + inline void set_selector(uint x, uint y, uint val) + { + CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); + + uint selector_index = (y * 4) + x; + uint bit_index = selector_index * cDXT5SelectorBits; + + uint byte_index = bit_index >> 3; + uint bit_ofs = bit_index & 7; + + uint v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); + + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cNumSelectorBytes - 1)) + m_selectors[byte_index + 1] = static_cast(v >> 8); + } + + enum { cMaxSelectorValues = 8 }; + + // Results written to alpha channel. + static uint get_block_values6(color_quad_u8* pDst, uint l, uint h); + static uint get_block_values8(color_quad_u8* pDst, uint l, uint h); + static uint get_block_values(color_quad_u8* pDst, uint l, uint h); + + static uint get_block_values6(uint* pDst, uint l, uint h); + static uint get_block_values8(uint* pDst, uint l, uint h); + // pDst must point to an array at least cDXT5SelectorValues long. + static uint get_block_values(uint* pDst, uint l, uint h); + + static uint unpack_endpoint(uint packed, uint index); + static uint pack_endpoints(uint lo, uint hi); + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt5_block); + + struct dxt_pixel_block + { + color_quad_u8 m_pixels[cDXTBlockSize][cDXTBlockSize]; // [y][x] + + inline void clear() + { + utils::zero_object(*this); + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_pixel_block); + +} // namespace crnlib + + + diff --git a/crnlib/crn_dxt1.cpp b/crnlib/crn_dxt1.cpp new file mode 100644 index 00000000..0a0453e7 --- /dev/null +++ b/crnlib/crn_dxt1.cpp @@ -0,0 +1,2138 @@ +// File: crn_dxt1.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt1.h" +#include "crn_ryg_dxt.hpp" +#include "crn_dxt_fast.h" +#include "crn_intersect.h" +#include "crn_vec_interval.h" + +namespace crnlib +{ + dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() : + m_pParams(NULL), + m_pResults(NULL), + m_pSolutions(NULL), + m_has_color_weighting(false), + m_perceptual(false), + m_all_pixels_grayscale(false) + { + m_low_coords.reserve(512); + m_high_coords.reserve(512); + + m_unique_colors.reserve(512); + m_temp_unique_colors.reserve(512); + m_unique_packed_colors.reserve(512); + + m_norm_unique_colors.reserve(512); + m_norm_unique_colors_weighted.reserve(512); + + m_lo_cells.reserve(128); + m_hi_cells.reserve(128); + } + + void dxt1_endpoint_optimizer::clear() + { + m_pParams = NULL; + m_pResults = NULL; + m_pSolutions = NULL; + + if (m_unique_color_hash_map.get_table_size() > 8192) + m_unique_color_hash_map.clear(); + else + m_unique_color_hash_map.reset(); + + if (m_solutions_tried.get_table_size() > 8192) + m_solutions_tried.clear(); + + m_unique_colors.resize(0); + + m_has_transparent_pixels = false; + m_total_unique_color_weight = 0; + + m_norm_unique_colors.resize(0); + m_mean_norm_color.clear(); + + m_norm_unique_colors_weighted.resize(0); + m_mean_norm_color_weighted.clear(); + + m_principle_axis.clear(); + + m_total_evals = 0; + m_all_pixels_grayscale = false; + m_has_color_weighting = false; + m_perceptual = false; + } + + bool dxt1_endpoint_optimizer::handle_all_transparent_block() + { + m_pResults->m_low_color = 0; + m_pResults->m_high_color = 0; + m_pResults->m_alpha_block = true; + + memset(m_pResults->m_pSelectors, 3, m_pParams->m_num_pixels); + + return true; + } + + bool dxt1_endpoint_optimizer::try_average_block_as_solid() + { + uint64 tot_r = 0; + uint64 tot_g = 0; + uint64 tot_b = 0; + + uint total_weight = 0; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + uint weight = m_unique_colors[i].m_weight; + total_weight += weight; + + tot_r += m_unique_colors[i].m_color.r * weight; + tot_g += m_unique_colors[i].m_color.g * weight; + tot_b += m_unique_colors[i].m_color.b * weight; + } + + const uint half_total_weight = total_weight >> 1; + uint ave_r = static_cast((tot_r + half_total_weight) / total_weight); + uint ave_g = static_cast((tot_g + half_total_weight) / total_weight); + uint ave_b = static_cast((tot_b + half_total_weight) / total_weight); + + uint low_color = (ryg_dxt::OMatch5[ave_r][0]<<11) | (ryg_dxt::OMatch6[ave_g][0]<<5) | ryg_dxt::OMatch5[ave_b][0]; + uint high_color = (ryg_dxt::OMatch5[ave_r][1]<<11) | (ryg_dxt::OMatch6[ave_g][1]<<5) | ryg_dxt::OMatch5[ave_b][1]; + bool improved = evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution); + + if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) + { + low_color = (ryg_dxt::OMatch5_3[ave_r][0]<<11) | (ryg_dxt::OMatch6_3[ave_g][0]<<5) | ryg_dxt::OMatch5_3[ave_b][0]; + high_color = (ryg_dxt::OMatch5_3[ave_r][1]<<11) | (ryg_dxt::OMatch6_3[ave_g][1]<<5) | ryg_dxt::OMatch5_3[ave_b][1]; + improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution); + } + + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + for (uint i = 0; i < m_unique_colors.size(); i++) + { + uint r = m_unique_colors[i].m_color[0]; + uint g = m_unique_colors[i].m_color[1]; + uint b = m_unique_colors[i].m_color[2]; + if ((r == ave_r) && (g == ave_g) && (b == ave_b)) + continue; + + uint low_color = (ryg_dxt::OMatch5[r][0]<<11) | (ryg_dxt::OMatch6[g][0]<<5) | ryg_dxt::OMatch5[b][0]; + uint high_color = (ryg_dxt::OMatch5[r][1]<<11) | (ryg_dxt::OMatch6[g][1]<<5) | ryg_dxt::OMatch5[b][1]; + improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution); + + if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) + { + low_color = (ryg_dxt::OMatch5_3[r][0]<<11) | (ryg_dxt::OMatch6_3[g][0]<<5) | ryg_dxt::OMatch5_3[b][0]; + high_color = (ryg_dxt::OMatch5_3[r][1]<<11) | (ryg_dxt::OMatch6_3[g][1]<<5) | ryg_dxt::OMatch5_3[b][1]; + improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution); + } + } + } + + return improved; + } + + bool dxt1_endpoint_optimizer::handle_solid_block() + { + int r = m_unique_colors[0].m_color.r; + int g = m_unique_colors[0].m_color.g; + int b = m_unique_colors[0].m_color.b; + + //uint packed_color = dxt1_block::pack_color(r, g, b, true); + //evaluate_solution(dxt1_solution_coordinates((uint16)packed_color, (uint16)packed_color), false, &m_best_solution); + + uint low_color = (ryg_dxt::OMatch5[r][0]<<11) | (ryg_dxt::OMatch6[g][0]<<5) | ryg_dxt::OMatch5[b][0]; + uint high_color = (ryg_dxt::OMatch5[r][1]<<11) | (ryg_dxt::OMatch6[g][1]<<5) | ryg_dxt::OMatch5[b][1]; + evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), false, &m_best_solution); + + if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) + { + low_color = (ryg_dxt::OMatch5_3[r][0]<<11) | (ryg_dxt::OMatch6_3[g][0]<<5) | ryg_dxt::OMatch5_3[b][0]; + high_color = (ryg_dxt::OMatch5_3[r][1]<<11) | (ryg_dxt::OMatch6_3[g][1]<<5) | ryg_dxt::OMatch5_3[b][1]; + evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color), true, &m_best_solution); + } + + return_solution(*m_pResults, m_best_solution); + + return true; + } + + void dxt1_endpoint_optimizer::compute_vectors(const vec3F& perceptual_weights) + { + m_norm_unique_colors.resize(0); + m_norm_unique_colors_weighted.resize(0); + + m_mean_norm_color.clear(); + m_mean_norm_color_weighted.clear(); + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const color_quad_u8& color = m_unique_colors[i].m_color; + const uint weight = m_unique_colors[i].m_weight; + + vec3F norm_color(color.r * 1.0f/255.0f, color.g * 1.0f/255.0f, color.b * 1.0f/255.0f); + vec3F norm_color_weighted(vec3F::mul_components(perceptual_weights, norm_color)); + + m_norm_unique_colors.push_back(norm_color); + m_norm_unique_colors_weighted.push_back(norm_color_weighted); + + m_mean_norm_color += norm_color * (float)weight; + m_mean_norm_color_weighted += norm_color_weighted * (float)weight; + } + + if (m_total_unique_color_weight) + { + m_mean_norm_color *= (1.0f / m_total_unique_color_weight); + m_mean_norm_color_weighted *= (1.0f / m_total_unique_color_weight); + } + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + m_norm_unique_colors[i] -= m_mean_norm_color; + m_norm_unique_colors_weighted[i] -= m_mean_norm_color_weighted; + } + } + + void dxt1_endpoint_optimizer::compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def) + { +#if 0 + axis.clear(); + + CRNLIB_ASSERT(m_unique_colors.size() == norm_colors.size()); + + bool first = true; + for (uint i = 0; i < norm_colors.size(); i++) + { + const uint weight = m_unique_colors[i].m_weight; + + for (uint j = 0; j < weight; j++) + { + vec3F x(norm_colors[i] * norm_colors[i][0]); + vec3F y(norm_colors[i] * norm_colors[i][1]); + vec3F z(norm_colors[i] * norm_colors[i][2]); + + vec3F v(first ? norm_colors[0] : axis); + first = false; + + v.normalize(&def); + + axis[0] += (x * v); + axis[1] += (y * v); + axis[2] += (z * v); + } + } + + axis.normalize(&def); +#else + double cov[6] = { 0, 0, 0, 0, 0, 0 }; + + //vec3F lo(math::cNearlyInfinite); + //vec3F hi(-math::cNearlyInfinite); + + for(uint i = 0; i < norm_colors.size(); i++) + { + const vec3F& v = norm_colors[i]; + + //if (v[0] < lo[0]) lo[0] = v[0]; + //if (v[1] < lo[1]) lo[1] = v[1]; + //if (v[2] < lo[2]) lo[2] = v[2]; + //if (v[0] > hi[0]) hi[0] = v[0]; + //if (v[1] > hi[1]) hi[1] = v[1]; + //if (v[2] > hi[2]) hi[2] = v[2]; + + float r = v[0]; + float g = v[1]; + float b = v[2]; + + if (m_unique_colors[i].m_weight > 1) + { + const double weight = m_unique_colors[i].m_weight; + + cov[0] += r*r*weight; + cov[1] += r*g*weight; + cov[2] += r*b*weight; + cov[3] += g*g*weight; + cov[4] += g*b*weight; + cov[5] += b*b*weight; + } + else + { + cov[0] += r*r; + cov[1] += r*g; + cov[2] += r*b; + cov[3] += g*g; + cov[4] += g*b; + cov[5] += b*b; + } + } + + double vfr, vfg, vfb; + //vfr = hi[0] - lo[0]; + //vfg = hi[1] - lo[1]; + //vfb = hi[2] - lo[2]; + vfr = .9f; + vfg = 1.0f; + vfb = .7f; + + const uint cNumIters = 8; + + for (uint iter = 0; iter < cNumIters; iter++) + { + double r = vfr*cov[0] + vfg*cov[1] + vfb*cov[2]; + double g = vfr*cov[1] + vfg*cov[3] + vfb*cov[4]; + double b = vfr*cov[2] + vfg*cov[4] + vfb*cov[5]; + + double m = math::maximum(fabs(r), fabs(g), fabs(b)); + if (m > 1e-10) + { + m = 1.0f / m; + r *= m; + g *= m; + b *= m; + } + + double delta = math::square(vfr-r) + math::square(vfg-g) + math::square(vfb-b); + + vfr = r; + vfg = g; + vfb = b; + + if ((iter > 2) && (delta < 1e-8)) + break; + } + + double len = vfr*vfr + vfg*vfg + vfb*vfb; + + if (len < 1e-10) + { + axis = def; + } + else + { + len = 1.0f / sqrt(len); + vfr *= len; + vfg *= len; + vfb *= len; + + axis.set(static_cast(vfr), static_cast(vfg), static_cast(vfb)); + } +#endif + } + + static const uint8 g_invTableNull[4] = { 0, 1, 2, 3 }; + static const uint8 g_invTableAlpha[4] = { 1, 0, 2, 3 }; + static const uint8 g_invTableColor[4] = { 1, 0, 3, 2 }; + + void dxt1_endpoint_optimizer::return_solution(results& res, const potential_solution& solution) + { + bool invert_selectors; + + if (solution.m_alpha_block) + invert_selectors = (solution.m_coords.m_low_color > solution.m_coords.m_high_color); + else + { + CRNLIB_ASSERT(solution.m_coords.m_low_color != solution.m_coords.m_high_color); + + invert_selectors = (solution.m_coords.m_low_color < solution.m_coords.m_high_color); + } + + if (invert_selectors) + { + res.m_low_color = solution.m_coords.m_high_color; + res.m_high_color = solution.m_coords.m_low_color; + } + else + { + res.m_low_color = solution.m_coords.m_low_color; + res.m_high_color = solution.m_coords.m_high_color; + } + + const uint8* pInvert_table = g_invTableNull; + if (invert_selectors) + pInvert_table = solution.m_alpha_block ? g_invTableAlpha : g_invTableColor; + + const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0; + + const uint32* pSrc_pixels = reinterpret_cast(m_pParams->m_pPixels); + uint8* pDst_selectors = res.m_pSelectors; + + if ((m_unique_colors.size() == 1) && (!m_pParams->m_pixels_have_alpha)) + { + uint32 c = utils::read_le32(pSrc_pixels); + + CRNLIB_ASSERT(c >= alpha_thresh); + + c |= 0xFF000000U; + + unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); + CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); + + uint unique_color_index = it->second; + + uint selector = pInvert_table[solution.m_selectors[unique_color_index]]; + + memset(pDst_selectors, selector, m_pParams->m_num_pixels); + } + else + { + uint8* pDst_selectors_end = pDst_selectors + m_pParams->m_num_pixels; + + uint8 prev_selector = 0; + uint32 prev_color = 0; + + do + { + uint32 c = utils::read_le32(pSrc_pixels); + pSrc_pixels++; + + uint8 selector = 3; + + if (c >= alpha_thresh) + { + c |= 0xFF000000U; + + if (c == prev_color) + selector = prev_selector; + else + { + unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); + + CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); + + uint unique_color_index = it->second; + + selector = pInvert_table[solution.m_selectors[unique_color_index]]; + + prev_color = c; + prev_selector = selector; + } + } + + *pDst_selectors++ = selector; + + } while (pDst_selectors != pDst_selectors_end); + } + + res.m_alpha_block = solution.m_alpha_block; + res.m_error = solution.m_error; + } + + inline vec3F dxt1_endpoint_optimizer::unpack_to_vec3F(uint16 packed_color) + { + color_quad_u8 c(dxt1_block::unpack_color(packed_color, false)); + + return vec3F(c.r * 1.0f/31.0f, c.g * 1.0f/63.0f, c.b * 1.0f/31.0f); + } + + inline vec3F dxt1_endpoint_optimizer::unpack_to_vec3F_raw(uint16 packed_color) + { + color_quad_u8 c(dxt1_block::unpack_color(packed_color, false)); + + return vec3F(c.r, c.g, c.b); + } + + void dxt1_endpoint_optimizer::optimize_endpoint_comps() + { + if ((m_best_solution.m_alpha_block) || (!m_best_solution.m_error)) + return; + + //color_quad_u8 orig_l(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + //color_quad_u8 orig_h(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + //uint orig_error = m_best_solution.m_error; + + color_quad_u8 orig_l_scaled(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true)); + color_quad_u8 orig_h_scaled(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true)); + + color_quad_u8 min_color(0xFF, 0xFF, 0xFF, 0xFF); + color_quad_u8 max_color(0, 0, 0, 0); + for (uint i = 0; i < m_unique_colors.size(); i++) + { + min_color = color_quad_u8::component_min(min_color, m_unique_colors[i].m_color); + max_color = color_quad_u8::component_max(max_color, m_unique_colors[i].m_color); + } + + // Try to separately optimize each component. This is a 1D problem so it's easy to compute accurate per-component error bounds. + for (uint comp_index = 0; comp_index < 3; comp_index++) + { + uint ll[4]; + ll[0] = orig_l_scaled[comp_index]; + ll[1] = orig_h_scaled[comp_index]; + ll[2] = (ll[0]*2+ll[1])/3; + ll[3] = (ll[0]+ll[1]*2)/3; + + uint error_to_beat = 0; + uint min_color_weight = 0; + uint max_color_weight = 0; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + uint c = m_unique_colors[i].m_color[comp_index]; + uint w = m_unique_colors[i].m_weight; + + int delta = ll[m_best_solution.m_selectors[i]] - c; + error_to_beat += (int)w * (delta * delta); + + if (c == min_color[comp_index]) + min_color_weight += w; + if (c == max_color[comp_index]) + max_color_weight += w; + } + + if (!error_to_beat) + continue; + + CRNLIB_ASSERT((min_color_weight > 0) && (max_color_weight > 0)); + const uint error_to_beat_div_min_color_weight = min_color_weight ? ((error_to_beat + min_color_weight - 1) / min_color_weight) : 0; + const uint error_to_beat_div_max_color_weight = max_color_weight ? ((error_to_beat + max_color_weight - 1) / max_color_weight) : 0; + + const uint m = (comp_index == 1) ? 63 : 31; + const uint m_shift = (comp_index == 1) ? 3 : 2; + + for (uint o = 0; o <= m; o++) + { + uint tl[4]; + + tl[0] = (comp_index == 1) ? ((o << 2) | (o >> 4)) : ((o << 3) | (o >> 2)); + + for (uint h = 0; h < 8; h++) + { + const uint pl = h << m_shift; + const uint ph = ((h + 1) << m_shift) - 1; + + uint tl_l = (comp_index == 1) ? ((pl << 2) | (pl >> 4)) : ((pl << 3) | (pl >> 2)); + uint tl_h = (comp_index == 1) ? ((ph << 2) | (ph >> 4)) : ((ph << 3) | (ph >> 2)); + + tl_l = math::minimum(tl_l, tl[0]); + tl_h = math::maximum(tl_h, tl[0]); + + uint c_l = min_color[comp_index]; + uint c_h = max_color[comp_index]; + + if (c_h < tl_l) + { + uint min_possible_error = math::square(tl_l - c_l); + if (min_possible_error > error_to_beat_div_min_color_weight) + continue; + } + else if (c_l > tl_h) + { + uint min_possible_error = math::square(c_h - tl_h); + if (min_possible_error > error_to_beat_div_max_color_weight) + continue; + } + + for (uint p = pl; p <= ph; p++) + { + tl[1] = (comp_index == 1) ? ((p << 2) | (p >> 4)) : ((p << 3) | (p >> 2)); + + tl[2] = (tl[0]*2+tl[1])/3; + tl[3] = (tl[0]+tl[1]*2)/3; + + uint trial_error = 0; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + int delta = tl[m_best_solution.m_selectors[i]] - m_unique_colors[i].m_color[comp_index]; + trial_error += m_unique_colors[i].m_weight * (delta * delta); + if (trial_error >= error_to_beat) + break; + } + + //CRNLIB_ASSERT(trial_error >= min_possible_error); + + if (trial_error < error_to_beat) + { + color_quad_u8 l(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + color_quad_u8 h(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + l[comp_index] = static_cast(o); + h[comp_index] = static_cast(p); + + bool better = evaluate_solution( + dxt1_solution_coordinates(dxt1_block::pack_color(l, false), dxt1_block::pack_color(h, false)), + true, &m_best_solution); + better; + + if (better) + { +#if 0 + printf("comp: %u, orig: %u %u, new: %u %u, orig_error: %u, new_error: %u\n", comp_index, + orig_l[comp_index], orig_h[comp_index], + l[comp_index], h[comp_index], + orig_error, m_best_solution.m_error); +#endif + if (!m_best_solution.m_error) + return; + + error_to_beat = 0; + for (uint i = 0; i < m_unique_colors.size(); i++) + { + int delta = tl[m_best_solution.m_selectors[i]] - m_unique_colors[i].m_color[comp_index]; + error_to_beat += m_unique_colors[i].m_weight * (delta * delta); + } + + } // better + + //goto early_out; + } // if (trial_error < error_to_beat) + + } // for (uint p = 0; p <= m; p++) + } + + } // for (uint o = 0; o <= m; o++) + + } // comp_index + } + + static const struct adjacent_coords + { + int8 x, y, z; + } g_adjacency[26] = { + {-1, -1, -1}, + {0, -1, -1}, + {1, -1, -1}, + {-1, 0, -1}, + {0, 0, -1}, + {1, 0, -1}, + {-1, 1, -1}, + {0, 1, -1}, + + {1, 1, -1}, + {-1, -1, 0}, + {0, -1, 0}, + {1, -1, 0}, + {-1, 0, 0}, + {1, 0, 0}, + {-1, 1, 0}, + {0, 1, 0}, + + {1, 1, 0}, + {-1, -1, 1}, + {0, -1, 1}, + {1, -1, 1}, + {-1, 0, 1}, + {0, 0, 1}, + {1, 0, 1}, + {-1, 1, 1}, + + {0, 1, 1}, + {1, 1, 1} + }; + + bool dxt1_endpoint_optimizer::refine_solution(int refinement_level) + { + CRNLIB_ASSERT(m_best_solution.m_valid); + + static const int w1Tab[4] = { 3,0,2,1 }; + + static const int prods_0[4] = { 0x00,0x00,0x02,0x02 }; + static const int prods_1[4] = { 0x00,0x09,0x01,0x04 }; + static const int prods_2[4] = { 0x09,0x00,0x04,0x01 }; + + double akku_0 = 0; + double akku_1 = 0; + double akku_2 = 0; + double At1_r, At1_g, At1_b; + double At2_r, At2_g, At2_b; + + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for(uint i = 0; i < m_unique_colors.size(); i++) + { + const color_quad_u8& c = m_unique_colors[i].m_color; + const double weight = m_unique_colors[i].m_weight; + + double r = c.r*weight; + double g = c.g*weight; + double b = c.b*weight; + int step = m_best_solution.m_selectors[i]^1; + + int w1 = w1Tab[step]; + + akku_0 += prods_0[step]*weight; + akku_1 += prods_1[step]*weight; + akku_2 += prods_2[step]*weight; + At1_r += w1*r; + At1_g += w1*g; + At1_b += w1*b; + At2_r += r; + At2_g += g; + At2_b += b; + } + + At2_r = 3*At2_r - At1_r; + At2_g = 3*At2_g - At1_g; + At2_b = 3*At2_b - At1_b; + + double xx = akku_2; + double yy = akku_1; + double xy = akku_0; + + double t = xx * yy - xy * xy; + if (!yy || !xx || (fabs(t) < .0000125f)) + return false; + + double frb = (3.0f * 31.0f / 255.0f) / t; + double fg = frb * (63.0f / 31.0f); + + bool improved = false; + + if (refinement_level == 0) + { + uint max16; + max16 = math::clamp(static_cast((At1_r*yy - At2_r*xy)*frb+0.5f),0,31) << 11; + max16 |= math::clamp(static_cast((At1_g*yy - At2_g*xy)*fg +0.5f),0,63) << 5; + max16 |= math::clamp(static_cast((At1_b*yy - At2_b*xy)*frb+0.5f),0,31) << 0; + + uint min16; + min16 = math::clamp(static_cast((At2_r*xx - At1_r*xy)*frb+0.5f),0,31) << 11; + min16 |= math::clamp(static_cast((At2_g*xx - At1_g*xy)*fg +0.5f),0,63) << 5; + min16 |= math::clamp(static_cast((At2_b*xx - At1_b*xy)*frb+0.5f),0,31) << 0; + + dxt1_solution_coordinates nc((uint16)min16, (uint16)max16); + nc.canonicalize(); + improved |= evaluate_solution(nc, true, &m_best_solution, false); + } + else if (refinement_level == 1) + { + color_quad_u8 e[2]; + e[0][0] = (uint8)math::clamp(static_cast((At1_r*yy - At2_r*xy)*frb+0.5f),0,31); + e[0][1] = (uint8)math::clamp(static_cast((At1_g*yy - At2_g*xy)*fg +0.5f),0,63); + e[0][2] = (uint8)math::clamp(static_cast((At1_b*yy - At2_b*xy)*frb+0.5f),0,31); + + e[1][0] = (uint8)math::clamp(static_cast((At2_r*xx - At1_r*xy)*frb+0.5f),0,31); + e[1][1] = (uint8)math::clamp(static_cast((At2_g*xx - At1_g*xy)*fg +0.5f),0,63); + e[1][2] = (uint8)math::clamp(static_cast((At2_b*xx - At1_b*xy)*frb+0.5f),0,31); + + for (uint i = 0; i < 2; i++) + { + for (int rr = -1; rr <= 1; rr++) + { + for (int gr = -1; gr <= 1; gr++) + { + for (int br = -1; br <= 1; br++) + { + dxt1_solution_coordinates nc; + + color_quad_u8 c[2]; + c[0] = e[0]; + c[1] = e[1]; + + c[i][0] = (uint8)math::clamp(c[i][0] + rr, 0, 31); + c[i][1] = (uint8)math::clamp(c[i][1] + gr, 0, 63); + c[i][2] = (uint8)math::clamp(c[i][2] + br, 0, 31); + + nc.m_low_color = dxt1_block::pack_color(c[0], false); + nc.m_high_color = dxt1_block::pack_color(c[1], false); + + nc.canonicalize(); + + if ((nc.m_low_color != m_best_solution.m_coords.m_low_color) || (nc.m_high_color != m_best_solution.m_coords.m_high_color)) + { + improved |= evaluate_solution(nc, true, &m_best_solution, false); + } + } + } + } + } + } + else + { + color_quad_u8 e[2]; + e[0][0] = (uint8)math::clamp(static_cast((At1_r*yy - At2_r*xy)*frb+0.5f),0,31); + e[0][1] = (uint8)math::clamp(static_cast((At1_g*yy - At2_g*xy)*fg +0.5f),0,63); + e[0][2] = (uint8)math::clamp(static_cast((At1_b*yy - At2_b*xy)*frb+0.5f),0,31); + + e[1][0] = (uint8)math::clamp(static_cast((At2_r*xx - At1_r*xy)*frb+0.5f),0,31); + e[1][1] = (uint8)math::clamp(static_cast((At2_g*xx - At1_g*xy)*fg +0.5f),0,63); + e[1][2] = (uint8)math::clamp(static_cast((At2_b*xx - At1_b*xy)*frb+0.5f),0,31); + + for (int orr = -1; orr <= 1; orr++) + { + for (int ogr = -1; ogr <= 1; ogr++) + { + for (int obr = -1; obr <= 1; obr++) + { + dxt1_solution_coordinates nc; + + color_quad_u8 c[2]; + c[0] = e[0]; + c[1] = e[1]; + + c[0][0] = (uint8)math::clamp(c[0][0] + orr, 0, 31); + c[0][1] = (uint8)math::clamp(c[0][1] + ogr, 0, 63); + c[0][2] = (uint8)math::clamp(c[0][2] + obr, 0, 31); + + for (int rr = -1; rr <= 1; rr++) + { + for (int gr = -1; gr <= 1; gr++) + { + for (int br = -1; br <= 1; br++) + { + c[1][0] = (uint8)math::clamp(c[1][0] + rr, 0, 31); + c[1][1] = (uint8)math::clamp(c[1][1] + gr, 0, 63); + c[1][2] = (uint8)math::clamp(c[1][2] + br, 0, 31); + + nc.m_low_color = dxt1_block::pack_color(c[0], false); + nc.m_high_color = dxt1_block::pack_color(c[1], false); + nc.canonicalize(); + + improved |= evaluate_solution(nc, true, &m_best_solution, false); + } + } + } + } + } + } + } + + return improved; + } + + //----------------------------------------------------------------------------------------------------------------------------------------- + + static int16 g_fast_probe_table[] = + { + 0, + 1, + 2, + 3 + }; + const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); + + static int16 g_normal_probe_table[] = + { + 0, + 1, + 3, + 5, + 7 + }; + const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); + + static int16 g_better_probe_table[] = + { + 0, + 1, + 2, + 3, + + 5, + 9, + 15, + 19, + + 27, + 43 + }; + const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); + + static int16 g_uber_probe_table[] = + { + 0, + 1, + 2, + 3, + 5, + 7, + 9, + 10, + 13, + 15, + 19, + 27, + 43, + 59, + 91 + }; + + const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); + + bool dxt1_endpoint_optimizer::optimize_endpoints(vec3F& low_color, vec3F& high_color) + { + vec3F orig_low_color(low_color); + vec3F orig_high_color(high_color); + + m_trial_solution.clear(); + + uint num_passes; + int16* pProbe_table = g_uber_probe_table; + uint probe_range; + float dist_per_trial = .015625f; + + switch (m_pParams->m_quality) + { + case cCRNDXTQualitySuperFast: + pProbe_table = g_fast_probe_table; + probe_range = cFastProbeTableSize; + dist_per_trial = .027063293f; + num_passes = 1; + break; + case cCRNDXTQualityFast: + pProbe_table = g_fast_probe_table; + probe_range = cFastProbeTableSize; + dist_per_trial = .027063293f; + num_passes = 2; + break; + case cCRNDXTQualityNormal: + pProbe_table = g_normal_probe_table; + probe_range = cNormalProbeTableSize; + dist_per_trial = .027063293f; + num_passes = 2; + break; + case cCRNDXTQualityBetter: + pProbe_table = g_better_probe_table; + probe_range = cBetterProbeTableSize; + num_passes = 2; + break; + default: + pProbe_table = g_uber_probe_table; + probe_range = cUberProbeTableSize; + num_passes = 4; + break; + } + + m_solutions_tried.reset(); + + if (m_pParams->m_endpoint_caching) + { + const uint num_prev_results = math::minimum(cMaxPrevResults, m_num_prev_results); + for (uint i = 0; i < num_prev_results; i++) + { + const dxt1_solution_coordinates& coords = m_prev_results[i]; + + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U))); + if (!solution_res.second) + continue; + + evaluate_solution(coords, true, &m_best_solution); + } + + if (!m_best_solution.m_error) + { + return_solution(*m_pResults, m_best_solution); + return true; + } + } + + if (m_pParams->m_quality >= cCRNDXTQualityBetter) + { + //evaluate_solution(dxt1_solution_coordinates(low_color, high_color), true, &m_best_solution); + //refine_solution(); + + try_median4(orig_low_color, orig_high_color); + } + + uint probe_low[cUberProbeTableSize * 2 + 1]; + uint probe_high[cUberProbeTableSize * 2 + 1]; + + vec3F scaled_principle_axis[2]; + + scaled_principle_axis[1] = m_principle_axis * dist_per_trial; + scaled_principle_axis[1][0] *= 31.0f; + scaled_principle_axis[1][1] *= 63.0f; + scaled_principle_axis[1][2] *= 31.0f; + + scaled_principle_axis[0] = -scaled_principle_axis[1]; + + //vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); + //initial_ofs[0] += .5f; + //initial_ofs[1] += .5f; + //initial_ofs[2] += .5f; + + low_color[0] = math::clamp(low_color[0] * 31.0f, 0.0f, 31.0f); + low_color[1] = math::clamp(low_color[1] * 63.0f, 0.0f, 63.0f); + low_color[2] = math::clamp(low_color[2] * 31.0f, 0.0f, 31.0f); + + high_color[0] = math::clamp(high_color[0] * 31.0f, 0.0f, 31.0f); + high_color[1] = math::clamp(high_color[1] * 63.0f, 0.0f, 63.0f); + high_color[2] = math::clamp(high_color[2] * 31.0f, 0.0f, 31.0f); + + for (uint pass = 0; pass < num_passes; pass++) + { + if (pass) + { + low_color = unpack_to_vec3F_raw(m_best_solution.m_coords.m_low_color); + high_color = unpack_to_vec3F_raw(m_best_solution.m_coords.m_high_color); + } + + const uint64 prev_best_error = m_best_solution.m_error; + if (!prev_best_error) + break; + + int prev_packed_color[2] = { -1, -1 }; + uint num_low_trials = 0; + vec3F initial_probe_low_color(low_color + vec3F(.5f)); + for (uint i = 0; i < probe_range; i++) + { + const int ls = i ? 0 : 1; + int x = pProbe_table[i]; + + for (int s = ls; s < 2; s++) + { + vec3F probe_low_color(initial_probe_low_color + scaled_principle_axis[s] * (float)x); + + int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); + + int packed_color = b | (g << 5U) | (r << 11U); + if (packed_color != prev_packed_color[s]) + { + probe_low[num_low_trials++] = packed_color; + prev_packed_color[s] = packed_color; + } + } + } + + prev_packed_color[0] = -1; + prev_packed_color[1] = -1; + + uint num_high_trials = 0; + vec3F initial_probe_high_color(high_color + vec3F(.5f)); + for (uint i = 0; i < probe_range; i++) + { + const int ls = i ? 0 : 1; + int x = pProbe_table[i]; + + for (int s = ls; s < 2; s++) + { + vec3F probe_high_color(initial_probe_high_color + scaled_principle_axis[s] * (float)x); + + int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); + + int packed_color = b | (g << 5U) | (r << 11U); + if (packed_color != prev_packed_color[s]) + { + probe_high[num_high_trials++] = packed_color; + prev_packed_color[s] = packed_color; + } + } + } + + for (uint i = 0; i < num_low_trials; i++) + { + for (uint j = 0; j < num_high_trials; j++) + { + dxt1_solution_coordinates coords((uint16)probe_low[i], (uint16)probe_high[j]); + coords.canonicalize(); + + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U))); + if (!solution_res.second) + continue; + + evaluate_solution(coords, true, &m_best_solution); + } + } + + if (m_pParams->m_quality >= cCRNDXTQualityNormal) + { + color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); + + for (int i = 0; i < 26; i++) + { + int r = lc.r + g_adjacency[i].x; + if ((r < 0) || (r > 31)) continue; + + int g = lc.g + g_adjacency[i].y; + if ((g < 0) || (g > 63)) continue; + + int b = lc.b + g_adjacency[i].z; + if ((b < 0) || (b > 31)) continue; + + dxt1_solution_coordinates coords(dxt1_block::pack_color(r, g, b, false), m_best_solution.m_coords.m_high_color); + coords.canonicalize(); + + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U))); + if (solution_res.second) + evaluate_solution(coords, true, &m_best_solution); + } + + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false); + + for (int a = 0; a < 3; a++) + { + int limit = (a == 1) ? 63 : 31; + + for (int s = -2; s <= 2; s += 4) + { + color_quad_u8 c(lc); + int q = c[a] + s; + if ((q < 0) || (q > limit)) continue; + + c[a] = (uint8)q; + + dxt1_solution_coordinates coords(dxt1_block::pack_color(c, false), m_best_solution.m_coords.m_high_color); + coords.canonicalize(); + + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U))); + if (solution_res.second) + evaluate_solution(coords, true, &m_best_solution); + } + } + } + + color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); + + for (int i = 0; i < 26; i++) + { + int r = hc.r + g_adjacency[i].x; + if ((r < 0) || (r > 31)) continue; + + int g = hc.g + g_adjacency[i].y; + if ((g < 0) || (g > 63)) continue; + + int b = hc.b + g_adjacency[i].z; + if ((b < 0) || (b > 31)) continue; + + dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(r, g, b, false)); + coords.canonicalize(); + + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U))); + if (solution_res.second) + evaluate_solution(coords, true, &m_best_solution); + } + + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false); + + for (int a = 0; a < 3; a++) + { + int limit = (a == 1) ? 63 : 31; + + for (int s = -2; s <= 2; s += 4) + { + color_quad_u8 c(hc); + int q = c[a] + s; + if ((q < 0) || (q > limit)) continue; + + c[a] = (uint8)q; + + dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(c, false)); + coords.canonicalize(); + + solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | (coords.m_high_color << 16U))); + if (solution_res.second) + evaluate_solution(coords, true, &m_best_solution); + } + } + } + } + + if ((!m_best_solution.m_error) || ((pass) && (m_best_solution.m_error == prev_best_error))) + break; + + if (m_pParams->m_quality >= cCRNDXTQualityUber) + refine_solution(1); + } + + if (m_pParams->m_quality >= cCRNDXTQualityNormal) + { + if ((m_best_solution.m_error) && (!m_pParams->m_pixels_have_alpha)) + { + bool choose_solid_block = false; + if (m_best_solution.are_selectors_all_equal()) + choose_solid_block = try_average_block_as_solid(); + + if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber)) + optimize_endpoint_comps(); + } + + if (m_pParams->m_quality == cCRNDXTQualityUber) + { + if (m_best_solution.m_error) + try_combinatorial_encoding(); + } + } + + return_solution(*m_pResults, m_best_solution); + + if (m_pParams->m_endpoint_caching) + { + m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords; + m_num_prev_results++; + } + + return true; + } + + static inline int mul_8bit(int a, int b) + { + int t = a * b + 128; + return (t + (t >> 8)) >> 8; + } + + bool dxt1_endpoint_optimizer::handle_multicolor_block() + { + uint num_passes = 1; + vec3F perceptual_weights(1.0f); + + if (m_perceptual) + { + float ave_redness = 0; + float ave_blueness = 0; + float ave_l = 0; + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const color_quad_u8& c = m_unique_colors[i].m_color; + const float weight = (float)m_unique_colors[i].m_weight; + + int l = mul_8bit(c.r + c.g + c.b, 0x55); // /3 + ave_l += l; + l = math::maximum(1, l); + + float scale = weight / static_cast(l); + + ave_redness += scale * c.r; + ave_blueness += scale * c.b; + } + + ave_redness /= m_total_unique_color_weight; + ave_blueness /= m_total_unique_color_weight; + ave_l /= m_total_unique_color_weight; + + ave_l = math::minimum(1.0f, ave_l * 16.0f / 255.0f); + + //float r = ave_l * powf(math::saturate(ave_redness / 3.0f), 5.0f); + //float b = ave_l * powf(math::saturate(ave_blueness / 3.0f), 5.0f); + + float p = ave_l * powf(math::saturate(math::maximum(ave_redness, ave_blueness) * 1.0f/3.0f), 2.75f); + + if (p >= 1.0f) + num_passes = 1; + else + { + num_passes = 2; + perceptual_weights = vec3F::lerp(vec3F(.212f, .72f, .072f), perceptual_weights, p); + } + } + + for (uint pass_index = 0; pass_index < num_passes; pass_index++) + { + compute_vectors(perceptual_weights); + + compute_pca(m_principle_axis, m_norm_unique_colors_weighted, vec3F(.2837149f, 0.9540631f, 0.096277453f)); + +#if 0 + matrix44F m(matrix44F::make_scale_matrix(perceptual_weights[0], perceptual_weights[1], perceptual_weights[2])); + matrix44F im(m.get_inverse()); + im.transpose_in_place(); + m_principle_axis = m_principle_axis * im; +#else + m_principle_axis[0] /= perceptual_weights[0]; + m_principle_axis[1] /= perceptual_weights[1]; + m_principle_axis[2] /= perceptual_weights[2]; +#endif + m_principle_axis.normalize_in_place(); + + if (num_passes > 1) + { + if (fabs(m_principle_axis[0]) >= .795f) + perceptual_weights.set(.424f, .6f, .072f); + else if (fabs(m_principle_axis[2]) >= .795f) + perceptual_weights.set(.212f, .6f, .212f); + else + break; + } + } + + float l = 1e+9; + float h = -1e+9; + + for (uint i = 0; i < m_norm_unique_colors.size(); i++) + { + float d = m_norm_unique_colors[i] * m_principle_axis; + l = math::minimum(l, d); + h = math::maximum(h, d); + } + + vec3F low_color(m_mean_norm_color + l * m_principle_axis); + vec3F high_color(m_mean_norm_color + h * m_principle_axis); + + if (!low_color.is_within_bounds(0.0f, 1.0f)) + { + vec3F coord; + float t; + aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); + intersection::result res = intersection::ray_aabb(coord, t, ray3F(low_color, m_principle_axis), bounds); + if (res == intersection::cSuccess) + low_color = coord; + } + + if (!high_color.is_within_bounds(0.0f, 1.0f)) + { + vec3F coord; + float t; + aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); + intersection::result res = intersection::ray_aabb(coord, t, ray3F(high_color, -m_principle_axis), bounds); + if (res == intersection::cSuccess) + high_color = coord; + } + + if (!optimize_endpoints(low_color, high_color)) + return false; + + return true; + } + + bool dxt1_endpoint_optimizer::handle_grayscale_block() + { + // TODO + return true; + } + + bool dxt1_endpoint_optimizer::try_median4(const vec3F& low_color, const vec3F& high_color) + { + vec3F means[4]; + + if (m_unique_colors.size() <= 4) + { + for (uint i = 0; i < 4; i++) + means[i] = m_norm_unique_colors[math::minimum(m_norm_unique_colors.size() - 1, i)]; + } + else + { + means[0] = low_color - m_mean_norm_color; + means[3] = high_color - m_mean_norm_color; + means[1] = vec3F::lerp(means[0], means[3], 1.0f/3.0f); + means[2] = vec3F::lerp(means[0], means[3], 2.0f/3.0f); + + fast_random rm; + + const uint cMaxIters = 8; + uint reassign_rover = 0; + float prev_total_dist = math::cNearlyInfinite; + for (uint iter = 0; iter < cMaxIters; iter++) + { + vec3F new_means[4]; + float new_weights[4]; + utils::zero_object(new_means); + utils::zero_object(new_weights); + + float total_dist = 0; + + for (uint i = 0; i < m_unique_colors.size(); i++) + { + const vec3F& v = m_norm_unique_colors[i]; + + float best_dist = means[0].squared_distance(v); + int best_index = 0; + + for (uint j = 1; j < 4; j++) + { + float dist = means[j].squared_distance(v); + if (dist < best_dist) + { + best_dist = dist; + best_index = j; + } + } + + total_dist += best_dist; + + new_means[best_index] += v * (float)m_unique_colors[i].m_weight; + new_weights[best_index] += (float)m_unique_colors[i].m_weight; + } + + uint highest_index = 0; + float highest_weight = 0; + bool empty_cell = false; + for (uint j = 0; j < 4; j++) + { + if (new_weights[j] > 0.0f) + { + means[j] = new_means[j] / new_weights[j]; + if (new_weights[j] > highest_weight) + { + highest_weight = new_weights[j]; + highest_index = j; + } + } + else + empty_cell = true; + } + + if (!empty_cell) + { + if (fabs(total_dist - prev_total_dist) < .00001f) + break; + + prev_total_dist = total_dist; + } + else + prev_total_dist = math::cNearlyInfinite; + + if ((empty_cell) && (iter != (cMaxIters - 1))) + { + const uint ri = (highest_index + reassign_rover) & 3; + reassign_rover++; + + for (uint j = 0; j < 4; j++) + { + if (new_weights[j] == 0.0f) + { + means[j] = means[ri]; + means[j] += vec3F::make_random(rm, -.00196f, .00196f); + } + } + } + } + } + + bool improved = false; + + for (uint i = 0; i < 3; i++) + { + for (uint j = i + 1; j < 4; j++) + { + const vec3F v0(means[i] + m_mean_norm_color); + const vec3F v1(means[j] + m_mean_norm_color); + + dxt1_solution_coordinates sc( + color_quad_u8((int)floor(.5f + v0[0] * 31.0f), (int)floor(.5f + v0[1] * 63.0f), (int)floor(.5f + v0[2] * 31.0f), 255), + color_quad_u8((int)floor(.5f + v1[0] * 31.0f), (int)floor(.5f + v1[1] * 63.0f), (int)floor(.5f + v1[2] * 31.0f), 255), false ); + + sc.canonicalize(); + + improved |= evaluate_solution(sc, true, &m_best_solution, false); + } + } + + improved |= refine_solution((m_pParams->m_quality == cCRNDXTQualityUber) ? 1 : 0); + + return improved; + } + + bool dxt1_endpoint_optimizer::evaluate_solution( + const dxt1_solution_coordinates& coords, + bool early_out, + potential_solution* pBest_solution, + bool alternate_rounding) + { + m_total_evals++; + + if ((!m_pSolutions) || (alternate_rounding)) + { + if (m_pParams->m_quality >= cCRNDXTQualityBetter) + return evaluate_solution_uber(m_trial_solution, coords, early_out, pBest_solution, alternate_rounding); + else + return evaluate_solution_fast(m_trial_solution, coords, early_out, pBest_solution, alternate_rounding); + } + + evaluate_solution_uber(m_trial_solution, coords, false, NULL, alternate_rounding); + + CRNLIB_ASSERT(m_trial_solution.m_valid); + + m_pSolutions->resize(m_pSolutions->size() + 1); + solution& new_solution = m_pSolutions->back(); + new_solution.m_selectors.resize(m_pParams->m_num_pixels); + new_solution.m_results.m_pSelectors = &new_solution.m_selectors[0]; + + return_solution(new_solution.m_results, m_trial_solution); + + if ((pBest_solution) && (m_trial_solution.m_error < m_best_solution.m_error)) + { + *pBest_solution = m_trial_solution; + return true; + } + + return false; + } + + inline uint dxt1_endpoint_optimizer::color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) + { + if (perceptual) + { + return color::color_distance(true, e1, e2, alpha); + } + else if (m_pParams->m_grayscale_sampling) + { + // Computes error assuming shader will be converting the result to grayscale. + int y0 = color::RGB_to_Y(e1); + int y1 = color::RGB_to_Y(e2); + int yd = y0 - y1; + if (alpha) + { + int da = (int)e1[3] - (int)e2[3]; + return yd * yd + da * da; + } + else + { + return yd * yd; + } + } + else if (m_has_color_weighting) + { + // Compute error using user provided color component weights. + int dr = (int)e1[0] - (int)e2[0]; + int dg = (int)e1[1] - (int)e2[1]; + int db = (int)e1[2] - (int)e2[2]; + + dr = (dr * dr) * m_pParams->m_color_weights[0]; + dg = (dg * dg) * m_pParams->m_color_weights[1]; + db = (db * db) * m_pParams->m_color_weights[2]; + + if (alpha) + { + int da = (int)e1[3] - (int)e2[3]; + da = (da * da) * (m_pParams->m_color_weights[0] + m_pParams->m_color_weights[1] + m_pParams->m_color_weights[2]); + return dr + dg + db + da; + } + else + { + return dr + dg + db; + } + } + else + { + return color::color_distance(false, e1, e2, alpha); + } + } + + bool dxt1_endpoint_optimizer::evaluate_solution_uber( + potential_solution& solution, + const dxt1_solution_coordinates& coords, + bool early_out, + potential_solution* pBest_solution, + bool alternate_rounding) + { + solution.m_coords = coords; + solution.m_selectors.resize(m_unique_colors.size()); + + if ((pBest_solution) && (early_out)) + solution.m_error = pBest_solution->m_error; + else + solution.m_error = UINT64_MAX; + + solution.m_alpha_block = false; + solution.m_valid = false; + + uint first_block_type = 0; + uint last_block_type = 1; + + if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) + first_block_type = 1; + else if (!m_pParams->m_use_alpha_blocks) + last_block_type = 0; + + m_trial_selectors.resize(m_unique_colors.size()); + + color_quad_u8 colors[cDXT1SelectorValues]; + + colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); + colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); + + for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) + { + uint64 trial_error = 0; + + if (!block_type) + { + colors[2].set_noclamp_rgba( (colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 0); + colors[3].set_noclamp_rgba( (colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 0); + + if (m_perceptual) + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(true, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(true, c, colors[1], false); + if (err < best_error) { best_error = err; best_color_index = 1; } + + err = color_distance(true, c, colors[2], false); + if (err < best_error) { best_error = err; best_color_index = 2; } + + err = color_distance(true, c, colors[3], false); + if (err < best_error) { best_error = err; best_color_index = 3; } + + trial_error += best_error * m_unique_colors[unique_color_index].m_weight; + if (trial_error >= solution.m_error) + break; + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + else + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(false, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(false, c, colors[1], false); + if (err < best_error) { best_error = err; best_color_index = 1; } + + err = color_distance(false, c, colors[2], false); + if (err < best_error) { best_error = err; best_color_index = 2; } + + err = color_distance(false, c, colors[3], false); + if (err < best_error) { best_error = err; best_color_index = 3; } + + trial_error += best_error * m_unique_colors[unique_color_index].m_weight; + if (trial_error >= solution.m_error) + break; + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + } + else + { + colors[2].set_noclamp_rgba( (colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); + + if (m_perceptual) + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(true, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(true, c, colors[1], false); + if (err < best_error) { best_error = err; best_color_index = 1; } + + err = color_distance(true, c, colors[2], false); + if (err < best_error) { best_error = err; best_color_index = 2; } + + trial_error += best_error * m_unique_colors[unique_color_index].m_weight; + if (trial_error >= solution.m_error) + break; + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + else + { + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + uint best_error = color_distance(false, c, colors[0], false); + uint best_color_index = 0; + + uint err = color_distance(false, c, colors[1], false); + if (err < best_error) { best_error = err; best_color_index = 1; } + + err = color_distance(false, c, colors[2], false); + if (err < best_error) { best_error = err; best_color_index = 2; } + + trial_error += best_error * m_unique_colors[unique_color_index].m_weight; + if (trial_error >= solution.m_error) + break; + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + } + + if (trial_error < solution.m_error) + { + solution.m_error = trial_error; + solution.m_alpha_block = (block_type != 0); + solution.m_selectors = m_trial_selectors; + solution.m_valid = true; + } + } + + if ((!solution.m_alpha_block) && (solution.m_coords.m_low_color == solution.m_coords.m_high_color)) + { + uint s; + if ((solution.m_coords.m_low_color & 31) != 31) + { + solution.m_coords.m_low_color++; + s = 1; + } + else + { + solution.m_coords.m_high_color--; + s = 0; + } + + for (uint i = 0; i < m_unique_colors.size(); i++) + solution.m_selectors[i] = static_cast(s); + } + + if ((pBest_solution) && (solution.m_error < pBest_solution->m_error)) + { + *pBest_solution = solution; + return true; + } + + return false; + } + + bool dxt1_endpoint_optimizer::evaluate_solution_fast( + potential_solution& solution, + const dxt1_solution_coordinates& coords, + bool early_out, + potential_solution* pBest_solution, + bool alternate_rounding) + { + solution.m_coords = coords; + solution.m_selectors.resize(m_unique_colors.size()); + + if ((pBest_solution) && (early_out)) + solution.m_error = pBest_solution->m_error; + else + solution.m_error = UINT64_MAX; + + solution.m_alpha_block = false; + solution.m_valid = false; + + uint first_block_type = 0; + uint last_block_type = 1; + + if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) + first_block_type = 1; + else if (!m_pParams->m_use_alpha_blocks) + last_block_type = 0; + + m_trial_selectors.resize(m_unique_colors.size()); + + color_quad_u8 colors[cDXT1SelectorValues]; + colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); + colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); + + int vr = colors[1].r - colors[0].r; + int vg = colors[1].g - colors[0].g; + int vb = colors[1].b - colors[0].b; + if (m_perceptual) + { + vr *= 8; + vg *= 24; + } + + int stops[4]; + stops[0] = colors[0].r*vr + colors[0].g*vg + colors[0].b*vb; + stops[1] = colors[1].r*vr + colors[1].g*vg + colors[1].b*vb; + + int dirr = vr * 2; + int dirg = vg * 2; + int dirb = vb * 2; + + for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) + { + uint64 trial_error = 0; + + if (!block_type) + { + colors[2].set_noclamp_rgba( (colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 255U); + colors[3].set_noclamp_rgba( (colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 255U); + + stops[2] = colors[2].r*vr + colors[2].g*vg + colors[2].b*vb; + stops[3] = colors[3].r*vr + colors[3].g*vg + colors[3].b*vb; + + // 0 2 3 1 + int c0Point = stops[1] + stops[3]; + int halfPoint = stops[3] + stops[2]; + int c3Point = stops[2] + stops[0]; + + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + int dot = c.r*dirr + c.g*dirg + c.b*dirb; + + uint8 best_color_index; + if (dot < halfPoint) + best_color_index = (dot < c3Point) ? 0 : 2; + else + best_color_index = (dot < c0Point) ? 3 : 1; + + uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); + + trial_error += best_error * m_unique_colors[unique_color_index].m_weight; + if (trial_error >= solution.m_error) + break; + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + else + { + colors[2].set_noclamp_rgba( (colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); + + stops[2] = colors[2].r*vr + colors[2].g*vg + colors[2].b*vb; + + // 0 2 1 + int c02Point = stops[0] + stops[2]; + int c21Point = stops[2] + stops[1]; + + for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) + { + const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; + + int dot = c.r*dirr + c.g*dirg + c.b*dirb; + + uint8 best_color_index; + if (dot < c02Point) + best_color_index = 0; + else if (dot < c21Point) + best_color_index = 2; + else + best_color_index = 1; + + uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); + + trial_error += best_error * m_unique_colors[unique_color_index].m_weight; + if (trial_error >= solution.m_error) + break; + + m_trial_selectors[unique_color_index] = static_cast(best_color_index); + } + } + + if (trial_error < solution.m_error) + { + solution.m_error = trial_error; + solution.m_alpha_block = (block_type != 0); + solution.m_selectors = m_trial_selectors; + solution.m_valid = true; + } + } + + if ((!solution.m_alpha_block) && (solution.m_coords.m_low_color == solution.m_coords.m_high_color)) + { + uint s; + if ((solution.m_coords.m_low_color & 31) != 31) + { + solution.m_coords.m_low_color++; + s = 1; + } + else + { + solution.m_coords.m_high_color--; + s = 0; + } + + for (uint i = 0; i < m_unique_colors.size(); i++) + solution.m_selectors[i] = static_cast(s); + } + + if ((pBest_solution) && (solution.m_error < pBest_solution->m_error)) + { + *pBest_solution = solution; + return true; + } + + return false; + } + + unique_color dxt1_endpoint_optimizer::lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding) + { + color_quad_u8 res; + + float r = rounding ? 1.0f : 0.0f; + res[0] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[0], b[0], f)), 0, 255)); + res[1] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[1], b[1], f)), 0, 255)); + res[2] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[2], b[2], f)), 0, 255)); + res[3] = 255; + + return unique_color(res, 1); + } + + void dxt1_endpoint_optimizer::try_combinatorial_encoding() + { + if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4)) + return; + + m_temp_unique_colors = m_unique_colors; + + if (m_temp_unique_colors.size() == 2) + { + // a b c d + // 0.0 1/3 2/3 1.0 + + for (uint k = 0; k < 2; k++) + { + for (uint q = 0; q < 2; q++) + { + const uint r = q ^ 1; + + // a b + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 3.0f, k)); + + // a c + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 1.5f, k)); + + // a d + + // b c + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); + + // b d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -.5f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); + + // c d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -2.0f, k)); + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); + } + } + } + else if (m_temp_unique_colors.size() == 3) + { + // a b c d + // 0.0 1/3 2/3 1.0 + + for (uint i = 0; i <= 2; i++) + { + for (uint j = 0; j <= 2; j++) + { + if (i == j) + continue; + + // a b c + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.5f)); + + // a b d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 2.0f/3.0f)); + + // a c d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.0f/3.0f)); + + // b c d + m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, -.5f)); + } + } + } + + m_unique_packed_colors.resize(0); + + for (uint i = 0; i < m_temp_unique_colors.size(); i++) + { + const color_quad_u8& unique_color = m_temp_unique_colors[i].m_color; + const uint16 packed_color = dxt1_block::pack_color(unique_color, true); + + if (std::find(m_unique_packed_colors.begin(), m_unique_packed_colors.end(), packed_color) != m_unique_packed_colors.end()) + continue; + + m_unique_packed_colors.push_back(packed_color); + } + + if (m_unique_packed_colors.size() < 2) + return; + + for (uint alt_rounding = 0; alt_rounding < 2; alt_rounding++) + { + for (uint i = 0; i < m_unique_packed_colors.size() - 1; i++) + { + for (uint j = i + 1; j < m_unique_packed_colors.size(); j++) + { + evaluate_solution( + dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j]), + true, + (alt_rounding == 0) ? &m_best_solution : NULL, + (alt_rounding != 0)); + + if (m_trial_solution.m_error == 0) + { + if (alt_rounding) + m_best_solution = m_trial_solution; + + return; + } + } + } + } + + return; + } + + bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization() + { + const params* pOrig_params = m_pParams; + pOrig_params; + results* pOrig_results = m_pResults; + + uint num_dark_colors = 0; + + for (uint i = 0; i < m_unique_colors.size(); i++) + if ( (m_unique_colors[i].m_color[0] <= 4) && (m_unique_colors[i].m_color[1] <= 4) && (m_unique_colors[i].m_color[2] <= 4) ) + num_dark_colors++; + + if ( (!num_dark_colors) || (num_dark_colors == m_unique_colors.size()) ) + return true; + + params trial_params(*m_pParams); + crnlib::vector trial_colors; + trial_colors.insert(0, m_pParams->m_pPixels, m_pParams->m_num_pixels); + + trial_params.m_pPixels = trial_colors.get_ptr(); + trial_params.m_pixels_have_alpha = true; + + for (uint i = 0; i < trial_colors.size(); i++) + if ( (trial_colors[i][0] <= 4) && (trial_colors[i][1] <= 4) && (trial_colors[i][2] <= 4) ) + trial_colors[i][3] = 0; + + results trial_results; + + crnlib::vector trial_selectors(m_pParams->m_num_pixels); + trial_results.m_pSelectors = trial_selectors.get_ptr(); + + if (!compute_internal(trial_params, trial_results, NULL)) + return false; + + CRNLIB_ASSERT(trial_results.m_alpha_block); + + color_quad_u8 c[4]; + dxt1_block::get_block_colors3(c, trial_results.m_low_color, trial_results.m_high_color); + + uint64 trial_error = 0; + + for (uint i = 0; i < trial_colors.size(); i++) + { + if (trial_colors[i][3] == 0) + { + CRNLIB_ASSERT(trial_selectors[i] == 3); + } + else + { + CRNLIB_ASSERT(trial_selectors[i] != 3); + } + + trial_error += color_distance(m_perceptual, trial_colors[i], c[trial_selectors[i]], false); + } + + if (trial_error < pOrig_results->m_error) + { + pOrig_results->m_error = trial_error; + + pOrig_results->m_low_color = trial_results.m_low_color; + pOrig_results->m_high_color = trial_results.m_high_color; + + if (pOrig_results->m_pSelectors) + memcpy(pOrig_results->m_pSelectors, trial_results.m_pSelectors, m_pParams->m_num_pixels); + + pOrig_results->m_alpha_block = true; + } + + return true; + } + + bool dxt1_endpoint_optimizer::compute_internal(const params& p, results& r, solution_vec* pSolutions) + { + clear(); + + m_pParams = &p; + m_pResults = &r; + m_pSolutions = pSolutions; + + m_has_color_weighting = (m_pParams->m_color_weights[0] != 1) || (m_pParams->m_color_weights[1] != 1) || (m_pParams->m_color_weights[2] != 1); + m_perceptual = m_pParams->m_perceptual && !m_has_color_weighting && !m_pParams->m_grayscale_sampling; + + find_unique_colors(); + + m_best_solution.clear(); + + if (m_unique_colors.empty()) + return handle_all_transparent_block(); + else if ((m_unique_colors.size() == 1) && (!m_has_transparent_pixels)) + return handle_solid_block(); + else + { + if (!handle_multicolor_block()) + return false; + + if ((m_all_pixels_grayscale) && (m_best_solution.m_error)) + { + if (!handle_grayscale_block()) + return false; + } + } + + return true; + } + + bool dxt1_endpoint_optimizer::compute(const params& p, results& r, solution_vec* pSolutions) + { + if (!p.m_pPixels) + return false; + + bool status = compute_internal(p, r, pSolutions); + if (!status) + return false; + + if ( (m_pParams->m_use_alpha_blocks) && (m_pParams->m_use_transparent_indices_for_black) && (!m_pParams->m_pixels_have_alpha) && (!pSolutions) ) + { + if (!try_alpha_as_black_optimization()) + return false; + } + + return true; + } + + void dxt1_endpoint_optimizer::find_unique_colors() + { + m_has_transparent_pixels = false; + + uint num_opaque_pixels = 0; + + const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0; + + const uint32* pSrc_pixels = reinterpret_cast(m_pParams->m_pPixels); + const uint32* pSrc_pixels_end = pSrc_pixels + m_pParams->m_num_pixels; + + m_unique_colors.resize(m_pParams->m_num_pixels); + uint num_unique_colors = 0; + + m_all_pixels_grayscale = true; + + do + { + uint32 c = utils::read_le32(pSrc_pixels); + pSrc_pixels++; + + if (c < alpha_thresh) + { + m_has_transparent_pixels = true; + continue; + } + + if (m_all_pixels_grayscale) + { + uint r = c & 0xFF; + uint g = (c >> 8) & 0xFF; + uint b = (c >> 16) & 0xFF; + if ((r != g) || (r != b)) + m_all_pixels_grayscale = false; + } + + c |= 0xFF000000U; + + unique_color_hash_map::insert_result ins_result(m_unique_color_hash_map.insert(c, num_unique_colors)); + + if (ins_result.second) + { + utils::write_le32(&m_unique_colors[num_unique_colors].m_color.m_u32, c); + m_unique_colors[num_unique_colors].m_weight = 1; + num_unique_colors++; + } + else + m_unique_colors[ins_result.first->second].m_weight++; + + num_opaque_pixels++; + + } while (pSrc_pixels != pSrc_pixels_end); + + m_unique_colors.resize(num_unique_colors); + + m_total_unique_color_weight = num_opaque_pixels; + } + +} // namespace crnlib diff --git a/crnlib/crn_dxt1.h b/crnlib/crn_dxt1.h new file mode 100644 index 00000000..7ff7388e --- /dev/null +++ b/crnlib/crn_dxt1.h @@ -0,0 +1,352 @@ +// File: crn_dxt1.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt.h" + +namespace crnlib +{ + struct dxt1_solution_coordinates + { + inline dxt1_solution_coordinates() : m_low_color(0), m_high_color(0){ } + + inline dxt1_solution_coordinates(uint16 l, uint16 h) : m_low_color(l), m_high_color(h) { } + + inline dxt1_solution_coordinates(const color_quad_u8& l, const color_quad_u8& h, bool scaled = true) : + m_low_color(dxt1_block::pack_color(l, scaled)), + m_high_color(dxt1_block::pack_color(h, scaled)) + { + } + + inline dxt1_solution_coordinates(vec3F nl, vec3F nh) + { +#if CRNLIB_DXT_ALT_ROUNDING + // Umm, wtf? + nl.clamp(0.0f, .999f); + nh.clamp(0.0f, .999f); + color_quad_u8 l( (int)floor(nl[0] * 32.0f), (int)floor(nl[1] * 64.0f), (int)floor(nl[2] * 32.0f), 255); + color_quad_u8 h( (int)floor(nh[0] * 32.0f), (int)floor(nh[1] * 64.0f), (int)floor(nh[2] * 32.0f), 255); +#else + // Fixes the bins + color_quad_u8 l( (int)floor(.5f + nl[0] * 31.0f), (int)floor(.5f + nl[1] * 63.0f), (int)floor(.5f + nl[2] * 31.0f), 255); + color_quad_u8 h( (int)floor(.5f + nh[0] * 31.0f), (int)floor(.5f + nh[1] * 63.0f), (int)floor(.5f + nh[2] * 31.0f), 255); +#endif + + m_low_color = dxt1_block::pack_color(l, false); + m_high_color = dxt1_block::pack_color(h, false); + } + + uint16 m_low_color; + uint16 m_high_color; + + inline void clear() + { + m_low_color = 0; + m_high_color = 0; + } + + inline dxt1_solution_coordinates& canonicalize() + { + if (m_low_color < m_high_color) + utils::swap(m_low_color, m_high_color); + return *this; + } + + inline operator size_t() const { return fast_hash(this, sizeof(*this)); } + + inline bool operator== (const dxt1_solution_coordinates& other) const + { + uint16 l0 = math::minimum(m_low_color, m_high_color); + uint16 h0 = math::maximum(m_low_color, m_high_color); + + uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); + uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); + + return (l0 == l1) && (h0 == h1); + } + + inline bool operator!= (const dxt1_solution_coordinates& other) const + { + return !(*this == other); + } + + inline bool operator< (const dxt1_solution_coordinates& other) const + { + uint16 l0 = math::minimum(m_low_color, m_high_color); + uint16 h0 = math::maximum(m_low_color, m_high_color); + + uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); + uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); + + if (l0 < l1) + return true; + else if (l0 == l1) + { + if (h0 < h1) + return true; + } + + return false; + } + }; + + typedef crnlib::vector dxt1_solution_coordinates_vec; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_solution_coordinates); + + struct unique_color + { + inline unique_color() { } + inline unique_color(const color_quad_u8& color, uint weight) : m_color(color), m_weight(weight) { } + + color_quad_u8 m_color; + uint m_weight; + + inline bool operator< (const unique_color& c) const + { + return *reinterpret_cast(&m_color) < *reinterpret_cast(&c.m_color); + } + + inline bool operator== (const unique_color& c) const + { + return *reinterpret_cast(&m_color) == *reinterpret_cast(&c.m_color); + } + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(unique_color); + + class dxt1_endpoint_optimizer + { + public: + dxt1_endpoint_optimizer(); + + struct params + { + params() : + m_block_index(0), + m_pPixels(NULL), + m_num_pixels(0), + m_dxt1a_alpha_threshold(128U), + m_quality(cCRNDXTQualityUber), + m_pixels_have_alpha(false), + m_use_alpha_blocks(true), + m_perceptual(true), + m_grayscale_sampling(false), + m_endpoint_caching(true), + m_use_transparent_indices_for_black(false), + m_force_alpha_blocks(false) + { + m_color_weights[0] = 1; + m_color_weights[1] = 1; + m_color_weights[2] = 1; + } + + uint m_block_index; + + const color_quad_u8* m_pPixels; + uint m_num_pixels; + uint m_dxt1a_alpha_threshold; + + crn_dxt_quality m_quality; + + bool m_pixels_have_alpha; + bool m_use_alpha_blocks; + bool m_perceptual; + bool m_grayscale_sampling; + bool m_endpoint_caching; + bool m_use_transparent_indices_for_black; + bool m_force_alpha_blocks; + int m_color_weights[3]; + }; + + struct results + { + inline results() : m_pSelectors(NULL) { } + + uint64 m_error; + + uint16 m_low_color; + uint16 m_high_color; + + uint8* m_pSelectors; + bool m_alpha_block; + }; + + struct solution + { + solution() { } + + solution(const solution& other) + { + m_results = other.m_results; + m_selectors = other.m_selectors; + m_results.m_pSelectors = m_selectors.begin(); + } + + solution& operator= (const solution& rhs) + { + if (this == &rhs) + return *this; + + m_results = rhs.m_results; + m_selectors = rhs.m_selectors; + m_results.m_pSelectors = m_selectors.begin(); + + return *this; + } + + results m_results; + crnlib::vector m_selectors; + + inline bool operator< (const solution& other) const + { + return m_results.m_error < other.m_results.m_error; + } + static inline bool coords_equal(const solution& lhs, const solution& rhs) + { + return (lhs.m_results.m_low_color == rhs.m_results.m_low_color) && (lhs.m_results.m_high_color == rhs.m_results.m_high_color); + } + }; + typedef crnlib::vector solution_vec; + + bool compute(const params& p, results& r, solution_vec* pSolutions = NULL); + + private: + const params* m_pParams; + results* m_pResults; + solution_vec* m_pSolutions; + + bool m_perceptual; + bool m_has_color_weighting; + + typedef crnlib::vector unique_color_vec; + + //typedef crnlib::hash_map > unique_color_hash_map; + typedef crnlib::hash_map unique_color_hash_map; + unique_color_hash_map m_unique_color_hash_map; + + unique_color_vec m_unique_colors; // excludes transparent colors! + unique_color_vec m_temp_unique_colors; + + uint m_total_unique_color_weight; + + bool m_has_transparent_pixels; + + vec3F_array m_norm_unique_colors; + vec3F m_mean_norm_color; + + vec3F_array m_norm_unique_colors_weighted; + vec3F m_mean_norm_color_weighted; + + vec3F m_principle_axis; + + bool m_all_pixels_grayscale; + + crnlib::vector m_unique_packed_colors; + crnlib::vector m_trial_selectors; + + crnlib::vector m_low_coords; + crnlib::vector m_high_coords; + + enum { cMaxPrevResults = 4 }; + dxt1_solution_coordinates m_prev_results[cMaxPrevResults]; + uint m_num_prev_results; + + crnlib::vector m_lo_cells; + crnlib::vector m_hi_cells; + + uint m_total_evals; + + struct potential_solution + { + potential_solution() : m_coords(), m_error(UINT64_MAX), m_alpha_block(false), m_valid(false) + { + } + + dxt1_solution_coordinates m_coords; + crnlib::vector m_selectors; + uint64 m_error; + bool m_alpha_block; + bool m_valid; + + void clear() + { + m_coords.clear(); + m_selectors.resize(0); + m_error = UINT64_MAX; + m_alpha_block = false; + m_valid = false; + } + + bool are_selectors_all_equal() const + { + if (m_selectors.empty()) + return false; + const uint s = m_selectors[0]; + for (uint i = 1; i < m_selectors.size(); i++) + if (m_selectors[i] != s) + return false; + return true; + } + }; + + potential_solution m_trial_solution; + potential_solution m_best_solution; + + typedef crnlib::hash_map solution_hash_map; + solution_hash_map m_solutions_tried; + + bool refine_solution(int refinement_level = 0); + + bool evaluate_solution( + const dxt1_solution_coordinates& coords, + bool early_out, + potential_solution* pBest_solution, + bool alternate_rounding = false); + + bool evaluate_solution_uber( + potential_solution& solution, + const dxt1_solution_coordinates& coords, + bool early_out, + potential_solution* pBest_solution, + bool alternate_rounding = false); + + bool evaluate_solution_fast( + potential_solution& solution, + const dxt1_solution_coordinates& coords, + bool early_out, + potential_solution* pBest_solution, + bool alternate_rounding = false); + + void clear(); + void find_unique_colors(); + bool handle_all_transparent_block(); + bool handle_solid_block(); + bool handle_multicolor_block(); + bool handle_grayscale_block(); + void compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def); + void compute_vectors(const vec3F& perceptual_weights); + void return_solution(results& results, const potential_solution& solution); + void try_combinatorial_encoding(); + void optimize_endpoint_comps(); + bool optimize_endpoints(vec3F& low_color, vec3F& high_color); + bool try_alpha_as_black_optimization(); + bool try_average_block_as_solid(); + bool try_median4(const vec3F& low_color, const vec3F& high_color); + + bool compute_internal(const params& p, results& r, solution_vec* pSolutions); + + unique_color lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding = 1); + + inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha); + + static inline vec3F unpack_to_vec3F_raw(uint16 packed_color); + static inline vec3F unpack_to_vec3F(uint16 packed_color); + }; + + inline void swap(dxt1_endpoint_optimizer::solution& a, dxt1_endpoint_optimizer::solution& b) + { + std::swap(a.m_results, b.m_results); + a.m_selectors.swap(b.m_selectors); + } + +} // namespace crnlib diff --git a/crnlib/crn_dxt5a.cpp b/crnlib/crn_dxt5a.cpp new file mode 100644 index 00000000..f2fe2ee0 --- /dev/null +++ b/crnlib/crn_dxt5a.cpp @@ -0,0 +1,209 @@ +// File: crn_dxt5a.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt5a.h" +#include "crn_ryg_dxt.hpp" +#include "crn_dxt_fast.h" +#include "crn_intersect.h" + +namespace crnlib +{ + dxt5_endpoint_optimizer::dxt5_endpoint_optimizer() : + m_pParams(NULL), + m_pResults(NULL) + { + m_unique_values.reserve(16); + m_unique_value_weights.reserve(16); + } + + bool dxt5_endpoint_optimizer::compute(const params& p, results& r) + { + m_pParams = &p; + m_pResults = &r; + + if ((!p.m_num_pixels) || (!p.m_pPixels)) + return false; + + m_unique_values.resize(0); + m_unique_value_weights.resize(0); + + for (uint i = 0; i < 256; i++) + m_unique_value_map[i] = -1; + + for (uint i = 0; i < p.m_num_pixels; i++) + { + uint alpha = p.m_pPixels[i][p.m_comp_index]; + + int index = m_unique_value_map[alpha]; + + if (index == -1) + { + index = m_unique_values.size(); + + m_unique_value_map[alpha] = index; + + m_unique_values.push_back(static_cast(alpha)); + m_unique_value_weights.push_back(0); + } + + m_unique_value_weights[index]++; + } + + if (m_unique_values.size() == 1) + { + r.m_block_type = 0; + r.m_error = 0; + r.m_first_endpoint = m_unique_values[0]; + r.m_second_endpoint = m_unique_values[0]; + memset(r.m_pSelectors, 0, p.m_num_pixels); + return true; + } + + m_trial_selectors.resize(m_unique_values.size()); + m_best_selectors.resize(m_unique_values.size()); + + r.m_error = UINT64_MAX; + + for (uint i = 0; i < m_unique_values.size() - 1; i++) + { + const uint low_endpoint = m_unique_values[i]; + + for (uint j = i + 1; j < m_unique_values.size(); j++) + { + const uint high_endpoint = m_unique_values[j]; + + evaluate_solution(low_endpoint, high_endpoint); + } + } + + if ((m_pParams->m_quality >= cCRNDXTQualityBetter) && (m_pResults->m_error)) + { + m_flags.resize(256 * 256); + m_flags.clear_all_bits(); + + const int cProbeAmount = (m_pParams->m_quality == cCRNDXTQualityUber) ? 16 : 8; + + for (int l_delta = -cProbeAmount; l_delta <= cProbeAmount; l_delta++) + { + const int l = m_pResults->m_first_endpoint + l_delta; + if (l < 0) + continue; + else if (l > 255) + break; + + const uint bit_index = l * 256; + + for (int h_delta = -cProbeAmount; h_delta <= cProbeAmount; h_delta++) + { + const int h = m_pResults->m_second_endpoint + h_delta; + if (h < 0) + continue; + else if (h > 255) + break; + + //if (m_flags.get_bit(bit_index + h)) + // continue; + if ((m_flags.get_bit(bit_index + h)) || (m_flags.get_bit(h * 256 + l))) + continue; + m_flags.set_bit(bit_index + h); + + evaluate_solution(static_cast(l), static_cast(h)); + } + } + } + + if (m_pResults->m_first_endpoint == m_pResults->m_second_endpoint) + { + for (uint i = 0; i < m_best_selectors.size(); i++) + m_best_selectors[i] = 0; + } + else if (m_pResults->m_block_type) + { + //if (l > h) + // eight alpha + // else + // six alpha + + if (m_pResults->m_first_endpoint > m_pResults->m_second_endpoint) + { + utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); + for (uint i = 0; i < m_best_selectors.size(); i++) + m_best_selectors[i] = g_six_alpha_invert_table[m_best_selectors[i]]; + } + } + else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) + { + utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); + for (uint i = 0; i < m_best_selectors.size(); i++) + m_best_selectors[i] = g_eight_alpha_invert_table[m_best_selectors[i]]; + } + + for (uint i = 0; i < m_pParams->m_num_pixels; i++) + { + uint alpha = m_pParams->m_pPixels[i][m_pParams->m_comp_index]; + + int index = m_unique_value_map[alpha]; + + m_pResults->m_pSelectors[i] = m_best_selectors[index]; + } + + return true; + } + + void dxt5_endpoint_optimizer::evaluate_solution(uint low_endpoint, uint high_endpoint) + { + for (uint block_type = 0; block_type < (m_pParams->m_use_both_block_types ? 2U : 1U); block_type++) + { + uint selector_values[8]; + + if (!block_type) + dxt5_block::get_block_values8(selector_values, low_endpoint, high_endpoint); + else + dxt5_block::get_block_values6(selector_values, low_endpoint, high_endpoint); + + uint64 trial_error = 0; + + for (uint i = 0; i < m_unique_values.size(); i++) + { + const uint val = m_unique_values[i]; + const uint weight = m_unique_value_weights[i]; + + uint best_selector_error = UINT_MAX; + uint best_selector = 0; + + for (uint j = 0; j < 8; j++) + { + int selector_error = val - selector_values[j]; + selector_error = selector_error * selector_error * (int)weight; + + if (static_cast(selector_error) < best_selector_error) + { + best_selector_error = selector_error; + best_selector = j; + if (!best_selector_error) + break; + } + } + + m_trial_selectors[i] = static_cast(best_selector); + trial_error += best_selector_error; + + if (trial_error > m_pResults->m_error) + break; + } + + if (trial_error < m_pResults->m_error) + { + m_pResults->m_error = trial_error; + m_pResults->m_first_endpoint = static_cast(low_endpoint); + m_pResults->m_second_endpoint = static_cast(high_endpoint); + m_pResults->m_block_type = static_cast(block_type); + m_best_selectors.swap(m_trial_selectors); + + if (!trial_error) + break; + } + } + } + +} // namespace crnlib diff --git a/crnlib/crn_dxt5a.h b/crnlib/crn_dxt5a.h new file mode 100644 index 00000000..a10f7724 --- /dev/null +++ b/crnlib/crn_dxt5a.h @@ -0,0 +1,66 @@ +// File: crn_dxt5a.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt.h" + +namespace crnlib +{ + class dxt5_endpoint_optimizer + { + public: + dxt5_endpoint_optimizer(); + + struct params + { + params() : + m_block_index(0), + m_pPixels(NULL), + m_num_pixels(0), + m_comp_index(3), + m_quality(cCRNDXTQualityUber), + m_use_both_block_types(true) + { + } + + uint m_block_index; + + const color_quad_u8* m_pPixels; + uint m_num_pixels; + uint m_comp_index; + + crn_dxt_quality m_quality; + + bool m_use_both_block_types; + }; + + struct results + { + uint8* m_pSelectors; + + uint64 m_error; + + uint8 m_first_endpoint; + uint8 m_second_endpoint; + + uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha + }; + + bool compute(const params& p, results& r); + + private: + const params* m_pParams; + results* m_pResults; + + crnlib::vector m_unique_values; + crnlib::vector m_unique_value_weights; + + crnlib::vector m_trial_selectors; + crnlib::vector m_best_selectors; + int m_unique_value_map[256]; + + sparse_bit_array m_flags; + + void evaluate_solution(uint low_endpoint, uint high_endpoint); + }; + +} // namespace crnlib diff --git a/crnlib/crn_dxt_endpoint_refiner.cpp b/crnlib/crn_dxt_endpoint_refiner.cpp new file mode 100644 index 00000000..a7a312e5 --- /dev/null +++ b/crnlib/crn_dxt_endpoint_refiner.cpp @@ -0,0 +1,362 @@ +// File: crn_dxt_endpoint_refiner.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt_endpoint_refiner.h" +#include "crn_dxt1.h" + +namespace crnlib +{ + dxt_endpoint_refiner::dxt_endpoint_refiner() : + m_pParams(NULL), + m_pResults(NULL) + { + } + + bool dxt_endpoint_refiner::refine(const params& p, results& r) + { + if (!p.m_num_pixels) + return false; + + m_pParams = &p; + m_pResults = &r; + + r.m_error = UINT64_MAX; + r.m_low_color = 0; + r.m_high_color = 0; + + double alpha2_sum = 0.0f; + double beta2_sum = 0.0f; + double alphabeta_sum = 0.0f; + + vec<3, double> alphax_sum( 0.0f ); + vec<3, double> betax_sum( 0.0f ); + + vec<3, double> first_color( 0.0f ); + + // This linear solver is from Squish. + for( uint i = 0; i < p.m_num_pixels; ++i ) + { + uint8 c = p.m_pSelectors[i]; + + double k; + if (p.m_dxt1_selectors) + k = g_dxt1_to_linear[c] * 1.0f/3.0f; + else + k = g_dxt5_to_linear[c] * 1.0f/7.0f; + + double alpha = 1.0f - k; + double beta = k; + + vec<3, double> x; + + if (p.m_dxt1_selectors) + x.set( p.m_pPixels[i][0] * 1.0f/255.0f, p.m_pPixels[i][1] * 1.0f/255.0f, p.m_pPixels[i][2] * 1.0f/255.0f ); + else + x.set( p.m_pPixels[i][p.m_alpha_comp_index]/255.0f ); + + if (!i) + first_color = x; + + alpha2_sum += alpha*alpha; + beta2_sum += beta*beta; + alphabeta_sum += alpha*beta; + alphax_sum += alpha*x; + betax_sum += beta*x; + } + + // zero where non-determinate + vec<3, double> a, b; + if( beta2_sum == 0.0f ) + { + a = alphax_sum / alpha2_sum; + b.clear(); + } + else if( alpha2_sum == 0.0f ) + { + a.clear(); + b = betax_sum / beta2_sum; + } + else + { + double factor = alpha2_sum*beta2_sum - alphabeta_sum*alphabeta_sum; + if (factor != 0.0f) + { + a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum ) / factor; + b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum ) / factor; + } + else + { + a = first_color; + b = first_color; + } + } + + vec3F l(0.0f), h(0.0f); + l = a; + h = b; + + l.clamp(0.0f, 1.0f); + h.clamp(0.0f, 1.0f); + + if (p.m_dxt1_selectors) + optimize_dxt1(l, h); + else + optimize_dxt5(l, h); + + //if (r.m_low_color < r.m_high_color) + // utils::swap(r.m_low_color, r.m_high_color); + + return r.m_error < p.m_error_to_beat; + } + + void dxt_endpoint_refiner::optimize_dxt5(vec3F low_color, vec3F high_color) + { + float nl = low_color[0]; + float nh = high_color[0]; + +#if CRNLIB_DXT_ALT_ROUNDING + nl = math::clamp(nl, 0.0f, .999f); + nh = math::clamp(nh, 0.0f, .999f); + uint il = (int)floor(nl * 256.0f); + uint ih = (int)floor(nh * 256.0f); +#else + uint il = (int)floor(.5f + math::clamp(nl, 0.0f, 1.0f) * 255.0f); + uint ih = (int)floor(.5f + math::clamp(nh, 0.0f, 1.0f) * 255.0f); +#endif + + crnlib::vector trial_solutions; + trial_solutions.reserve(256); + trial_solutions.push_back(il | (ih << 8)); + + sparse_bit_array flags; + flags.resize(256 * 256); + + flags.set_bit((il * 256) + ih); + + const int cProbeAmount = 11; + + for (int l_delta = -cProbeAmount; l_delta <= cProbeAmount; l_delta++) + { + const int l = il + l_delta; + if (l < 0) + continue; + else if (l > 255) + break; + + const uint bit_index = l * 256; + + for (int h_delta = -cProbeAmount; h_delta <= cProbeAmount; h_delta++) + { + const int h = ih + h_delta; + if (h < 0) + continue; + else if (h > 255) + break; + + if ((flags.get_bit(bit_index + h)) || (flags.get_bit(h * 256 + l))) + continue; + + flags.set_bit(bit_index + h); + + trial_solutions.push_back(l | (h << 8)); + } + } + + for (uint trial = 0; trial < trial_solutions.size(); trial++) + { + uint l = trial_solutions[trial] & 0xFF; + uint h = trial_solutions[trial] >> 8; + + if (l == h) + { + if (h) + h--; + else + l++; + } + else if (l < h) + { + utils::swap(l, h); + } + + CRNLIB_ASSERT(l > h); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values8(values, l, h); + + uint total_error = 0; + + for (uint j = 0; j < m_pParams->m_num_pixels; j++) + { + int p = m_pParams->m_pPixels[j][m_pParams->m_alpha_comp_index]; + int c = values[m_pParams->m_pSelectors[j]]; + + int error = p - c; + error *= error; + + total_error += error; + + if (total_error > m_pResults->m_error) + break; + } + + if (total_error < m_pResults->m_error) + { + m_pResults->m_error = total_error; + m_pResults->m_low_color = static_cast(l); + m_pResults->m_high_color = static_cast(h); + + if (m_pResults->m_error == 0) + return; + } + } + } + + void dxt_endpoint_refiner::optimize_dxt1(vec3F low_color, vec3F high_color) + { + uint selector_hist[4]; + utils::zero_object(selector_hist); + for (uint i = 0; i < m_pParams->m_num_pixels; i++) + selector_hist[m_pParams->m_pSelectors[i]]++; + + dxt1_solution_coordinates c(low_color, high_color); + + for (uint pass = 0; pass < 8; pass++) + { + const uint64 initial_error = m_pResults->m_error; + + dxt1_solution_coordinates_vec coords_to_try; + + coords_to_try.resize(0); + + color_quad_u8 lc(dxt1_block::unpack_color(c.m_low_color, false)); + color_quad_u8 hc(dxt1_block::unpack_color(c.m_high_color, false)); + + for (int i = 0; i < 27; i++) + { + if (13 == i) continue; + + const int ir = (i % 3) - 1; + const int ig = ((i / 3) % 3) - 1; + const int ib = ((i / 9) % 3) - 1; + + int r = lc.r + ir; + int g = lc.g + ig; + int b = lc.b + ib; + if ((r < 0) || (r > 31)|| (g < 0) || (g > 63) || (b < 0) || (b > 31)) continue; + + coords_to_try.push_back( + dxt1_solution_coordinates(dxt1_block::pack_color(r, g, b, false), c.m_high_color) + ); + } + + for (int i = 0; i < 27; i++) + { + if (13 == i) continue; + + const int ir = (i % 3) - 1; + const int ig = ((i / 3) % 3) - 1; + const int ib = ((i / 9) % 3) - 1; + + int r = hc.r + ir; + int g = hc.g + ig; + int b = hc.b + ib; + if ((r < 0) || (r > 31)|| (g < 0) || (g > 63) || (b < 0) || (b > 31)) continue; + + coords_to_try.push_back(dxt1_solution_coordinates(c.m_low_color, dxt1_block::pack_color(r, g, b, false))); + } + + std::sort(coords_to_try.begin(), coords_to_try.end()); + + dxt1_solution_coordinates_vec::const_iterator p_last = std::unique(coords_to_try.begin(), coords_to_try.end()); + uint num_coords_to_try = (uint)(p_last - coords_to_try.begin()); + + for (uint i = 0; i < num_coords_to_try; i++) + { + color_quad_u8 block_colors[4]; + uint16 l = coords_to_try[i].m_low_color; + uint16 h = coords_to_try[i].m_high_color; + if (l < h) + utils::swap(l, h); + else if (l == h) + { + color_quad_u8 lc(dxt1_block::unpack_color(l, false)); + color_quad_u8 hc(dxt1_block::unpack_color(h, false)); + + bool retry = false; + if ((selector_hist[0] + selector_hist[2]) > (selector_hist[1] + selector_hist[3])) + { + // l affects the output more than h, so muck with h + if (hc[2] != 0) + hc[2]--; + else if (hc[0] != 0) + hc[0]--; + else if (hc[1] != 0) + hc[1]--; + else + retry = true; + } + else + { + // h affects the output more than l, so muck with l + if (lc[2] != 31) + lc[2]++; + else if (lc[0] != 31) + lc[0]++; + else if (lc[1] != 63) + lc[1]++; + else + retry = true; + } + + if (retry) + { + if (l == 0) + l++; + else + h--; + } + else + { + l = dxt1_block::pack_color(lc, false); + h = dxt1_block::pack_color(hc, false); + } + + CRNLIB_ASSERT(l > h); + } + + dxt1_block::get_block_colors4(block_colors, l, h); + + uint total_error = 0; + + for (uint j = 0; j < m_pParams->m_num_pixels; j++) + { + const color_quad_u8& c = block_colors[m_pParams->m_pSelectors[j]]; + total_error += color::color_distance(m_pParams->m_perceptual, c, m_pParams->m_pPixels[j], false); + + if (total_error > m_pResults->m_error) + break; + } + + if (total_error < m_pResults->m_error) + { + m_pResults->m_error = total_error; + m_pResults->m_low_color = l; + m_pResults->m_high_color = h; + CRNLIB_ASSERT(l > h); + if (m_pResults->m_error == 0) + return; + } + } + + if (m_pResults->m_error == initial_error) + break; + + c.m_low_color = m_pResults->m_low_color; + c.m_high_color = m_pResults->m_high_color; + } + + } + +} // namespace crnlib + diff --git a/crnlib/crn_dxt_endpoint_refiner.h b/crnlib/crn_dxt_endpoint_refiner.h new file mode 100644 index 00000000..3a97e132 --- /dev/null +++ b/crnlib/crn_dxt_endpoint_refiner.h @@ -0,0 +1,62 @@ +// File: crn_dxt_endpoint_refiner.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt.h" + +namespace crnlib +{ + // TODO: Experimental/Not fully implemented + class dxt_endpoint_refiner + { + public: + dxt_endpoint_refiner(); + + struct params + { + params() : + m_block_index(0), + m_pPixels(NULL), + m_num_pixels(0), + m_pSelectors(NULL), + m_alpha_comp_index(0), + m_error_to_beat(UINT64_MAX), + m_dxt1_selectors(true), + m_perceptual(true), + m_highest_quality(true) + { + } + + uint m_block_index; + + const color_quad_u8* m_pPixels; + uint m_num_pixels; + + const uint8* m_pSelectors; + + uint m_alpha_comp_index; + + uint64 m_error_to_beat; + + bool m_dxt1_selectors; + bool m_perceptual; + bool m_highest_quality; + }; + + struct results + { + uint16 m_low_color; + uint16 m_high_color; + uint64 m_error; + }; + + bool refine(const params& p, results& r); + + private: + const params* m_pParams; + results* m_pResults; + + void optimize_dxt1(vec3F low_color, vec3F high_color); + void optimize_dxt5(vec3F low_color, vec3F high_color); + }; + +} // namespace crnlib diff --git a/crnlib/crn_dxt_fast.cpp b/crnlib/crn_dxt_fast.cpp new file mode 100644 index 00000000..fffdcb9d --- /dev/null +++ b/crnlib/crn_dxt_fast.cpp @@ -0,0 +1,916 @@ +// File: crn_dxt_fast.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +// Parts of this module are derived from RYG's excellent public domain DXTx compressor. +#include "crn_core.h" +#include "crn_dxt_fast.h" +#include "crn_ryg_dxt.hpp" + +namespace crnlib +{ + namespace dxt_fast + { + static inline int mul_8bit(int a, int b) + { + int t = a * b + 128; + return (t + (t >> 8)) >> 8; + } + + static inline color_quad_u8& unpack_color(color_quad_u8& c, uint v) + { + uint rv = (v & 0xf800) >> 11; + uint gv = (v & 0x07e0) >> 5; + uint bv = (v & 0x001f) >> 0; + + c.r = ryg_dxt::Expand5[rv]; + c.g = ryg_dxt::Expand6[gv]; + c.b = ryg_dxt::Expand5[bv]; + c.a = 0; + + return c; + } + + static inline uint pack_color(const color_quad_u8& c) + { + return (mul_8bit(c.r, 31) << 11) + (mul_8bit(c.g, 63) << 5) + mul_8bit(c.b, 31); + } + + static inline void lerp_color(color_quad_u8& result, const color_quad_u8& p1, const color_quad_u8& p2, uint f) + { + CRNLIB_ASSERT(f <= 255); + + result.r = static_cast(p1.r + mul_8bit(p2.r - p1.r, f)); + result.g = static_cast(p1.g + mul_8bit(p2.g - p1.g, f)); + result.b = static_cast(p1.b + mul_8bit(p2.b - p1.b, f)); + } + + static inline void eval_colors(color_quad_u8* pColors, uint c0, uint c1) + { + unpack_color(pColors[0], c0); + unpack_color(pColors[1], c1); + +#if 0 + lerp_color(pColors[2], pColors[0], pColors[1], 0x55); + lerp_color(pColors[3], pColors[0], pColors[1], 0xAA); +#else + pColors[2].r = (pColors[0].r*2+pColors[1].r)/3; + pColors[2].g = (pColors[0].g*2+pColors[1].g)/3; + pColors[2].b = (pColors[0].b*2+pColors[1].b)/3; + + pColors[3].r = (pColors[1].r*2+pColors[0].r)/3; + pColors[3].g = (pColors[1].g*2+pColors[0].g)/3; + pColors[3].b = (pColors[1].b*2+pColors[0].b)/3; +#endif + } + + // false if all selectors equal + static bool match_block_colors(uint n, const color_quad_u8* pBlock, const color_quad_u8* pColors, uint8* pSelectors) + { + int dirr = pColors[0].r - pColors[1].r; + int dirg = pColors[0].g - pColors[1].g; + int dirb = pColors[0].b - pColors[1].b; + + int stops[4]; + for(int i = 0; i < 4; i++) + stops[i] = pColors[i].r*dirr + pColors[i].g*dirg + pColors[i].b*dirb; + + // 0 2 3 1 + int c0Point = stops[1] + stops[3]; + int halfPoint = stops[3] + stops[2]; + int c3Point = stops[2] + stops[0]; + + //dirr *= 2; + //dirg *= 2; + //dirb *= 2; + c0Point >>= 1; + halfPoint >>= 1; + c3Point >>= 1; + + bool status = false; + for (uint i = 0; i < n; i++) + { + int dot = pBlock[i].r*dirr + pBlock[i].g*dirg + pBlock[i].b*dirb; + + uint8 s; + if (dot < halfPoint) + s = (dot < c0Point) ? 1 : 3; + else + s = (dot < c3Point) ? 2 : 0; + + pSelectors[i] = s; + + if (s != pSelectors[0]) + status = true; + } + + return status; + } + + static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3]) + { + int min[3], max[3]; + + for(uint ch = 0; ch < 3; ch++) + { + const uint8 *bp = ((const uint8 *) block) + ch; + int minv, maxv; + + int64 muv = bp[0]; + minv = maxv = bp[0]; + + const uint l = n << 2; + for (uint i = 4; i < l; i += 4) + { + muv += bp[i]; + minv = math::minimum(minv, bp[i]); + maxv = math::maximum(maxv, bp[i]); + } + + ave_color[ch] = static_cast((muv + (n / 2)) / n); + min[ch] = minv; + max[ch] = maxv; + } + + if ((min[0] == max[0]) && (min[1] == max[1]) && (min[2] == max[2])) + return false; + + // determine covariance matrix + double cov[6]; + for(int i=0;i<6;i++) + cov[i] = 0; + + for(uint i=0;i(vfr); + v_g = static_cast(vfg); + v_b = static_cast(vfb); + + axis[0] = (float)vfr; + axis[1] = (float)vfg; + axis[2] = (float)vfb; + } + + int mind = block[0].r * v_r + block[0].g * v_g + block[0].b * v_b; + int maxd = mind; + color_quad_u8 minp(block[0]); + color_quad_u8 maxp(block[0]); + + for(uint i = 1; i < n; i++) + { + int dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b; + + if (dot < mind) + { + mind = dot; + minp = block[i]; + } + + if (dot > maxd) + { + maxd = dot; + maxp = block[i]; + } + } + + max16 = pack_color(maxp); + min16 = pack_color(minp); + + return true; + } + + // The refinement function. (Clever code, part 2) + // Tries to optimize colors to suit block contents better. + // (By solving a least squares system via normal equations+Cramer's rule) + static bool refine_block(uint n, const color_quad_u8 *block, uint &max16, uint &min16, const uint8* pSelectors) + { + static const int w1Tab[4] = { 3,0,2,1 }; + + static const int prods_0[4] = { 0x00,0x00,0x02,0x02 }; + static const int prods_1[4] = { 0x00,0x09,0x01,0x04 }; + static const int prods_2[4] = { 0x09,0x00,0x04,0x01 }; + + double akku_0 = 0; + double akku_1 = 0; + double akku_2 = 0; + double At1_r, At1_g, At1_b; + double At2_r, At2_g, At2_b; + + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for(uint i = 0; i < n; i++) + { + double r = block[i].r; + double g = block[i].g; + double b = block[i].b; + int step = pSelectors[i]; + + int w1 = w1Tab[step]; + + akku_0 += prods_0[step]; + akku_1 += prods_1[step]; + akku_2 += prods_2[step]; + At1_r += w1*r; + At1_g += w1*g; + At1_b += w1*b; + At2_r += r; + At2_g += g; + At2_b += b; + } + + At2_r = 3*At2_r - At1_r; + At2_g = 3*At2_g - At1_g; + At2_b = 3*At2_b - At1_b; + + double xx = akku_2; + double yy = akku_1; + double xy = akku_0; + + double t = xx * yy - xy * xy; + if (!yy || !xx || (fabs(t) < .0000125f)) + return false; + + double frb = (3.0f * 31.0f / 255.0f) / t; + double fg = frb * (63.0f / 31.0f); + + uint oldMin = min16; + uint oldMax = max16; + + // solve. + max16 = math::clamp(static_cast((At1_r*yy - At2_r*xy)*frb+0.5f),0,31) << 11; + max16 |= math::clamp(static_cast((At1_g*yy - At2_g*xy)*fg +0.5f),0,63) << 5; + max16 |= math::clamp(static_cast((At1_b*yy - At2_b*xy)*frb+0.5f),0,31) << 0; + + min16 = math::clamp(static_cast((At2_r*xx - At1_r*xy)*frb+0.5f),0,31) << 11; + min16 |= math::clamp(static_cast((At2_g*xx - At1_g*xy)*fg +0.5f),0,63) << 5; + min16 |= math::clamp(static_cast((At2_b*xx - At1_b*xy)*frb+0.5f),0,31) << 0; + + return (oldMin != min16) || (oldMax != max16); + } + + // false if all selectors equal + static bool determine_selectors(uint n, const color_quad_u8* block, uint min16, uint max16, uint8* pSelectors) + { + color_quad_u8 color[4]; + + if (max16 != min16) + { + eval_colors(color, min16, max16); + + return match_block_colors(n, block, color, pSelectors); + } + + memset(pSelectors, 0, n); + return false; + } + + static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error) + { + color_quad_u8 color[4]; + + eval_colors(color, min16, max16); + + int dirr = color[0].r - color[1].r; + int dirg = color[0].g - color[1].g; + int dirb = color[0].b - color[1].b; + + int stops[4]; + for(int i = 0; i < 4; i++) + stops[i] = color[i].r*dirr + color[i].g*dirg + color[i].b*dirb; + + // 0 2 3 1 + int c0Point = stops[1] + stops[3]; + int halfPoint = stops[3] + stops[2]; + int c3Point = stops[2] + stops[0]; + + c0Point >>= 1; + halfPoint >>= 1; + c3Point >>= 1; + + uint64 total_error = 0; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& a = block[i]; + + uint s = 0; + if (min16 != max16) + { + int dot = a.r*dirr + a.g*dirg + a.b*dirb; + + if (dot < halfPoint) + s = (dot < c0Point) ? 1 : 3; + else + s = (dot < c3Point) ? 2 : 0; + } + + const color_quad_u8& b = color[s]; + + int e = a[0] - b[0]; + total_error += e * e; + + e = a[1] - b[1]; + total_error += e * e; + + e = a[2] - b[2]; + total_error += e * e; + + if (total_error >= early_out_error) + break; + } + + return total_error; + } + + static bool refine_endpoints(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors) + { + bool optimized = false; + + const int limits[3] = { 31, 63, 31 }; + + for (uint trial = 0; trial < 2; trial++) + { + color_quad_u8 color[4]; + eval_colors(color, low16, high16); + + uint64 total_error[3] = { 0, 0, 0 }; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& a = pBlock[i]; + + const uint s = pSelectors[i]; + const color_quad_u8& b = color[s]; + + int e = a[0] - b[0]; + total_error[0] += e * e; + + e = a[1] - b[1]; + total_error[1] += e * e; + + e = a[2] - b[2]; + total_error[2] += e * e; + } + + color_quad_u8 endpoints[2]; + endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); + endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); + + color_quad_u8 expanded_endpoints[2]; + expanded_endpoints[0] = dxt1_block::unpack_color((uint16)low16, true); + expanded_endpoints[1] = dxt1_block::unpack_color((uint16)high16, true); + + bool trial_optimized = false; + + for (uint axis = 0; axis < 3; axis++) + { + if (!total_error[axis]) + continue; + + const sU8* const pExpand = (axis == 1) ? ryg_dxt::Expand6 : ryg_dxt::Expand5; + + for (uint e = 0; e < 2; e++) + { + uint v[4]; + v[e^1] = expanded_endpoints[e^1][axis]; + + for (int t = -1; t <= 1; t += 2) + { + int a = endpoints[e][axis] + t; + if ((a < 0) || (a > limits[axis])) + continue; + + v[e] = pExpand[a]; + + //int delta = v[1] - v[0]; + //v[2] = v[0] + mul_8bit(delta, 0x55); + //v[3] = v[0] + mul_8bit(delta, 0xAA); + + v[2] = (v[0] * 2 + v[1]) / 3; + v[3] = (v[0] + v[1] * 2) / 3; + + uint64 axis_error = 0; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& p = pBlock[i]; + + int e = v[pSelectors[i]] - p[axis]; + + axis_error += e * e; + + if (axis_error >= total_error[axis]) + break; + } + + if (axis_error < total_error[axis]) + { + //total_error[axis] = axis_error; + + endpoints[e][axis] = (uint8)a; + expanded_endpoints[e][axis] = (uint8)v[e]; + + if (e) + high16 = dxt1_block::pack_color(endpoints[1], false); + else + low16 = dxt1_block::pack_color(endpoints[0], false); + + determine_selectors(n, pBlock, low16, high16, pSelectors); + + eval_colors(color, low16, high16); + + utils::zero_object(total_error); + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& a = pBlock[i]; + + const uint s = pSelectors[i]; + const color_quad_u8& b = color[s]; + + int e = a[0] - b[0]; + total_error[0] += e * e; + + e = a[1] - b[1]; + total_error[1] += e * e; + + e = a[2] - b[2]; + total_error[2] += e * e; + } + + trial_optimized = true; + } + + } // t + + } // e + } // axis + + if (!trial_optimized) + break; + + optimized = true; + + } // for ( ; ; ) + + return optimized; + } + + static void refine_endpoints2(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors, float axis[3]) + { + uint64 orig_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); + if (!orig_error) + return; + + float l = 1.0f / sqrt(axis[0]*axis[0] + axis[1]*axis[1] + axis[2]*axis[2]); + vec3F principle_axis(axis[0] * l, axis[1] * l, axis[2] * l); + + const float dist_per_trial = 0.027063293f; + + const uint cMaxProbeRange = 8; + uint probe_low[cMaxProbeRange * 2 + 1]; + uint probe_high[cMaxProbeRange * 2 + 1]; + + int probe_range = 8; + uint num_iters = 4; + + const uint num_trials = probe_range * 2 + 1; + + vec3F scaled_principle_axis(principle_axis * dist_per_trial); + scaled_principle_axis[0] *= 31.0f; + scaled_principle_axis[1] *= 63.0f; + scaled_principle_axis[2] *= 31.0f; + vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); + initial_ofs[0] += .5f; + initial_ofs[1] += .5f; + initial_ofs[2] += .5f; + + uint64 cur_error = orig_error; + + for (uint iter = 0; iter < num_iters; iter++) + { + color_quad_u8 endpoints[2]; + + endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); + endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); + + vec3F low_color(endpoints[0][0], endpoints[0][1], endpoints[0][2]); + vec3F high_color(endpoints[1][0], endpoints[1][1], endpoints[1][2]); + + vec3F probe_low_color(low_color + initial_ofs); + for (uint i = 0; i < num_trials; i++) + { + int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); + probe_low[i] = b | (g << 5U) | (r << 11U); + + probe_low_color += scaled_principle_axis; + } + + vec3F probe_high_color(high_color + initial_ofs); + for (uint i = 0; i < num_trials; i++) + { + int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); + int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); + int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); + probe_high[i] = b | (g << 5U) | (r << 11U); + + probe_high_color += scaled_principle_axis; + } + + uint best_l = low16; + uint best_h = high16; + + enum { cMaxHash = 4 }; + uint64 hash[cMaxHash]; + for (uint i = 0; i < cMaxHash; i++) + hash[i] = 0; + + uint c = best_l | (best_h << 16); + c = fast_hash(&c, sizeof(c)); + hash[(c >> 6) & 3] = 1ULL << (c & 63); + + for (uint i = 0; i < num_trials; i++) + { + for (uint j = 0; j < num_trials; j++) + { + uint l = probe_low[i]; + uint h = probe_high[j]; + if (l < h) + utils::swap(l, h); + + uint c = l | (h << 16); + c = fast_hash(&c, sizeof(c)); + uint64 mask = 1ULL << (c & 63); + uint ofs = (c >> 6) & 3; + if (hash[ofs] & mask) + continue; + + hash[ofs] |= mask; + + uint64 new_error = determine_error(n, pBlock, l, h, cur_error); + if (new_error < cur_error) + { + best_l = l; + best_h = h; + cur_error = new_error; + } + } + } + + bool improved = false; + + if ((best_l != low16) || (best_h != high16)) + { + low16 = best_l; + high16 = best_h; + + determine_selectors(n, pBlock, low16, high16, pSelectors); + improved = true; + } + + if (refine_endpoints(n, pBlock, low16, high16, pSelectors)) + { + improved = true; + + uint64 cur_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); + if (!cur_error) + return; + } + + if (!improved) + break; + + } // iter + + //uint64 end_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); + //if (end_error > orig_error) DebugBreak(); + } + + static void compress_solid_block(uint n, uint ave_color[3], uint& low16, uint& high16, uint8* pSelectors) + { + uint r = ave_color[0]; + uint g = ave_color[1]; + uint b = ave_color[2]; + + memset(pSelectors, 2, n); + + low16 = (ryg_dxt::OMatch5[r][0]<<11) | (ryg_dxt::OMatch6[g][0]<<5) | ryg_dxt::OMatch5[b][0]; + high16 = (ryg_dxt::OMatch5[r][1]<<11) | (ryg_dxt::OMatch6[g][1]<<5) | ryg_dxt::OMatch5[b][1]; + } + + void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine) + { + CRNLIB_ASSERT((n & 15) == 0); + + uint ave_color[3]; + float axis[3]; + + if (!optimize_block_colors(n, block, low16, high16, ave_color, axis)) + { + compress_solid_block(n, ave_color, low16, high16, pSelectors); + } + else + { + if (!determine_selectors(n, block, low16, high16, pSelectors)) + compress_solid_block(n, ave_color, low16, high16, pSelectors); + else + { + if (refine_block(n, block, low16, high16, pSelectors)) + determine_selectors(n, block, low16, high16, pSelectors); + + if (refine) + refine_endpoints2(n, block, low16, high16, pSelectors, axis); + } + } + + if (low16 < high16) + { + utils::swap(low16, high16); + for (uint i = 0; i < n; i++) + pSelectors[i] ^= 1; + } + } + + void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine) + { + uint8 color_selectors[16]; + uint low16, high16; + dxt_fast::compress_color_block(16, pBlock, low16, high16, color_selectors, refine); + + pDXT1_block->set_low_color(static_cast(low16)); + pDXT1_block->set_high_color(static_cast(high16)); + + uint mask = 0; + for (int i = 15; i >= 0; i--) + { + mask <<= 2; + mask |= color_selectors[i]; + } + + pDXT1_block->m_selectors[0] = (uint8)(mask & 0xFF); + pDXT1_block->m_selectors[1] = (uint8)((mask >> 8) & 0xFF); + pDXT1_block->m_selectors[2] = (uint8)((mask >> 16) & 0xFF); + pDXT1_block->m_selectors[3] = (uint8)((mask >> 24) & 0xFF); + } + + void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index) + { + int min, max; + min = max = block[0][comp_index]; + + for (uint i = 1; i < n; i++) + { + min = math::minimum(min, block[i][comp_index]); + max = math::maximum(max, block[i][comp_index]); + } + + low8 = max; + high8 = min; + + int dist = max-min; + int bias = min*7 - (dist >> 1); + int dist4 = dist*4; + int dist2 = dist*2; + + for (uint i = 0; i < n; i++) + { + int a = block[i][comp_index]*7 - bias; + int ind,t; + + t = (dist4 - a) >> 31; ind = t & 4; a -= dist4 & t; + t = (dist2 - a) >> 31; ind += t & 2; a -= dist2 & t; + t = (dist - a) >> 31; ind += t & 1; + + ind = -ind & 7; + ind ^= (2 > ind); + + pSelectors[i] = static_cast(ind); + } + } + + void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index) + { + uint8 selectors[16]; + uint low8, high8; + + compress_alpha_block(16, pBlock, low8, high8, selectors, comp_index); + + pDXT5_block->set_low_alpha(low8); + pDXT5_block->set_high_alpha(high8); + + uint mask = 0; + uint bits = 0; + uint8* pDst = pDXT5_block->m_selectors; + + for (uint i = 0; i < 16; i++) + { + mask |= (selectors[i] << bits); + + if ((bits += 3) >= 8) + { + *pDst++ = static_cast(mask); + mask >>= 8; + bits -= 8; + } + } + } + + void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi) + { + uint64 ave64[3]; + ave64[0] = 0; + ave64[1] = 0; + ave64[2] = 0; + + for (uint i = 0; i < n; i++) + { + ave64[0] += pBlock[i].r; + ave64[1] += pBlock[i].g; + ave64[2] += pBlock[i].b; + } + + uint ave[3]; + ave[0] = static_cast((ave64[0] + (n / 2)) / n); + ave[1] = static_cast((ave64[1] + (n / 2)) / n); + ave[2] = static_cast((ave64[2] + (n / 2)) / n); + + int furthest_dist = -1; + uint furthest_index = 0; + for (uint i = 0; i < n; i++) + { + int r = pBlock[i].r - ave[0]; + int g = pBlock[i].g - ave[1]; + int b = pBlock[i].b - ave[2]; + int dist = r*r + g*g + b*b; + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest_index = i; + } + } + + color_quad_u8 lo_color(pBlock[furthest_index]); + + int opp_dist = -1; + uint opp_index = 0; + for (uint i = 0; i < n; i++) + { + int r = pBlock[i].r - lo_color.r; + int g = pBlock[i].g - lo_color.g; + int b = pBlock[i].b - lo_color.b; + int dist = r*r + g*g + b*b; + if (dist > opp_dist) + { + opp_dist = dist; + opp_index = i; + } + } + + color_quad_u8 hi_color(pBlock[opp_index]); + + for (uint i = 0; i < 3; i++) + { + lo_color[i] = static_cast((lo_color[i] + ave[i]) >> 1); + hi_color[i] = static_cast((hi_color[i] + ave[i]) >> 1); + } + + const uint cMaxIters = 4; + for (uint iter_index = 0; iter_index < cMaxIters; iter_index++) + { + if ((lo_color[0] == hi_color[0]) && (lo_color[1] == hi_color[1]) && (lo_color[2] == hi_color[2])) + break; + + uint64 new_color[2][3]; + uint weight[2]; + + utils::zero_object(new_color); + utils::zero_object(weight); + + int vec_r = hi_color[0] - lo_color[0]; + int vec_g = hi_color[1] - lo_color[1]; + int vec_b = hi_color[2] - lo_color[2]; + + int lo_dot = vec_r * lo_color[0] + vec_g * lo_color[1] + vec_b * lo_color[2]; + int hi_dot = vec_r * hi_color[0] + vec_g * hi_color[1] + vec_b * hi_color[2]; + int mid_dot = lo_dot + hi_dot; + + vec_r *= 2; + vec_g *= 2; + vec_b *= 2; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& c = pBlock[i]; + + const int dot = c[0] * vec_r + c[1] * vec_g + c[2] * vec_b; + const uint match_index = (dot > mid_dot); + + new_color[match_index][0] += c.r; + new_color[match_index][1] += c.g; + new_color[match_index][2] += c.b; + weight[match_index]++; + } + + if ((!weight[0]) || (!weight[1])) + break; + + uint8 new_color8[2][3]; + + for (uint j = 0; j < 2; j++) + for (uint i = 0; i < 3; i++) + new_color8[j][i] = static_cast((new_color[j][i] + (weight[j] / 2)) / weight[j]); + + if ((new_color8[0][0] == lo_color[0]) && (new_color8[0][1] == lo_color[1]) && (new_color8[0][2] == lo_color[2]) && + (new_color8[1][0] == hi_color[0]) && (new_color8[1][1] == hi_color[1]) && (new_color8[1][2] == hi_color[2])) + break; + + for (uint i = 0; i < 3; i++) + { + lo_color[i] = new_color8[0][i]; + hi_color[i] = new_color8[1][i]; + } + } + + uint energy[2] = { 0, 0 }; + for (uint i = 0; i < 3; i++) + { + energy[0] += lo_color[i] * lo_color[i]; + energy[1] += hi_color[i] * hi_color[i]; + } + + if (energy[0] > energy[1]) + utils::swap(lo_color, hi_color); + + lo = lo_color; + hi = hi_color; + } + + } // namespace dxt_fast + +} // namespace crnlib + + + + + + + + + + + + + + + diff --git a/crnlib/crn_dxt_fast.h b/crnlib/crn_dxt_fast.h new file mode 100644 index 00000000..07346a3f --- /dev/null +++ b/crnlib/crn_dxt_fast.h @@ -0,0 +1,21 @@ +// File: crn_dxt_fast.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_color.h" +#include "crn_dxt.h" + +namespace crnlib +{ + namespace dxt_fast + { + void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine = false); + void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine = false); + + void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index); + void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index); + + void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi); + + } // namespace dxt_fast + +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc.cpp b/crnlib/crn_dxt_hc.cpp new file mode 100644 index 00000000..1d92f97a --- /dev/null +++ b/crnlib/crn_dxt_hc.cpp @@ -0,0 +1,2544 @@ +// File: crn_dxt_hc.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt_hc.h" +#include "crn_image_utils.h" +#include "crn_console.h" +#include "crn_dxt_fast.h" + +#define CRNLIB_USE_FAST_DXT 1 +#define CRNLIB_ENABLE_DEBUG_MESSAGES 0 + +namespace crnlib +{ + static color_quad_u8 g_tile_layout_colors[cNumChunkTileLayouts] = + { + color_quad_u8(255,90,32,255), + color_quad_u8(64,210,192,255), + color_quad_u8(128,16,225,255), + color_quad_u8(255,192,200,255), + + color_quad_u8(255,128,200,255), + + color_quad_u8(255,0,0,255), + color_quad_u8(0,255,0,255), + color_quad_u8(0,0,255,255), + color_quad_u8(255,0,255,255) + }; + + dxt_hc::dxt_hc() : + m_num_chunks(0), + m_pChunks(NULL), + m_num_alpha_blocks(0), + m_has_color_blocks(false), + m_has_alpha0_blocks(false), + m_has_alpha1_blocks(false), + m_main_thread_id(get_current_thread_id()), + m_canceled(false), + m_pTask_pool(NULL), + m_prev_phase_index(-1), + m_prev_percentage_complete(-1) + { + utils::zero_object(m_encoding_hist); + } + + dxt_hc::~dxt_hc() + { + } + + void dxt_hc::clear() + { + m_num_chunks = 0; + m_pChunks = NULL; + + m_chunk_encoding.clear(); + + m_num_alpha_blocks = 0; + m_has_color_blocks = false; + m_has_alpha0_blocks = false; + m_has_alpha1_blocks = false; + + m_color_selectors.clear(); + + m_alpha_selectors.clear(); + for (uint i = 0; i < cNumCompressedChunkVecs; i++) + m_compressed_chunks[i].clear(); + + utils::zero_object(m_encoding_hist); + + m_total_tiles = 0; + + m_color_clusters.clear(); + m_alpha_clusters.clear(); + m_color_selectors.clear(); + m_alpha_selectors.clear(); + + m_chunk_blocks_using_color_selectors.clear(); + m_chunk_blocks_using_alpha_selectors.clear(); + + m_color_endpoints.clear(); + m_alpha_endpoints.clear(); + + m_dbg_chunk_pixels.clear(); + m_dbg_chunk_pixels_tile_vis.clear(); + m_dbg_chunk_pixels_color_quantized.clear(); + m_dbg_chunk_pixels_alpha_quantized.clear(); + + m_dbg_chunk_pixels_quantized_color_selectors.clear(); + m_dbg_chunk_pixels_orig_color_selectors.clear(); + m_dbg_chunk_pixels_final_color_selectors.clear(); + m_dbg_chunk_pixels_final_alpha_selectors.clear(); + + m_dbg_chunk_pixels_quantized_alpha_selectors.clear(); + m_dbg_chunk_pixels_orig_alpha_selectors.clear(); + m_dbg_chunk_pixels_final_alpha_selectors.clear(); + + m_dbg_chunk_pixels_final.clear(); + + m_canceled = false; + + m_prev_phase_index = -1; + m_prev_percentage_complete = -1; + } + + bool dxt_hc::compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool) + { + m_pTask_pool = &task_pool; + m_main_thread_id = get_current_thread_id(); + + bool result = compress_internal(p, num_chunks, pChunks); + + m_pTask_pool = NULL; + + return result; + } + + bool dxt_hc::compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks) + { + if ((!num_chunks) || (!pChunks)) + return false; + if ((m_params.m_format == cDXT1A) || (m_params.m_format == cDXT3)) + return false; + + clear(); + + m_params = p; + + m_num_chunks = num_chunks; + m_pChunks = pChunks; + + switch (m_params.m_format) + { + case cDXT1: + { + m_has_color_blocks = true; + break; + } + case cDXT5: + { + m_has_color_blocks = true; + m_has_alpha0_blocks = true; + m_num_alpha_blocks = 1; + break; + } + case cDXT5A: + { + m_has_alpha0_blocks = true; + m_num_alpha_blocks = 1; + break; + } + case cDXN_XY: + case cDXN_YX: + { + m_has_alpha0_blocks = true; + m_has_alpha1_blocks = true; + m_num_alpha_blocks = 2; + break; + } + default: + { + return false; + } + } + + determine_compressed_chunks(); + + if (m_has_color_blocks) + { + if (!determine_color_endpoint_clusters()) + return false; + if (!determine_color_endpoint_codebook()) + return false; + } + + if (m_num_alpha_blocks) + { + if (!determine_alpha_endpoint_clusters()) + return false; + if (!determine_alpha_endpoint_codebook()) + return false; + } + + create_quantized_debug_images(); + + if (m_has_color_blocks) + { + if (!create_selector_codebook(false)) + return false; + } + + if (m_num_alpha_blocks) + { + if (!create_selector_codebook(true)) + return false; + } + + if (m_has_color_blocks) + { + if (!refine_quantized_color_selectors()) + return false; + + if (!refine_quantized_color_endpoints()) + return false; + } + + if (m_num_alpha_blocks) + { + if (!refine_quantized_alpha_endpoints()) + return false; + + if (!refine_quantized_alpha_selectors()) + return false; + } + + create_final_debug_image(); + + if (!create_chunk_encodings()) + return false; + + return true; + } + + void dxt_hc::compress_dxt1_block( + dxt1_endpoint_optimizer::results& results, + uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, + uint8* pColor_Selectors) + { + chunk_index; + + color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < height; y++) + for (uint x = 0; x < width; x++) + pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y); + + //double s = image_utils::compute_std_dev(width * height, pixels, 0, 3); + +#if CRNLIB_USE_FAST_DXT + uint low16, high16; + dxt_fast::compress_color_block(width * height, pixels, low16, high16, pColor_Selectors); + results.m_low_color = static_cast(low16); + results.m_high_color = static_cast(high16); + results.m_alpha_block = false; + results.m_error = INT_MAX; + results.m_pSelectors = pColor_Selectors; +#else + dxt1_endpoint_optimizer optimizer; + + dxt1_endpoint_optimizer::params params; + params.m_block_index = chunk_index; + params.m_pPixels = pixels; + params.m_num_pixels = width * height; + params.m_pixels_have_alpha = false; + params.m_use_alpha_blocks = false; + params.m_perceptual = m_params.m_perceptual; + params.m_highest_quality = false;//false; + params.m_endpoint_caching = false; + + results.m_pSelectors = pColor_Selectors; + + optimizer.compute(params, results); +#endif + } + + void dxt_hc::compress_dxt5_block( + dxt5_endpoint_optimizer::results& results, + uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index, + uint8* pAlpha_selectors) + { + chunk_index; + + color_quad_u8 pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < height; y++) + for (uint x = 0; x < width; x++) + pixels[x + y * width] = chunk(x_ofs + x, y_ofs + y); + +#if 0 //CRNLIB_USE_FAST_DXT + uint low, high; + dxt_fast::compress_alpha_block(width * height, pixels, low, high, pAlpha_selectors, component_index); + results.m_pSelectors = pAlpha_selectors; + results.m_error = INT_MAX; + results.m_first_endpoint = static_cast(low); + results.m_second_endpoint = static_cast(high); + results.m_block_type = 0; +#else + dxt5_endpoint_optimizer optimizer; + dxt5_endpoint_optimizer::params params; + params.m_block_index = chunk_index; + params.m_pPixels = pixels; + params.m_num_pixels = width * height; + params.m_comp_index = component_index; + params.m_use_both_block_types = false; + params.m_quality = cCRNDXTQualityNormal; + + results.m_pSelectors = pAlpha_selectors; + + optimizer.compute(params, results); +#endif + } + + void dxt_hc::determine_compressed_chunks_task(uint64 data, void* pData_ptr) + { + pData_ptr; + const uint thread_index = static_cast(data); + + image_u8 orig_chunk; + image_u8 decomp_chunk[cNumChunkEncodings]; + + orig_chunk.resize(cChunkPixelWidth, cChunkPixelHeight); + for (uint i = 0; i < cNumChunkEncodings; i++) + decomp_chunk[i].resize(cChunkPixelWidth, cChunkPixelHeight); + + image_utils::error_metrics color_error_metrics[cNumChunkEncodings]; + dxt1_endpoint_optimizer::results color_optimizer_results[cNumChunkTileLayouts]; + uint8 layout_color_selectors[cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight]; + + image_utils::error_metrics alpha_error_metrics[2][cNumChunkEncodings]; + dxt5_endpoint_optimizer::results alpha_optimizer_results[2][cNumChunkTileLayouts]; + uint8 layout_alpha_selectors[2][cNumChunkTileLayouts][cChunkPixelWidth * cChunkPixelHeight]; + + uint first_layout = 0; + uint last_layout = cNumChunkTileLayouts; + + uint first_encoding = 0; + uint last_encoding = cNumChunkEncodings; + + if (!m_params.m_hierarchical) + { + first_layout = cFirst4x4ChunkTileLayout; + first_encoding = cNumChunkEncodings - 1; + } + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if (m_canceled) + return; + + if ((get_current_thread_id() == m_main_thread_id) && ((chunk_index & 511) == 0)) + { + if (!update_progress(0, chunk_index, m_num_chunks)) + return; + } + + if (m_pTask_pool->get_num_threads()) + { + if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + uint level_index = 0; + for (uint i = 0; i < m_params.m_num_levels; i++) + { + if ((chunk_index >= m_params.m_levels[i].m_first_chunk) && (chunk_index < m_params.m_levels[i].m_first_chunk + m_params.m_levels[i].m_num_chunks)) + { + level_index = i; + break; + } + } + + for (uint cy = 0; cy < cChunkPixelHeight; cy++) + for (uint cx = 0; cx < cChunkPixelWidth; cx++) + orig_chunk(cx, cy) = m_pChunks[chunk_index](cx, cy); + + if (m_has_color_blocks) + { + for (uint l = first_layout; l < last_layout; l++) + { + utils::zero_object(layout_color_selectors[l]); + + compress_dxt1_block( + color_optimizer_results[l], chunk_index, + orig_chunk, + g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs, + g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height, + layout_color_selectors[l]); + } + } + + float alpha_layout_std_dev[2][cNumChunkTileLayouts]; + utils::zero_object(alpha_layout_std_dev); + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + for (uint l = first_layout; l < last_layout; l++) + { + utils::zero_object(layout_alpha_selectors[a][l]); + + compress_dxt5_block( + alpha_optimizer_results[a][l], chunk_index, + orig_chunk, + g_chunk_tile_layouts[l].m_x_ofs, g_chunk_tile_layouts[l].m_y_ofs, + g_chunk_tile_layouts[l].m_width, g_chunk_tile_layouts[l].m_height, + m_params.m_alpha_component_indices[a], + layout_alpha_selectors[a][l]); + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + float mean = 0.0f; + float variance = 0.0f; + + for (uint cy = 0; cy < g_chunk_tile_layouts[l].m_height; cy++) + { + for (uint cx = 0; cx < g_chunk_tile_layouts[l].m_width; cx++) + { + uint s = orig_chunk(cx + g_chunk_tile_layouts[l].m_x_ofs, cy + g_chunk_tile_layouts[l].m_y_ofs)[m_params.m_alpha_component_indices[a]]; + + mean += s; + variance += s * s; + } // cx + } //cy + + float scale = 1.0f / (g_chunk_tile_layouts[l].m_width * g_chunk_tile_layouts[l].m_height); + + mean *= scale; + variance *= scale; + + variance -= mean * mean; + + alpha_layout_std_dev[a][l] = sqrt(variance); + + } //a + } + } + + for (uint e = first_encoding; e < last_encoding; e++) + { + for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) + { + const uint layout_index = g_chunk_encodings[e].m_tiles[t].m_layout_index; + CRNLIB_ASSERT( (layout_index >= first_layout) && (layout_index < last_layout) ); + + if (m_has_color_blocks) + { + const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index]; + const uint8* pColor_selectors = layout_color_selectors[layout_index]; + + color_quad_u8 block_colors[cDXT1SelectorValues]; + CRNLIB_ASSERT(color_results.m_low_color >= color_results.m_high_color); + // it's okay if color_results.m_low_color == color_results.m_high_color, because in this case only selector 0 should be used + dxt1_block::get_block_colors4(block_colors, color_results.m_low_color, color_results.m_high_color); + + for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) + { + for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) + { + uint s = pColor_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width]; + CRNLIB_ASSERT(s < cDXT1SelectorValues); + + decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs) = block_colors[s]; + } + } + } + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index]; + const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index]; + + uint block_values[cDXT5SelectorValues]; + CRNLIB_ASSERT(alpha_results.m_first_endpoint >= alpha_results.m_second_endpoint); + dxt5_block::get_block_values8(block_values, alpha_results.m_first_endpoint, alpha_results.m_second_endpoint); + + for (uint cy = 0; cy < g_chunk_encodings[e].m_tiles[t].m_height; cy++) + { + for (uint cx = 0; cx < g_chunk_encodings[e].m_tiles[t].m_width; cx++) + { + uint s = pAlpha_selectors[cx + cy * g_chunk_encodings[e].m_tiles[t].m_width]; + CRNLIB_ASSERT(s < cDXT5SelectorValues); + + decomp_chunk[e](cx + g_chunk_encodings[e].m_tiles[t].m_x_ofs, cy + g_chunk_encodings[e].m_tiles[t].m_y_ofs)[m_params.m_alpha_component_indices[a]] = + static_cast(block_values[s]); + } + } + + } + } // t + + if (m_params.m_hierarchical) + { + if (m_has_color_blocks) + color_error_metrics[e].compute(decomp_chunk[e], orig_chunk, 0, 3); + + for (uint a = 0; a < m_num_alpha_blocks; a++) + alpha_error_metrics[a][e].compute(decomp_chunk[e], orig_chunk, m_params.m_alpha_component_indices[a], 1); + } + } // e + + uint best_encoding = cNumChunkEncodings - 1; + + if (m_params.m_hierarchical) + { + float quality[cNumChunkEncodings]; + utils::zero_object(quality); + + float best_quality = 0.0f; + + best_encoding = 0; + + for (uint e = 0; e < cNumChunkEncodings; e++) + { + if (m_has_color_blocks) + { + float adaptive_tile_color_psnr_derating = m_params.m_adaptive_tile_color_psnr_derating; + if ((level_index) && (adaptive_tile_color_psnr_derating > .25f)) + { + //adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, (level_index - 1) / math::maximum(1.0f, float(m_params.m_num_levels - 2))); + adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast(level_index))); + } + + float color_derating = math::lerp( 0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f ); + quality[e] = (float)math::maximum(color_error_metrics[e].mPeakSNR - color_derating, 0.0f); + } + + if (m_num_alpha_blocks) + { + quality[e] *= m_params.m_adaptive_tile_color_alpha_weighting_ratio; + float alpha_derating = math::lerp( 0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f ); + + float max_std_dev = 0.0f; + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + quality[e] += (float)math::maximum(alpha_error_metrics[a][e].mPeakSNR - alpha_derating, 0.0f); + + for (uint t = 0; t < g_chunk_encodings[e].m_num_tiles; t++) + { + float std_dev = alpha_layout_std_dev[a][ g_chunk_encodings[e].m_tiles[t].m_layout_index ]; + max_std_dev = math::maximum(max_std_dev, std_dev); + } + } + +#if 0 +// rg [4/28/09] - disabling this because it's fucking up dxt5_xgbr normal maps + const float l = 6.0f; + const float k = .5f; + + if (max_std_dev > l) + { + float s = max_std_dev - l; + quality[e] -= (k * s); + } +#endif + } + + if (quality[e] > best_quality) + { + best_quality = quality[e]; + best_encoding = e; + } + } + } + + interlocked_increment32(&m_encoding_hist[best_encoding]); + + interlocked_exchange_add32(&m_total_tiles, g_chunk_encodings[best_encoding].m_num_tiles); + + for (uint q = 0; q < cNumCompressedChunkVecs; q++) + { + if (q == cColorChunks) + { + if (!m_has_color_blocks) + continue; + } + else if (q > m_num_alpha_blocks) + continue; + + compressed_chunk& output = m_compressed_chunks[q][chunk_index]; + + output.m_encoding_index = static_cast(best_encoding); + output.m_num_tiles = static_cast(g_chunk_encodings[best_encoding].m_num_tiles); + + for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) + { + const uint layout_index = g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index; + + output.m_tiles[t].m_layout_index = static_cast(layout_index); + output.m_tiles[t].m_pixel_width = static_cast(g_chunk_encodings[best_encoding].m_tiles[t].m_width); + output.m_tiles[t].m_pixel_height = static_cast(g_chunk_encodings[best_encoding].m_tiles[t].m_height); + + if (q == cColorChunks) + { + const dxt1_endpoint_optimizer::results& color_results = color_optimizer_results[layout_index]; + const uint8* pColor_selectors = layout_color_selectors[layout_index]; + + output.m_tiles[t].m_endpoint_cluster_index = 0; + output.m_tiles[t].m_first_endpoint = color_results.m_low_color; + output.m_tiles[t].m_second_endpoint = color_results.m_high_color; + + memcpy(output.m_tiles[t].m_selectors, pColor_selectors, cChunkPixelWidth * cChunkPixelHeight); + output.m_tiles[t].m_alpha_encoding = color_results.m_alpha_block; + } + else + { + const uint a = q - cAlpha0Chunks; + + const dxt5_endpoint_optimizer::results& alpha_results = alpha_optimizer_results[a][layout_index]; + const uint8* pAlpha_selectors = layout_alpha_selectors[a][layout_index]; + + output.m_tiles[t].m_endpoint_cluster_index = 0; + output.m_tiles[t].m_first_endpoint = alpha_results.m_first_endpoint; + output.m_tiles[t].m_second_endpoint = alpha_results.m_second_endpoint; + + memcpy(output.m_tiles[t].m_selectors, pAlpha_selectors, cChunkPixelWidth * cChunkPixelHeight); + output.m_tiles[t].m_alpha_encoding = alpha_results.m_block_type != 0; + } + } // t + } // q + + if (m_params.m_debugging) + { + for (uint y = 0; y < cChunkPixelHeight; y++) + for (uint x = 0; x < cChunkPixelWidth; x++) + m_dbg_chunk_pixels[chunk_index](x, y) = decomp_chunk[best_encoding](x, y); + + for (uint t = 0; t < g_chunk_encodings[best_encoding].m_num_tiles; t++) + { + const uint layout_index = g_chunk_encodings[best_encoding].m_tiles[t].m_layout_index; + + const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[layout_index]; + + for (uint ty = 0; ty < tile_desc.m_height; ty++) + for (uint tx = 0; tx < tile_desc.m_width; tx++) + m_dbg_chunk_pixels_tile_vis[chunk_index](tile_desc.m_x_ofs + tx, tile_desc.m_y_ofs + ty) = g_tile_layout_colors[layout_index]; + } + } + + } // chunk_index + } + + bool dxt_hc::determine_compressed_chunks() + { + utils::zero_object(m_encoding_hist); + + for (uint i = 0; i < cNumCompressedChunkVecs; i++) + m_compressed_chunks[i].clear(); + + if (m_has_color_blocks) + m_compressed_chunks[cColorChunks].resize(m_num_chunks); + + for (uint a = 0; a < m_num_alpha_blocks; a++) + m_compressed_chunks[cAlpha0Chunks + a].resize(m_num_chunks); + + if (m_params.m_debugging) + { + m_dbg_chunk_pixels.resize(m_num_chunks); + m_dbg_chunk_pixels_tile_vis.resize(m_num_chunks); + + for (uint i = 0; i < m_num_chunks; i++) + { + m_dbg_chunk_pixels[i].clear(); + m_dbg_chunk_pixels_tile_vis[i].clear(); + } + } + + m_total_tiles = 0; + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_compressed_chunks_task, i); + + m_pTask_pool->join(); + if (m_canceled) + return false; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + { + console::info(L"Total Pixels: %u, Chunks: %u, Blocks: %u, Adapted Tiles: %u", m_num_chunks * cChunkPixelWidth * cChunkPixelHeight, m_num_chunks, m_num_chunks * cChunkBlockWidth * cChunkBlockHeight, m_total_tiles); + + console::info(L"Chunk encoding type symbol_histogram: "); + for (uint e = 0; e < cNumChunkEncodings; e++) + console::info(L"%u ", m_encoding_hist[e]); + + console::info(L"Blocks per chunk encoding type: "); + for (uint e = 0; e < cNumChunkEncodings; e++) + console::info(L"%u ", m_encoding_hist[e] * cChunkBlockWidth * cChunkBlockHeight); + } +#endif + + return true; + } + + void dxt_hc::assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr) + { + const uint thread_index = (uint)data; + assign_color_endpoint_clusters_state& state = *static_cast(pData_ptr); + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if (m_canceled) + return; + + if ((get_current_thread_id() == m_main_thread_id) && ((chunk_index & 63) == 0)) + { + if (!update_progress(2, chunk_index, m_num_chunks)) + return; + } + + if (m_pTask_pool->get_num_threads()) + { + if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + uint cluster_index = state.m_vq.find_best_codebook_entry_fs(state.m_training_vecs[chunk_index][tile_index]); + + chunk.m_endpoint_cluster_index[tile_index] = static_cast(cluster_index); + } + } + } + + bool dxt_hc::determine_color_endpoint_clusters() + { + if (!m_has_color_blocks) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Generating color training vectors"); +#endif + + const float r_scale = .5f; + const float b_scale = .25f; + + vec6F_tree_vq vq; + + crnlib::vector< crnlib::vector > training_vecs; + + training_vecs.resize(m_num_chunks); + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if ((chunk_index & 255) == 0) + { + if (!update_progress(1, chunk_index, m_num_chunks)) + return false; + } + + const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + training_vecs[chunk_index].resize(chunk.m_num_tiles); + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const compressed_tile& tile = chunk.m_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index]; + + tree_clusterizer palettizer; + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + const color_quad_u8& c = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y); + + vec3F v; + if (m_params.m_perceptual) + { + v.set(c[0] * 1.0f/255.0f, c[1] * 1.0f/255.0f, c[2] * 1.0f/255.0f); + v[0] *= r_scale; + v[2] *= b_scale; + } + else + { + v.set(c[0] * 1.0f/255.0f, c[1] * 1.0f/255.0f, c[2] * 1.0f/255.0f); + } + + palettizer.add_training_vec(v, 1); + } + } + + palettizer.generate_codebook(2); + + uint tile_weight = tile.m_pixel_width * tile.m_pixel_height; + tile_weight = static_cast(tile_weight * m_pChunks[chunk_index].m_weight); + + vec3F v[2]; + utils::zero_object(v); + + for (uint i = 0; i < palettizer.get_codebook_size(); i++) + v[i] = palettizer.get_codebook_entry(i); + + if (palettizer.get_codebook_size() == 1) + v[1] = v[0]; + if (v[0].length() > v[1].length()) + utils::swap(v[0], v[1]); + + vec6F vv; + for (uint i = 0; i < 2; i++) + { + vv[i*3+0] = v[i][0]; + vv[i*3+1] = v[i][1]; + vv[i*3+2] = v[i][2]; + } + + vq.add_training_vec(vv, tile_weight); + + training_vecs[chunk_index][tile_index] = vv; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Begin color cluster analysis"); + timer t; + t.start(); +#endif + + uint codebook_size = math::minimum(m_total_tiles, m_params.m_color_endpoint_codebook_size); + vq.generate_codebook(codebook_size); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + { + double total_time = t.get_elapsed_secs(); + console::info(L"Codebook gen time: %3.3fs, Total color clusters: %u", total_time, vq.get_codebook_size()); + } +#endif + + m_color_clusters.resize(vq.get_codebook_size()); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Begin color cluster assignment"); +#endif + + assign_color_endpoint_clusters_state state(vq, training_vecs); + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &dxt_hc::assign_color_endpoint_clusters_task, i, &state); + + m_pTask_pool->join(); + if (m_canceled) + return false; + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + uint cluster_index = chunk.m_endpoint_cluster_index[tile_index]; + + m_color_clusters[cluster_index].m_tiles.push_back( std::make_pair(chunk_index, tile_index) ); + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Completed color cluster assignment"); +#endif + + return true; + } + + void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); + const determine_alpha_endpoint_clusters_state& state = *static_cast(pData_ptr); + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if (m_canceled) + return; + + if ((get_current_thread_id() == m_main_thread_id) && ((chunk_index & 63) == 0)) + { + if (!update_progress(7, m_num_chunks * a + chunk_index, m_num_chunks * m_num_alpha_blocks)) + return; + } + + if (m_pTask_pool->get_num_threads()) + { + if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + uint cluster_index = state.m_vq.find_best_codebook_entry_fs(state.m_training_vecs[a][chunk_index][tile_index]); + + chunk.m_endpoint_cluster_index[tile_index] = static_cast(cluster_index); + } + } + } + } + + bool dxt_hc::determine_alpha_endpoint_clusters() + { + if (!m_num_alpha_blocks) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Generating alpha training vectors"); +#endif + + determine_alpha_endpoint_clusters_state state; + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + state.m_training_vecs[a].resize(m_num_chunks); + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if ((chunk_index & 63) == 0) + { + if (!update_progress(6, m_num_chunks * a + chunk_index, m_num_chunks * m_num_alpha_blocks)) + return false; + } + + const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index]; + + state.m_training_vecs[a][chunk_index].resize(chunk.m_num_tiles); + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const compressed_tile& tile = chunk.m_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index]; + + tree_clusterizer palettizer; + + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + uint c = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[m_params.m_alpha_component_indices[a]]; + + vec1F v(c * 1.0f/255.0f); + + palettizer.add_training_vec(v, 1); + } + } + palettizer.generate_codebook(2); + + const uint tile_weight = tile.m_pixel_width * tile.m_pixel_height; + + vec1F v[2]; + utils::zero_object(v); + + for (uint i = 0; i < palettizer.get_codebook_size(); i++) + v[i] = palettizer.get_codebook_entry(i); + + if (palettizer.get_codebook_size() == 1) + v[1] = v[0]; + if (v[0] > v[1]) + utils::swap(v[0], v[1]); + + vec2F vv(v[0][0], v[1][0]); + + state.m_vq.add_training_vec(vv, tile_weight); + + state.m_training_vecs[a][chunk_index][tile_index] = vv; + + } // tile_index + } // chunk_index + } // a + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Begin alpha cluster analysis"); + timer t; + t.start(); +#endif + + uint codebook_size = math::minimum(m_total_tiles, m_params.m_alpha_endpoint_codebook_size); + state.m_vq.generate_codebook(codebook_size); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + { + double total_time = t.get_elapsed_secs(); + console::info(L"Codebook gen time: %3.3fs, Total alpha clusters: %u", total_time, state.m_vq.get_codebook_size()); + } +#endif + + m_alpha_clusters.resize(state.m_vq.get_codebook_size()); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Begin alpha cluster assignment"); +#endif + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &state); + + m_pTask_pool->join(); + if (m_canceled) + return false; + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const uint cluster_index = chunk.m_endpoint_cluster_index[tile_index]; + + m_alpha_clusters[cluster_index].m_tiles.push_back( std::make_pair(chunk_index, tile_index | (a << 16)) ); + } + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Completed alpha cluster assignment"); +#endif + + return true; + } + + void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr) + { + pData_ptr; + const uint thread_index = static_cast(data); + + if (!m_has_color_blocks) + return; + + crnlib::vector pixels; + pixels.reserve(512); + + crnlib::vector selectors; + + uint total_pixels = 0; + + uint total_empty_clusters = 0; + for (uint cluster_index = 0; cluster_index < m_color_clusters.size(); cluster_index++) + { + if (m_canceled) + return; + + if ((get_current_thread_id() == m_main_thread_id) && ((cluster_index & 63) == 0)) + { + if (!update_progress(3, cluster_index, m_color_clusters.size())) + return; + } + + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + tile_cluster& cluster = m_color_clusters[cluster_index]; + if (cluster.m_tiles.empty()) + { + total_empty_clusters++; + continue; + } + + pixels.resize(0); + + for (uint t = 0; t < cluster.m_tiles.size(); t++) + { + const uint chunk_index = cluster.m_tiles[t].first; + const uint tile_index = cluster.m_tiles[t].second; + CRNLIB_ASSERT(chunk_index < m_num_chunks); + CRNLIB_ASSERT(tile_index < cChunkMaxTiles); + + const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + CRNLIB_ASSERT(tile_index < chunk.m_num_tiles); + const compressed_tile& tile = chunk.m_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index]; + + for (uint y = 0; y < layout.m_height; y++) + for (uint x = 0; x < layout.m_width; x++) + pixels.push_back( m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y) ); + } + + total_pixels += pixels.size(); + + selectors.resize(pixels.size()); + + dxt1_endpoint_optimizer::params params; + params.m_block_index = cluster_index; + params.m_pPixels = &pixels[0]; + params.m_num_pixels = pixels.size(); + params.m_pixels_have_alpha = false; + params.m_use_alpha_blocks = false; + params.m_perceptual = m_params.m_perceptual; + params.m_quality = cCRNDXTQualityUber; + params.m_endpoint_caching = false; + + dxt1_endpoint_optimizer::results results; + results.m_pSelectors = &selectors[0]; + + dxt1_endpoint_optimizer optimizer; + const bool all_transparent = optimizer.compute(params, results); + all_transparent; + + cluster.m_first_endpoint = results.m_low_color; + cluster.m_second_endpoint = results.m_high_color; + cluster.m_alpha_encoding = results.m_alpha_block; + cluster.m_error = results.m_error; + + uint pixel_index = 0; + + for (uint t = 0; t < cluster.m_tiles.size(); t++) + { + const uint chunk_index = cluster.m_tiles[t].first; + const uint tile_index = cluster.m_tiles[t].second; + + CRNLIB_ASSERT(chunk_index < m_num_chunks); + + compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + CRNLIB_ASSERT(tile_index < chunk.m_num_tiles); + + CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index); + + const compressed_tile& tile = chunk.m_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index]; + layout; + + compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height; + + quantized_tile.m_endpoint_cluster_index = cluster_index; + quantized_tile.m_first_endpoint = results.m_low_color; + quantized_tile.m_second_endpoint = results.m_high_color; + //quantized_tile.m_error = results.m_error; + quantized_tile.m_alpha_encoding = results.m_alpha_block; + quantized_tile.m_pixel_width = tile.m_pixel_width; + quantized_tile.m_pixel_height = tile.m_pixel_height; + quantized_tile.m_layout_index = tile.m_layout_index; + + memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels); + + pixel_index += total_pixels; + } + } + + //CRNLIB_ASSERT(total_pixels == (m_num_chunks * cChunkPixelWidth * cChunkPixelHeight)); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + { + if (total_empty_clusters) + console::warning(L"Total empty color clusters: %u", total_empty_clusters); + } +#endif + } + + bool dxt_hc::determine_color_endpoint_codebook() + { + if (!m_has_color_blocks) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Computing optimal color cluster endpoints"); +#endif + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_codebook_task, i, NULL); + + m_pTask_pool->join(); + + return !m_canceled; + } + + void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr) + { + pData_ptr; + + const uint thread_index = static_cast(data); + + crnlib::vector pixels; + pixels.reserve(512); + + crnlib::vector selectors; + selectors.reserve(512); + + uint total_empty_clusters = 0; + for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) + { + if (m_canceled) + return; + + if ((get_current_thread_id() == m_main_thread_id) && ((cluster_index & 63) == 0)) + { + if (!update_progress(8, cluster_index, m_alpha_clusters.size())) + return; + } + + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + tile_cluster& cluster = m_alpha_clusters[cluster_index]; + if (cluster.m_tiles.empty()) + { + total_empty_clusters++; + continue; + } + + pixels.resize(0); + + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU; + const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U; + + CRNLIB_ASSERT(chunk_index < m_num_chunks); + CRNLIB_ASSERT(tile_index < cChunkMaxTiles); + CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks); + + const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index]; + + CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index); + + CRNLIB_ASSERT(tile_index < chunk.m_num_tiles); + const compressed_tile& tile = chunk.m_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index]; + + color_quad_u8 c(cClear); + + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + c[0] = m_pChunks[chunk_index](layout.m_x_ofs + x, layout.m_y_ofs + y)[ m_params.m_alpha_component_indices[alpha_index] ]; + + pixels.push_back(c); + } + } + } + + selectors.resize(pixels.size()); + + dxt5_endpoint_optimizer::params params; + params.m_block_index = cluster_index; + params.m_pPixels = &pixels[0]; + params.m_num_pixels = pixels.size(); + params.m_comp_index = 0; + params.m_quality = cCRNDXTQualityUber; + params.m_use_both_block_types = false; + + dxt5_endpoint_optimizer::results results; + results.m_pSelectors = &selectors[0]; + + dxt5_endpoint_optimizer optimizer; + const bool all_transparent = optimizer.compute(params, results); + all_transparent; + + cluster.m_first_endpoint = results.m_first_endpoint; + cluster.m_second_endpoint = results.m_second_endpoint; + cluster.m_alpha_encoding = results.m_block_type != 0; + cluster.m_error = results.m_error; + + uint pixel_index = 0; + + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU; + const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U; + CRNLIB_ASSERT(chunk_index < m_num_chunks); + CRNLIB_ASSERT(tile_index < cChunkMaxTiles); + CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks); + + compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index]; + + CRNLIB_ASSERT(chunk.m_endpoint_cluster_index[tile_index] == cluster_index); + + CRNLIB_ASSERT(tile_index < chunk.m_num_tiles); + const compressed_tile& tile = chunk.m_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[tile.m_layout_index]; + layout; + + compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + const uint total_pixels = tile.m_pixel_width * tile.m_pixel_height; + + quantized_tile.m_endpoint_cluster_index = cluster_index; + quantized_tile.m_first_endpoint = results.m_first_endpoint; + quantized_tile.m_second_endpoint = results.m_second_endpoint; + //quantized_tile.m_error = results.m_error; + quantized_tile.m_alpha_encoding = results.m_block_type != 0; + quantized_tile.m_pixel_width = tile.m_pixel_width; + quantized_tile.m_pixel_height = tile.m_pixel_height; + quantized_tile.m_layout_index = tile.m_layout_index; + + memcpy(quantized_tile.m_selectors, &selectors[pixel_index], total_pixels); + + pixel_index += total_pixels; + } + } // cluster_index + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + { + if (total_empty_clusters) + console::warning(L"Total empty alpha clusters: %u", total_empty_clusters); + } +#endif + } + + bool dxt_hc::determine_alpha_endpoint_codebook() + { + if (!m_num_alpha_blocks) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Computing optimal alpha cluster endpoints"); +#endif + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, NULL); + + m_pTask_pool->join(); + + return !m_canceled; + } + + void dxt_hc::create_quantized_debug_images() + { + if (!m_params.m_debugging) + return; + + if (m_has_color_blocks) + { + m_dbg_chunk_pixels_color_quantized.resize(m_num_chunks); + m_dbg_chunk_pixels_quantized_color_selectors.resize(m_num_chunks); + m_dbg_chunk_pixels_orig_color_selectors.resize(m_num_chunks); + + for (uint i = 0; i < m_num_chunks; i++) + { + m_dbg_chunk_pixels_color_quantized[i].clear(); + m_dbg_chunk_pixels_quantized_color_selectors[i].clear(); + m_dbg_chunk_pixels_orig_color_selectors[i].clear(); + } + } + + if (m_num_alpha_blocks) + { + m_dbg_chunk_pixels_alpha_quantized.resize(m_num_chunks); + m_dbg_chunk_pixels_quantized_alpha_selectors.resize(m_num_chunks); + m_dbg_chunk_pixels_orig_alpha_selectors.resize(m_num_chunks); + + for (uint i = 0; i < m_num_chunks; i++) + { + m_dbg_chunk_pixels_alpha_quantized[i].clear(); + m_dbg_chunk_pixels_quantized_alpha_selectors[i].clear(); + m_dbg_chunk_pixels_orig_alpha_selectors[i].clear(); + } + } + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if (m_has_color_blocks) + { + pixel_chunk& output_chunk_color_quantized = m_dbg_chunk_pixels_color_quantized[chunk_index]; + pixel_chunk& output_chunk_selectors = m_dbg_chunk_pixels_quantized_color_selectors[chunk_index]; + pixel_chunk& output_chunk_orig_selectors = m_dbg_chunk_pixels_orig_color_selectors[chunk_index]; + + const compressed_chunk& color_chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + for (uint tile_index = 0; tile_index < color_chunk.m_num_tiles; tile_index++) + { + const compressed_tile& quantized_tile = color_chunk.m_quantized_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index]; + + const uint8* pColor_Selectors = quantized_tile.m_selectors; + + color_quad_u8 block_colors[cDXT1SelectorValues]; + CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint); + dxt1_block::get_block_colors(block_colors, static_cast(quantized_tile.m_first_endpoint), static_cast(quantized_tile.m_second_endpoint)); + + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + const uint selector = pColor_Selectors[x + y * layout.m_width]; + + output_chunk_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = selector*255/(cDXT1SelectorValues-1); + + output_chunk_orig_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = color_chunk.m_tiles[tile_index].m_selectors[x + y * layout.m_width] * 255 / (cDXT1SelectorValues-1); + + output_chunk_color_quantized(x + layout.m_x_ofs, y + layout.m_y_ofs) = block_colors[selector]; + } + } + } + } + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + pixel_chunk& output_chunk_alpha_quantized = m_dbg_chunk_pixels_alpha_quantized[chunk_index]; + pixel_chunk& output_chunk_selectors = m_dbg_chunk_pixels_quantized_alpha_selectors[chunk_index]; + pixel_chunk& output_chunk_orig_selectors = m_dbg_chunk_pixels_orig_alpha_selectors[chunk_index]; + + const compressed_chunk& alpha_chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index]; + + for (uint tile_index = 0; tile_index < alpha_chunk.m_num_tiles; tile_index++) + { + const compressed_tile& quantized_tile = alpha_chunk.m_quantized_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index]; + + const uint8* pAlpha_selectors = quantized_tile.m_selectors; + + uint block_values[cDXT5SelectorValues]; + CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint); + dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint); + + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + const uint selector = pAlpha_selectors[x + y * layout.m_width]; + + CRNLIB_ASSERT(selector < cDXT5SelectorValues); + + output_chunk_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast(selector*255/(cDXT5SelectorValues-1)); + + output_chunk_orig_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast(alpha_chunk.m_tiles[tile_index].m_selectors[x + y * layout.m_width]*255/(cDXT5SelectorValues-1)); + + output_chunk_alpha_quantized(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast(block_values[selector]); + } + } + } + } // a + + } + } + + void dxt_hc::create_selector_codebook_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); + const create_selector_codebook_state& state = *static_cast(pData_ptr); + + for (uint comp_chunk_index = state.m_comp_index_start; comp_chunk_index <= state.m_comp_index_end; comp_chunk_index++) + { + const uint alpha_index = state.m_alpha_blocks ? (comp_chunk_index - cAlpha0Chunks) : 0; + const uint alpha_pixel_comp = state.m_alpha_blocks ? m_params.m_alpha_component_indices[alpha_index] : 0; + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if (m_canceled) + return; + + if ((get_current_thread_id() == m_main_thread_id) && ((chunk_index & 127) == 0)) + { + if (!update_progress(12 + comp_chunk_index, chunk_index, m_num_chunks)) + return; + } + + if (m_pTask_pool->get_num_threads()) + { + if ((chunk_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index]; + + const uint tile_blocks_x = layout.m_width >> 2; + const uint tile_blocks_y = layout.m_height >> 2; + + const uint tile_block_ofs_x = layout.m_x_ofs >> 2; + const uint tile_block_ofs_y = layout.m_y_ofs >> 2; + + if (state.m_alpha_blocks) + { + uint block_values[cDXT5SelectorValues]; + dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint); + + for (uint by = 0; by < tile_blocks_y; by++) + { + for (uint bx = 0; bx < tile_blocks_x; bx++) + { + #if 0 + uint best_index = selector_vq.find_best_codebook_entry_fs(training_vecs[comp_chunk_index][(tile_block_ofs_x+bx)+(tile_block_ofs_y+by)*2][chunk_index]); + #else + const dxt_pixel_block& block = m_pChunks[chunk_index].m_blocks[tile_block_ofs_y + by][tile_block_ofs_x + bx]; + + uint best_error = UINT_MAX; + uint best_index = 0; + + for (uint i = 0; i < state.m_selectors_cb.size(); i++) + { + const selectors& s = state.m_selectors_cb[i]; + + uint total_error = 0; + + for (uint y = 0; y < cBlockPixelHeight; y++) + { + for (uint x = 0; x < cBlockPixelWidth; x++) + { + int a = block.m_pixels[y][x][alpha_pixel_comp]; + int b = block_values[s.m_selectors[y][x]]; + int error = a - b; + error *= error; + + total_error += error; + if (total_error > best_error) + goto early_out; + } // x + } //y + + early_out: + if (total_error < best_error) + { + best_error = total_error; + best_index = i; + + if (best_error == 0) + break; + } + } // i + #endif + + CRNLIB_ASSERT( (tile_block_ofs_x + bx) < 2 ); + CRNLIB_ASSERT( (tile_block_ofs_y + by) < 2 ); + + chunk.m_selector_cluster_index[tile_block_ofs_y + by][tile_block_ofs_x + bx] = static_cast(best_index); + + { + scoped_spinlock lock(state.m_chunk_blocks_using_selectors_lock); + state.m_chunk_blocks_using_selectors[best_index].push_back( block_id(chunk_index, alpha_index, tile_index, tile_block_ofs_x + bx, tile_block_ofs_y + by ) ); + } + // std::make_pair(chunk_index, (tile_index << 16) | ((tile_block_ofs_y + by) << 8) | (tile_block_ofs_x + bx) ) ); + + } // bx + } // by + + } + else + { + color_quad_u8 block_colors[cDXT1SelectorValues]; + dxt1_block::get_block_colors4(block_colors, static_cast(quantized_tile.m_first_endpoint), static_cast(quantized_tile.m_second_endpoint)); + + const bool block_with_alpha = quantized_tile.m_first_endpoint == quantized_tile.m_second_endpoint; + + for (uint by = 0; by < tile_blocks_y; by++) + { + for (uint bx = 0; bx < tile_blocks_x; bx++) + { + const dxt_pixel_block& block = m_pChunks[chunk_index].m_blocks[tile_block_ofs_y + by][tile_block_ofs_x + bx]; + + uint best_error = UINT_MAX; + uint best_index = 0; + + for (uint i = 0; i < state.m_selectors_cb.size(); i++) + { + const selectors& s = state.m_selectors_cb[i]; + + uint total_error = 0; + + for (uint y = 0; y < cBlockPixelHeight; y++) + { + for (uint x = 0; x < cBlockPixelWidth; x++) + { + const color_quad_u8& a = block.m_pixels[y][x]; + + uint selector_index = s.m_selectors[y][x]; + if ((block_with_alpha) && (selector_index == 3)) + total_error += 999999; + + const color_quad_u8& b = block_colors[selector_index]; + + uint error = color::color_distance(m_params.m_perceptual, a, b, false); + + total_error += error; + if (total_error > best_error) + goto early_out2; + } // x + } //y + + early_out2: + if (total_error < best_error) + { + best_error = total_error; + best_index = i; + + if (best_error == 0) + break; + } + } // i + + CRNLIB_ASSERT( (tile_block_ofs_x + bx) < 2 ); + CRNLIB_ASSERT( (tile_block_ofs_y + by) < 2 ); + + chunk.m_selector_cluster_index[tile_block_ofs_y + by][tile_block_ofs_x + bx] = static_cast(best_index); + + { + scoped_spinlock lock(state.m_chunk_blocks_using_selectors_lock); + state.m_chunk_blocks_using_selectors[best_index].push_back( block_id(chunk_index, 0, tile_index, tile_block_ofs_x + bx, tile_block_ofs_y + by ) ); + } + // std::make_pair(chunk_index, (tile_index << 16) | ((tile_block_ofs_y + by) << 8) | (tile_block_ofs_x + bx) ) ); + + } // bx + } // by + + } // if alpha_blocks + + } // tile_index + + } // chunk_index + + } // comp_chunk_index + } + + bool dxt_hc::create_selector_codebook(bool alpha_blocks) + { +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Computing selector training vectors"); +#endif + + const uint cColorDistToWeight = 2000; + const uint cAlphaErrorToWeight = 8; + + vec16F_tree_vq selector_vq; + + uint comp_index_start = cColorChunks; + uint comp_index_end = cColorChunks; + if (alpha_blocks) + { + comp_index_start = cAlpha0Chunks; + comp_index_end = cAlpha0Chunks + m_num_alpha_blocks - 1; + } + + crnlib::vector training_vecs[cNumCompressedChunkVecs][4]; + + for (uint comp_chunk_index = comp_index_start; comp_chunk_index <= comp_index_end; comp_chunk_index++) + { + for (uint i = 0; i < 4; i++) + training_vecs[comp_chunk_index][i].resize(m_num_chunks); + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if ((chunk_index & 63) == 0) + { + if (!update_progress(9 + comp_chunk_index, chunk_index, m_num_chunks)) + return false; + } + + const compressed_chunk& chunk = m_compressed_chunks[comp_chunk_index][chunk_index]; + + uint8 block_selectors[cChunkBlockWidth][cChunkBlockHeight][cBlockPixelWidth * cBlockPixelHeight]; + uint block_weight[cChunkBlockWidth][cChunkBlockHeight]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + uint weight; + if (comp_chunk_index == cColorChunks) + { + const color_quad_u8 first_color(dxt1_block::unpack_color(static_cast(quantized_tile.m_first_endpoint), true)); + const color_quad_u8 second_color(dxt1_block::unpack_color(static_cast(quantized_tile.m_second_endpoint), true)); + const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false); + + weight = dist / cColorDistToWeight; + + weight = static_cast(weight * m_pChunks[chunk_index].m_weight); + } + else + { + int first_endpoint = quantized_tile.m_first_endpoint; + int second_endpoint = quantized_tile.m_second_endpoint; + int error = first_endpoint - second_endpoint; + error = error * error; + + weight = static_cast(error / cAlphaErrorToWeight); + } + + const uint cMaxWeight = 2048; + + weight = math::clamp(weight, 1U, cMaxWeight); + + // umm, this is a hack + float f = math::lerp(1.15f, 1.0f, chunk.m_encoding_index / float(cNumChunkEncodings - 1)); + weight = (uint)(weight * f); + + const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index]; + + for (uint y = 0; y < (layout.m_height >> 2); y++) + for (uint x = 0; x < (layout.m_width >> 2); x++) + block_weight[x + (layout.m_x_ofs >> 2)][y + (layout.m_y_ofs >> 2)] = weight; + + const uint8* pSelectors = quantized_tile.m_selectors; + + for (uint y = 0; y < layout.m_height; y++) + { + const uint cy = y + layout.m_y_ofs; + + for (uint x = 0; x < layout.m_width; x++) + { + const uint selector = pSelectors[x + y * layout.m_width]; + + if (comp_chunk_index == cColorChunks) + CRNLIB_ASSERT(selector < cDXT1SelectorValues); + else + CRNLIB_ASSERT(selector < cDXT5SelectorValues); + + const uint cx = x + layout.m_x_ofs; + + block_selectors[cx >> 2][cy >> 2][(cx & 3) + (cy & 3) * 4] = static_cast(selector); + } // x + } // y + } // tile_index + + vec16F v; + for (uint y = 0; y < cChunkBlockHeight; y++) + { + for (uint x = 0; x < cChunkBlockWidth; x++) + { + for (uint i = 0; i < cBlockPixelWidth * cBlockPixelHeight; i++) + { + uint s = block_selectors[x][y][i]; + + float f; + + if (comp_chunk_index == cColorChunks) + { + CRNLIB_ASSERT(s < cDXT1SelectorValues); + f = (g_dxt1_to_linear[s] + .5f) * 1.0f/4.0f; + } + else + { + CRNLIB_ASSERT(s < cDXT5SelectorValues); + f = (g_dxt5_to_linear[s] + .5f) * 1.0f/8.0f; + } + + CRNLIB_ASSERT((f >= 0.0f) && (f <= 1.0f)); + + v[i] = f; + } // i + + selector_vq.add_training_vec(v, block_weight[x][y]); + + training_vecs[comp_chunk_index][x+y*2][chunk_index] = v; + } // x + } // y + + } // chunk_index + + } // comp_chunk_index + + timer t; + t.start(); + + selector_vq.generate_codebook(alpha_blocks ? m_params.m_alpha_selector_codebook_size : m_params.m_color_selector_codebook_size); + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + { + double total_time = t.get_elapsed_secs(); + console::info(L"Codebook gen time: %3.3fs, Selector codebook size: %u", total_time, selector_vq.get_codebook_size()); + } +#endif + + selectors_vec& selectors_cb = alpha_blocks ? m_alpha_selectors : m_color_selectors; + + selectors_cb.resize(selector_vq.get_codebook_size()); + + for (uint i = 0; i < selector_vq.get_codebook_size(); i++) + { + const vec16F& v = selector_vq.get_codebook_entry(i); + + for (uint j = 0; j < cBlockPixelWidth * cBlockPixelHeight; j++) + { + int s; + if (alpha_blocks) + { + s = math::clamp(static_cast(v[j] * 8.0f), 0, 7); + s = g_dxt5_from_linear[s]; + } + else + { + s = math::clamp(static_cast(v[j] * 4.0f), 0, 3); + s = g_dxt1_from_linear[s]; + } + + selectors_cb[i].m_selectors[j >> 2][j & 3] = static_cast(s); + } // j + } // i + + chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors = alpha_blocks ? m_chunk_blocks_using_alpha_selectors : m_chunk_blocks_using_color_selectors; + + chunk_blocks_using_selectors.clear(); + chunk_blocks_using_selectors.resize(selectors_cb.size()); + + create_selector_codebook_state state(*this, alpha_blocks, comp_index_start, comp_index_end, selector_vq, chunk_blocks_using_selectors, selectors_cb); + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &dxt_hc::create_selector_codebook_task, i, &state); + + m_pTask_pool->join(); + + return !m_canceled; + } + + bool dxt_hc::refine_quantized_color_selectors() + { + if (!m_has_color_blocks) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Refining quantized color selectors"); +#endif + + uint total_refined_selectors = 0; + uint total_refined_pixels = 0; + uint total_selectors = 0; + + for (uint selector_index = 0; selector_index < m_color_selectors.size(); selector_index++) + { + if ((selector_index & 255) == 0) + { + if (!update_progress(15, selector_index, m_color_selectors.size())) + return false; + } + + if (m_chunk_blocks_using_color_selectors[selector_index].empty()) + continue; + + selectors& sel = m_color_selectors[selector_index]; + + for (uint y = 0; y < cBlockPixelHeight; y++) + { + for (uint x = 0; x < cBlockPixelWidth; x++) + { + uint best_s = 0; + uint best_error = UINT_MAX; + + for (uint s = 0; s < cDXT1SelectorValues; s++) + { + uint total_error = 0; + + for (uint block_iter = 0; block_iter < m_chunk_blocks_using_color_selectors[selector_index].size(); block_iter++) + { + const block_id& id = m_chunk_blocks_using_color_selectors[selector_index][block_iter]; + const uint chunk_index = id.m_chunk_index; + const uint tile_index = id.m_tile_index; + const uint chunk_block_x = id.m_block_x; + const uint chunk_block_y = id.m_block_y; + + CRNLIB_ASSERT((chunk_block_x < cChunkBlockWidth) && (chunk_block_y < cChunkBlockHeight)); + + const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + CRNLIB_ASSERT(tile_index < chunk.m_num_tiles); + + CRNLIB_ASSERT(chunk.m_selector_cluster_index[chunk_block_y][chunk_block_x] == selector_index); + + const compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + //const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[tile.m_layout_index]; + + color_quad_u8 block_colors[cDXT1SelectorValues]; + CRNLIB_ASSERT(tile.m_first_endpoint >= tile.m_second_endpoint); + dxt1_block::get_block_colors4(block_colors, static_cast(tile.m_first_endpoint), static_cast(tile.m_second_endpoint)); + + if ((tile.m_first_endpoint == tile.m_second_endpoint) && (s == 3)) + total_error += 999999; + + const color_quad_u8& orig_pixel = m_pChunks[chunk_index](chunk_block_x * cBlockPixelWidth + x, chunk_block_y * cBlockPixelHeight + y); + const color_quad_u8& quantized_pixel = block_colors[s]; + + const uint error = color::color_distance(m_params.m_perceptual, orig_pixel, quantized_pixel, false); + total_error += error; + + } // block_iter + + if (total_error < best_error) + { + best_error = total_error; + best_s = s; + } + + } // s + + if (sel.m_selectors[y][x] != best_s) + { + total_refined_selectors++; + total_refined_pixels += m_chunk_blocks_using_color_selectors[selector_index].size(); + sel.m_selectors[y][x] = static_cast(best_s); + } + + total_selectors++; + + } //x + + } //y + + } // selector_index + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Total refined pixels: %u, selectors: %u out of %u", total_refined_pixels, total_refined_selectors, total_selectors); +#endif + + return true; + } + + bool dxt_hc::refine_quantized_alpha_selectors() + { + if (!m_num_alpha_blocks) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Refining quantized alpha selectors"); +#endif + + uint total_refined_selectors = 0; + uint total_refined_pixels = 0; + uint total_selectors = 0; + + for (uint selector_index = 0; selector_index < m_alpha_selectors.size(); selector_index++) + { + if ((selector_index & 255) == 0) + { + if (!update_progress(16, selector_index, m_alpha_selectors.size())) + return false; + } + + if (m_chunk_blocks_using_alpha_selectors[selector_index].empty()) + continue; + + selectors& sel = m_alpha_selectors[selector_index]; + + for (uint y = 0; y < cBlockPixelHeight; y++) + { + for (uint x = 0; x < cBlockPixelWidth; x++) + { + uint best_s = 0; + uint best_error = UINT_MAX; + + for (uint s = 0; s < cDXT5SelectorValues; s++) + { + uint total_error = 0; + + for (uint block_iter = 0; block_iter < m_chunk_blocks_using_alpha_selectors[selector_index].size(); block_iter++) + { + const block_id& id = m_chunk_blocks_using_alpha_selectors[selector_index][block_iter]; + const uint chunk_index = id.m_chunk_index; + const uint tile_index = id.m_tile_index; + const uint chunk_block_x = id.m_block_x; + const uint chunk_block_y = id.m_block_y; + const uint alpha_index = id.m_alpha_index; + CRNLIB_ASSERT(alpha_index < m_num_alpha_blocks); + + CRNLIB_ASSERT((chunk_block_x < cChunkBlockWidth) && (chunk_block_y < cChunkBlockHeight)); + + const compressed_chunk& chunk = m_compressed_chunks[alpha_index + cAlpha0Chunks][chunk_index]; + CRNLIB_ASSERT(tile_index < chunk.m_num_tiles); + + CRNLIB_ASSERT(chunk.m_selector_cluster_index[chunk_block_y][chunk_block_x] == selector_index); + + const compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + //const chunk_tile_desc& tile_desc = g_chunk_tile_layouts[tile.m_layout_index]; + + uint block_values[cDXT5SelectorValues]; + CRNLIB_ASSERT(tile.m_first_endpoint >= tile.m_second_endpoint); + dxt5_block::get_block_values(block_values, tile.m_first_endpoint, tile.m_second_endpoint); + + int orig_value = m_pChunks[chunk_index](chunk_block_x * cBlockPixelWidth + x, chunk_block_y * cBlockPixelHeight + y)[m_params.m_alpha_component_indices[alpha_index]]; + int quantized_value = block_values[s]; + + int error = (orig_value - quantized_value); + error *= error; + + total_error += error; + + } // block_iter + + if (total_error < best_error) + { + best_error = total_error; + best_s = s; + } + + } // s + + if (sel.m_selectors[y][x] != best_s) + { + total_refined_selectors++; + total_refined_pixels += m_chunk_blocks_using_alpha_selectors[selector_index].size(); + sel.m_selectors[y][x] = static_cast(best_s); + } + + total_selectors++; + + } //x + + } //y + + } // selector_index + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Total refined pixels: %u, selectors: %u out of %u", total_refined_pixels, total_refined_selectors, total_selectors); +#endif + + return true; + } + + bool dxt_hc::refine_quantized_color_endpoints() + { + if (!m_has_color_blocks) + return true; + + uint total_refined_tiles = 0; + uint total_refined_pixels = 0; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Refining quantized color endpoints"); +#endif + + for (uint cluster_index = 0; cluster_index < m_color_clusters.size(); cluster_index++) + { + if ((cluster_index & 255) == 0) + { + if (!update_progress(17, cluster_index, m_color_clusters.size())) + return false; + } + + tile_cluster& cluster = m_color_clusters[cluster_index]; + + uint total_pixels = 0; + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second; + + compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint); + CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint); + + total_pixels += (tile.m_pixel_width * tile.m_pixel_height); + } + + if (!total_pixels) + continue; + + crnlib::vector pixels; + crnlib::vector selectors; + + pixels.reserve(total_pixels); + selectors.reserve(total_pixels); + + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second; + + compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + const pixel_chunk& src_pixels = m_pChunks[chunk_index]; + + CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint); + CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint); + + const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index]; + + for (uint y = 0; y < tile.m_pixel_height; y++) + { + for (uint x = 0; x < tile.m_pixel_width; x++) + { + selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]); + + pixels.push_back(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs)); + } + } + } + + dxt_endpoint_refiner refiner; + dxt_endpoint_refiner::params p; + dxt_endpoint_refiner::results r; + + p.m_perceptual = m_params.m_perceptual; + p.m_pSelectors = &selectors[0]; + p.m_pPixels = &pixels[0]; + p.m_num_pixels = total_pixels; + p.m_dxt1_selectors = true; + p.m_error_to_beat = cluster.m_error; + p.m_block_index = cluster_index; + + if (!refiner.refine(p, r)) + continue; + + total_refined_tiles++; + total_refined_pixels += total_pixels; + + cluster.m_error = r.m_error; + + cluster.m_first_endpoint = r.m_low_color; + cluster.m_second_endpoint = r.m_high_color; + + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second; + + compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + tile.m_first_endpoint = r.m_low_color; + tile.m_second_endpoint = r.m_high_color; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_color_clusters.size()); +#endif + + return true; + } + + bool dxt_hc::refine_quantized_alpha_endpoints() + { + if (!m_num_alpha_blocks) + return true; + + uint total_refined_tiles = 0; + uint total_refined_pixels = 0; +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Refining quantized alpha endpoints"); +#endif + + for (uint cluster_index = 0; cluster_index < m_alpha_clusters.size(); cluster_index++) + { + if ((cluster_index & 255) == 0) + { + if (!update_progress(18, cluster_index, m_alpha_clusters.size())) + return false; + } + + tile_cluster& cluster = m_alpha_clusters[cluster_index]; + + uint total_pixels = 0; + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU; + const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U; + + compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index]; + compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint); + CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint); + + total_pixels += (tile.m_pixel_width * tile.m_pixel_height); + } + + if (!total_pixels) + continue; + + crnlib::vector pixels; + crnlib::vector selectors; + + pixels.reserve(total_pixels); + selectors.reserve(total_pixels); + + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU; + const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U; + + compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index]; + compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + const pixel_chunk& src_pixels = m_pChunks[chunk_index]; + + CRNLIB_ASSERT(tile.m_first_endpoint == cluster.m_first_endpoint); + CRNLIB_ASSERT(tile.m_second_endpoint == cluster.m_second_endpoint); + + const chunk_tile_desc& tile_layout = g_chunk_tile_layouts[tile.m_layout_index]; + + for (uint y = 0; y < tile.m_pixel_height; y++) + { + for (uint x = 0; x < tile.m_pixel_width; x++) + { + selectors.push_back(tile.m_selectors[x + y * tile.m_pixel_width]); + + pixels.push_back(color_quad_u8(src_pixels(x + tile_layout.m_x_ofs, y + tile_layout.m_y_ofs)[m_params.m_alpha_component_indices[alpha_index]])); + } + } + } + + dxt_endpoint_refiner refiner; + dxt_endpoint_refiner::params p; + dxt_endpoint_refiner::results r; + + p.m_perceptual = m_params.m_perceptual; + p.m_pSelectors = &selectors[0]; + p.m_pPixels = &pixels[0]; + p.m_num_pixels = total_pixels; + p.m_dxt1_selectors = false; + p.m_error_to_beat = cluster.m_error; + p.m_block_index = cluster_index; + + if (!refiner.refine(p, r)) + continue; + + total_refined_tiles++; + total_refined_pixels += total_pixels; + + cluster.m_error = r.m_error; + + cluster.m_first_endpoint = r.m_low_color; + cluster.m_second_endpoint = r.m_high_color; + + for (uint tile_iter = 0; tile_iter < cluster.m_tiles.size(); tile_iter++) + { + const uint chunk_index = cluster.m_tiles[tile_iter].first; + const uint tile_index = cluster.m_tiles[tile_iter].second & 0xFFFFU; + const uint alpha_index = cluster.m_tiles[tile_iter].second >> 16U; + + compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + alpha_index][chunk_index]; + compressed_tile& tile = chunk.m_quantized_tiles[tile_index]; + + tile.m_first_endpoint = r.m_low_color; + tile.m_second_endpoint = r.m_high_color; + } + } + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + console::info(L"Total refined pixels: %u, endpoints: %u out of %u", total_refined_pixels, total_refined_tiles, m_alpha_clusters.size()); +#endif + + return true; + } + + void dxt_hc::create_final_debug_image() + { + if (!m_params.m_debugging) + return; + + m_dbg_chunk_pixels_final.resize(m_num_chunks); + for (uint i = 0; i < m_num_chunks; i++) + m_dbg_chunk_pixels_final[i].clear(); + + if (m_has_color_blocks) + { + m_dbg_chunk_pixels_final_color_selectors.resize(m_num_chunks); + for (uint i = 0; i < m_num_chunks; i++) + m_dbg_chunk_pixels_final_color_selectors[i].clear(); + } + + if (m_num_alpha_blocks) + { + m_dbg_chunk_pixels_final_alpha_selectors.resize(m_num_chunks); + for (uint i = 0; i < m_num_chunks; i++) + m_dbg_chunk_pixels_final_alpha_selectors[i].clear(); + } + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + pixel_chunk& output_chunk_final = m_dbg_chunk_pixels_final[chunk_index]; + + if (m_has_color_blocks) + { + const compressed_chunk& chunk = m_compressed_chunks[cColorChunks][chunk_index]; + + pixel_chunk& output_chunk_quantized_color_selectors = m_dbg_chunk_pixels_final_color_selectors[chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index]; + + color_quad_u8 block_colors[cDXT1SelectorValues]; + dxt1_block::get_block_colors(block_colors, static_cast(quantized_tile.m_first_endpoint), static_cast(quantized_tile.m_second_endpoint)); + + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + const uint chunk_x_ofs = x + layout.m_x_ofs; + const uint chunk_y_ofs = y + layout.m_y_ofs; + const uint block_x = chunk_x_ofs >> 2; + const uint block_y = chunk_y_ofs >> 2; + const selectors& s = m_color_selectors[chunk.m_selector_cluster_index[block_y][block_x]]; + + uint selector = s.m_selectors[chunk_y_ofs & 3][chunk_x_ofs & 3]; + + output_chunk_final(x + layout.m_x_ofs, y + layout.m_y_ofs) = block_colors[selector]; + output_chunk_quantized_color_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs) = g_tile_layout_colors[selector]; + } + } + } + } + + if (m_num_alpha_blocks) + { + pixel_chunk& output_chunk_quantized_alpha_selectors = m_dbg_chunk_pixels_final_alpha_selectors[chunk_index]; + + for (uint a = 0; a < m_num_alpha_blocks; a++) + { + const compressed_chunk& chunk = m_compressed_chunks[cAlpha0Chunks + a][chunk_index]; + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + const chunk_tile_desc& layout = g_chunk_tile_layouts[quantized_tile.m_layout_index]; + + uint block_values[cDXT5SelectorValues]; + + // purposely call the general version to debug single color alpah6 blocks + CRNLIB_ASSERT(quantized_tile.m_first_endpoint >= quantized_tile.m_second_endpoint); + dxt5_block::get_block_values(block_values, quantized_tile.m_first_endpoint, quantized_tile.m_second_endpoint); + + for (uint y = 0; y < layout.m_height; y++) + { + for (uint x = 0; x < layout.m_width; x++) + { + const uint chunk_x_ofs = x + layout.m_x_ofs; + const uint chunk_y_ofs = y + layout.m_y_ofs; + const uint block_x = chunk_x_ofs >> 2; + const uint block_y = chunk_y_ofs >> 2; + const selectors& s = m_alpha_selectors[chunk.m_selector_cluster_index[block_y][block_x]]; + + uint selector = s.m_selectors[chunk_y_ofs & 3][chunk_x_ofs & 3]; + + CRNLIB_ASSERT(selector < cDXT5SelectorValues); + + output_chunk_final(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast(block_values[selector]); + + output_chunk_quantized_alpha_selectors(x + layout.m_x_ofs, y + layout.m_y_ofs)[m_params.m_alpha_component_indices[a]] = static_cast(selector*255/(cDXT5SelectorValues-1)); + } //x + } // y + } // tile_index + + } // a + } + } // chunk_index + + } + + bool dxt_hc::create_chunk_encodings() + { + m_chunk_encoding.resize(m_num_chunks); + + for (uint chunk_index = 0; chunk_index < m_num_chunks; chunk_index++) + { + if ((chunk_index & 255) == 0) + { + if (!update_progress(19, chunk_index, m_num_chunks)) + return false; + } + + chunk_encoding& encoding = m_chunk_encoding[chunk_index]; + + for (uint q = 0; q < cNumCompressedChunkVecs; q++) + { + bool skip = true; + if (q == cColorChunks) + { + if (m_has_color_blocks) + skip = false; + } + else if (q <= m_num_alpha_blocks) + skip = false; + + if (skip) + continue; + + CRNLIB_ASSERT(!m_compressed_chunks[q].empty()); + const compressed_chunk& chunk = m_compressed_chunks[q][chunk_index]; + + CRNLIB_ASSERT(chunk.m_encoding_index < cNumChunkEncodings); + encoding.m_encoding_index = static_cast(chunk.m_encoding_index); + + CRNLIB_ASSERT(chunk.m_num_tiles <= cChunkMaxTiles); + encoding.m_num_tiles = static_cast(chunk.m_num_tiles); + + for (uint tile_index = 0; tile_index < chunk.m_num_tiles; tile_index++) + { + const compressed_tile& quantized_tile = chunk.m_quantized_tiles[tile_index]; + + if (!q) + { + CRNLIB_ASSERT(quantized_tile.m_endpoint_cluster_index < m_color_clusters.size()); + } + else + { + CRNLIB_ASSERT(quantized_tile.m_endpoint_cluster_index < m_alpha_clusters.size()); + } + + encoding.m_endpoint_indices[q][tile_index] = static_cast(quantized_tile.m_endpoint_cluster_index); + } + + for (uint y = 0; y < cChunkBlockHeight; y++) + { + for (uint x = 0; x < cChunkBlockWidth; x++) + { + const uint selector_index = chunk.m_selector_cluster_index[y][x]; + + if (!q) + { + CRNLIB_ASSERT(selector_index < m_color_selectors.size()); + } + else + { + CRNLIB_ASSERT(selector_index < m_alpha_selectors.size()); + } + + encoding.m_selector_indices[q][y][x] = static_cast(selector_index); + } + } + + } // q + + } // chunk_index + + if (m_has_color_blocks) + { + m_color_endpoints.resize(m_color_clusters.size()); + for (uint i = 0; i < m_color_clusters.size(); i++) + m_color_endpoints[i] = dxt1_block::pack_endpoints(m_color_clusters[i].m_first_endpoint, m_color_clusters[i].m_second_endpoint); + } + + if (m_num_alpha_blocks) + { + m_alpha_endpoints.resize(m_alpha_clusters.size()); + for (uint i = 0; i < m_alpha_clusters.size(); i++) + m_alpha_endpoints[i] = dxt5_block::pack_endpoints(m_alpha_clusters[i].m_first_endpoint, m_alpha_clusters[i].m_second_endpoint); + } + + return true; + } + + void dxt_hc::create_debug_image_from_chunks(uint num_chunks_x, uint num_chunks_y, const pixel_chunk_vec& chunks, const chunk_encoding_vec *pChunk_encodings, image_u8& img, bool serpentine_scan, int comp_index) + { + if (chunks.empty()) + { + img.set_all(color_quad_u8::make_black()); + return; + } + + img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); + + for (uint y = 0; y < num_chunks_y; y++) + { + for (uint x = 0; x < num_chunks_x; x++) + { + uint c = x + y * num_chunks_x; + if ((serpentine_scan) && (y & 1)) + c = (num_chunks_x - 1 - x) + y * num_chunks_x; + + if (comp_index >= 0) + { + for (uint cy = 0; cy < cChunkPixelHeight; cy++) + for (uint cx = 0; cx < cChunkPixelWidth; cx++) + img(x * cChunkPixelWidth + cx, y * cChunkPixelHeight + cy) = chunks[c](cx, cy)[comp_index]; + } + else + { + for (uint cy = 0; cy < cChunkPixelHeight; cy++) + for (uint cx = 0; cx < cChunkPixelWidth; cx++) + img(x * cChunkPixelWidth + cx, y * cChunkPixelHeight + cy) = chunks[c](cx, cy); + } + + if (pChunk_encodings) + { + const chunk_encoding& chunk = (*pChunk_encodings)[c]; + const chunk_encoding_desc &encoding_desc = g_chunk_encodings[chunk.m_encoding_index]; + CRNLIB_ASSERT(chunk.m_num_tiles == encoding_desc.m_num_tiles); + for (uint t = 0; t < chunk.m_num_tiles; t++) + { + const chunk_tile_desc &tile_desc = encoding_desc.m_tiles[t]; + + img.draw_box( + x*8 + tile_desc.m_x_ofs, y*8 + tile_desc.m_y_ofs, + tile_desc.m_width + 1, tile_desc.m_height + 1, color_quad_u8(128, 128, 128, 255)); + } + } + } + } + } + + bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) + { + CRNLIB_ASSERT(get_current_thread_id() == m_main_thread_id); + + if (!m_params.m_pProgress_func) + return true; + +#if CRNLIB_ENABLE_DEBUG_MESSAGES + if (m_params.m_debugging) + return true; +#endif + + const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100; + if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete)) + return !m_canceled; + + m_prev_percentage_complete = percentage_complete; + + bool status = (*m_params.m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_params.m_pProgress_func_data) != 0; + if (!status) + { + m_canceled = true; + return false; + } + + return true; + } + +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc.h b/crnlib/crn_dxt_hc.h new file mode 100644 index 00000000..0d0bb26e --- /dev/null +++ b/crnlib/crn_dxt_hc.h @@ -0,0 +1,440 @@ +// File: crn_dxt_hc.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt1.h" +#include "crn_dxt5a.h" +#include "crn_dxt_endpoint_refiner.h" +#include "crn_image.h" +#include "crn_dxt.h" +#include "crn_image.h" +#include "crn_dxt_hc_common.h" +#include "crn_tree_clusterizer.h" +#include "crn_task_pool.h" +#include "crn_spinlock.h" + +#define CRN_NO_FUNCTION_DEFINITIONS +#include "../inc/crnlib.h" + +namespace crnlib +{ + const uint cTotalCompressionPhases = 25; + + class dxt_hc + { + public: + dxt_hc(); + ~dxt_hc(); + + struct pixel_chunk + { + pixel_chunk() { clear(); } + + dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth]; + + const color_quad_u8& operator() (uint cx, uint cy) const + { + CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight)); + + return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels + [cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)]; + } + + color_quad_u8& operator() (uint cx, uint cy) + { + CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight)); + + return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels + [cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)]; + } + + inline void clear() + { + utils::zero_object(*this); + m_weight = 1.0f; + } + + float m_weight; + }; + + typedef crnlib::vector pixel_chunk_vec; + + struct params + { + params() : + m_color_endpoint_codebook_size(3072), + m_color_selector_codebook_size(3072), + m_alpha_endpoint_codebook_size(3072), + m_alpha_selector_codebook_size(3072), + m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f + m_adaptive_tile_alpha_psnr_derating(2.0f), + m_adaptive_tile_color_alpha_weighting_ratio(3.0f), + m_num_levels(0), + m_format(cDXT1), + m_hierarchical(true), + m_perceptual(true), + m_debugging(false), + m_pProgress_func(NULL), + m_pProgress_func_data(NULL) + { + m_alpha_component_indices[0] = 3; + m_alpha_component_indices[1] = 0; + + for (uint i = 0; i < cCRNMaxLevels; i++) + { + m_levels[i].m_first_chunk = 0; + m_levels[i].m_num_chunks = 0; + } + } + + // Valid range for codebook sizes: [32,8192] (non-power of two values are okay) + uint m_color_endpoint_codebook_size; + uint m_color_selector_codebook_size; + + uint m_alpha_endpoint_codebook_size; + uint m_alpha_selector_codebook_size; + + // Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files). + // Lower values cause the encoder to use large tiles less often (better quality/larger files). + // Valid range: [0.0,100.0]. + // A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss. + float m_adaptive_tile_color_psnr_derating; + + float m_adaptive_tile_alpha_psnr_derating; + + float m_adaptive_tile_color_alpha_weighting_ratio; + + uint m_alpha_component_indices[2]; + + struct miplevel_desc + { + uint m_first_chunk; + uint m_num_chunks; + }; + // The mip level data is optional! + miplevel_desc m_levels[cCRNMaxLevels]; + uint m_num_levels; + + dxt_format m_format; + + // If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files). + bool m_hierarchical; + + // If m_perceptual is true, perceptual color metrics will be used by the encoder. + bool m_perceptual; + + bool m_debugging; + + crn_progress_callback_func m_pProgress_func; + void* m_pProgress_func_data; + }; + + void clear(); + + // Main compression function + bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool); + + // Output accessors + inline uint get_num_chunks() const { return m_num_chunks; } + + struct chunk_encoding + { + chunk_encoding() { utils::zero_object(*this); }; + + // Index into g_chunk_encodings. + uint8 m_encoding_index; + + // Number of tiles, endpoint indices. + uint8 m_num_tiles; + + // Color, alpha0, alpha1 + enum { cColorIndex = 0, cAlpha0Index = 1, cAlpha1Index = 2 }; + uint16 m_endpoint_indices[3][cChunkMaxTiles]; + uint16 m_selector_indices[3][cChunkBlockHeight][cChunkBlockWidth]; // [block_y][block_x] + }; + + typedef crnlib::vector chunk_encoding_vec; + + inline const chunk_encoding& get_chunk_encoding(uint chunk_index) const { return m_chunk_encoding[chunk_index]; } + inline const chunk_encoding_vec& get_chunk_encoding_vec() const { return m_chunk_encoding; } + + struct selectors + { + selectors() { utils::zero_object(*this); } + + uint8 m_selectors[cBlockPixelHeight][cBlockPixelWidth]; + + uint8 get_by_index(uint i) const { CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); return *(&m_selectors[0][0] + i); } + void set_by_index(uint i, uint v) { CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); *(&m_selectors[0][0] + i) = static_cast(v); } + }; + typedef crnlib::vector selectors_vec; + + // Color endpoints + inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); } + inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; } + const crnlib::vector& get_color_endpoint_vec() const { return m_color_endpoints; } + + // Color selectors + uint get_color_selector_codebook_size() const { return m_color_selectors.size(); } + const selectors& get_color_selectors(uint codebook_index) const { return m_color_selectors[codebook_index]; } + const crnlib::vector& get_color_selectors_vec() const { return m_color_selectors; } + + // Alpha endpoints + inline uint get_alpha_endpoint_codebook_size() const { return m_alpha_endpoints.size(); } + inline uint get_alpha_endpoint(uint codebook_index) const { return m_alpha_endpoints[codebook_index]; } + const crnlib::vector& get_alpha_endpoint_vec() const { return m_alpha_endpoints; } + + // Alpha selectors + uint get_alpha_selector_codebook_size() const { return m_alpha_selectors.size(); } + const selectors& get_alpha_selectors(uint codebook_index) const { return m_alpha_selectors[codebook_index]; } + const crnlib::vector& get_alpha_selectors_vec() const { return m_alpha_selectors; } + + // Debug images + const pixel_chunk_vec& get_compressed_chunk_pixels() const { return m_dbg_chunk_pixels; } + const pixel_chunk_vec& get_compressed_chunk_pixels_tile_vis() const { return m_dbg_chunk_pixels_tile_vis; } + const pixel_chunk_vec& get_compressed_chunk_pixels_color_quantized() const { return m_dbg_chunk_pixels_color_quantized; } + const pixel_chunk_vec& get_compressed_chunk_pixels_alpha_quantized() const { return m_dbg_chunk_pixels_alpha_quantized; } + const pixel_chunk_vec& get_compressed_chunk_pixels_final() const { return m_dbg_chunk_pixels_final; } + + const pixel_chunk_vec& get_compressed_chunk_pixels_orig_color_selectors() const { return m_dbg_chunk_pixels_orig_color_selectors; } + const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_color_selectors() const { return m_dbg_chunk_pixels_quantized_color_selectors; } + const pixel_chunk_vec& get_compressed_chunk_pixels_final_color_selectors() const { return m_dbg_chunk_pixels_final_color_selectors; } + + const pixel_chunk_vec& get_compressed_chunk_pixels_orig_alpha_selectors() const { return m_dbg_chunk_pixels_orig_alpha_selectors; } + const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_alpha_selectors() const { return m_dbg_chunk_pixels_quantized_alpha_selectors; } + const pixel_chunk_vec& get_compressed_chunk_pixels_final_alpha_selectors() const { return m_dbg_chunk_pixels_final_alpha_selectors; } + + static void create_debug_image_from_chunks(uint num_chunks_x, uint num_chunks_y, const pixel_chunk_vec& chunks, const chunk_encoding_vec *pChunk_encodings, image_u8& img, bool serpentine_scan, int comp_index = -1); + + private: + params m_params; + + uint m_num_chunks; + const pixel_chunk* m_pChunks; + + chunk_encoding_vec m_chunk_encoding; + + uint m_num_alpha_blocks; // 0, 1, or 2 + bool m_has_color_blocks; + bool m_has_alpha0_blocks; + bool m_has_alpha1_blocks; + + struct compressed_tile + { + uint m_endpoint_cluster_index; + uint m_first_endpoint; + uint m_second_endpoint; + + uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; + + void set_selector(uint x, uint y, uint s) + { + CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height)); + m_selectors[x + y * m_pixel_width] = static_cast(s); + } + + uint get_selector(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height)); + return m_selectors[x + y * m_pixel_width]; + } + + uint8 m_pixel_width; + uint8 m_pixel_height; + + uint8 m_layout_index; + + bool m_alpha_encoding; + }; + + struct compressed_chunk + { + compressed_chunk() { utils::zero_object(*this); } + + uint8 m_encoding_index; + + uint8 m_num_tiles; + + compressed_tile m_tiles[cChunkMaxTiles]; + compressed_tile m_quantized_tiles[cChunkMaxTiles]; + + uint16 m_endpoint_cluster_index[cChunkMaxTiles]; + uint16 m_selector_cluster_index[cChunkBlockHeight][cChunkBlockWidth]; + }; + + typedef crnlib::vector compressed_chunk_vec; + enum + { + cColorChunks = 0, + cAlpha0Chunks = 1, + cAlpha1Chunks = 2, + + cNumCompressedChunkVecs = 3 + }; + compressed_chunk_vec m_compressed_chunks[cNumCompressedChunkVecs]; + + int32 m_encoding_hist[cNumChunkEncodings]; + + int32 m_total_tiles; + + void compress_dxt1_block( + dxt1_endpoint_optimizer::results& results, + uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, + uint8* pSelectors); + + void compress_dxt5_block( + dxt5_endpoint_optimizer::results& results, + uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index, + uint8* pAlpha_selectors); + + void determine_compressed_chunks_task(uint64 data, void* pData_ptr); + bool determine_compressed_chunks(); + + struct tile_cluster + { + tile_cluster() : m_first_endpoint(0), m_second_endpoint(0), m_error(0), m_alpha_encoding(false) { } + + // first = chunk, second = tile + // if an alpha tile, second's upper 16 bits contains the alpha index (0 or 1) + crnlib::vector< std::pair > m_tiles; + + uint m_first_endpoint; + uint m_second_endpoint; + uint64 m_error; + + bool m_alpha_encoding; + }; + + typedef crnlib::vector tile_cluster_vec; + + tile_cluster_vec m_color_clusters; + tile_cluster_vec m_alpha_clusters; + + selectors_vec m_color_selectors; + selectors_vec m_alpha_selectors; + + // For each selector, this array indicates every chunk/tile/tile block that use this color selector. + struct block_id + { + block_id() { utils::zero_object(*this); } + + block_id(uint chunk_index, uint alpha_index, uint tile_index, uint block_x, uint block_y) : + m_chunk_index(chunk_index), m_alpha_index((uint8)alpha_index), m_tile_index((uint8)tile_index), m_block_x((uint8)block_x), m_block_y((uint8)block_y) { } + + uint m_chunk_index; + uint8 m_alpha_index; + uint8 m_tile_index; + uint8 m_block_x; + uint8 m_block_y; + }; + + typedef crnlib::vector< crnlib::vector< block_id > > chunk_blocks_using_selectors_vec; + chunk_blocks_using_selectors_vec m_chunk_blocks_using_color_selectors; + chunk_blocks_using_selectors_vec m_chunk_blocks_using_alpha_selectors; // second's upper 16 bits contain alpha index! + + crnlib::vector m_color_endpoints; // not valid until end, only for user access + crnlib::vector m_alpha_endpoints; // not valid until end, only for user access + + // Debugging + pixel_chunk_vec m_dbg_chunk_pixels; + pixel_chunk_vec m_dbg_chunk_pixels_tile_vis; + pixel_chunk_vec m_dbg_chunk_pixels_color_quantized; + pixel_chunk_vec m_dbg_chunk_pixels_alpha_quantized; + + pixel_chunk_vec m_dbg_chunk_pixels_orig_color_selectors; + pixel_chunk_vec m_dbg_chunk_pixels_quantized_color_selectors; + pixel_chunk_vec m_dbg_chunk_pixels_final_color_selectors; + + pixel_chunk_vec m_dbg_chunk_pixels_orig_alpha_selectors; + pixel_chunk_vec m_dbg_chunk_pixels_quantized_alpha_selectors; + pixel_chunk_vec m_dbg_chunk_pixels_final_alpha_selectors; + + pixel_chunk_vec m_dbg_chunk_pixels_final; + + uint32 m_main_thread_id; + bool m_canceled; + task_pool* m_pTask_pool; + + int m_prev_phase_index; + int m_prev_percentage_complete; + + typedef vec<6, float> vec6F; + typedef vec<16, float> vec16F; + typedef tree_clusterizer vec2F_tree_vq; + typedef tree_clusterizer vec6F_tree_vq; + typedef tree_clusterizer vec16F_tree_vq; + + struct assign_color_endpoint_clusters_state + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(assign_color_endpoint_clusters_state); + + assign_color_endpoint_clusters_state(vec6F_tree_vq& vq, crnlib::vector< crnlib::vector >& training_vecs) : + m_vq(vq), m_training_vecs(training_vecs) { } + + vec6F_tree_vq& m_vq; + crnlib::vector< crnlib::vector >& m_training_vecs; + }; + + struct create_selector_codebook_state + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(create_selector_codebook_state); + + create_selector_codebook_state(dxt_hc& hc, bool alpha_blocks, uint comp_index_start, uint comp_index_end, vec16F_tree_vq& selector_vq, chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors, selectors_vec& selectors_cb) : + m_hc(hc), + m_alpha_blocks(alpha_blocks), + m_comp_index_start(comp_index_start), + m_comp_index_end(comp_index_end), + m_selector_vq(selector_vq), + m_chunk_blocks_using_selectors(chunk_blocks_using_selectors), + m_selectors_cb(selectors_cb) + { + } + + dxt_hc& m_hc; + bool m_alpha_blocks; + uint m_comp_index_start; + uint m_comp_index_end; + vec16F_tree_vq& m_selector_vq; + chunk_blocks_using_selectors_vec& m_chunk_blocks_using_selectors; + selectors_vec& m_selectors_cb; + + mutable spinlock m_chunk_blocks_using_selectors_lock; + }; + + void assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr); + bool determine_color_endpoint_clusters(); + + struct determine_alpha_endpoint_clusters_state + { + vec2F_tree_vq m_vq; + crnlib::vector< crnlib::vector > m_training_vecs[2]; + }; + + void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr); + bool determine_alpha_endpoint_clusters(); + + void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr); + bool determine_color_endpoint_codebook(); + + void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr); + bool determine_alpha_endpoint_codebook(); + + void create_quantized_debug_images(); + + void create_selector_codebook_task(uint64 data, void* pData_ptr); + bool create_selector_codebook(bool alpha_blocks); + + bool refine_quantized_color_endpoints(); + bool refine_quantized_color_selectors(); + bool refine_quantized_alpha_endpoints(); + bool refine_quantized_alpha_selectors(); + void create_final_debug_image(); + bool create_chunk_encodings(); + bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); + bool compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks); + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk); + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::chunk_encoding); + CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors); + +} // namespace crnlib diff --git a/crnlib/crn_dxt_hc_common.cpp b/crnlib/crn_dxt_hc_common.cpp new file mode 100644 index 00000000..980f82bb --- /dev/null +++ b/crnlib/crn_dxt_hc_common.cpp @@ -0,0 +1,47 @@ +// File: crn_dxt_hc_common.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt_hc_common.h" + +namespace crnlib +{ + chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = + { + { 1, { { 0, 0, 8, 8, 0 } } }, + + { 2, { { 0, 0, 8, 4, 1 }, { 0, 4, 8, 4, 2 } } }, + { 2, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 8, 4 } } }, + + { 3, { { 0, 0, 8, 4, 1 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 0, 4, 8, 4, 2 }, { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 } } }, + + { 3, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 4, 6 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 4, 0, 4, 8, 4 }, { 0, 0, 4, 4, 5 }, { 0, 4, 4, 4, 7 } } }, + + { 4, { { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } } + }; + + chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = + { + // 2x2 + { 0, 0, 8, 8, 0 }, + + // 2x1 + { 0, 0, 8, 4, 1 }, + { 0, 4, 8, 4, 2 }, + + // 1x2 + { 0, 0, 4, 8, 3 }, + { 4, 0, 4, 8, 4 }, + + // 1x1 + { 0, 0, 4, 4, 5 }, + { 4, 0, 4, 4, 6 }, + { 0, 4, 4, 4, 7 }, + { 4, 4, 4, 4, 8 } + }; + +} // namespace crnlib + + + diff --git a/crnlib/crn_dxt_hc_common.h b/crnlib/crn_dxt_hc_common.h new file mode 100644 index 00000000..808278fa --- /dev/null +++ b/crnlib/crn_dxt_hc_common.h @@ -0,0 +1,43 @@ +// File: crn_dxt_hc_common.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + struct chunk_tile_desc + { + // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. + uint m_x_ofs; + uint m_y_ofs; + uint m_width; + uint m_height; + uint m_layout_index; + }; + + struct chunk_encoding_desc + { + uint m_num_tiles; + chunk_tile_desc m_tiles[4]; + }; + + const uint cChunkPixelWidth = 8; + const uint cChunkPixelHeight = 8; + const uint cChunkBlockWidth = 2; + const uint cChunkBlockHeight = 2; + + const uint cChunkMaxTiles = 4; + + const uint cBlockPixelWidthShift = 2; + const uint cBlockPixelHeightShift = 2; + + const uint cBlockPixelWidth = 4; + const uint cBlockPixelHeight = 4; + + const uint cNumChunkEncodings = 8; + extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; + + const uint cNumChunkTileLayouts = 9; + const uint cFirst4x4ChunkTileLayout = 5; + extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; + +} // namespace crnlib diff --git a/crnlib/crn_dxt_image.cpp b/crnlib/crn_dxt_image.cpp new file mode 100644 index 00000000..d3f395d0 --- /dev/null +++ b/crnlib/crn_dxt_image.cpp @@ -0,0 +1,1264 @@ +// File: crn_dxt_image.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dxt_image.h" +#if CRNLIB_SUPPORT_SQUISH +#include "squish\squish.h" +#endif +#include "crn_ryg_dxt.hpp" +#include "crn_dxt_fast.h" +#include "crn_task_pool.h" +#include "crn_console.h" + +#if CRNLIB_SUPPORT_ATI_COMPRESS + #ifdef _DLL + #pragma comment(lib, "ATI_Compress_MT_DLL_VC8.lib") + #else + #pragma comment(lib, "ATI_Compress_MT_VC8.lib") + #endif + #include "..\ext\ATI_Compress\ATI_Compress.h" +#endif + +namespace crnlib +{ + dxt_image::dxt_image() : + m_pElements(NULL), + m_width(0), + m_height(0), + m_blocks_x(0), + m_blocks_y(0), + m_total_blocks(0), + m_total_elements(0), + m_num_elements_per_block(0), + m_bytes_per_block(0), + m_format(cDXTInvalid) + { + utils::zero_object(m_element_type); + utils::zero_object(m_element_component_index); + } + + dxt_image::dxt_image(const dxt_image& other) : + m_pElements(NULL) + { + *this = other; + } + + dxt_image& dxt_image::operator= (const dxt_image& rhs) + { + if (this == &rhs) + return *this; + + clear(); + + m_width = rhs.m_width; + m_height = rhs.m_height; + m_blocks_x = rhs.m_blocks_x; + m_blocks_y = rhs.m_blocks_y; + m_num_elements_per_block = rhs.m_num_elements_per_block; + m_bytes_per_block = rhs.m_bytes_per_block; + m_format = rhs.m_format; + m_total_blocks = rhs.m_total_blocks; + m_total_elements = rhs.m_total_elements; + m_pElements = NULL; + memcpy(m_element_type, rhs.m_element_type, sizeof(m_element_type)); + memcpy(m_element_component_index, rhs.m_element_component_index, sizeof(m_element_component_index)); + + if (rhs.m_pElements) + { + m_elements.resize(m_total_elements); + memcpy(&m_elements[0], rhs.m_pElements, sizeof(element) * m_total_elements); + m_pElements = &m_elements[0]; + } + + return *this; + } + + void dxt_image::clear() + { + m_elements.clear(); + m_width = 0; + m_height = 0; + m_blocks_x = 0; + m_blocks_y = 0; + m_num_elements_per_block = 0; + m_bytes_per_block = 0; + m_format = cDXTInvalid; + utils::zero_object(m_element_type); + utils::zero_object(m_element_component_index); + m_total_blocks = 0; + m_total_elements = 0; + m_pElements = NULL; + } + + bool dxt_image::init_internal(dxt_format fmt, uint width, uint height) + { + CRNLIB_ASSERT((fmt != cDXTInvalid) && (width > 0) && (height > 0)); + + clear(); + + m_width = width; + m_height = height; + + m_blocks_x = (m_width + 3) >> cDXTBlockShift; + m_blocks_y = (m_height + 3) >> cDXTBlockShift; + + m_num_elements_per_block = 2; + if ((fmt == cDXT1) || (fmt == cDXT1A) || (fmt == cDXT5A)) + m_num_elements_per_block = 1; + + m_total_blocks = m_blocks_x * m_blocks_y; + m_total_elements = m_total_blocks * m_num_elements_per_block; + + m_bytes_per_block = cDXT1BytesPerBlock * m_num_elements_per_block; + + m_format = fmt; + + switch (m_format) + { + case cDXT1: + case cDXT1A: + { + m_element_type[0] = cColor; + m_element_component_index[0] = -1; + break; + } + case cDXT3: + { + m_element_type[0] = cAlpha3; + m_element_type[1] = cColor; + m_element_component_index[0] = 3; + m_element_component_index[1] = -1; + break; + } + case cDXT5: + { + m_element_type[0] = cAlpha5; + m_element_type[1] = cColor; + m_element_component_index[0] = 3; + m_element_component_index[1] = -1; + break; + } + case cDXT5A: + { + m_element_type[0] = cAlpha5; + m_element_component_index[0] = 3; + break; + } + case cDXN_XY: + { + m_element_type[0] = cAlpha5; + m_element_type[1] = cAlpha5; + m_element_component_index[0] = 0; + m_element_component_index[1] = 1; + break; + } + case cDXN_YX: + { + m_element_type[0] = cAlpha5; + m_element_type[1] = cAlpha5; + m_element_component_index[0] = 1; + m_element_component_index[1] = 0; + break; + } + default: + { + CRNLIB_ASSERT(0); + clear(); + return false; + } + } + + return true; + } + + bool dxt_image::init(dxt_format fmt, uint width, uint height, bool clear_elements) + { + if (!init_internal(fmt, width, height)) + return false; + + m_elements.resize(m_total_elements); + m_pElements = &m_elements[0]; + + if (clear_elements) + memset(m_pElements, 0, sizeof(element) * m_total_elements); + + return true; + } + + bool dxt_image::init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy) + { + CRNLIB_ASSERT(num_elements && pElements); + + if (!init_internal(fmt, width, height)) + return false; + + if (num_elements != m_total_elements) + { + clear(); + return false; + } + + if (create_copy) + { + m_elements.resize(m_total_elements); + m_pElements = &m_elements[0]; + + memcpy(m_pElements, pElements, m_total_elements * sizeof(element)); + } + else + m_pElements = pElements; + + return true; + } + + struct init_task_params + { + dxt_format m_fmt; + const image_u8* m_pImg; + const dxt_image::pack_params* m_pParams; + uint32 m_main_thread; + int32 m_canceled; + }; + + void dxt_image::init_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); + init_task_params* pInit_params = static_cast(pData_ptr); + + const image_u8& img = *pInit_params->m_pImg; + const pack_params& p = *pInit_params->m_pParams; + const bool is_main_thread = (get_current_thread_id() == pInit_params->m_main_thread); + + uint block_index = 0; + + dxt1_endpoint_optimizer dxt1_optimizer; + dxt5_endpoint_optimizer dxt5_optimizer; + int prev_progress_percentage = -1; + + for (uint block_y = 0; block_y < m_blocks_y; block_y++) + { + const uint pixel_ofs_y = block_y * cDXTBlockSize; + + for (uint block_x = 0; block_x < m_blocks_x; block_x++, block_index++) + { + if (pInit_params->m_canceled) + return; + + if (p.m_pProgress_callback && is_main_thread && ((block_index & 63) == 63)) + { + const uint progress_percentage = p.m_progress_start + ((block_index * p.m_progress_range + get_total_blocks() / 2) / get_total_blocks()); + if ((int)progress_percentage != prev_progress_percentage) + { + prev_progress_percentage = progress_percentage; + if (!(p.m_pProgress_callback)(progress_percentage, p.m_pProgress_callback_user_data_ptr)) + { + interlocked_exchange32(&pInit_params->m_canceled, CRNLIB_TRUE); + return; + } + } + } + + if (p.m_num_helper_threads) + { + if ((block_index % (p.m_num_helper_threads + 1)) != thread_index) + continue; + } + + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + + const uint pixel_ofs_x = block_x * cDXTBlockSize; + + for (uint y = 0; y < cDXTBlockSize; y++) + { + const uint iy = math::minimum(pixel_ofs_y + y, img.get_height() - 1); + + for (uint x = 0; x < cDXTBlockSize; x++) + { + const uint ix = math::minimum(pixel_ofs_x + x, img.get_width() - 1); + + pixels[x + y * cDXTBlockSize] = img(ix, iy); + } + } + + set_block_pixels(block_x, block_y, pixels, p, dxt1_optimizer, dxt5_optimizer); + } + } + } + +#if CRNLIB_SUPPORT_ATI_COMPRESS + bool dxt_image::init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p) + { + image_u8 tmp_img(img); + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + color_quad_u8 c(img(x, y)); + std::swap(c.r, c.b); + tmp_img(x, y) = c; + } + } + + ATI_TC_Texture src_tex; + utils::zero_object(src_tex); + src_tex.dwSize = sizeof(ATI_TC_Texture); + src_tex.dwWidth = tmp_img.get_width(); + src_tex.dwHeight = tmp_img.get_height(); + src_tex.dwPitch = tmp_img.get_pitch_in_bytes(); + src_tex.format = ATI_TC_FORMAT_ARGB_8888; + src_tex.dwDataSize = src_tex.dwPitch * tmp_img.get_height(); + src_tex.pData = (ATI_TC_BYTE*)tmp_img.get_ptr(); + + ATI_TC_Texture dst_tex; + utils::zero_object(dst_tex); + dst_tex.dwSize = sizeof(ATI_TC_Texture); + dst_tex.dwWidth = tmp_img.get_width(); + dst_tex.dwHeight = tmp_img.get_height(); + dst_tex.dwDataSize = get_size_in_bytes(); + dst_tex.pData = (ATI_TC_BYTE*)get_element_ptr(); + + switch (fmt) + { + case cDXT1: + case cDXT1A: + dst_tex.format = ATI_TC_FORMAT_DXT1; + break; + case cDXT3: + dst_tex.format = ATI_TC_FORMAT_DXT3; + break; + case cDXT5: + dst_tex.format = ATI_TC_FORMAT_DXT5; + break; + case cDXT5A: + dst_tex.format = ATI_TC_FORMAT_ATI1N; + break; + case cDXN_XY: + dst_tex.format = ATI_TC_FORMAT_ATI2N_XY; + break; + case cDXN_YX: + dst_tex.format = ATI_TC_FORMAT_ATI2N; + break; + default: + { + CRNLIB_ASSERT(false); + return false; + } + } + + ATI_TC_CompressOptions options; + utils::zero_object(options); + options.dwSize = sizeof(ATI_TC_CompressOptions); + + if (fmt == cDXT1A) + { + options.bDXT1UseAlpha = TRUE; + options.nAlphaThreshold = (ATI_TC_BYTE)p.m_dxt1a_alpha_threshold; + } + options.bDisableMultiThreading = (p.m_num_helper_threads == 0); + switch (p.m_quality) + { + case cCRNDXTQualityFast: + options.nCompressionSpeed = ATI_TC_Speed_Fast; + break; + case cCRNDXTQualitySuperFast: + options.nCompressionSpeed = ATI_TC_Speed_SuperFast; + break; + default: + options.nCompressionSpeed = ATI_TC_Speed_Normal; + break; + } + + if (p.m_perceptual) + { + options.bUseChannelWeighting = TRUE; + options.fWeightingRed = .212671f; + options.fWeightingGreen = .715160f; + options.fWeightingBlue = .072169f; + } + + ATI_TC_ERROR err = ATI_TC_ConvertTexture(&src_tex, &dst_tex, &options, NULL, NULL, NULL); + return err == ATI_TC_OK; + } +#endif + + bool dxt_image::init(dxt_format fmt, const image_u8& img, const pack_params& p) + { + if (!init(fmt, img.get_width(), img.get_height(), false)) + return false; + +#if CRNLIB_SUPPORT_ATI_COMPRESS + if (p.m_compressor == cCRNDXTCompressorATI) + return init_ati_compress(fmt, img, p); +#endif + + task_pool *pPool = p.m_pTask_pool; + + task_pool tmp_pool; + if (!pPool) + { + if (!tmp_pool.init(p.m_num_helper_threads)) + return false; + pPool = &tmp_pool; + } + + init_task_params init_params; + init_params.m_fmt = fmt; + init_params.m_pImg = &img; + init_params.m_pParams = &p; + init_params.m_main_thread = get_current_thread_id(); + init_params.m_canceled = false; + + for (uint i = 0; i <= p.m_num_helper_threads; i++) + pPool->queue_object_task(this, &dxt_image::init_task, i, &init_params); + + pPool->join(); + + if (init_params.m_canceled) + return false; + + return true; + } + + bool dxt_image::unpack(image_u8& img) const + { + if (!m_total_elements) + return false; + + img.resize(m_width, m_height); + + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pixels[i].set(0, 0, 0, 255); + + for (uint block_y = 0; block_y < m_blocks_y; block_y++) + { + const uint pixel_ofs_y = block_y * cDXTBlockSize; + const uint limit_y = math::minimum(cDXTBlockSize, img.get_height() - pixel_ofs_y); + + for (uint block_x = 0; block_x < m_blocks_x; block_x++) + { + get_block_pixels(block_x, block_y, pixels); + + const uint pixel_ofs_x = block_x * cDXTBlockSize; + + const uint limit_x = math::minimum(cDXTBlockSize, img.get_width() - pixel_ofs_x); + + for (uint y = 0; y < limit_y; y++) + { + const uint iy = pixel_ofs_y + y; + + for (uint x = 0; x < limit_x; x++) + { + const uint ix = pixel_ofs_x + x; + + img(ix, iy) = pixels[x + (y << cDXTBlockShift)]; + } + } + } + } + + img.reset_comp_flags(); + img.set_component_valid(0, false); + img.set_component_valid(1, false); + img.set_component_valid(2, false); + for (uint i = 0; i < m_num_elements_per_block; i++) + { + if (m_element_component_index[i] < 0) + { + img.set_component_valid(0, true); + img.set_component_valid(1, true); + img.set_component_valid(2, true); + } + else + img.set_component_valid(m_element_component_index[i], true); + } + + img.set_component_valid(3, get_dxt_format_has_alpha(m_format)); + + return true; + } + + void dxt_image::endian_swap() + { + utils::endian_switch_words(reinterpret_cast(m_elements.get_ptr()), m_elements.size_in_bytes() / sizeof(uint16)); + } + + const dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) const + { + CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); + return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; + } + + dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) + { + CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); + return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; + } + + bool dxt_image::has_alpha() const + { + switch (m_format) + { + case cDXT1: + { + for (uint i = 0; i < m_total_elements; i++) + { + const dxt1_block& blk = *(dxt1_block*)&m_pElements[i]; + + if (blk.get_low_color() <= blk.get_high_color()) + { + for (uint y = 0; y < cDXTBlockSize; y++) + for (uint x = 0; x < cDXTBlockSize; x++) + if (blk.get_selector(x, y) == 3) + return true; + } + } + + break; + } + case cDXT1A: + case cDXT3: + case cDXT5: + case cDXT5A: + return true; + default: break; + } + + return false; + } + + color_quad_u8 dxt_image::get_pixel(uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + const element* pElement = reinterpret_cast(&get_element(block_x, block_y, 0)); + + color_quad_u8 result(0, 0, 0, 255); + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColor: + { + const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_color(); + const uint h = pBlock->get_high_color(); + + color_quad_u8 c0(dxt1_block::unpack_color(static_cast(l), true)); + color_quad_u8 c1(dxt1_block::unpack_color(static_cast(h), true)); + + const uint s = pBlock->get_selector(x & 3, y & 3); + + if (l > h) + { + switch (s) + { + case 0: result.set_noclamp_rgb(c0.r, c0.g, c0.b); break; + case 1: result.set_noclamp_rgb(c1.r, c1.g, c1.b); break; + case 2: result.set_noclamp_rgb( (c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3); break; + case 3: result.set_noclamp_rgb( (c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3); break; + } + } + else + { + switch (s) + { + case 0: result.set_noclamp_rgb(c0.r, c0.g, c0.b); break; + case 1: result.set_noclamp_rgb(c1.r, c1.g, c1.b); break; + case 2: result.set_noclamp_rgb( (c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U); break; + case 3: + { + if (m_format <= cDXT1A) + result.set_noclamp_rgba(0, 0, 0, 0); + else + result.set_noclamp_rgb(0, 0, 0); + break; + } + } + } + + break; + } + case cAlpha5: + { + const int comp_index = m_element_component_index[element_index]; + + const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_alpha(); + const uint h = pBlock->get_high_alpha(); + + const uint s = pBlock->get_selector(x & 3, y & 3); + + if (l > h) + { + switch (s) + { + case 0: result[comp_index] = static_cast(l); break; + case 1: result[comp_index] = static_cast(h); break; + case 2: result[comp_index] = static_cast((l * 6 + h ) / 7); break; + case 3: result[comp_index] = static_cast((l * 5 + h * 2) / 7); break; + case 4: result[comp_index] = static_cast((l * 4 + h * 3) / 7); break; + case 5: result[comp_index] = static_cast((l * 3 + h * 4) / 7); break; + case 6: result[comp_index] = static_cast((l * 2 + h * 5) / 7); break; + case 7: result[comp_index] = static_cast((l + h * 6) / 7); break; + } + } + else + { + switch (s) + { + case 0: result[comp_index] = static_cast(l); break; + case 1: result[comp_index] = static_cast(h); break; + case 2: result[comp_index] = static_cast((l * 4 + h ) / 5); break; + case 3: result[comp_index] = static_cast((l * 3 + h * 2) / 5); break; + case 4: result[comp_index] = static_cast((l * 2 + h * 3) / 5); break; + case 5: result[comp_index] = static_cast((l + h * 4) / 5); break; + case 6: result[comp_index] = 0; break; + case 7: result[comp_index] = 255; break; + } + } + + break; + } + case cAlpha3: + { + const int comp_index = m_element_component_index[element_index]; + + const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + result[comp_index] = static_cast(pBlock->get_alpha(x & 3, y & 3, true)); + + break; + } + default: break; + } + } + + return result; + } + + uint dxt_image::get_pixel_alpha(uint x, uint y, uint element_index) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height) && (element_index < m_num_elements_per_block)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + switch (m_element_type[element_index]) + { + case cColor: + { + if (m_format <= cDXT1A) + { + const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_color(); + const uint h = pBlock->get_high_color(); + + if (l <= h) + { + uint s = pBlock->get_selector(x & 3, y & 3); + + return (s == 3) ? 0 : 255; + } + else + { + return 255; + } + } + + break; + } + case cAlpha5: + { + const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + const uint l = pBlock->get_low_alpha(); + const uint h = pBlock->get_high_alpha(); + + const uint s = pBlock->get_selector(x & 3, y & 3); + + if (l > h) + { + switch (s) + { + case 0: return l; + case 1: return h; + case 2: return (l * 6 + h ) / 7; + case 3: return (l * 5 + h * 2) / 7; + case 4: return (l * 4 + h * 3) / 7; + case 5: return (l * 3 + h * 4) / 7; + case 6: return (l * 2 + h * 5) / 7; + case 7: return (l + h * 6) / 7; + } + } + else + { + switch (s) + { + case 0: return l; + case 1: return h; + case 2: return (l * 4 + h ) / 5; + case 3: return (l * 3 + h * 2) / 5; + case 4: return (l * 2 + h * 3) / 5; + case 5: return (l + h * 4) / 5; + case 6: return 0; + case 7: return 255; + } + } + } + case cAlpha3: + { + const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); + + return pBlock->get_alpha(x & 3, y & 3, true); + } + default: break; + } + + return 255; + } + + void dxt_image::set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + element* pElement = &get_element(block_x, block_y, 0); + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColor: + { + dxt1_block* pDXT1_block = reinterpret_cast(pElement); + + color_quad_u8 colors[cDXT1SelectorValues]; + const uint n = pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + if ((m_format == cDXT1A) && (c.a < 128)) + pDXT1_block->set_selector(x & 3, y & 3, 3); + else + { + uint best_error = UINT_MAX; + uint best_selector = 0; + + for (uint i = 0; i < n; i++) + { + uint error = color::color_distance(perceptual, colors[i], c, false); + if (error < best_error) + { + best_error = error; + best_selector = i; + } + } + + pDXT1_block->set_selector(x & 3, y & 3, best_selector); + } + + break; + } + case cAlpha5: + { + dxt5_block* pDXT5_block = reinterpret_cast(pElement); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + + const int comp_index = m_element_component_index[element_index]; + + uint best_error = UINT_MAX; + uint best_selector = 0; + + for (uint i = 0; i < cDXT5SelectorValues; i++) + { + uint error = labs(values[i] - c[comp_index]); // no need to square + + if (error < best_error) + { + best_error = error; + best_selector = i; + } + } + + pDXT5_block->set_selector(x & 3, y & 3, best_selector); + + break; + } + case cAlpha3: + { + const int comp_index = m_element_component_index[element_index]; + + dxt3_block* pDXT3_block = reinterpret_cast(pElement); + + pDXT3_block->set_alpha(x & 3, y & 3, c[comp_index], true); + + break; + } + default: break; + } + } // element_index + } + + void dxt_image::get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const + { + const element* pElement = &get_element(block_x, block_y, 0); + + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColor: + { + const dxt1_block* pDXT1_block = reinterpret_cast(pElement); + + color_quad_u8 colors[cDXT1SelectorValues]; + pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + uint s = pDXT1_block->get_selector(i & 3, i >> 2); + + pPixels[i].r = colors[s].r; + pPixels[i].g = colors[s].g; + pPixels[i].b = colors[s].b; + + if (m_format <= cDXT1A) + pPixels[i].a = colors[s].a; + } + + break; + } + case cAlpha5: + { + const dxt5_block* pDXT5_block = reinterpret_cast(pElement); + + uint values[cDXT5SelectorValues]; + dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); + + const int comp_index = m_element_component_index[element_index]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + uint s = pDXT5_block->get_selector(i & 3, i >> 2); + + pPixels[i][comp_index] = static_cast(values[s]); + } + + break; + } + case cAlpha3: + { + const dxt3_block* pDXT3_block = reinterpret_cast(pElement); + + const int comp_index = m_element_component_index[element_index]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); + + pPixels[i][comp_index] = static_cast(a); + } + + break; + } + default: break; + } + } // element_index + } + + void dxt_image::set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p) + { + dxt1_endpoint_optimizer dxt1_optimizer; + dxt5_endpoint_optimizer dxt5_optimizer; + set_block_pixels(block_x, block_y, pPixels, p, dxt1_optimizer, dxt5_optimizer); + } + + void dxt_image::set_block_pixels( + uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, + dxt1_endpoint_optimizer& dxt1_optimizer, dxt5_endpoint_optimizer& dxt5_optimizer) + { + element* pElement = &get_element(block_x, block_y, 0); + +#if CRNLIB_SUPPORT_SQUISH + if ((p.m_compressor == cCRNDXTCompressorSquish) && ((m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cDXT5A))) + { + uint squish_flags = 0; + if ((m_format == cDXT1) || (m_format == cDXT1A)) + squish_flags = squish::kDxt1; + else if (m_format == cDXT3) + squish_flags = squish::kDxt3; + else if (m_format == cDXT5A) + squish_flags = squish::kDxt5A; + else + squish_flags = squish::kDxt5; + + if (p.m_perceptual) + squish_flags |= squish::kColourMetricPerceptual; + else + squish_flags |= squish::kColourMetricUniform; + + if (p.m_quality >= cCRNDXTQualityBetter) + squish_flags |= squish::kColourIterativeClusterFit; + else if (p.m_quality == cCRNDXTQualitySuperFast) + squish_flags |= squish::kColourRangeFit; + + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + + memcpy(pixels, pPixels, sizeof(color_quad_u8) * cDXTBlockSize * cDXTBlockSize); + + if (m_format == cDXT1) + { + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pixels[i].a = 255; + } + else if (m_format == cDXT1A) + { + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + if (pixels[i].a < p.m_dxt1a_alpha_threshold) + pixels[i].a = 0; + else + pixels[i].a = 255; + } + + squish::Compress(reinterpret_cast(pixels), pElement, squish_flags); + } + + else +#endif // CRNLIB_SUPPORT_SQUISH + // RYG doesn't support DXT1A + if ((p.m_compressor == cCRNDXTCompressorRYG) && ((m_format == cDXT1) || (m_format == cDXT5) || (m_format == cDXT5A))) + { + color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + pixels[i].r = pPixels[i].b; + pixels[i].g = pPixels[i].g; + pixels[i].b = pPixels[i].r; + + if (m_format == cDXT1) + pixels[i].a = 255; + else + pixels[i].a = pPixels[i].a; + } + + if (m_format == cDXT5A) + ryg_dxt::sCompressDXT5ABlock((sU8*)pElement, (const sU32*)pixels, 0); + else + ryg_dxt::sCompressDXTBlock((sU8*)pElement, (const sU32*)pixels, m_format == cDXT5, 0); + } + else if ((p.m_compressor == cCRNDXTCompressorCRNF) && (m_format != cDXT1A)) + { + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColor: + { + dxt1_block* pDXT1_block = reinterpret_cast(pElement); + dxt_fast::compress_color_block(pDXT1_block, pPixels, p.m_quality >= cCRNDXTQualityNormal); + + break; + } + case cAlpha5: + { + dxt5_block* pDXT5_block = reinterpret_cast(pElement); + dxt_fast::compress_alpha_block(pDXT5_block, pPixels, m_element_component_index[element_index]); + + break; + } + case cAlpha3: + { + const int comp_index = m_element_component_index[element_index]; + + dxt3_block* pDXT3_block = reinterpret_cast(pElement); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); + + break; + } + default: break; + } + } + } + else + { + for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) + { + switch (m_element_type[element_index]) + { + case cColor: + { + dxt1_block* pDXT1_block = reinterpret_cast(pElement); + + bool pixels_have_alpha = false; + if (m_format == cDXT1A) + { + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + if (pPixels[i].a < p.m_dxt1a_alpha_threshold) + { + pixels_have_alpha = true; + break; + } + } + + dxt1_endpoint_optimizer::results results; + uint8 selectors[cDXTBlockSize * cDXTBlockSize]; + results.m_pSelectors = selectors; + + dxt1_endpoint_optimizer::params params; + params.m_block_index = block_x + block_y * m_blocks_x; + params.m_quality = p.m_quality; + params.m_perceptual = p.m_perceptual; + params.m_grayscale_sampling = p.m_grayscale_sampling; + params.m_pixels_have_alpha = pixels_have_alpha; + params.m_use_alpha_blocks = p.m_use_both_block_types; + params.m_use_transparent_indices_for_black = p.m_use_transparent_indices_for_black; + params.m_dxt1a_alpha_threshold = p.m_dxt1a_alpha_threshold; + params.m_pPixels = pPixels; + params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; + params.m_endpoint_caching = p.m_endpoint_caching; + params.m_color_weights[0] = p.m_color_weights[0]; + params.m_color_weights[1] = p.m_color_weights[1]; + params.m_color_weights[2] = p.m_color_weights[2]; + + if ((m_format != cDXT1) && (m_format != cDXT1A)) + params.m_use_alpha_blocks = false; + + if (!dxt1_optimizer.compute(params, results)) + { + CRNLIB_ASSERT(0); + break; + } + + pDXT1_block->set_low_color(results.m_low_color); + pDXT1_block->set_high_color(results.m_high_color); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pDXT1_block->set_selector(i & 3, i >> 2, selectors[i]); + + break; + } + case cAlpha5: + { + dxt5_block* pDXT5_block = reinterpret_cast(pElement); + + dxt5_endpoint_optimizer::results results; + + uint8 selectors[cDXTBlockSize * cDXTBlockSize]; + results.m_pSelectors = selectors; + + dxt5_endpoint_optimizer::params params; + params.m_block_index = block_x + block_y * m_blocks_x; + params.m_pPixels = pPixels; + params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; + params.m_comp_index = m_element_component_index[element_index]; + params.m_quality = p.m_quality; + params.m_use_both_block_types = p.m_use_both_block_types; + + if (!dxt5_optimizer.compute(params, results)) + { + CRNLIB_ASSERT(0); + break; + } + + pDXT5_block->set_low_alpha(results.m_first_endpoint); + pDXT5_block->set_high_alpha(results.m_second_endpoint); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pDXT5_block->set_selector(i & 3, i >> 2, selectors[i]); + + break; + } + case cAlpha3: + { + const int comp_index = m_element_component_index[element_index]; + + dxt3_block* pDXT3_block = reinterpret_cast(pElement); + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); + + break; + } + default: break; + } + } + } + } + + void dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const + { + const element& block = get_element(block_x, block_y, element_index); + + switch (m_element_type[element_index]) + { + case cColor: + { + const dxt1_block& block1 = *reinterpret_cast(&block); + + packed_low_endpoint = block1.get_low_color(); + packed_high_endpoint = block1.get_high_color(); + + break; + } + case cAlpha5: + { + const dxt5_block& block5 = *reinterpret_cast(&block); + + packed_low_endpoint = block5.get_low_alpha(); + packed_high_endpoint = block5.get_high_alpha(); + + break; + } + case cAlpha3: + { + packed_low_endpoint = 0; + packed_high_endpoint = 255; + + break; + } + default: break; + } + } + + int dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled) const + { + uint l, h; + get_block_endpoints(block_x, block_y, element_index, l, h); + + switch (m_element_type[element_index]) + { + case cColor: + { + uint r, g, b; + + dxt1_block::unpack_color(r, g, b, static_cast(l), scaled); + low_endpoint.r = static_cast(r); + low_endpoint.g = static_cast(g); + low_endpoint.b = static_cast(b); + + dxt1_block::unpack_color(r, g, b, static_cast(h), scaled); + high_endpoint.r = static_cast(r); + high_endpoint.g = static_cast(g); + high_endpoint.b = static_cast(b); + + return -1; + } + case cAlpha5: + { + const int component = m_element_component_index[element_index]; + + low_endpoint[component] = static_cast(l); + high_endpoint[component] = static_cast(h); + + return component; + } + case cAlpha3: + { + const int component = m_element_component_index[element_index]; + + low_endpoint[component] = static_cast(l); + high_endpoint[component] = static_cast(h); + + return component; + } + default: break; + } + + return 0; + } + + uint dxt_image::get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors) + { + const element& block = get_element(block_x, block_y, element_index); + + switch (m_element_type[element_index]) + { + case cColor: + { + const dxt1_block& block1 = *reinterpret_cast(&block); + return dxt1_block::get_block_colors(pColors, static_cast(block1.get_low_color()), static_cast(block1.get_high_color())); + } + case cAlpha5: + { + const dxt5_block& block5 = *reinterpret_cast(&block); + + uint values[cDXT5SelectorValues]; + + const uint n = dxt5_block::get_block_values(values, block5.get_low_alpha(), block5.get_high_alpha()); + + const int comp_index = m_element_component_index[element_index]; + for (uint i = 0; i < n; i++) + pColors[i][comp_index] = static_cast(values[i]); + + return n; + } + case cAlpha3: + { + const int comp_index = m_element_component_index[element_index]; + for (uint i = 0; i < 16; i++) + pColors[i][comp_index] = static_cast((i << 4) | i); + + return 16; + } + default: break; + } + + return 0; + } + + uint dxt_image::get_selector(uint x, uint y, uint element_index) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + + const uint block_x = x >> cDXTBlockShift; + const uint block_y = y >> cDXTBlockShift; + + const element& block = get_element(block_x, block_y, element_index); + + switch (m_element_type[element_index]) + { + case cColor: + { + const dxt1_block& block1 = *reinterpret_cast(&block); + return block1.get_selector(x & 3, y & 3); + } + case cAlpha5: + { + const dxt5_block& block5 = *reinterpret_cast(&block); + return block5.get_selector(x & 3, y & 3); + } + case cAlpha3: + { + const dxt3_block& block3 = *reinterpret_cast(&block); + return block3.get_alpha(x & 3, y & 3, false); + } + default: break; + } + + return 0; + } + + void dxt_image::change_dxt1_to_dxt1a() + { + if (m_format == cDXT1) + m_format = cDXT1A; + } + +} // namespace crnlib + + + + diff --git a/crnlib/crn_dxt_image.h b/crnlib/crn_dxt_image.h new file mode 100644 index 00000000..9c68dc9d --- /dev/null +++ b/crnlib/crn_dxt_image.h @@ -0,0 +1,218 @@ +// File: crn_dxt_image.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt1.h" +#include "crn_dxt5a.h" +#include "crn_image.h" + +#define CRNLIB_SUPPORT_ATI_COMPRESS 0 + +namespace crnlib +{ + class task_pool; + + class dxt_image + { + public: + dxt_image(); + dxt_image(const dxt_image& other); + dxt_image& operator= (const dxt_image& rhs); + + void clear(); + + inline bool is_valid() const { return m_blocks_x > 0; } + + uint get_width() const { return m_width; } + uint get_height() const { return m_height; } + + uint get_blocks_x() const { return m_blocks_x; } + uint get_blocks_y() const { return m_blocks_y; } + uint get_total_blocks() const { return m_blocks_x * m_blocks_y; } + + uint get_elements_per_block() const { return m_num_elements_per_block; } + uint get_bytes_per_block() const { return m_bytes_per_block; } + + dxt_format get_format() const { return m_format; } + + bool has_color() const { return (m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5); } + + // Will be pretty slow if the image is DXT1, as this method scans for alpha blocks/selectors. + bool has_alpha() const; + + enum element_type + { + cUnused = 0, + + cColor, + + cAlpha3, + cAlpha5, + }; + + element_type get_element_type(uint element_index) const { CRNLIB_ASSERT(element_index < m_num_elements_per_block); return m_element_type[element_index]; } + + //Returns -1 for RGB, or [0,3] + int8 get_element_component_index(uint element_index) const { CRNLIB_ASSERT(element_index < m_num_elements_per_block); return m_element_component_index[element_index]; } + + struct element + { + uint8 m_bytes[8]; + + uint get_le_word(uint index) const { CRNLIB_ASSERT(index < 4); return m_bytes[index*2] | (m_bytes[index * 2 + 1] << 8); } + uint get_be_word(uint index) const { CRNLIB_ASSERT(index < 4); return m_bytes[index*2 + 1] | (m_bytes[index * 2] << 8); } + + void set_le_word(uint index, uint val) { CRNLIB_ASSERT((index < 4) && (val <= UINT16_MAX)); m_bytes[index*2] = static_cast(val & 0xFF); m_bytes[index * 2 + 1] = static_cast((val >> 8) & 0xFF); } + void set_be_word(uint index, uint val) { CRNLIB_ASSERT((index < 4) && (val <= UINT16_MAX)); m_bytes[index*2+1] = static_cast(val & 0xFF); m_bytes[index * 2] = static_cast((val >> 8) & 0xFF); } + + void clear() + { + memset(this, 0, sizeof(*this)); + } + }; + + typedef crnlib::vector element_vec; + + bool init(dxt_format fmt, uint width, uint height, bool clear_elements); + bool init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy); + + struct pack_params + { + pack_params() + { + clear(); + } + + void clear() + { + m_quality = cCRNDXTQualityUber; + m_perceptual = true; + m_grayscale_sampling = false; + m_use_both_block_types = true; + m_endpoint_caching = true; + m_compressor = cCRNDXTCompressorCRN; + m_pProgress_callback = NULL; + m_pProgress_callback_user_data_ptr = NULL; + m_dxt1a_alpha_threshold = 128; + m_num_helper_threads = 0; + m_progress_start = 0; + m_progress_range = 100; + m_use_transparent_indices_for_black = false; + m_pTask_pool = NULL; + m_color_weights[0] = 1; + m_color_weights[1] = 1; + m_color_weights[2] = 1; + } + + void init(const crn_comp_params ¶ms) + { + m_perceptual = (params.m_flags & cCRNCompFlagPerceptual) != 0; + m_num_helper_threads = params.m_num_helper_threads; + m_use_both_block_types = (params.m_flags & cCRNCompFlagUseBothBlockTypes) != 0; + m_use_transparent_indices_for_black = (params.m_flags & cCRNCompFlagUseTransparentIndicesForBlack) != 0; + m_dxt1a_alpha_threshold = params.m_dxt1a_alpha_threshold; + m_quality = params.m_dxt_quality; + m_endpoint_caching = (params.m_flags & cCRNCompFlagDisableEndpointCaching) == 0; + m_grayscale_sampling = (params.m_flags & cCRNCompFlagGrayscaleSampling) != 0; + m_compressor = params.m_dxt_compressor_type; + } + + uint m_dxt1a_alpha_threshold; + + uint m_num_helper_threads; + + crn_dxt_quality m_quality; + + crn_dxt_compressor_type m_compressor; + + bool m_perceptual; + bool m_grayscale_sampling; + bool m_use_both_block_types; + bool m_endpoint_caching; + bool m_use_transparent_indices_for_black; + + typedef bool (*progress_callback_func)(uint percentage_complete, void* pUser_data_ptr); + progress_callback_func m_pProgress_callback; + void* m_pProgress_callback_user_data_ptr; + + uint m_progress_start; + uint m_progress_range; + + task_pool *m_pTask_pool; + + int m_color_weights[3]; + }; + + bool init(dxt_format fmt, const image_u8& img, const pack_params& p = dxt_image::pack_params()); + + bool unpack(image_u8& img) const; + + void endian_swap(); + + uint get_num_elements() const { return m_elements.size(); } + + const element_vec& get_element_vec() const { return m_elements; } + element_vec& get_element_vec() { return m_elements; } + + const element& get_element(uint block_x, uint block_y, uint element_index) const; + element& get_element(uint block_x, uint block_y, uint element_index); + + const element* get_element_ptr() const { return m_pElements; } + element* get_element_ptr() { return m_pElements; } + + uint get_size_in_bytes() const { return m_elements.size() * sizeof(element); } + uint get_row_pitch_in_bytes() const { return m_blocks_x * m_bytes_per_block; } + + color_quad_u8 get_pixel(uint x, uint y) const; + uint get_pixel_alpha(uint x, uint y, uint element_index) const; + + void set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual = true); + + // get_block_pixels() only sets those components stored in the image! + void get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const; + + void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, dxt1_endpoint_optimizer& dxt1_optimizer, dxt5_endpoint_optimizer& dxt5_optimizer); + void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p); + + void get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const; + + // Returns a value representing the component(s) that where actually set, where -1 = RGB. + // This method does not always set every component! + int get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled = true) const; + + // pColors should point to a 16 entry array, to handle DXT3. + // Returns the number of block colors: 3, 4, 6, 8, or 16. + uint get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors); + + uint get_selector(uint x, uint y, uint element_index) const; + + void change_dxt1_to_dxt1a(); + + private: + element_vec m_elements; + element* m_pElements; + + uint m_width; + uint m_height; + + uint m_blocks_x; + uint m_blocks_y; + uint m_total_blocks; + uint m_total_elements; + + uint m_num_elements_per_block; // 1 or 2 + uint m_bytes_per_block; // 8 or 16 + + int8 m_element_component_index[2]; + element_type m_element_type[2]; + + dxt_format m_format; // DXT1, 1A, 3, 5, N/3DC, or A + + bool init_internal(dxt_format fmt, uint width, uint height); + void init_task(uint64 data, void* pData_ptr); + +#if CRNLIB_SUPPORT_ATI_COMPRESS + bool init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p); +#endif + }; + +} // namespace crnlib diff --git a/crnlib/crn_dynamic_stream.h b/crnlib/crn_dynamic_stream.h new file mode 100644 index 00000000..b5a10763 --- /dev/null +++ b/crnlib/crn_dynamic_stream.h @@ -0,0 +1,206 @@ +// File: crn_dynamic_stream.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_data_stream.h" + +namespace crnlib +{ + class dynamic_stream : public data_stream + { + public: + dynamic_stream(uint initial_size, const wchar_t* pName = L"dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : + data_stream(pName, attribs), + m_ofs(0) + { + open(initial_size, pName, attribs); + } + + dynamic_stream(const void* pBuf, uint size, const wchar_t* pName = L"dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : + data_stream(pName, attribs), + m_ofs(0) + { + open(pBuf, size, pName, attribs); + } + + dynamic_stream() : + data_stream(), + m_ofs(0) + { + open(); + } + + virtual ~dynamic_stream() + { + } + + bool open(uint initial_size = 0, const wchar_t* pName = L"dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) + { + close(); + + m_opened = true; + m_buf.clear(); + m_buf.resize(initial_size); + m_ofs = 0; + m_name.set(pName ? pName : L"dynamic_stream"); + m_attribs = static_cast(attribs); + return true; + } + + bool reopen(const wchar_t* pName, uint attribs) + { + if (!m_opened) + { + return open(0, pName, attribs); + } + + m_name.set(pName ? pName : L"dynamic_stream"); + m_attribs = static_cast(attribs); + return true; + } + + bool open(const void* pBuf, uint size, const wchar_t* pName = L"dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) + { + if (!m_opened) + { + m_opened = true; + m_buf.resize(size); + if (size) + { + CRNLIB_ASSERT(pBuf); + memcpy(&m_buf[0], pBuf, size); + } + m_ofs = 0; + m_name.set(pName ? pName : L"dynamic_stream"); + m_attribs = static_cast(attribs); + return true; + } + + return false; + } + + virtual bool close() + { + if (m_opened) + { + m_opened = false; + m_buf.clear(); + m_ofs = 0; + return true; + } + + return false; + } + + const crnlib::vector& get_buf() const { return m_buf; } + crnlib::vector& get_buf() { return m_buf; } + + void reserve(uint size) + { + if (m_opened) + { + m_buf.reserve(size); + } + } + + virtual const void* get_ptr() const { return m_buf.empty() ? NULL : &m_buf[0]; } + + virtual uint read(void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_readable()) || (!len)) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_buf.size()); + + uint bytes_left = m_buf.size() - m_ofs; + + len = math::minimum(len, bytes_left); + + if (len) + memcpy(pBuf, &m_buf[m_ofs], len); + + m_ofs += len; + + return len; + } + + virtual uint write(const void* pBuf, uint len) + { + CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); + + if ((!m_opened) || (!is_writable()) || (!len)) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_buf.size()); + + uint new_ofs = m_ofs + len; + if (new_ofs > m_buf.size()) + m_buf.resize(new_ofs); + + memcpy(&m_buf[m_ofs], pBuf, len); + m_ofs = new_ofs; + + return len; + } + + virtual bool flush() + { + if (!m_opened) + return false; + + return true; + } + + virtual uint64 get_size() + { + if (!m_opened) + return 0; + + return m_buf.size(); + } + + virtual uint64 get_remaining() + { + if (!m_opened) + return 0; + + CRNLIB_ASSERT(m_ofs <= m_buf.size()); + + return m_buf.size() - m_ofs; + } + + virtual uint64 get_ofs() + { + if (!m_opened) + return 0; + + return m_ofs; + } + + virtual bool seek(int64 ofs, bool relative) + { + if ((!m_opened) || (!is_seekable())) + return false; + + int64 new_ofs = relative ? (m_ofs + ofs) : ofs; + + if (new_ofs < 0) + return false; + else if (new_ofs > m_buf.size()) + return false; + + m_ofs = static_cast(new_ofs); + + post_seek(); + + return true; + } + + private: + crnlib::vector m_buf; + uint m_ofs; + }; + +} // namespace crnlib + diff --git a/crnlib/crn_dynamic_string.cpp b/crnlib/crn_dynamic_string.cpp new file mode 100644 index 00000000..464ef986 --- /dev/null +++ b/crnlib/crn_dynamic_string.cpp @@ -0,0 +1,695 @@ +// File: crn_dynamic_string.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dynamic_string.h" +#include "crn_dynamic_wstring.h" +#include "crn_winhdr.h" +#include + +namespace crnlib +{ + dynamic_string g_empty_dynamic_string; + + dynamic_string::dynamic_string(eVarArg dummy, const char* p, ...) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + dummy; + + CRNLIB_ASSERT(p); + + va_list args; + va_start(args, p); + format_args(p, args); + va_end(args); + } + + dynamic_string::dynamic_string(const char* p) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + CRNLIB_ASSERT(p); + set(p); + } + + dynamic_string::dynamic_string(const char* p, uint len) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + CRNLIB_ASSERT(p); + set_from_buf(p, len); + } + + dynamic_string::dynamic_string(const dynamic_string& other) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + set(other); + } + + dynamic_string::dynamic_string(const wchar_t* pStr) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + set(pStr); + } + + dynamic_string& dynamic_string::set(const wchar_t *pStr) + { + uint len = static_cast(wcslen(pStr)); + if (!len) + { + clear(); + return *this; + } + + const uint num_needed = WideCharToMultiByte(CP_ACP, 0, pStr, len, NULL, 0, NULL, NULL); + if (num_needed <= 0) + { + clear(); + return *this; + } + + if (!ensure_buf(num_needed, false)) + { + clear(); + return *this; + } + + const uint num_written = WideCharToMultiByte(CP_ACP, 0, pStr, len, get_ptr_raw(), num_needed, NULL, NULL); + CRNLIB_ASSERT(num_written == num_needed); + + get_ptr_raw()[num_written] = 0; + m_len = static_cast(num_written); + + check(); + + return *this; + } + + dynamic_wstring& dynamic_string::as_utf16(dynamic_wstring &buf) + { + buf.set(get_ptr()); + return buf; + } + + void dynamic_string::clear() + { + check(); + + if (m_pStr) + { + crnlib_delete_array(m_pStr); + m_pStr = NULL; + + m_len = 0; + m_buf_size = 0; + } + } + + void dynamic_string::empty() + { + truncate(0); + } + + void dynamic_string::optimize() + { + if (!m_len) + clear(); + else + { + uint min_buf_size = math::next_pow2((uint)m_len + 1); + if (m_buf_size > min_buf_size) + { + char* p = crnlib_new_array(min_buf_size); + memcpy(p, m_pStr, m_len + 1); + + crnlib_delete_array(m_pStr); + m_pStr = p; + + m_buf_size = static_cast(min_buf_size); + + check(); + } + } + } + + int dynamic_string::compare(const char* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + + const int result = (case_sensitive ? strcmp : _stricmp)(get_ptr_priv(), p); + + if (result < 0) + return -1; + else if (result > 0) + return 1; + + return 0; + } + + int dynamic_string::compare(const dynamic_string& rhs, bool case_sensitive) const + { + return compare(rhs.get_ptr_priv(), case_sensitive); + } + + dynamic_string& dynamic_string::set(const char* p, uint max_len) + { + CRNLIB_ASSERT(p); + + const uint len = math::minimum(max_len, static_cast(strlen(p))); + CRNLIB_ASSERT(len < UINT16_MAX); + + if ((!len) || (len >= UINT16_MAX)) + clear(); + else if ((m_pStr) && (p >= m_pStr) && (p < (m_pStr + m_buf_size))) + { + if (m_pStr != p) + memmove(m_pStr, p, len); + m_pStr[len] = '\0'; + m_len = static_cast(len); + } + else if (ensure_buf(len, false)) + { + m_len = static_cast(len); + memcpy(m_pStr, p, m_len + 1); + } + + check(); + + return *this; + } + + dynamic_string& dynamic_string::set(const dynamic_string& other, uint max_len) + { + if (this == &other) + { + if (max_len < m_len) + { + m_pStr[max_len] = '\0'; + m_len = static_cast(max_len); + } + } + else + { + const uint len = math::minimum(max_len, other.m_len); + + if (!len) + clear(); + else if (ensure_buf(len, false)) + { + m_len = static_cast(len); + memcpy(m_pStr, other.get_ptr_priv(), m_len); + m_pStr[len] = '\0'; + } + } + + check(); + + return *this; + } + + bool dynamic_string::set_len(uint new_len, char fill_char) + { + if ((new_len >= UINT16_MAX) || (!fill_char)) + return false; + + uint cur_len = m_len; + + if (ensure_buf(new_len, true)) + { + if (new_len > cur_len) + memset(m_pStr + cur_len, fill_char, new_len - cur_len); + + m_pStr[new_len] = 0; + + m_len = static_cast(new_len); + + check(); + } + + return true; + } + + dynamic_string& dynamic_string::set_from_buf(const void* pBuf, uint buf_size) + { + CRNLIB_ASSERT(pBuf); + + if (buf_size >= UINT16_MAX) + { + clear(); + return *this; + } + + if ((buf_size) && (memchr(pBuf, 0, buf_size) != NULL)) + { + CRNLIB_ASSERT(0); + clear(); + return *this; + } + + if (ensure_buf(buf_size, false)) + { + if (buf_size) + memcpy(m_pStr, pBuf, buf_size); + + m_pStr[buf_size] = 0; + + m_len = static_cast(buf_size); + + check(); + } + + return *this; + } + + dynamic_string& dynamic_string::set_char(uint index, char c) + { + CRNLIB_ASSERT(index <= m_len); + + if (!c) + truncate(index); + else if (index < m_len) + { + m_pStr[index] = c; + + check(); + } + else if (index == m_len) + append_char(c); + + return *this; + } + + dynamic_string& dynamic_string::append_char(char c) + { + if (ensure_buf(m_len + 1)) + { + m_pStr[m_len] = c; + m_pStr[m_len + 1] = '\0'; + m_len++; + check(); + } + + return *this; + } + + dynamic_string& dynamic_string::truncate(uint new_len) + { + if (new_len < m_len) + { + m_pStr[new_len] = '\0'; + m_len = static_cast(new_len); + check(); + } + return *this; + } + + dynamic_string& dynamic_string::tolower() + { + if (m_len) + { +#ifdef _MSC_VER + _strlwr_s(get_ptr_priv(), m_buf_size); +#else + strlwr(get_ptr_priv()); +#endif + } + return *this; + } + + dynamic_string& dynamic_string::toupper() + { + if (m_len) + { +#ifdef _MSC_VER + _strupr_s(get_ptr_priv(), m_buf_size); +#else + strupr(get_ptr_priv()); +#endif + } + return *this; + } + + dynamic_string& dynamic_string::append(const char* p) + { + CRNLIB_ASSERT(p); + + uint len = static_cast(strlen(p)); + uint new_total_len = m_len + len; + if ((new_total_len) && ensure_buf(new_total_len)) + { + memcpy(m_pStr + m_len, p, len + 1); + m_len = static_cast(m_len + len); + check(); + } + + return *this; + } + + dynamic_string& dynamic_string::append(const dynamic_string& other) + { + uint len = other.m_len; + uint new_total_len = m_len + len; + if ((new_total_len) && ensure_buf(new_total_len)) + { + memcpy(m_pStr + m_len, other.get_ptr_priv(), len + 1); + m_len = static_cast(m_len + len); + check(); + } + + return *this; + } + + dynamic_string operator+ (const char* p, const dynamic_string& a) + { + return dynamic_string(p).append(a); + } + + dynamic_string operator+ (const dynamic_string& a, const char* p) + { + return dynamic_string(a).append(p); + } + + dynamic_string operator+ (const dynamic_string& a, const dynamic_string& b) + { + return dynamic_string(a).append(b); + } + + dynamic_string& dynamic_string::format_args(const char* p, va_list args) + { + CRNLIB_ASSERT(p); + + const uint cBufSize = 4096; + char buf[cBufSize]; + +#ifdef _MSC_VER + int l = vsnprintf_s(buf, cBufSize, _TRUNCATE, p, args); +#else + int l = vsnprintf(buf, cBufSize, p, args); +#endif + if (l <= 0) + clear(); + else if (ensure_buf(l, false)) + { + memcpy(m_pStr, buf, l + 1); + + m_len = static_cast(l); + + check(); + } + + return *this; + } + + dynamic_string& dynamic_string::format(const char* p, ...) + { + CRNLIB_ASSERT(p); + + va_list args; + va_start(args, p); + format_args(p, args); + va_end(args); + return *this; + } + + dynamic_string& dynamic_string::crop(uint start, uint len) + { + if (start >= m_len) + { + clear(); + return *this; + } + + len = math::minimum(len, m_len - start); + + if (start) + memmove(get_ptr_priv(), get_ptr_priv() + start, len); + + m_pStr[len] = '\0'; + + m_len = static_cast(len); + + check(); + + return *this; + } + + dynamic_string& dynamic_string::substring(uint start, uint end) + { + CRNLIB_ASSERT(start <= end); + if (start > end) + return *this; + return crop(start, end - start); + } + + dynamic_string& dynamic_string::left(uint len) + { + return substring(0, len); + } + + dynamic_string& dynamic_string::mid(uint start, uint len) + { + return crop(start, len); + } + + dynamic_string& dynamic_string::right(uint start) + { + return substring(start, get_len()); + } + + dynamic_string& dynamic_string::tail(uint num) + { + return substring(math::maximum(static_cast(get_len()) - static_cast(num), 0), get_len()); + } + + dynamic_string& dynamic_string::unquote() + { + if (m_len >= 2) + { + if ( ((*this)[0] == '\"') && ((*this)[m_len - 1] == '\"') ) + { + return mid(1, m_len - 2); + } + } + + return *this; + } + + int dynamic_string::find_left(const char* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + + const int p_len = (int)strlen(p); + + for (int i = 0; i <= (m_len - p_len); i++) + if ((case_sensitive ? strncmp : _strnicmp)(p, &m_pStr[i], p_len) == 0) + return i; + + return -1; + } + + bool dynamic_string::contains(const char* p, bool case_sensitive) const + { + return find_left(p, case_sensitive) >= 0; + } + + uint dynamic_string::count_char(char c) const + { + uint count = 0; + for (uint i = 0; i < m_len; i++) + if (m_pStr[i] == c) + count++; + return count; + } + + int dynamic_string::find_left(char c) const + { + for (uint i = 0; i < m_len; i++) + if (m_pStr[i] == c) + return i; + return -1; + } + + int dynamic_string::find_right(char c) const + { + for (int i = (int)m_len - 1; i >= 0; i--) + if (m_pStr[i] == c) + return i; + return -1; + } + + int dynamic_string::find_right(const char* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + const int p_len = (int)strlen(p); + + for (int i = m_len - p_len; i >= 0; i--) + if ((case_sensitive ? strncmp : _strnicmp)(p, &m_pStr[i], p_len) == 0) + return i; + + return -1; + } + + dynamic_string& dynamic_string::trim() + { + int s, e; + for (s = 0; s < (int)m_len; s++) + if (!isspace(m_pStr[s])) + break; + + for (e = m_len - 1; e > s; e--) + if (!isspace(m_pStr[e])) + break; + + return crop(s, e - s + 1); + } + + dynamic_string& dynamic_string::trim_crlf() + { + int s = 0, e; + + for (e = m_len - 1; e > s; e--) + if ((m_pStr[e] != 13) && (m_pStr[e] != 10)) + break; + + return crop(s, e - s + 1); + } + + dynamic_string& dynamic_string::remap(int from_char, int to_char) + { + for (uint i = 0; i < m_len; i++) + if (m_pStr[i] == from_char) + m_pStr[i] = (char)to_char; + return *this; + } + +#ifdef CRNLIB_BUILD_DEBUG + void dynamic_string::check() const + { + if (!m_pStr) + { + CRNLIB_ASSERT(!m_buf_size && !m_len); + } + else + { + CRNLIB_ASSERT(m_buf_size); + CRNLIB_ASSERT((m_buf_size == UINT16_MAX) || math::is_power_of_2((uint32)m_buf_size)); + CRNLIB_ASSERT(m_len < m_buf_size); + CRNLIB_ASSERT(strlen(m_pStr) == m_len); + } + } +#endif + + bool dynamic_string::ensure_buf(uint len, bool preserve_contents) + { + uint buf_size_needed = len + 1; + + CRNLIB_ASSERT(buf_size_needed <= UINT16_MAX); + + if (buf_size_needed <= UINT16_MAX) + { + if (buf_size_needed > m_buf_size) + expand_buf(buf_size_needed, preserve_contents); + } + + return m_buf_size >= buf_size_needed; + } + + bool dynamic_string::expand_buf(uint new_buf_size, bool preserve_contents) + { + new_buf_size = math::minimum(UINT16_MAX, math::next_pow2(math::maximum(m_buf_size, new_buf_size))); + + if (new_buf_size != m_buf_size) + { + char* p = crnlib_new_array(new_buf_size); + + if (preserve_contents) + memcpy(p, get_ptr_priv(), m_len + 1); + + crnlib_delete_array(m_pStr); + m_pStr = p; + + m_buf_size = static_cast(new_buf_size); + + if (preserve_contents) + check(); + } + + return m_buf_size >= new_buf_size; + } + + void dynamic_string::swap(dynamic_string& other) + { + utils::swap(other.m_buf_size, m_buf_size); + utils::swap(other.m_len, m_len); + utils::swap(other.m_pStr, m_pStr); + } + + int dynamic_string::serialize(void* pBuf, uint buf_size, bool little_endian) const + { + uint buf_left = buf_size; + + if (m_len > UINT16_MAX) + return -1; + + if (!utils::write_val((uint16)m_len, pBuf, buf_left, little_endian)) + return -1; + + if (buf_left < m_len) + return -1; + + memcpy(pBuf, get_ptr(), m_len); + + buf_left -= m_len; + + return buf_size - buf_left; + } + + int dynamic_string::deserialize(const void* pBuf, uint buf_size, bool little_endian) + { + uint buf_left = buf_size; + + if (buf_left < sizeof(uint16)) return -1; + + uint16 l; + if (!utils::read_obj(l, pBuf, buf_left, little_endian)) + return -1; + + if (buf_left < l) + return -1; + + set_from_buf(pBuf, l); + + buf_left -= l; + + return buf_size - buf_left; + } + + void dynamic_string::translate_lf_to_crlf() + { + if (find_left(0x0A) < 0) + return; + + dynamic_string tmp; + tmp.ensure_buf(m_len + 2); + + // normal sequence is 0x0D 0x0A (CR LF, \r\n) + + int prev_char = -1; + for (uint i = 0; i < get_len(); i++) + { + const int cur_char = (*this)[i]; + + if ((cur_char == 0x0A) && (prev_char != 0x0D)) + tmp.append_char(0x0D); + + tmp.append_char(cur_char); + + prev_char = cur_char; + } + + swap(tmp); + } + + dynamic_string& dynamic_string::operator= (const dynamic_wstring& rhs) + { + return set(rhs.get_ptr()); + } + +} // namespace crnlib diff --git a/crnlib/crn_dynamic_string.h b/crnlib/crn_dynamic_string.h new file mode 100644 index 00000000..f0d6f369 --- /dev/null +++ b/crnlib/crn_dynamic_string.h @@ -0,0 +1,163 @@ +// File: crn_dynamic_string.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + class dynamic_wstring; + + class dynamic_string + { + friend class dynamic_wstring; + + public: + inline dynamic_string() : m_buf_size(0), m_len(0), m_pStr(NULL) { } + dynamic_string(eVarArg dummy, const char* p, ...); + dynamic_string(const char* p); + dynamic_string(const char* p, uint len); + dynamic_string(const dynamic_string& other); + + inline ~dynamic_string() { if (m_pStr) crnlib_delete_array(m_pStr); } + + explicit dynamic_string(const wchar_t* pStr); + dynamic_string& set(const wchar_t *pStr); + dynamic_wstring& as_utf16(dynamic_wstring &buf); + + // Truncates the string to 0 chars and frees the buffer. + void clear(); + void optimize(); + + // Truncates the string to 0 chars, but does not free the buffer. + void empty(); + + inline uint get_len() const { return m_len; } + inline bool is_empty() const { return !m_len; } + + inline const char* get_ptr() const { return m_pStr ? m_pStr : ""; } + + inline const char* get_ptr_raw() const { return m_pStr; } + inline char* get_ptr_raw() { return m_pStr; } + + inline char operator[] (uint i) const { CRNLIB_ASSERT(i <= m_len); return get_ptr()[i]; } + + inline operator size_t() const { return fast_hash(get_ptr(), m_len) ^ fast_hash(&m_len, sizeof(m_len)); } + + int compare(const char* p, bool case_sensitive = false) const; + int compare(const dynamic_string& rhs, bool case_sensitive = false) const; + + inline bool operator== (const dynamic_string& rhs) const { return compare(rhs) == 0; } + inline bool operator== (const char* p) const { return compare(p) == 0; } + + inline bool operator!= (const dynamic_string& rhs) const { return compare(rhs) != 0; } + inline bool operator!= (const char* p) const { return compare(p) != 0; } + + inline bool operator< (const dynamic_string& rhs) const { return compare(rhs) < 0; } + inline bool operator< (const char* p) const { return compare(p) < 0; } + + inline bool operator> (const dynamic_string& rhs) const { return compare(rhs) > 0; } + inline bool operator> (const char* p) const { return compare(p) > 0; } + + inline bool operator<= (const dynamic_string& rhs) const { return compare(rhs) <= 0; } + inline bool operator<= (const char* p) const { return compare(p) <= 0; } + + inline bool operator>= (const dynamic_string& rhs) const { return compare(rhs) >= 0; } + inline bool operator>= (const char* p) const { return compare(p) >= 0; } + + friend inline bool operator== (const char* p, const dynamic_string& rhs) { return rhs.compare(p) == 0; } + + dynamic_string& set(const char* p, uint max_len = UINT_MAX); + dynamic_string& set(const dynamic_string& other, uint max_len = UINT_MAX); + + bool set_len(uint new_len, char fill_char = ' '); + + // Set from non-zero terminated buffer. + dynamic_string& set_from_buf(const void* pBuf, uint buf_size); + + dynamic_string& operator= (const dynamic_string& rhs) { return set(rhs); } + dynamic_string& operator= (const dynamic_wstring& rhs); + dynamic_string& operator= (const char* p) { return set(p); } + + dynamic_string& set_char(uint index, char c); + dynamic_string& append_char(char c); + dynamic_string& append_char(int c) { CRNLIB_ASSERT((c >= 0) && (c <= 255)); return append_char(static_cast(c)); } + dynamic_string& truncate(uint new_len); + dynamic_string& tolower(); + dynamic_string& toupper(); + + dynamic_string& append(const char* p); + dynamic_string& append(const dynamic_string& other); + dynamic_string& operator += (const char* p) { return append(p); } + dynamic_string& operator += (const dynamic_string& other) { return append(other); } + + friend dynamic_string operator+ (const char* p, const dynamic_string& a); + friend dynamic_string operator+ (const dynamic_string& a, const char* p); + friend dynamic_string operator+ (const dynamic_string& a, const dynamic_string& b); + + dynamic_string& format_args(const char* p, va_list args); + dynamic_string& format(const char* p, ...); + + dynamic_string& crop(uint start, uint len); + dynamic_string& substring(uint start, uint end); + dynamic_string& left(uint len); + dynamic_string& mid(uint start, uint len); + dynamic_string& right(uint start); + dynamic_string& tail(uint num); + + dynamic_string& unquote(); + + uint count_char(char c) const; + + int find_left(const char* p, bool case_sensitive = false) const; + int find_left(char c) const; + + int find_right(char c) const; + int find_right(const char* p, bool case_sensitive = false) const; + + bool contains(const char* p, bool case_sensitive = false) const; + + dynamic_string& trim(); + dynamic_string& trim_crlf(); + + dynamic_string& remap(int from_char, int to_char); + + void swap(dynamic_string& other); + + // Returns -1 on failure, or the number of bytes written. + int serialize(void* pBuf, uint buf_size, bool little_endian) const; + + // Returns -1 on failure, or the number of bytes read. + int deserialize(const void* pBuf, uint buf_size, bool little_endian); + + void translate_lf_to_crlf(); + + private: + uint16 m_buf_size; + uint16 m_len; + char* m_pStr; + +#ifdef CRNLIB_BUILD_DEBUG + void check() const; +#else + inline void check() const { } +#endif + + bool expand_buf(uint new_buf_size, bool preserve_contents); + + const char* get_ptr_priv() const { return m_pStr ? m_pStr : ""; } + char* get_ptr_priv() { return (char*)(m_pStr ? m_pStr : ""); } + + bool ensure_buf(uint len, bool preserve_contents = true); + }; + + typedef crnlib::vector dynamic_string_array; + + extern dynamic_string g_empty_dynamic_string; + + CRNLIB_DEFINE_BITWISE_MOVABLE(dynamic_string); + + inline void swap (dynamic_string& a, dynamic_string& b) + { + a.swap(b); + } + +} // namespace crnlib diff --git a/crnlib/crn_dynamic_wstring.cpp b/crnlib/crn_dynamic_wstring.cpp new file mode 100644 index 00000000..3696a6a7 --- /dev/null +++ b/crnlib/crn_dynamic_wstring.cpp @@ -0,0 +1,715 @@ +// File: crn_dynamic_wstring.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_dynamic_wstring.h" +#include "crn_winhdr.h" + +namespace crnlib +{ + dynamic_wstring g_empty_dynamic_wstring; + + dynamic_wstring::dynamic_wstring(eVarArg dummy, const wchar_t* p, ...) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + dummy; + + CRNLIB_ASSERT(p); + + va_list args; + va_start(args, p); + format_args(p, args); + va_end(args); + } + + dynamic_wstring::dynamic_wstring(const wchar_t* p) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + CRNLIB_ASSERT(p); + set(p); + } + + dynamic_wstring::dynamic_wstring(const wchar_t* p, uint len) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + CRNLIB_ASSERT(p); + set_from_buf(p, len); + } + + dynamic_wstring::dynamic_wstring(const dynamic_wstring& other) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + set(other); + } + + void dynamic_wstring::clear() + { + check(); + + if (m_pStr) + { + crnlib_delete_array(m_pStr); + m_pStr = NULL; + + m_len = 0; + m_buf_size = 0; + } + } + + void dynamic_wstring::empty() + { + truncate(0); + } + + void dynamic_wstring::optimize() + { + if (!m_len) + clear(); + else + { + uint min_buf_size = math::next_pow2((uint)m_len + 1); + if (m_buf_size > min_buf_size) + { + wchar_t* p = crnlib_new_array(min_buf_size); + memcpy(p, m_pStr, (m_len + 1) * sizeof(wchar_t)); + + crnlib_delete_array(m_pStr); + m_pStr = p; + + m_buf_size = static_cast(min_buf_size); + + check(); + } + } + } + + int dynamic_wstring::compare(const wchar_t* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + + const int result = (case_sensitive ? wcscmp : _wcsicmp)(get_ptr_priv(), p); + + if (result < 0) + return -1; + else if (result > 0) + return 1; + + return 0; + } + + int dynamic_wstring::compare(const dynamic_wstring& rhs, bool case_sensitive) const + { + return compare(rhs.get_ptr_priv(), case_sensitive); + } + + dynamic_wstring& dynamic_wstring::set(const wchar_t* p, uint max_len) + { + CRNLIB_ASSERT(p); + + const uint len = math::minimum(max_len, static_cast(wcslen(p))); + CRNLIB_ASSERT(len < UINT16_MAX); + + if ((!len) || (len >= UINT16_MAX)) + clear(); + else if ((m_pStr) && (p >= m_pStr) && (p < (m_pStr + m_buf_size))) + { + if (m_pStr != p) + memmove(m_pStr, p, len * sizeof(wchar_t)); + m_pStr[len] = L'\0'; + m_len = static_cast(len); + } + else if (ensure_buf(len, false)) + { + m_len = static_cast(len); + memcpy(m_pStr, p, (m_len + 1) * sizeof(wchar_t)); + } + + check(); + + return *this; + } + + dynamic_wstring& dynamic_wstring::set(const dynamic_wstring& other, uint max_len) + { + if (this == &other) + { + if (max_len < m_len) + { + m_pStr[max_len] = L'\0'; + m_len = static_cast(max_len); + } + } + else + { + const uint len = math::minimum(max_len, other.m_len); + + if (!len) + clear(); + else if (ensure_buf(len, false)) + { + m_len = static_cast(len); + memcpy(m_pStr, other.get_ptr_priv(), m_len * sizeof(wchar_t)); + m_pStr[len] = L'\0'; + } + } + + check(); + + return *this; + } + + bool dynamic_wstring::set_len(uint new_len, wchar_t fill_char) + { + if ((new_len >= UINT16_MAX) || (!fill_char)) + return false; + + uint cur_len = m_len; + + if (ensure_buf(new_len, true)) + { + if (new_len > cur_len) + { + for (uint i = 0; i < (new_len - cur_len); i++) + m_pStr[cur_len + i] = fill_char; + } + + m_pStr[new_len] = L'\0'; + + m_len = static_cast(new_len); + + check(); + } + + return true; + } + + dynamic_wstring& dynamic_wstring::set_from_buf(const void* pBuf, uint buf_size, bool little_endian) + { + CRNLIB_ASSERT(pBuf); + + if (buf_size >= UINT16_MAX) + { + clear(); + return *this; + } + + for (uint i = 0; i < buf_size; i++) + { + if (static_cast(pBuf)[i] == L'\0') + { + CRNLIB_ASSERT(0); + clear(); + return *this; + } + } + + if (ensure_buf(buf_size, false)) + { + utils::copy_words(reinterpret_cast(m_pStr), reinterpret_cast(pBuf), buf_size, c_crnlib_little_endian_platform != little_endian); + + m_pStr[buf_size] = L'\0'; + + m_len = static_cast(buf_size); + + check(); + } + + return *this; + } + + dynamic_wstring& dynamic_wstring::set_char(uint index, wchar_t c) + { + CRNLIB_ASSERT(index <= m_len); + + if (!c) + truncate(index); + else if (index < m_len) + { + m_pStr[index] = c; + + check(); + } + else if (index == m_len) + append_char(c); + + return *this; + } + + dynamic_wstring& dynamic_wstring::append_char(wchar_t c) + { + if (ensure_buf(m_len + 1)) + { + m_pStr[m_len] = c; + m_pStr[m_len + 1] = L'\0'; + m_len++; + check(); + } + + return *this; + } + + dynamic_wstring& dynamic_wstring::truncate(uint new_len) + { + if (new_len < m_len) + { + m_pStr[new_len] = L'\0'; + m_len = static_cast(new_len); + check(); + } + return *this; + } + + dynamic_wstring& dynamic_wstring::tolower() + { + if (m_len) + { +#ifdef _MSC_VER + _wcslwr_s(get_ptr_priv(), m_buf_size); +#else + _wcslwr(get_ptr_priv()); +#endif + } + return *this; + } + + dynamic_wstring& dynamic_wstring::toupper() + { + if (m_len) + { +#ifdef _MSC_VER + _wcsupr_s(get_ptr_priv(), m_buf_size); +#else + _wcsupr(get_ptr_priv()); +#endif + } + return *this; + } + + dynamic_wstring& dynamic_wstring::append(const wchar_t* p) + { + CRNLIB_ASSERT(p); + + uint len = static_cast(wcslen(p)); + uint new_total_len = m_len + len; + if ((new_total_len) && ensure_buf(new_total_len)) + { + memcpy(m_pStr + m_len, p, (len + 1) * sizeof(wchar_t)); + m_len = static_cast(m_len + len); + check(); + } + + return *this; + } + + dynamic_wstring& dynamic_wstring::append(const dynamic_wstring& other) + { + uint len = other.m_len; + uint new_total_len = m_len + len; + if ((new_total_len) && ensure_buf(new_total_len)) + { + memcpy(m_pStr + m_len, other.get_ptr_priv(), (len + 1) * sizeof(wchar_t)); + m_len = static_cast(m_len + len); + check(); + } + + return *this; + } + + dynamic_wstring operator+ (const wchar_t* p, const dynamic_wstring& a) + { + return dynamic_wstring(p).append(a); + } + + dynamic_wstring operator+ (const dynamic_wstring& a, const wchar_t* p) + { + return dynamic_wstring(a).append(p); + } + + dynamic_wstring operator+ (const dynamic_wstring& a, const dynamic_wstring& b) + { + return dynamic_wstring(a).append(b); + } + + dynamic_wstring& dynamic_wstring::format_args(const wchar_t* p, va_list args) + { + CRNLIB_ASSERT(p); + + const uint cBufSize = 4096; + wchar_t buf[cBufSize]; + +#ifdef _MSC_VER + int l = _vsnwprintf_s(buf, cBufSize, _TRUNCATE, p, args); +#else + int l = _vsnwprintf(buf, cBufSize, p, args); +#endif + if (l <= 0) + clear(); + else if (ensure_buf(l, false)) + { + memcpy(m_pStr, buf, (l + 1) * sizeof(wchar_t)); + + m_len = static_cast(l); + + check(); + } + + return *this; + } + + dynamic_wstring& dynamic_wstring::format(const wchar_t* p, ...) + { + CRNLIB_ASSERT(p); + + va_list args; + va_start(args, p); + format_args(p, args); + va_end(args); + return *this; + } + + dynamic_wstring& dynamic_wstring::crop(uint start, uint len) + { + if (start >= m_len) + { + clear(); + return *this; + } + + len = math::minimum(len, m_len - start); + + if (start) + memmove(get_ptr_priv(), get_ptr_priv() + start, len * sizeof(wchar_t)); + + m_pStr[len] = L'\0'; + + m_len = static_cast(len); + + check(); + + return *this; + } + + dynamic_wstring& dynamic_wstring::substring(uint start, uint end) + { + CRNLIB_ASSERT(start <= end); + if (start > end) + return *this; + return crop(start, end - start); + } + + dynamic_wstring& dynamic_wstring::left(uint len) + { + return substring(0, len); + } + + dynamic_wstring& dynamic_wstring::mid(uint start, uint len) + { + return crop(start, len); + } + + dynamic_wstring& dynamic_wstring::right(uint start) + { + return substring(start, get_len()); + } + + dynamic_wstring& dynamic_wstring::tail(uint num) + { + return substring(math::maximum(static_cast(get_len()) - static_cast(num), 0), get_len()); + } + + dynamic_wstring& dynamic_wstring::unquote() + { + if (m_len >= 2) + { + if ( ((*this)[0] == L'\"') && ((*this)[m_len - 1] == L'\"') ) + { + return mid(1, m_len - 2); + } + } + + return *this; + } + + int dynamic_wstring::find_left(const wchar_t* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + + const int p_len = (int)wcslen(p); + + for (int i = 0; i <= (m_len - p_len); i++) + if ((case_sensitive ? wcsncmp : _wcsnicmp)(p, &m_pStr[i], p_len) == 0) + return i; + + return -1; + } + + bool dynamic_wstring::contains(const wchar_t* p, bool case_sensitive) const + { + return find_left(p, case_sensitive) >= 0; + } + + uint dynamic_wstring::count_char(wchar_t c) const + { + uint count = 0; + for (uint i = 0; i < m_len; i++) + if (m_pStr[i] == c) + count++; + return count; + } + + int dynamic_wstring::find_left(wchar_t c) const + { + for (uint i = 0; i < m_len; i++) + if (m_pStr[i] == c) + return i; + return -1; + } + + int dynamic_wstring::find_right(wchar_t c) const + { + for (int i = (int)m_len - 1; i >= 0; i--) + if (m_pStr[i] == c) + return i; + return -1; + } + + int dynamic_wstring::find_right(const wchar_t* p, bool case_sensitive) const + { + CRNLIB_ASSERT(p); + const int p_len = (int)wcslen(p); + + for (int i = m_len - p_len; i >= 0; i--) + if ((case_sensitive ? wcsncmp : _wcsnicmp)(p, &m_pStr[i], p_len) == 0) + return i; + + return -1; + } + + dynamic_wstring& dynamic_wstring::trim() + { + int s, e; + for (s = 0; s < (int)m_len; s++) + if (!iswspace(m_pStr[s])) + break; + + for (e = m_len - 1; e > s; e--) + if (!iswspace(m_pStr[e])) + break; + + return crop(s, e - s + 1); + } + + dynamic_wstring& dynamic_wstring::trim_crlf() + { + int s = 0, e; + + for (e = m_len - 1; e > s; e--) + if ((m_pStr[e] != 13) && (m_pStr[e] != 10)) + break; + + return crop(s, e - s + 1); + } + + dynamic_wstring& dynamic_wstring::remap(int from_char, int to_char) + { + for (uint i = 0; i < m_len; i++) + if (m_pStr[i] == from_char) + m_pStr[i] = (wchar_t)to_char; + return *this; + } + +#ifdef CRNLIB_BUILD_DEBUG + void dynamic_wstring::check() const + { + if (!m_pStr) + { + CRNLIB_ASSERT(!m_buf_size && !m_len); + } + else + { + CRNLIB_ASSERT(m_buf_size); + CRNLIB_ASSERT((m_buf_size == UINT16_MAX) || math::is_power_of_2((uint32)m_buf_size)); + CRNLIB_ASSERT(m_len < m_buf_size); + CRNLIB_ASSERT(wcslen(m_pStr) == m_len); + } + } +#endif + + bool dynamic_wstring::ensure_buf(uint len, bool preserve_contents) + { + uint buf_size_needed = len + 1; + + CRNLIB_ASSERT(buf_size_needed <= UINT16_MAX); + + if (buf_size_needed <= UINT16_MAX) + { + if (buf_size_needed > m_buf_size) + expand_buf(buf_size_needed, preserve_contents); + } + + return m_buf_size >= buf_size_needed; + } + + bool dynamic_wstring::expand_buf(uint new_buf_size, bool preserve_contents) + { + new_buf_size = math::minimum(UINT16_MAX, math::next_pow2(math::maximum(m_buf_size, new_buf_size))); + + if (new_buf_size != m_buf_size) + { + wchar_t* p = crnlib_new_array(new_buf_size); + + if (preserve_contents) + memcpy(p, get_ptr_priv(), (m_len + 1) * sizeof(wchar_t)); + + crnlib_delete_array(m_pStr); + m_pStr = p; + + m_buf_size = static_cast(new_buf_size); + + if (preserve_contents) + check(); + } + + return m_buf_size >= new_buf_size; + } + + void dynamic_wstring::swap(dynamic_wstring& other) + { + utils::swap(other.m_buf_size, m_buf_size); + utils::swap(other.m_len, m_len); + utils::swap(other.m_pStr, m_pStr); + } + + int dynamic_wstring::serialize(void* pBuf, uint buf_size, bool little_endian) const + { + CRNLIB_ASSERT(pBuf); + + uint buf_left = buf_size; + + if (m_len > UINT16_MAX) + return -1; + + if (!utils::write_val((uint16)m_len, pBuf, buf_left, little_endian)) + return -1; + + if (buf_left < (m_len * sizeof(wchar_t))) + return -1; + + utils::copy_words(reinterpret_cast(pBuf), reinterpret_cast(get_ptr_priv()), m_len, little_endian != c_crnlib_little_endian_platform); + + buf_left -= m_len * sizeof(wchar_t); + + return buf_size - buf_left; + } + + int dynamic_wstring::deserialize(const void* pBuf, uint buf_size, bool little_endian) + { + CRNLIB_ASSERT(pBuf); + + uint buf_left = buf_size; + + if (buf_left < sizeof(uint16)) return -1; + + uint16 l; + if (!utils::read_obj(l, pBuf, buf_left, little_endian)) + return -1; + + if (buf_left < (l * sizeof(wchar_t))) + return -1; + + set_from_buf(pBuf, l, little_endian); + + buf_left -= l * sizeof(wchar_t); + + return buf_size - buf_left; + } + + dynamic_wstring::dynamic_wstring(const char* p) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + set(p); + } + + dynamic_wstring::dynamic_wstring(const dynamic_string& s) : + m_buf_size(0), m_len(0), m_pStr(NULL) + { + set(s.get_ptr()); + } + + dynamic_wstring& dynamic_wstring::set(const char* p) + { + CRNLIB_ASSERT(p); + if (!p) + { + clear(); + return *this; + } + + uint l = static_cast(strlen(p)); + if (!l) + { + clear(); + return *this; + } + + const uint num_needed = static_cast(MultiByteToWideChar(CP_ACP, 0, p, l, NULL, 0)); + if (!num_needed) + { + clear(); + return *this; + } + + if (!ensure_buf(num_needed, false)) + { + clear(); + return *this; + } + + const uint num_written = static_cast(MultiByteToWideChar(CP_ACP, 0, p, l, m_pStr, num_needed)); + CRNLIB_ASSERT(num_needed == num_written); + + m_pStr[num_written] = L'\0'; + m_len = static_cast(num_written); + + check(); + + return *this; + } + + dynamic_string& dynamic_wstring::as_ansi(dynamic_string& buf) + { + if (!m_len) + { + buf.clear(); + return buf; + } + + const uint num_needed = WideCharToMultiByte(CP_ACP, 0, m_pStr, m_len, NULL, 0, NULL, NULL); + if (num_needed <= 0) + { + buf.clear(); + return buf; + } + + if (!buf.ensure_buf(num_needed, false)) + { + buf.clear(); + return buf; + } + + const uint num_written = WideCharToMultiByte(CP_ACP, 0, m_pStr, m_len, buf.get_ptr_raw(), num_needed, NULL, NULL); + CRNLIB_ASSERT(num_written == num_needed); + + buf.get_ptr_raw()[num_written] = 0; + buf.m_len = static_cast(num_written); + + buf.check(); + + return buf; + } + + dynamic_wstring& dynamic_wstring::operator= (const dynamic_string& rhs) + { + return set(rhs.get_ptr()); + } + +} // namespace crnlib diff --git a/crnlib/crn_dynamic_wstring.h b/crnlib/crn_dynamic_wstring.h new file mode 100644 index 00000000..ca7ce317 --- /dev/null +++ b/crnlib/crn_dynamic_wstring.h @@ -0,0 +1,159 @@ +// File: crn_dynamic_wstring.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + // UCS-2 string class (plane 0 characters only) + class dynamic_wstring + { + public: + inline dynamic_wstring() : m_buf_size(0), m_len(0), m_pStr(NULL) { } + dynamic_wstring(eVarArg dummy, const wchar_t* p, ...); + dynamic_wstring(const wchar_t* p); + dynamic_wstring(const wchar_t* p, uint len); + dynamic_wstring(const dynamic_wstring& other); + + // Conversion from UCS-2 to ANSI and vice versa + explicit dynamic_wstring(const char* p); + explicit dynamic_wstring(const dynamic_string& s); + dynamic_wstring& set(const char* p); + dynamic_string& as_ansi(dynamic_string& buf); + + inline ~dynamic_wstring() { CRNLIB_ASSUME(sizeof(wchar_t) == sizeof(uint16)); if (m_pStr) crnlib_delete_array(m_pStr); } + + // Truncates the string to 0 chars and frees the buffer. + void clear(); + void optimize(); + + // Truncates the string to 0 chars, but does not free the buffer. + void empty(); + + inline uint get_len() const { return m_len; } + inline bool is_empty() const { return !m_len; } + + inline const wchar_t* get_ptr() const { return m_pStr ? m_pStr : L""; } + + inline const wchar_t* get_ptr_raw() const { return m_pStr; } + inline wchar_t* get_ptr_raw() { return m_pStr; } + + inline wchar_t operator[] (uint i) const { CRNLIB_ASSERT(i <= m_len); return get_ptr()[i]; } + + inline operator size_t() const { return fast_hash(get_ptr(), m_len * sizeof(wchar_t)) ^ fast_hash(&m_len, sizeof(m_len)); } + + int compare(const wchar_t* p, bool case_sensitive = false) const; + int compare(const dynamic_wstring& rhs, bool case_sensitive = false) const; + + inline bool operator== (const dynamic_wstring& rhs) const { return compare(rhs) == 0; } + inline bool operator== (const wchar_t* p) const { return compare(p) == 0; } + + inline bool operator!= (const dynamic_wstring& rhs) const { return compare(rhs) != 0; } + inline bool operator!= (const wchar_t* p) const { return compare(p) != 0; } + + inline bool operator< (const dynamic_wstring& rhs) const { return compare(rhs) < 0; } + inline bool operator< (const wchar_t* p) const { return compare(p) < 0; } + + inline bool operator> (const dynamic_wstring& rhs) const { return compare(rhs) > 0; } + inline bool operator> (const wchar_t* p) const { return compare(p) > 0; } + + inline bool operator<= (const dynamic_wstring& rhs) const { return compare(rhs) <= 0; } + inline bool operator<= (const wchar_t* p) const { return compare(p) <= 0; } + + inline bool operator>= (const dynamic_wstring& rhs) const { return compare(rhs) >= 0; } + inline bool operator>= (const wchar_t* p) const { return compare(p) >= 0; } + + friend inline bool operator== (const wchar_t* p, const dynamic_wstring& rhs) { return rhs.compare(p) == 0; } + + dynamic_wstring& set(const wchar_t* p, uint max_len = UINT_MAX); + dynamic_wstring& set(const dynamic_wstring& other, uint max_len = UINT_MAX); + + bool set_len(uint new_len, wchar_t fill_char = ' '); + + // Set from non-zero terminated buffer. + // little_endian is the endianness of the buffer's data + dynamic_wstring& set_from_buf(const void* pBuf, uint buf_size, bool little_endian = c_crnlib_little_endian_platform); + + dynamic_wstring& operator= (const dynamic_wstring& rhs) { return set(rhs); } + dynamic_wstring& operator= (const dynamic_string& rhs); + dynamic_wstring& operator= (const wchar_t* p) { return set(p); } + dynamic_wstring& operator= (const char* p) { return set(p); } + + dynamic_wstring& set_char(uint index, wchar_t c); + dynamic_wstring& append_char(wchar_t c); + dynamic_wstring& append_char(int c) { CRNLIB_ASSERT((c >= 0) && (c <= 0xFFFF)); return append_char(static_cast(c)); } + dynamic_wstring& truncate(uint new_len); + dynamic_wstring& tolower(); + dynamic_wstring& toupper(); + + dynamic_wstring& append(const wchar_t* p); + dynamic_wstring& append(const dynamic_wstring& other); + dynamic_wstring& operator += (const wchar_t* p) { return append(p); } + dynamic_wstring& operator += (const dynamic_wstring& other) { return append(other); } + + friend dynamic_wstring operator+ (const wchar_t* p, const dynamic_wstring& a); + friend dynamic_wstring operator+ (const dynamic_wstring& a, const wchar_t* p); + friend dynamic_wstring operator+ (const dynamic_wstring& a, const dynamic_wstring& b); + + dynamic_wstring& format_args(const wchar_t* p, va_list args); + dynamic_wstring& format(const wchar_t* p, ...); + + dynamic_wstring& crop(uint start, uint len); + dynamic_wstring& substring(uint start, uint end); + dynamic_wstring& left(uint len); + dynamic_wstring& mid(uint start, uint len); + dynamic_wstring& right(uint start); + dynamic_wstring& tail(uint num); + + dynamic_wstring& unquote(); + + uint count_char(wchar_t c) const; + + int find_left(const wchar_t* p, bool case_sensitive = false) const; + int find_left(wchar_t c) const; + + int find_right(wchar_t c) const; + int find_right(const wchar_t* p, bool case_sensitive = false) const; + + bool contains(const wchar_t* p, bool case_sensitive = false) const; + + dynamic_wstring& trim(); + dynamic_wstring& trim_crlf(); + + dynamic_wstring& remap(int from_char, int to_char); + + void swap(dynamic_wstring& other); + + int serialize(void* pBuf, uint buf_size, bool little_endian) const; + int deserialize(const void* pBuf, uint buf_size, bool little_endian); + + private: + // These values are in characters, not bytes! + uint16 m_buf_size; + uint16 m_len; + wchar_t* m_pStr; + +#ifdef CRNLIB_BUILD_DEBUG + void check() const; +#else + void check() const { } +#endif + + bool ensure_buf(uint len, bool preserve_contents = true); + bool expand_buf(uint new_buf_size, bool preserve_contents); + + const wchar_t* get_ptr_priv() const { return m_pStr ? m_pStr : L""; } + wchar_t* get_ptr_priv() { return (wchar_t*)(m_pStr ? m_pStr : L""); } + }; + + typedef crnlib::vector dynamic_wstring_array; + + extern dynamic_wstring g_empty_dynamic_wstring; + + CRNLIB_DEFINE_BITWISE_MOVABLE(dynamic_wstring); + + inline void swap (dynamic_wstring& a, dynamic_wstring& b) + { + a.swap(b); + } + +} // namespace crnlib diff --git a/crnlib/crn_event.h b/crnlib/crn_event.h new file mode 100644 index 00000000..28ec6742 --- /dev/null +++ b/crnlib/crn_event.h @@ -0,0 +1,27 @@ +// File: crn_event.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + class event + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(event); + + public: + event(bool manual_reset = false, bool initial_state = false, const char* pName = NULL); + ~event(); + + inline void *get_handle(void) const { return m_handle; } + + void set(void); + void reset(void); + void pulse(void); + bool wait(uint32 milliseconds = UINT32_MAX); + + private: + void *m_handle; + }; + +} // namespace crnlib + diff --git a/crnlib/crn_hash.cpp b/crnlib/crn_hash.cpp new file mode 100644 index 00000000..d26dc8d9 --- /dev/null +++ b/crnlib/crn_hash.cpp @@ -0,0 +1,68 @@ +// File: crn_hash.cpp +// See Paul Hsieh's page at: http://www.azillionmonkeys.com/qed/hash.html +// Also see http://www.concentric.net/~Ttwang/tech/inthash.htm, +// http://burtleburtle.net/bob/hash/integer.html +#include "crn_core.h" + +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16 *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32)(((const uint8 *)(d))[1])) << 8)\ + +(uint32)(((const uint8 *)(d))[0]) ) +#endif + +namespace crnlib +{ + uint32 fast_hash (const void* p, int len) + { + const char * data = static_cast(p); + + uint32 hash = len, tmp; + int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (;len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (uint16); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= data[sizeof (uint16)] << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += *data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; + } + +} // namespace crnlib diff --git a/crnlib/crn_hash.h b/crnlib/crn_hash.h new file mode 100644 index 00000000..db09c51e --- /dev/null +++ b/crnlib/crn_hash.h @@ -0,0 +1,34 @@ +// File: crn_hash.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + uint32 fast_hash (const void* p, int len); + + // 4-byte integer hash, full avalanche + inline uint32 bitmix32c(uint32 a) + { + a = (a+0x7ed55d16) + (a<<12); + a = (a^0xc761c23c) ^ (a>>19); + a = (a+0x165667b1) + (a<<5); + a = (a+0xd3a2646c) ^ (a<<9); + a = (a+0xfd7046c5) + (a<<3); + a = (a^0xb55a4f09) ^ (a>>16); + return a; + } + + // 4-byte integer hash, full avalanche, no constants + inline uint32 bitmix32(uint32 a) + { + a -= (a<<6); + a ^= (a>>17); + a -= (a<<9); + a ^= (a<<4); + a -= (a<<3); + a ^= (a<<10); + a ^= (a>>15); + return a; + } + +} // namespace crnlib diff --git a/crnlib/crn_hash_map.cpp b/crnlib/crn_hash_map.cpp new file mode 100644 index 00000000..47b2e8ca --- /dev/null +++ b/crnlib/crn_hash_map.cpp @@ -0,0 +1,155 @@ +// File: crn_hash_map.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_hash_map.h" +#include "crn_rand.h" + +namespace crnlib +{ +#if 0 + class counted_obj + { + public: + counted_obj(uint v = 0) : + m_val(v) + { + m_count++; + } + + counted_obj(const counted_obj& obj) : + m_val(obj.m_val) + { + m_count++; + } + + ~counted_obj() + { + CRNLIB_ASSERT(m_count > 0); + m_count--; + } + + static uint m_count; + + uint m_val; + + operator size_t() const { return m_val; } + + bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; } + bool operator== (const uint rhs) const { return m_val == rhs; } + + }; + + uint counted_obj::m_count; + + void hash_map_test() + { + random r0, r1; + + uint seed = 0; + for ( ; ; ) + { + seed++; + + typedef crnlib::hash_map my_hash_map; + my_hash_map m; + + const uint n = r0.irand(1, 100000); + + printf("%u\n", n); + + r1.seed(seed); + + crnlib::vector q; + + uint count = 0; + for (uint i = 0; i < n; i++) + { + uint v = r1.urand32() & 0x7FFFFFFF; + my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef)); + if (res.second) + { + count++; + q.push_back(v); + } + } + + CRNLIB_VERIFY(m.size() == count); + + r1.seed(seed); + + my_hash_map cm(m); + m.clear(); + m = cm; + cm.reset(); + + for (uint i = 0; i < n; i++) + { + uint v = r1.urand32() & 0x7FFFFFFF; + my_hash_map::const_iterator it = m.find(counted_obj(v)); + CRNLIB_VERIFY(it != m.end()); + CRNLIB_VERIFY(it->first == v); + CRNLIB_VERIFY(it->second == (v ^ 0xdeadbeef)); + } + + for (uint t = 0; t < 2; t++) + { + const uint nd = r0.irand(1, q.size() + 1); + for (uint i = 0; i < nd; i++) + { + uint p = r0.irand(0, q.size()); + + int k = q[p]; + if (k >= 0) + { + q[p] = -k - 1; + + bool s = m.erase(counted_obj(k)); + CRNLIB_VERIFY(s); + } + } + + typedef crnlib::hash_map uint_hash_set; + uint_hash_set s; + + for (uint i = 0; i < q.size(); i++) + { + int v = q[i]; + + if (v >= 0) + { + my_hash_map::const_iterator it = m.find(counted_obj(v)); + CRNLIB_VERIFY(it != m.end()); + CRNLIB_VERIFY(it->first == (uint)v); + CRNLIB_VERIFY(it->second == ((uint)v ^ 0xdeadbeef)); + + s.insert(v); + } + else + { + my_hash_map::const_iterator it = m.find(counted_obj(-v - 1)); + CRNLIB_VERIFY(it == m.end()); + } + } + + uint found_count = 0; + for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it) + { + CRNLIB_VERIFY(it->second == ((uint)it->first ^ 0xdeadbeef)); + + uint_hash_set::const_iterator fit(s.find((uint)it->first)); + CRNLIB_VERIFY(fit != s.end()); + + CRNLIB_VERIFY(fit->first == it->first); + + found_count++; + } + + CRNLIB_VERIFY(found_count == s.size()); + } + + CRNLIB_VERIFY(counted_obj::m_count == m.size() * 2); + } + } +#endif + +} // namespace crnlib diff --git a/crnlib/crn_hash_map.h b/crnlib/crn_hash_map.h new file mode 100644 index 00000000..b736d0e1 --- /dev/null +++ b/crnlib/crn_hash_map.h @@ -0,0 +1,871 @@ +// File: crn_hash_map.h +// See Copyright Notice and license at the end of inc/crnlib.h +// +// Notes: +// Hash function ref: http://www.brpreiss.com/books/opus4/html/page215.html +// Compared for speed against VC9's std::hash_map. +// Linear probing, auto resizes on ~50% load factor. +// Uses Knuth's multiplicative method (Fibonacci hashing). +#pragma once +#include "crn_sparse_array.h" +#include "crn_sparse_bit_array.h" +#include "crn_hash.h" + +namespace crnlib +{ + template + struct hasher + { + inline size_t operator() (const T& key) const { return static_cast(key); } + }; + + template + struct bit_hasher + { + inline size_t operator() (const T& key) const { return static_cast(fast_hash(&key, sizeof(key))); } + }; + + template + struct equal_to + { + inline bool operator()(const T& a, const T& b) const { return a == b; } + }; + + // Important: The Hasher and Equals objects must be bitwise movable! + template, typename Equals = equal_to > + class hash_map + { + friend class iterator; + friend class const_iterator; + + enum state + { + cStateInvalid = 0, + cStateValid = 1 + }; + + enum + { + cMinHashSize = 4U + }; + + public: + typedef hash_map hash_map_type; + typedef std::pair value_type; + typedef Key key_type; + typedef Value referent_type; + typedef Hasher hasher_type; + typedef Equals equals_type; + + hash_map() : + m_hash_shift(32), m_num_valid(0), m_grow_threshold(0) + { + } + + hash_map(const hash_map& other) : + m_values(other.m_values), + m_hash_shift(other.m_hash_shift), + m_hasher(other.m_hasher), + m_equals(other.m_equals), + m_num_valid(other.m_num_valid), + m_grow_threshold(other.m_grow_threshold) + { + } + + hash_map& operator= (const hash_map& other) + { + if (this == &other) + return *this; + + clear(); + + m_values = other.m_values; + m_hash_shift = other.m_hash_shift; + m_num_valid = other.m_num_valid; + m_grow_threshold = other.m_grow_threshold; + m_hasher = other.m_hasher; + m_equals = other.m_equals; + + return *this; + } + + inline ~hash_map() + { + clear(); + } + + const Equals& get_equals() const { return m_equals; } + Equals& get_equals() { return m_equals; } + + void set_equals(const Equals& equals) { m_equals = equals; } + + const Hasher& get_hasher() const { return m_hasher; } + Hasher& get_hasher() { return m_hasher; } + + void set_hasher(const Hasher& hasher) { m_hasher = hasher; } + + inline void clear() + { + if (!m_values.empty()) + { + if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_values.clear_no_destruction(); + + m_hash_shift = 32; + m_num_valid = 0; + m_grow_threshold = 0; + } + } + + inline void reset() + { + if (!m_num_valid) + return; + + if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + destruct_value_type(p); + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + else if (sizeof(node) <= 32) + { + memset(&m_values[0], 0, m_values.size_in_bytes()); + } + else + { + node* p = &get_node(0); + node* p_end = p + m_values.size(); + + uint num_remaining = m_num_valid; + while (p != p_end) + { + if (p->state) + { + p->state = cStateInvalid; + + num_remaining--; + if (!num_remaining) + break; + } + + p++; + } + } + + m_num_valid = 0; + } + + inline uint size() + { + return m_num_valid; + } + + inline uint get_table_size() + { + return m_values.size(); + } + + inline bool empty() + { + return !m_num_valid; + } + + inline void reserve(uint new_capacity) + { + uint new_hash_size = math::maximum(1U, new_capacity); + + new_hash_size = new_hash_size * 2U; + + if (!math::is_power_of_2(new_hash_size)) + new_hash_size = math::next_pow2(new_hash_size); + + new_hash_size = math::maximum(cMinHashSize, new_hash_size); + + if (new_hash_size > m_values.size()) + rehash(new_hash_size); + } + + class const_iterator; + + class iterator + { + friend class hash_map; + friend class hash_map::const_iterator; + + public: + inline iterator() : m_pTable(NULL), m_index(0) { } + inline iterator(hash_map_type& table, uint index) : m_pTable(&table), m_index(index) { } + inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline iterator operator++(int) + { + iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline iterator& operator++() + { + probe(); + return *this; + } + + inline value_type& operator*() const { return *get_cur(); } + inline value_type* operator->() const { return get_cur(); } + + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + + private: + hash_map_type* m_pTable; + uint m_index; + + inline value_type* get_cur() const + { + CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); + CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + CRNLIB_ASSERT(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + class const_iterator + { + friend class hash_map; + friend class hash_map::iterator; + + public: + inline const_iterator() : m_pTable(NULL), m_index(0) { } + inline const_iterator(const hash_map_type& table, uint index) : m_pTable(&table), m_index(index) { } + inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) { } + + inline const_iterator& operator= (const const_iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + inline const_iterator& operator= (const iterator& other) + { + m_pTable = other.m_pTable; + m_index = other.m_index; + return *this; + } + + // post-increment + inline const_iterator operator++(int) + { + const_iterator result(*this); + ++*this; + return result; + } + + // pre-increment + inline const_iterator& operator++() + { + probe(); + return *this; + } + + inline const value_type& operator*() const { return *get_cur(); } + inline const value_type* operator->() const { return get_cur(); } + + inline bool operator == (const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const const_iterator& b) const { return !(*this == b); } + inline bool operator == (const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } + inline bool operator != (const iterator& b) const { return !(*this == b); } + + private: + const hash_map_type* m_pTable; + uint m_index; + + inline const value_type* get_cur() const + { + CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); + CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); + + return &m_pTable->get_node(m_index); + } + + inline void probe() + { + CRNLIB_ASSERT(m_pTable); + m_index = m_pTable->find_next(m_index); + } + }; + + inline const_iterator begin() const + { + if (!m_num_valid) + return end(); + + return const_iterator(*this, find_next(-1)); + } + + inline const_iterator end() const + { + return const_iterator(*this, m_values.size()); + } + + inline iterator begin() + { + if (!m_num_valid) + return end(); + + return iterator(*this, find_next(-1)); + } + + inline iterator end() + { + return iterator(*this, m_values.size()); + } + + typedef std::pair insert_result; + + inline insert_result insert(const Key& k, const Value& v = Value()) + { + insert_result result; + if (!insert_no_grow(result, k, v)) + { + grow(); + + // This must succeed. + if (!insert_no_grow(result, k, v)) + { + CRNLIB_FAIL("insert() failed"); + } + } + + return result; + } + + inline insert_result insert(const value_type& v) + { + return insert(v.first, v.second); + } + + inline const_iterator find(const Key& k) const + { + return const_iterator(*this, find_index(k)); + } + + inline iterator find(const Key& k) + { + return iterator(*this, find_index(k)); + } + + inline bool erase(const Key& k) + { + int i = find_index(k); + + if (i >= static_cast(m_values.size())) + return false; + + node* pDst = &get_node(i); + destruct_value_type(pDst); + pDst->state = cStateInvalid; + + m_num_valid--; + + for ( ; ; ) + { + int r, j = i; + + node* pSrc = pDst; + + do + { + if (!i) + { + i = m_values.size() - 1; + pSrc = &get_node(i); + } + else + { + i--; + pSrc--; + } + + if (!pSrc->state) + return true; + + r = hash_key(pSrc->first); + + } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); + + move_node(pDst, pSrc); + + pDst = pSrc; + } + } + + inline void swap(hash_map_type& other) + { + m_values.swap(other.m_values); + utils::swap(m_hash_shift, other.m_hash_shift); + utils::swap(m_num_valid, other.m_num_valid); + utils::swap(m_grow_threshold, other.m_grow_threshold); + utils::swap(m_hasher, other.m_hasher); + utils::swap(m_equals, other.m_equals); + } + + private: + struct node : public value_type + { + uint8 state; + }; + + static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) + { + if (CRNLIB_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &k, sizeof(Key)); + else + scalar_type::construct(&pDst->first, k); + + if (CRNLIB_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &v, sizeof(Value)); + else + scalar_type::construct(&pDst->second, v); + } + + static inline void construct_value_type(value_type* pDst, const value_type* pSrc) + { + if ((CRNLIB_IS_BITWISE_COPYABLE(Key)) && (CRNLIB_IS_BITWISE_COPYABLE(Value))) + { + memcpy(pDst, pSrc, sizeof(value_type)); + } + else + { + if (CRNLIB_IS_BITWISE_COPYABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + scalar_type::construct(&pDst->first, pSrc->first); + + if (CRNLIB_IS_BITWISE_COPYABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + scalar_type::construct(&pDst->second, pSrc->second); + } + } + + static inline void destruct_value_type(value_type* p) + { + scalar_type::destruct(&p->first); + scalar_type::destruct(&p->second); + } + + static inline void move_node(node* pDst, node* pSrc) + { + CRNLIB_ASSERT(!pDst->state); + + if (CRNLIB_IS_BITWISE_MOVABLE(Key) && CRNLIB_IS_BITWISE_MOVABLE(Value)) + { + memcpy(pDst, pSrc, sizeof(node)); + } + else + { + if (CRNLIB_IS_BITWISE_MOVABLE(Key)) + memcpy(&pDst->first, &pSrc->first, sizeof(Key)); + else + { + scalar_type::construct(&pDst->first, pSrc->first); + scalar_type::destruct(&pSrc->first); + } + + if (CRNLIB_IS_BITWISE_MOVABLE(Value)) + memcpy(&pDst->second, &pSrc->second, sizeof(Value)); + else + { + scalar_type::construct(&pDst->second, pSrc->second); + scalar_type::destruct(&pSrc->second); + } + + pDst->state = cStateValid; + } + + pSrc->state = cStateInvalid; + } + + struct raw_node + { + inline raw_node() + { + node* p = reinterpret_cast(this); + p->state = cStateInvalid; + } + + inline ~raw_node() + { + node* p = reinterpret_cast(this); + if (p->state) + hash_map_type::destruct_value_type(p); + } + + inline raw_node(const raw_node& other) + { + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&other); + + if (pSrc->state) + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + else + pDst->state = cStateInvalid; + } + + inline raw_node& operator= (const raw_node& rhs) + { + if (this == &rhs) + return *this; + + node* pDst = reinterpret_cast(this); + const node* pSrc = reinterpret_cast(&rhs); + + if (pSrc->state) + { + if (pDst->state) + { + pDst->first = pSrc->first; + pDst->second = pSrc->second; + } + else + { + hash_map_type::construct_value_type(pDst, pSrc); + pDst->state = cStateValid; + } + } + else if (pDst->state) + { + hash_map_type::destruct_value_type(pDst); + pDst->state = cStateInvalid; + } + + return *this; + } + + uint8 m_bits[sizeof(node)]; + }; + + typedef crnlib::vector node_vector; + + node_vector m_values; + uint m_hash_shift; + + Hasher m_hasher; + Equals m_equals; + + uint m_num_valid; + + uint m_grow_threshold; + + inline int hash_key(const Key& k) const + { + CRNLIB_ASSERT((1U << (32U - m_hash_shift)) == m_values.size()); + + uint hash = static_cast(m_hasher(k)); + + // Fibonacci hashing + hash = (2654435769U * hash) >> m_hash_shift; + + CRNLIB_ASSERT(hash < m_values.size()); + return hash; + } + + inline const node& get_node(uint index) const + { + return *reinterpret_cast(&m_values[index]); + } + + inline node& get_node(uint index) + { + return *reinterpret_cast(&m_values[index]); + } + + inline state get_node_state(uint index) const + { + return static_cast(get_node(index).state); + } + + inline void set_node_state(uint index, bool valid) + { + get_node(index).state = valid; + } + + inline void grow() + { + rehash(math::maximum(cMinHashSize, m_values.size() * 2U)); + } + + inline void rehash(uint new_hash_size) + { + CRNLIB_ASSERT(new_hash_size >= m_num_valid); + CRNLIB_ASSERT(math::is_power_of_2(new_hash_size)); + + if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size())) + return; + + hash_map new_map; + new_map.m_values.resize(new_hash_size); + new_map.m_hash_shift = 32U - math::floor_log2i(new_hash_size); + CRNLIB_ASSERT(new_hash_size == (1U << (32U - new_map.m_hash_shift))); + new_map.m_grow_threshold = UINT_MAX; + + node* pNode = reinterpret_cast(m_values.begin()); + node* pNode_end = pNode + m_values.size(); + + while (pNode != pNode_end) + { + if (pNode->state) + { + new_map.move_into(pNode); + + if (new_map.m_num_valid == m_num_valid) + break; + } + + pNode++; + } + + new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U; + + m_values.clear_no_destruction(); + m_hash_shift = 32; + + swap(new_map); + } + + inline uint find_next(int index) const + { + index++; + + if (index >= static_cast(m_values.size())) + return index; + + const node* pNode = &get_node(index); + + for ( ; ; ) + { + if (pNode->state) + break; + + if (++index >= static_cast(m_values.size())) + break; + + pNode++; + } + + return index; + } + + inline uint find_index(const Key& k) const + { + if (m_num_valid) + { + int index = hash_key(k); + const node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + return index; + + const int orig_index = index; + + for ( ; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (index == orig_index) + break; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + return index; + } + } + } + + return m_values.size(); + } + + inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value()) + { + if (!m_values.size()) + return false; + + int index = hash_key(k); + node* pNode = &get_node(index); + + if (pNode->state) + { + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + + const int orig_index = index; + + for ( ; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pNode = &get_node(index); + } + else + { + index--; + pNode--; + } + + if (orig_index == index) + return false; + + if (!pNode->state) + break; + + if (m_equals(pNode->first, k)) + { + result.first = iterator(*this, index); + result.second = false; + return true; + } + } + } + + if (m_num_valid >= m_grow_threshold) + return false; + + construct_value_type(pNode, k, v); + + pNode->state = cStateValid; + + m_num_valid++; + CRNLIB_ASSERT(m_num_valid <= m_values.size()); + + result.first = iterator(*this, index); + result.second = true; + + return true; + } + + inline void move_into(node* pNode) + { + int index = hash_key(pNode->first); + node* pDst_node = &get_node(index); + + if (pDst_node->state) + { + const int orig_index = index; + + for ( ; ; ) + { + if (!index) + { + index = m_values.size() - 1; + pDst_node = &get_node(index); + } + else + { + index--; + pDst_node--; + } + + if (index == orig_index) + { + CRNLIB_ASSERT(false); + return; + } + + if (!pDst_node->state) + break; + } + } + + move_node(pDst_node, pNode); + + m_num_valid++; + } + }; + + template + struct bitwise_movable< hash_map > { enum { cFlag = true }; }; + + template + inline void swap(hash_map& a, hash_map& b) + { + a.swap(b); + } + + extern void hash_map_test(); + +} // namespace crnlib diff --git a/crnlib/crn_helpers.h b/crnlib/crn_helpers.h new file mode 100644 index 00000000..8a63176a --- /dev/null +++ b/crnlib/crn_helpers.h @@ -0,0 +1,64 @@ +// File: crn_helpers.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#define CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(c) c(const c&); c& operator= (const c&); +#define CRNLIB_NO_HEAP_ALLOC() private: static void* operator new(size_t); static void* operator new[](size_t); + +namespace crnlib +{ + namespace helpers + { + template struct rel_ops + { + friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } + friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } + }; + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + void construct_array(T* p, uint n) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T; + } + + template + void construct_array(T* p, uint n, const U& init) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + p; + p->~T(); + } + + template inline void destruct_array(T* p, uint n) + { + T* q = p + n; + for ( ; p != q; ++p) + p->~T(); + } + + } // namespace helpers + +} // namespace crnlib diff --git a/crnlib/crn_huffman_codes.cpp b/crnlib/crn_huffman_codes.cpp new file mode 100644 index 00000000..2ebdc168 --- /dev/null +++ b/crnlib/crn_huffman_codes.cpp @@ -0,0 +1,387 @@ +// File: crn_huffman_codes.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_huffman_codes.h" + +namespace crnlib +{ + struct sym_freq + { + uint m_freq; + uint16 m_left; + uint16 m_right; + + inline bool operator< (const sym_freq& other) const + { + return m_freq > other.m_freq; + } + }; + + static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1) + { + const uint cMaxPasses = 2; + uint hist[256 * cMaxPasses]; + + memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses); + + sym_freq* p = syms0; + sym_freq* q = syms0 + (num_syms >> 1) * 2; + + for ( ; p != q; p += 2) + { + const uint freq0 = p[0].m_freq; + const uint freq1 = p[1].m_freq; + + hist[ freq0 & 0xFF]++; + hist[256 + ((freq0 >> 8) & 0xFF)]++; + + hist[ freq1 & 0xFF]++; + hist[256 + ((freq1 >> 8) & 0xFF)]++; + } + + if (num_syms & 1) + { + const uint freq = p->m_freq; + + hist[ freq & 0xFF]++; + hist[256 + ((freq >> 8) & 0xFF)]++; + } + + sym_freq* pCur_syms = syms0; + sym_freq* pNew_syms = syms1; + + for (uint pass = 0; pass < cMaxPasses; pass++) + { + const uint* pHist = &hist[pass << 8]; + + uint offsets[256]; + + uint cur_ofs = 0; + for (uint i = 0; i < 256; i += 2) + { + offsets[i] = cur_ofs; + cur_ofs += pHist[i]; + + offsets[i+1] = cur_ofs; + cur_ofs += pHist[i+1]; + } + + const uint pass_shift = pass << 3; + + sym_freq* p = pCur_syms; + sym_freq* q = pCur_syms + (num_syms >> 1) * 2; + + for ( ; p != q; p += 2) + { + uint c0 = p[0].m_freq; + uint c1 = p[1].m_freq; + + if (pass) + { + c0 >>= 8; + c1 >>= 8; + } + + c0 &= 0xFF; + c1 &= 0xFF; + + if (c0 == c1) + { + uint dst_offset0 = offsets[c0]; + + offsets[c0] = dst_offset0 + 2; + + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset0 + 1] = p[1]; + } + else + { + uint dst_offset0 = offsets[c0]++; + uint dst_offset1 = offsets[c1]++; + + pNew_syms[dst_offset0] = p[0]; + pNew_syms[dst_offset1] = p[1]; + } + } + + if (num_syms & 1) + { + uint c = ((p->m_freq) >> pass_shift) & 0xFF; + + uint dst_offset = offsets[c]; + offsets[c] = dst_offset + 1; + + pNew_syms[dst_offset] = *p; + } + + sym_freq* t = pCur_syms; + pCur_syms = pNew_syms; + pNew_syms = t; + } + +#ifdef CRNLIB_ASSERTS_ENABLED + uint prev_freq = 0; + for (uint i = 0; i < num_syms; i++) + { + CRNLIB_ASSERT(!(pCur_syms[i].m_freq < prev_freq)); + prev_freq = pCur_syms[i].m_freq; + } +#endif + + return pCur_syms; + } + + struct huffman_work_tables + { + enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms }; + + sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; + sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; + + uint16 queue[cMaxInternalNodes]; + }; + + void* create_generate_huffman_codes_tables() + { + return crnlib_new(); + } + + void free_generate_huffman_codes_tables(void* p) + { + crnlib_delete(static_cast(p)); + } + +#if USE_CALCULATE_MINIMUM_REDUNDANCY + /* calculate_minimum_redundancy() written by + Alistair Moffat, alistair@cs.mu.oz.au, + Jyrki Katajainen, jyrki@diku.dk + November 1996. + */ + static void calculate_minimum_redundancy(int A[], int n) { + int root; /* next root node to be used */ + int leaf; /* next leaf to be used */ + int next; /* next value to be assigned */ + int avbl; /* number of available nodes */ + int used; /* number of internal nodes */ + int dpth; /* current depth of leaves */ + + /* check for pathological cases */ + if (n==0) { return; } + if (n==1) { A[0] = 0; return; } + + /* first pass, left to right, setting parent pointers */ + A[0] += A[1]; root = 0; leaf = 2; + for (next=1; next < n-1; next++) { + /* select first item for a pairing */ + if (leaf>=n || A[root]=n || (root=0; next--) + A[next] = A[A[next]]+1; + + /* third pass, right to left, setting leaf depths */ + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) { + while (root>=0 && A[root]==dpth) { + used++; root--; + } + while (avbl>used) { + A[next--] = dpth; avbl--; + } + avbl = 2*used; dpth++; used = 0; + } + } +#endif + + bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret) + { + if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms)) + return false; + + huffman_work_tables& state = *static_cast(pContext);; + + uint max_freq = 0; + uint total_freq = 0; + + uint num_used_syms = 0; + for (uint i = 0; i < num_syms; i++) + { + uint freq = pFreq[i]; + + if (!freq) + pCodesizes[i] = 0; + else + { + total_freq += freq; + max_freq = math::maximum(max_freq, freq); + + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_left = (uint16)i; + sf.m_right = UINT16_MAX; + sf.m_freq = freq; + num_used_syms++; + } + } + + total_freq_ret = total_freq; + + if (num_used_syms == 1) + { + pCodesizes[state.syms0[0].m_left] = 1; + return true; + } + + sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1); + +#if USE_CALCULATE_MINIMUM_REDUNDANCY + int x[cHuffmanMaxSupportedSyms]; + for (uint i = 0; i < num_used_syms; i++) + x[i] = state.syms0[i].m_freq; + + calculate_minimum_redundancy(x, num_used_syms); + + uint max_len = 0; + for (uint i = 0; i < num_used_syms; i++) + { + uint len = x[i]; + max_len = math::maximum(len, max_len); + pCodesizes[state.syms0[i].m_left] = static_cast(len); + } + + return true; +#else + // Dummy node + sym_freq& sf = state.syms0[num_used_syms]; + sf.m_left = UINT16_MAX; + sf.m_right = UINT16_MAX; + sf.m_freq = UINT_MAX; + + uint next_internal_node = num_used_syms + 1; + + uint queue_front = 0; + uint queue_end = 0; + + uint next_lowest_sym = 0; + + uint num_nodes_remaining = num_used_syms; + do + { + uint left_freq = syms[next_lowest_sym].m_freq; + uint left_child = next_lowest_sym; + + if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < left_freq)) + { + left_child = state.queue[queue_front]; + left_freq = syms[left_child].m_freq; + + queue_front++; + } + else + next_lowest_sym++; + + uint right_freq = syms[next_lowest_sym].m_freq; + uint right_child = next_lowest_sym; + + if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < right_freq)) + { + right_child = state.queue[queue_front]; + right_freq = syms[right_child].m_freq; + + queue_front++; + } + else + next_lowest_sym++; + + const uint internal_node_index = next_internal_node; + next_internal_node++; + + CRNLIB_ASSERT(next_internal_node < CRNLIB_ARRAYSIZE(state.syms0)); + + syms[internal_node_index].m_freq = left_freq + right_freq; + syms[internal_node_index].m_left = static_cast(left_child); + syms[internal_node_index].m_right = static_cast(right_child); + + CRNLIB_ASSERT(queue_end < huffman_work_tables::cMaxInternalNodes); + state.queue[queue_end] = static_cast(internal_node_index); + queue_end++; + + num_nodes_remaining--; + + } while (num_nodes_remaining > 1); + + CRNLIB_ASSERT(next_lowest_sym == num_used_syms); + CRNLIB_ASSERT((queue_end - queue_front) == 1); + + uint cur_node_index = state.queue[queue_front]; + + uint32* pStack = (syms == state.syms0) ? (uint32*)state.syms1 : (uint32*)state.syms0; + uint32* pStack_top = pStack; + + uint max_level = 0; + + for ( ; ; ) + { + uint level = cur_node_index >> 16; + uint node_index = cur_node_index & 0xFFFF; + + uint left_child = syms[node_index].m_left; + uint right_child = syms[node_index].m_right; + + uint next_level = (cur_node_index + 0x10000) & 0xFFFF0000; + + if (left_child < num_used_syms) + { + max_level = math::maximum(max_level, level); + + pCodesizes[syms[left_child].m_left] = static_cast(level + 1); + + if (right_child < num_used_syms) + { + pCodesizes[syms[right_child].m_left] = static_cast(level + 1); + + if (pStack == pStack_top) break; + cur_node_index = *--pStack; + } + else + { + cur_node_index = next_level | right_child; + } + } + else + { + if (right_child < num_used_syms) + { + max_level = math::maximum(max_level, level); + + pCodesizes[syms[right_child].m_left] = static_cast(level + 1); + + cur_node_index = next_level | left_child; + } + else + { + *pStack++ = next_level | left_child; + + cur_node_index = next_level | right_child; + } + } + } + + max_code_size = max_level + 1; +#endif + + return true; + } + +} // namespace crnlib + diff --git a/crnlib/crn_huffman_codes.h b/crnlib/crn_huffman_codes.h new file mode 100644 index 00000000..473d09d9 --- /dev/null +++ b/crnlib/crn_huffman_codes.h @@ -0,0 +1,14 @@ +// File: crn_huffman_codes.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + const uint cHuffmanMaxSupportedSyms = 8192; + + void* create_generate_huffman_codes_tables(); + void free_generate_huffman_codes_tables(void* p); + + bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); + +} // namespace crnlib diff --git a/crnlib/crn_image.h b/crnlib/crn_image.h new file mode 100644 index 00000000..5aeecafa --- /dev/null +++ b/crnlib/crn_image.h @@ -0,0 +1,612 @@ +// File: crn_image.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_color.h" +#include "crn_vec.h" +#include "crn_pixel_format.h" + +namespace crnlib +{ + template + class image + { + public: + typedef color_type color_t; + + typedef crnlib::vector pixel_buf_t; + + image() : + m_width(0), + m_height(0), + m_pitch(0), + m_total(0), + m_comp_flags(pixel_format_helpers::cDefaultCompFlags), + m_pPixels(NULL) + { + } + + image(uint width, uint height, uint pitch = UINT_MAX, const color_type& background = color_type::make_black(), uint flags = pixel_format_helpers::cDefaultCompFlags) : + m_comp_flags(flags) + { + CRNLIB_ASSERT((width > 0) && (height > 0)); + if (pitch == UINT_MAX) + pitch = width; + + m_pixel_buf.resize(pitch * height); + + m_width = width; + m_height = height; + m_pitch = pitch; + m_total = m_pitch * m_height; + + m_pPixels = &m_pixel_buf.front(); + + set_all(background); + } + + image(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) + { + alias(pPixels, width, height, pitch, flags); + } + + image& operator= (const image& other) + { + if (this == &other) + return *this; + + if (other.m_pixel_buf.empty()) + { + // This doesn't look very safe - let's make a new instance. + //m_pixel_buf.clear(); + //m_pPixels = other.m_pPixels; + + const uint total_pixels = other.m_pitch * other.m_height; + if ((total_pixels) && (other.m_pPixels)) + { + m_pixel_buf.resize(total_pixels); + m_pixel_buf.insert(0, other.m_pPixels, m_pixel_buf.size()); + m_pPixels = &m_pixel_buf.front(); + } + else + { + m_pixel_buf.clear(); + m_pPixels = NULL; + } + } + else + { + m_pixel_buf = other.m_pixel_buf; + m_pPixels = &m_pixel_buf.front(); + } + + m_width = other.m_width; + m_height = other.m_height; + m_pitch = other.m_pitch; + m_total = other.m_total; + m_comp_flags = other.m_comp_flags; + + return *this; + } + + image(const image& other) : + m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(NULL) + { + *this = other; + } + + void alias(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) + { + m_pixel_buf.clear(); + + m_pPixels = pPixels; + + m_width = width; + m_height = height; + m_pitch = (pitch == UINT_MAX) ? width : pitch; + m_total = m_pitch * m_height; + m_comp_flags = flags; + } + + void clear() + { + m_pPixels = NULL; + m_pixel_buf.clear(); + m_width = 0; + m_height = 0; + m_pitch = 0; + m_total = 0; + m_comp_flags = pixel_format_helpers::cDefaultCompFlags; + } + + inline bool is_valid() const { return m_total > 0; } + + inline pixel_format_helpers::component_flags get_comp_flags() const { return static_cast(m_comp_flags); } + inline void set_comp_flags(pixel_format_helpers::component_flags new_flags) { m_comp_flags = new_flags; } + inline void reset_comp_flags() { m_comp_flags = pixel_format_helpers::cDefaultCompFlags; } + + inline bool is_component_valid(uint index) const { CRNLIB_ASSERT(index < 4U); return utils::is_flag_set(m_comp_flags, index); } + inline void set_component_valid(uint index, bool state) { CRNLIB_ASSERT(index < 4U); utils::set_flag(m_comp_flags, index, state); } + + inline bool has_rgb() const { return is_component_valid(0) || is_component_valid(1) || is_component_valid(2); } + inline bool has_alpha() const { return is_component_valid(3); } + + inline bool is_grayscale() const { return utils::is_bit_set(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale); } + inline void set_grayscale(bool state) { utils::set_bit(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale, state); } + + void set_all(const color_type& c) + { + for (uint i = 0; i < m_total; i++) + m_pPixels[i] = c; + } + + void convert_to_grayscale() + { + for (uint y = 0; y < m_height; y++) + for (uint x = 0; x < m_width; x++) + { + color_type c((*this)(x, y)); + typename color_type::component_t l = static_cast< typename color_type::component_t >(c.get_luma()); + c.r = l; + c.g = l; + c.b = l; + (*this)(x, y) = c; + } + + set_grayscale(true); + } + + void swizzle(uint r, uint g, uint b, uint a) + { + for (uint y = 0; y < m_height; y++) + for (uint x = 0; x < m_width; x++) + { + const color_type& c = (*this)(x, y); + + (*this)(x, y) = color_type(c[r], c[g], c[b], c[a]); + } + } + + void set_alpha_to_luma() + { + for (uint y = 0; y < m_height; y++) + for (uint x = 0; x < m_width; x++) + { + color_type c((*this)(x, y)); + typename color_type::component_t l = static_cast< typename color_type::component_t >(c.get_luma()); + c.a = l; + (*this)(x, y) = c; + } + + set_component_valid(3, true); + } + + bool extract_block(color_type* pDst, uint x, uint y, uint w, uint h, bool flip_xy = false) const + { + if ((x >= m_width) || (y >= m_height)) + return false; + + if (flip_xy) + { + for (uint y_ofs = 0; y_ofs < h; y_ofs++) + for (uint x_ofs = 0; x_ofs < w; x_ofs++) + pDst[x_ofs * 4 + y_ofs] = get_clamped(x_ofs + x, y_ofs + y); + } + else if (((x + w) > m_width) || ((y + h) > m_height)) + { + for (uint y_ofs = 0; y_ofs < h; y_ofs++) + for (uint x_ofs = 0; x_ofs < w; x_ofs++) + *pDst++ = get_clamped(x_ofs + x, y_ofs + y); + } + else + { + const color_type* pSrc = get_scanline(y) + x; + + for (uint i = h; i; i--) + { + memcpy(pDst, pSrc, w * sizeof(color_type)); + pDst += w; + + pSrc += m_pitch; + } + } + + return true; + } + + void fill(uint x, uint y, uint w, uint h, const color_type& c) + { + CRNLIB_ASSERT((x + w) <= m_width); + CRNLIB_ASSERT((y + h) <= m_height); + + color_type* p = get_scanline(y) + x; + + for (uint i = h; i; i--) + { + color_type* q = p; + for (uint j = w; j; j--) + *q++ = c; + p += m_pitch; + } + } + + void draw_box(int x, int y, uint width, uint height, const color_type& c) + { + draw_line(x, y, x + width - 1, y, c); + draw_line(x, y, x, y + height - 1, c); + draw_line(x + width - 1, y, x + width - 1, y + height - 1, c); + draw_line(x, y + height - 1, x + width - 1, y + height - 1, c); + } + + // No clipping! + bool copy(uint src_x, uint src_y, uint src_w, uint src_h, uint dst_x, uint dst_y, const image& src) + { + if ( ((src_x + src_w) > src.get_width()) || ((src_y + src_h) > src.get_height()) ) + return false; + + if ( ((dst_x + src_w) > get_width()) || ((dst_y + src_h) > get_height()) ) + return false; + + const color_type* pS = &src(src_x, src_y); + color_type* pD = &(*this)(dst_x, dst_y); + + const uint bytes_to_copy = src_w * sizeof(color_type); + for (uint i = src_h; i; i--) + { + memcpy(pD, pS, bytes_to_copy); + + pS += src.get_pitch(); + pD += get_pitch(); + } + + return true; + } + + // With clipping. + void blit(int dst_x, int dst_y, const image& src) + { + uint src_x = 0; + uint src_y = 0; + + if (dst_x < 0) + { + src_x = -dst_x; + if (src_x >= src.get_width()) + return; + dst_x = 0; + } + + if (dst_y < 0) + { + src_y = -dst_y; + if (src_y >= src.get_height()) + return; + dst_y = 0; + } + + if ((dst_x >= (int)m_width) || (dst_y >= (int)m_height)) + return; + + uint width = math::minimum(m_width - dst_x, src.get_width() - src_x); + uint height = math::minimum(m_height - dst_y, src.get_height() - src_y); + + bool success = copy(src_x, src_y, width, height, dst_x, dst_y, src); + success; + CRNLIB_ASSERT(success); + } + + bool resize(uint new_width, uint new_height, uint new_pitch = UINT_MAX, const color_type background = color_type::make_black()) + { + if (new_pitch == UINT_MAX) + new_pitch = new_width; + + if ((new_width == m_width) && (new_height == m_height) && (new_pitch == m_pitch)) + return true; + + if ((!new_width) || (!new_height) || (!new_pitch)) + { + clear(); + return false; + } + + pixel_buf_t existing_pixels; + existing_pixels.swap(m_pixel_buf); + + if (!m_pixel_buf.try_resize(new_height * new_pitch)) + { + clear(); + return false; + } + + for (uint y = 0; y < new_height; y++) + { + for (uint x = 0; x < new_width; x++) + { + if ((x < m_width) && (y < m_height)) + m_pixel_buf[x + y * new_pitch] = existing_pixels[x + y * m_pitch]; + else + m_pixel_buf[x + y * new_pitch] = background; + } + } + + m_width = new_width; + m_height = new_height; + m_pitch = new_pitch; + m_total = new_pitch * new_height; + m_pPixels = &m_pixel_buf.front(); + + return true; + } + + inline uint get_width() const { return m_width; } + inline uint get_height() const { return m_height; } + inline uint get_total_pixels() const { return m_width * m_height; } + + inline uint get_pitch() const { return m_pitch; } + inline uint get_pitch_in_bytes() const { return m_pitch * sizeof(color_type); } + + // Returns pitch * height, NOT width * height! + inline uint get_total() const { return m_total; } + + inline uint get_block_width(uint block_size) const { return (m_width + block_size - 1) / block_size; } + inline uint get_block_height(uint block_size) const { return (m_height + block_size - 1) / block_size; } + inline uint get_total_blocks(uint block_size) const { return get_block_width(block_size) * get_block_height(block_size); } + + inline uint get_size_in_bytes() const { return sizeof(color_type) * m_total; } + + inline const color_type* get_pixels() const { return m_pPixels; } + inline color_type* get_pixels() { return m_pPixels; } + + inline const color_type& operator() (uint x, uint y) const + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_pPixels[x + y * m_pitch]; + } + + inline color_type& operator() (uint x, uint y) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + return m_pPixels[x + y * m_pitch]; + } + + inline const color_type& get_clamped (int x, int y) const + { + x = math::clamp(x, 0, m_width - 1); + y = math::clamp(y, 0, m_height - 1); + return (*this)((uint)x, (uint)y); + } + + // Sample image with bilinear filtering. + // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are (0,width] and (0,height]. + void get_filtered(float x, float y, color_type& result) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floor(x); + int iy = (int)floor(y); + float wx = x - ix; + float wy = y - iy; + + color_type a(get_clamped(ix, iy)); + color_type b(get_clamped(ix + 1, iy)); + color_type c(get_clamped(ix, iy + 1)); + color_type d(get_clamped(ix + 1, iy + 1)); + + for (uint i = 0; i < 4; i++) + { + double top = math::lerp(a[i], b[i], wx); + double bot = math::lerp(c[i], d[i], wx); + double m = math::lerp(top, bot, wy); + + if (!color_type::component_traits::cFloat) + m += .5f; + + result.set_component(i, static_cast< typename color_type::parameter_t >(m)); + } + } + + void get_filtered(float x, float y, vec4F& result) const + { + x -= .5f; + y -= .5f; + + int ix = (int)floor(x); + int iy = (int)floor(y); + float wx = x - ix; + float wy = y - iy; + + color_type a(get_clamped(ix, iy)); + color_type b(get_clamped(ix + 1, iy)); + color_type c(get_clamped(ix, iy + 1)); + color_type d(get_clamped(ix + 1, iy + 1)); + + for (uint i = 0; i < 4; i++) + { + float top = math::lerp(a[i], b[i], wx); + float bot = math::lerp(c[i], d[i], wx); + float m = math::lerp(top, bot, wy); + + result[i] = m; + } + } + + inline void set_pixel(uint x, uint y, const color_type& c) + { + CRNLIB_ASSERT((x < m_width) && (y < m_height)); + m_pPixels[x + y * m_pitch] = c; + } + + inline void set_pixel_clipped(int x, int y, const color_type& c) + { + if ((x < 0) || (x >= (int)m_width) || (y < 0) || (y >= (int)m_height)) + return; + + m_pPixels[x + y * m_pitch] = c; + } + + inline const color_type* get_scanline(uint y) const + { + CRNLIB_ASSERT(y < m_height); + return &m_pPixels[y * m_pitch]; + } + + inline color_type* get_scanline(uint y) + { + CRNLIB_ASSERT(y < m_height); + return &m_pPixels[y * m_pitch]; + } + + inline const color_type* get_ptr() const + { + return m_pPixels; + } + + inline color_type* get_ptr() + { + return m_pPixels; + } + + inline void swap(image& other) + { + utils::swap(m_width, other.m_width); + utils::swap(m_height, other.m_height); + utils::swap(m_pitch, other.m_pitch); + utils::swap(m_total, other.m_total); + utils::swap(m_comp_flags, other.m_comp_flags); + utils::swap(m_pPixels, other.m_pPixels); + m_pixel_buf.swap(other.m_pixel_buf); + } + + void draw_line(int xs, int ys, int xe, int ye, const color_type& color) + { + if (xs > xe) + { + utils::swap(xs, xe); + utils::swap(ys, ye); + } + + int dx = xe - xs, dy = ye - ys; + + if (!dx) + { + if (ys > ye) + utils::swap(ys, ye); + for (int i = ys ; i <= ye ; i++) + set_pixel_clipped(xs, i, color); + } + else if (!dy) + { + for (int i = xs ; i < xe ; i++) + set_pixel_clipped(i, ys, color); + } + else if (dy > 0) + { + if (dy <= dx) + { + int e = 2 * dy - dx; + int e_no_inc = 2 * dy; + int e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy; + int e_no_inc = 2 * dx; + int e_inc = 2 * (dx - dy); + rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); + } + } + else + { + dy = -dy; + + if (dy <= dx) + { + int e = 2 * dy - dx; + int e_no_inc = 2 * dy; + int e_inc = 2 * (dy - dx); + rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); + } + else + { + int e = 2 * dx - dy; + int e_no_inc = (2 * dx); + int e_inc = 2 * (dx - dy); + rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); + } + } + } + + const pixel_buf_t& get_pixel_buf() const { return m_pixel_buf; } + pixel_buf_t& get_pixel_buf() { return m_pixel_buf; } + + private: + uint m_width; + uint m_height; + uint m_pitch; + uint m_total; + uint m_comp_flags; + + color_type* m_pPixels; + + pixel_buf_t m_pixel_buf; + + void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_type& color) + { + int start, end, var; + + if (pred) + { + start = ys; + end = ye; + var = xs; + + for (int i = start; i <= end; i++) + { + set_pixel_clipped(var, i, color); + + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } + else + { + start = xs; + end = xe; + var = ys; + + for (int i = start; i <= end; i++) + { + set_pixel_clipped(i, var, color); + + if (e < 0) + e += e_no_inc; + else + { + var += inc_dec; + e += e_inc; + } + } + } + } + }; + + typedef image image_u8; + typedef image image_i16; + typedef image image_u16; + typedef image image_i32; + typedef image image_u32; + typedef image image_f; + + template + inline void swap(image& a, image& b) + { + a.swap(b); + } + +} // namespace crnlib diff --git a/crnlib/crn_image_utils.cpp b/crnlib/crn_image_utils.cpp new file mode 100644 index 00000000..60e05a70 --- /dev/null +++ b/crnlib/crn_image_utils.cpp @@ -0,0 +1,1169 @@ +// File: crn_image_utils.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_image_utils.h" +#include "crn_console.h" +#include "crn_resampler.h" +#include "crn_threaded_resampler.h" +#include "crn_strutils.h" + +#define STBI_HEADER_FILE_ONLY +#include "crn_stb_image.cpp" + +#include "crn_pixel_format.h" + +namespace crnlib +{ + const float cInfinitePSNR = 999999.0f; + + namespace image_utils + { + bool load_from_file_stb(const wchar_t* pFilename, image_u8& img) + { + int x = 0, y = 0, n = 0; + unsigned char* pData = stbi_load_w(pFilename, &x, &y, &n, 4); + + if (!pData) + return false; + + if ((x > 8192) || (y > 8192)) + { + stbi_image_free(pData); + return false; + } + + const bool has_alpha = ((n == 2) || (n == 4)); + + img.resize(x, y); + + bool grayscale = true; + + for (int py = 0; py < y; py++) + { + const color_quad_u8* pSrc = reinterpret_cast(pData) + (py * x); + color_quad_u8* pDst = img.get_scanline(py); + color_quad_u8* pDst_end = pDst + x; + + while (pDst != pDst_end) + { + color_quad_u8 c(*pSrc++); + if (!has_alpha) + c.a = 255; + + if (!c.is_grayscale()) + grayscale = false; + + *pDst++ = c; + } + } + + stbi_image_free(pData); + + img.reset_comp_flags(); + img.set_grayscale(grayscale); + img.set_component_valid(3, has_alpha); + + return true; + } + + bool save_to_file_stb(const wchar_t* pFilename, const image_u8& img, uint save_flags, int comp_index) + { + if (!img.get_width()) + return false; + + bool bSaveBMP = false; + dynamic_wstring ext(pFilename); + if (get_extension(ext)) + { + if (ext == L"bmp") + bSaveBMP = true; + else if (ext != L"tga") + { + console::error(L"crnlib::image_utils::save_to_file_stb: Can only write .BMP or .TGA files!\n"); + return false; + } + } + + if ((img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) || (save_flags & image_utils::cSaveGrayscale)) + { + CRNLIB_ASSERT(comp_index < 4); + if (comp_index > 3) comp_index = 3; + + crnlib::vector temp(img.get_total_pixels()); + + for (uint y = 0; y < img.get_height(); y++) + { + const color_quad_u8* pSrc = img.get_scanline(y); + const color_quad_u8* pSrc_end = pSrc + img.get_width(); + uint8* pDst = &temp[y * img.get_width()]; + + if (img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) + { + while (pSrc != pSrc_end) + *pDst++ = (*pSrc++)[1]; + } + else if (comp_index < 0) + { + while (pSrc != pSrc_end) + *pDst++ = static_cast((*pSrc++).get_luma()); + } + else + { + while (pSrc != pSrc_end) + *pDst++ = (*pSrc++)[comp_index]; + } + } + + return (bSaveBMP ? stbi_write_bmp_w : stbi_write_tga_w)(pFilename, img.get_width(), img.get_height(), 1, &temp[0]) == CRNLIB_TRUE; + } + else if ((!img.is_component_valid(3)) || (save_flags & cSaveIgnoreAlpha)) + { + crnlib::vector temp(img.get_total_pixels() * 3); + + for (uint y = 0; y < img.get_height(); y++) + { + const color_quad_u8* pSrc = img.get_scanline(y); + const color_quad_u8* pSrc_end = pSrc + img.get_width(); + uint8* pDst = &temp[y * img.get_width() * 3]; + + while (pSrc != pSrc_end) + { + const color_quad_u8 c(*pSrc++); + + pDst[0] = c.r; + pDst[1] = c.g; + pDst[2] = c.b; + + pDst += 3; + } + } + + return (bSaveBMP ? stbi_write_bmp_w : stbi_write_tga_w)(pFilename, img.get_width(), img.get_height(), 3, &temp[0]) == CRNLIB_TRUE; + } + else + { + return (bSaveBMP ? stbi_write_bmp_w : stbi_write_tga_w)(pFilename, img.get_width(), img.get_height(), 4, img.get_ptr()) == CRNLIB_TRUE; + } + } + + bool load_from_file(image_u8& dest, const wchar_t* pFilename, int flags) + { + flags; + return image_utils::load_from_file_stb(pFilename, dest); + } + + bool save_to_grayscale_file(const wchar_t* pFilename, const image_u8& src, int component, int flags) + { + flags; + return image_utils::save_to_file_stb(pFilename, src, image_utils::cSaveGrayscale, component); + } + + bool save_to_file(const wchar_t* pFilename, const image_u8& src, int flags, bool ignore_alpha) + { + if (src.is_grayscale()) + return save_to_grayscale_file(pFilename, src, cSaveLuma, flags); + else + { + uint save_flags = 0; + if (ignore_alpha) + save_flags |= image_utils::cSaveIgnoreAlpha; + return image_utils::save_to_file_stb(pFilename, src, save_flags); + } + } + + bool has_alpha(const image_u8& img) + { + for (uint y = 0; y < img.get_height(); y++) + for (uint x = 0; x < img.get_width(); x++) + if (img(x, y).a < 255) + return true; + + return false; + } + + void renorm_normal_map(image_u8& img) + { + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + color_quad_u8& c = img(x, y); + if ((c.r == 128) && (c.g == 128) && (c.b == 128)) + continue; + + vec3F v(c.r, c.g, c.b); + v *= 1.0f/255.0f; + v *= 2.0f; + v -= vec3F(1.0f); + v.clamp(-1.0f, 1.0f); + + float length = v.length(); + if (length < .077f) + c.set(128, 128, 128, c.a); + else if (fabs(length - 1.0f) > .077f) + { + if (length) + v /= length; + + for (uint i = 0; i < 3; i++) + c[i] = static_cast(math::clamp(floor((v[i] + 1.0f) * .5f * 255.0f + .5f), 0.0f, 255.0f)); + + if ((c.r == 128) && (c.g == 128)) + { + if (c.b < 128) + c.b = 0; + else + c.b = 255; + } + } + } + } + } + + bool is_normal_map(const image_u8& img, const wchar_t* pFilename) + { + float score = 0.0f; + + uint num_invalid_pixels = 0; + + // TODO: Derive better score from pixel mean, eigenvecs/vals + //crnlib::vector pixels; + + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + const color_quad_u8& c = img(x, y); + + if (c.b < 123) + { + num_invalid_pixels++; + continue; + } + else if ((c.r != 128) || (c.g != 128) || (c.b != 128)) + { + vec3F v(c.r, c.g, c.b); + v -= vec3F(128.0f); + v /= vec3F(127.0f); + //pixels.push_back(v); + v.clamp(-1.0f, 1.0f); + + float norm = v.norm(); + if ((norm < 0.83f) || (norm > 1.29f)) + num_invalid_pixels++; + } + } + } + + score -= math::clamp(float(num_invalid_pixels) / (img.get_width() * img.get_height()) - .026f, 0.0f, 1.0f) * 5.0f; + + if (pFilename) + { + dynamic_wstring str(pFilename); + str.tolower(); + + if (str.contains(L"normal") || str.contains(L"local") || str.contains(L"nmap")) + score += 1.0f; + + if (str.contains(L"diffuse") || str.contains(L"spec") || str.contains(L"gloss")) + score -= 1.0f; + } + + return score >= 0.0f; + } + + bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params) + { + const uint src_width = src.get_width(); + const uint src_height = src.get_height(); + + if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + const int cMaxComponents = 4; + if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) + return false; + + const uint dst_width = params.m_dst_width; + const uint dst_height = params.m_dst_height; + + if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + if ((src_width == dst_width) && (src_height == dst_height)) + { + dst = src; + return true; + } + + dst.clear(); + dst.resize(params.m_dst_width, params.m_dst_height); + + // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. + const float source_gamma = params.m_source_gamma;//1.75f; + + float srgb_to_linear[256]; + if (params.m_srgb) + { + for (int i = 0; i < 256; ++i) + srgb_to_linear[i] = (float)pow(i * 1.0f/255.0f, source_gamma); + } + + const int linear_to_srgb_table_size = 8192; + unsigned char linear_to_srgb[linear_to_srgb_table_size]; + + const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; + const float inv_source_gamma = 1.0f / source_gamma; + + if (params.m_srgb) + { + for (int i = 0; i < linear_to_srgb_table_size; ++i) + { + int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); + if (k < 0) k = 0; else if (k > 255) k = 255; + linear_to_srgb[i] = (unsigned char)k; + } + } + + Resampler* resamplers[cMaxComponents]; + crnlib::vector samples[cMaxComponents]; + + resamplers[0] = crnlib_new(src_width, src_height, dst_width, dst_height, + params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + params.m_pFilter, (Resampler::Contrib_List*)NULL, (Resampler::Contrib_List*)NULL, params.m_filter_scale, params.m_filter_scale); + samples[0].resize(src_width); + + for (uint i = 1; i < params.m_num_comps; i++) + { + resamplers[i] = crnlib_new(src_width, src_height, dst_width, dst_height, + params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, + params.m_pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), params.m_filter_scale, params.m_filter_scale); + samples[i].resize(src_width); + } + + uint dst_y = 0; + + for (uint src_y = 0; src_y < src_height; src_y++) + { + const color_quad_u8* pSrc = src.get_scanline(src_y); + + for (uint x = 0; x < src_width; x++) + { + for (uint c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + const uint8 v = (*pSrc)[comp_index]; + + if (!params.m_srgb || (comp_index == 3)) + samples[c][x] = v * (1.0f/255.0f); + else + samples[c][x] = srgb_to_linear[v]; + } + + pSrc++; + } + + for (uint c = 0; c < params.m_num_comps; c++) + { + if (!resamplers[c]->put_line(&samples[c][0])) + { + for (uint i = 0; i < params.m_num_comps; i++) + crnlib_delete(resamplers[i]); + return false; + } + } + + for ( ; ; ) + { + uint c; + for (c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + + const float* pOutput_samples = resamplers[c]->get_line(); + if (!pOutput_samples) + break; + + const bool linear = !params.m_srgb || (comp_index == 3); + CRNLIB_ASSERT(dst_y < dst_height); + color_quad_u8* pDst = dst.get_scanline(dst_y); + + for (uint x = 0; x < dst_width; x++) + { + if (linear) + { + int c = (int)(255.0f * pOutput_samples[x] + .5f); + if (c < 0) c = 0; else if (c > 255) c = 255; + (*pDst)[comp_index] = (unsigned char)c; + } + else + { + int j = (int)(linear_to_srgb_table_size * pOutput_samples[x] + .5f); + if (j < 0) j = 0; else if (j >= linear_to_srgb_table_size) j = linear_to_srgb_table_size - 1; + (*pDst)[comp_index] = linear_to_srgb[j]; + } + + pDst++; + } + } + if (c < params.m_num_comps) + break; + + dst_y++; + } + } + + for (uint i = 0; i < params.m_num_comps; i++) + crnlib_delete(resamplers[i]); + + return true; + } + + bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params) + { + const uint src_width = src.get_width(); + const uint src_height = src.get_height(); + + if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + const int cMaxComponents = 4; + if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) + return false; + + const uint dst_width = params.m_dst_width; + const uint dst_height = params.m_dst_height; + + if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) + { + printf("Image is too large!\n"); + return EXIT_FAILURE; + } + + if ((src_width == dst_width) && (src_height == dst_height)) + { + dst = src; + return true; + } + + dst.clear(); + + // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. + const float source_gamma = params.m_source_gamma;//1.75f; + + float srgb_to_linear[256]; + if (params.m_srgb) + { + for (int i = 0; i < 256; ++i) + srgb_to_linear[i] = (float)pow(i * 1.0f/255.0f, source_gamma); + } + + const int linear_to_srgb_table_size = 8192; + unsigned char linear_to_srgb[linear_to_srgb_table_size]; + + const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; + const float inv_source_gamma = 1.0f / source_gamma; + + if (params.m_srgb) + { + for (int i = 0; i < linear_to_srgb_table_size; ++i) + { + int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); + if (k < 0) k = 0; else if (k > 255) k = 255; + linear_to_srgb[i] = (unsigned char)k; + } + } + + task_pool tp; + tp.init(g_number_of_processors - 1); + + threaded_resampler resampler(tp); + threaded_resampler::params p; + p.m_src_width = src_width; + p.m_src_height = src_height; + p.m_dst_width = dst_width; + p.m_dst_height = dst_height; + p.m_sample_low = 0.0f; + p.m_sample_high = 1.0f; + p.m_boundary_op = params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP; + p.m_Pfilter_name = params.m_pFilter; + p.m_filter_x_scale = params.m_filter_scale; + p.m_filter_y_scale = params.m_filter_scale; + + uint resampler_comps = 4; + if (params.m_num_comps == 1) + { + p.m_fmt = threaded_resampler::cPF_Y_F32; + resampler_comps = 1; + } + else if (params.m_num_comps <= 3) + p.m_fmt = threaded_resampler::cPF_RGBX_F32; + else + p.m_fmt = threaded_resampler::cPF_RGBA_F32; + + crnlib::vector src_samples; + crnlib::vector dst_samples; + + if (!src_samples.try_resize(src_width * src_height * resampler_comps)) + return false; + + if (!dst_samples.try_resize(dst_width * dst_height * resampler_comps)) + return false; + + p.m_pSrc_pixels = src_samples.get_ptr(); + p.m_src_pitch = src_width * resampler_comps * sizeof(float); + p.m_pDst_pixels = dst_samples.get_ptr(); + p.m_dst_pitch = dst_width * resampler_comps * sizeof(float); + + for (uint src_y = 0; src_y < src_height; src_y++) + { + const color_quad_u8* pSrc = src.get_scanline(src_y); + float* pDst = src_samples.get_ptr() + src_width * resampler_comps * src_y; + + for (uint x = 0; x < src_width; x++) + { + for (uint c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + const uint8 v = (*pSrc)[comp_index]; + + if (!params.m_srgb || (comp_index == 3)) + pDst[c] = v * (1.0f/255.0f); + else + pDst[c] = srgb_to_linear[v]; + } + + pSrc++; + pDst += resampler_comps; + } + } + + if (!resampler.resample(p)) + return false; + + src_samples.clear(); + + if (!dst.resize(params.m_dst_width, params.m_dst_height)) + return false; + + for (uint dst_y = 0; dst_y < dst_height; dst_y++) + { + const float* pSrc = dst_samples.get_ptr() + dst_width * resampler_comps * dst_y; + color_quad_u8* pDst = dst.get_scanline(dst_y); + + for (uint x = 0; x < dst_width; x++) + { + color_quad_u8 dst(0, 0, 0, 255); + + for (uint c = 0; c < params.m_num_comps; c++) + { + const uint comp_index = params.m_first_comp + c; + const float v = pSrc[c]; + + if ((!params.m_srgb) || (comp_index == 3)) + { + int c = static_cast(255.0f * v + .5f); + if (c < 0) c = 0; else if (c > 255) c = 255; + dst[comp_index] = (unsigned char)c; + } + else + { + int j = static_cast(linear_to_srgb_table_size * v + .5f); + if (j < 0) j = 0; else if (j >= linear_to_srgb_table_size) j = linear_to_srgb_table_size - 1; + dst[comp_index] = linear_to_srgb[j]; + } + } + + *pDst++ = dst; + + pSrc += resampler_comps; + } + } + + return true; + } + + bool resample(const image_u8& src, image_u8& dst, const resample_params& params) + { + if ((params.m_multithreaded) && (g_number_of_processors > 1)) + return resample_multithreaded(src, dst, params); + else + return resample_single_thread(src, dst, params); + } + + bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale) + { + if ( (a.get_width() != b.get_width()) || (a.get_height() != b.get_height()) ) + return false; + + dest.resize(a.get_width(), b.get_height()); + + for (uint y = 0; y < a.get_height(); y++) + { + for (uint x = 0; x < a.get_width(); x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + color_quad_u8 cd; + for (uint c = 0; c < 4; c++) + { + int d = (ca[c] - cb[c]) * scale + 128; + d = math::clamp(d, 0, 255); + cd[c] = static_cast(d); + } + + dest(x, y) = cd; + } + } + + return true; + } + + // FIXME: Totally hack-ass computation. + // Perhaps port http://www.lomont.org/Software/Misc/SSIM/SSIM.html? + double compute_block_ssim(uint t, const uint8* pX, const uint8* pY) + { + double ave_x = 0.0f; + double ave_y = 0.0f; + for (uint i = 0; i < t; i++) + { + ave_x += pX[i]; + ave_y += pY[i]; + } + + ave_x /= t; + ave_y /= t; + + double var_x = 0.0f; + double var_y = 0.0f; + for (uint i = 0; i < t; i++) + { + var_x += math::square(pX[i] - ave_x); + var_y += math::square(pY[i] - ave_y); + } + + var_x = sqrt(var_x / (t - 1)); + var_y = sqrt(var_y / (t - 1)); + + double covar_xy = 0.0f; + for (uint i = 0; i < t; i++) + covar_xy += (pX[i] - ave_x) * (pY[i] - ave_y); + + covar_xy /= (t - 1); + + const double c1 = 6.5025; //(255*.01)^2 + const double c2 = 58.5225; //(255*.03)^2 + + double n = (2.0f * ave_x * ave_y + c1) * (2.0f * covar_xy + c2); + double d = (ave_x * ave_x + ave_y * ave_y + c1) * (var_x * var_x + var_y * var_y + c2); + + return n / d; + } + + double compute_ssim(const image_u8& a, const image_u8& b, int channel_index) + { + const uint N = 6; + uint8 sx[N*N], sy[N*N]; + + double total_ssim = 0.0f; + uint total_blocks = 0; + + //image_u8 yimg((a.get_width() + N - 1) / N, (a.get_height() + N - 1) / N); + + for (uint y = 0; y < a.get_height(); y += N) + { + for (uint x = 0; x < a.get_width(); x += N) + { + for (uint iy = 0; iy < N; iy++) + { + for (uint ix = 0; ix < N; ix++) + { + if (channel_index < 0) + sx[ix+iy*N] = (uint8)a.get_clamped(x+ix, y+iy).get_luma(); + else + sx[ix+iy*N] = (uint8)a.get_clamped(x+ix, y+iy)[channel_index]; + + if (channel_index < 0) + sy[ix+iy*N] = (uint8)b.get_clamped(x+ix, y+iy).get_luma(); + else + sy[ix+iy*N] = (uint8)b.get_clamped(x+ix, y+iy)[channel_index]; + } + } + + double ssim = compute_block_ssim(N*N, sx, sy); + total_ssim += ssim; + total_blocks++; + + //uint ssim_c = (uint)math::clamp(ssim * 127.0f + 128.0f, 0, 255); + //yimg(x / N, y / N).set(ssim_c, ssim_c, ssim_c, 255); + } + } + + if (!total_blocks) + return 0.0f; + + //save_to_file_stb(L"ssim.tga", yimg, cSaveGrayscale); + + return total_ssim / total_blocks; + } + + void print_ssim(const image_u8& src_img, const image_u8& dst_img) + { + double y_ssim = compute_ssim(src_img, dst_img, -1); + console::printf(L"Luma MSSIM: %f, Scaled: %f", y_ssim, (y_ssim - .8f) / .2f); + + //double r_ssim = compute_ssim(src_img, dst_img, 0); + //console::printf(L" R MSSIM: %f", r_ssim); + + //double g_ssim = compute_ssim(src_img, dst_img, 1); + //console::printf(L" G MSSIM: %f", g_ssim); + + //double b_ssim = compute_ssim(src_img, dst_img, 2); + //console::printf(L" B MSSIM: %f", b_ssim); + } + + void error_metrics::print(const wchar_t* pName) const + { + if (mPeakSNR >= cInfinitePSNR) + console::printf(L"%s Error: Max: %3u, Mean: %3.3f, RMS: %3.3f, PSNR: Infinite", pName, mMax, mMean, mRootMeanSquared); + else + console::printf(L"%s Error: Max: %3u, Mean: %3.3f, RMS: %3.3f, PSNR: %3.3f", pName, mMax, mMean, mRootMeanSquared, mPeakSNR); + } + + bool error_metrics::compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error) + { + //if ( (!a.get_width()) || (!b.get_height()) || (a.get_width() != b.get_width()) || (a.get_height() != b.get_height()) ) + // return false; + + const uint width = math::minimum(a.get_width(), b.get_width()); + const uint height = math::minimum(a.get_height(), b.get_height()); + + CRNLIB_ASSERT((first_channel < 4U) && (first_channel + num_channels <= 4U)); + + // Histogram approach due to Charles Bloom. + double hist[256]; + utils::zero_object(hist); + + for (uint y = 0; y < height; y++) + { + for (uint x = 0; x < width; x++) + { + const color_quad_u8& ca = a(x, y); + const color_quad_u8& cb = b(x, y); + + if (!num_channels) + hist[labs(ca.get_luma() - cb.get_luma())]++; + else + { + for (uint c = 0; c < num_channels; c++) + hist[labs(ca[first_channel + c] - cb[first_channel + c])]++; + } + } + } + + mMax = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint i = 0; i < 256; i++) + { + if (!hist[i]) + continue; + + mMax = math::maximum(mMax, i); + + double x = i * hist[i]; + + sum += x; + sum2 += i * x; + } + + // See http://bmrc.berkeley.edu/courseware/cs294/fall97/assignment/psnr.html + double total_values = width * height; + + if (average_component_error) + total_values *= math::clamp(num_channels, 1, 4); + + mMean = math::clamp(sum / total_values, 0.0f, 255.0f); + mMeanSquared = math::clamp(sum2 / total_values, 0.0f, 255.0f*255.0f); + + mRootMeanSquared = sqrt(mMeanSquared); + + if (!mRootMeanSquared) + mPeakSNR = cInfinitePSNR; + else + mPeakSNR = math::clamp(log10(255.0f / mRootMeanSquared) * 20.0f, 0.0f, 500.0f); + + return true; + } + + void print_image_metrics(const image_u8& src_img, const image_u8& dst_img) + { + if ( (!src_img.get_width()) || (!dst_img.get_height()) || (src_img.get_width() != dst_img.get_width()) || (src_img.get_height() != dst_img.get_height()) ) + console::printf(L"print_image_metrics: Image resolutions don't match exactly (%ux%u) vs. (%ux%u)", src_img.get_width(), src_img.get_height(), dst_img.get_width(), dst_img.get_height()); + + image_utils::error_metrics error_metrics; + + if (src_img.has_rgb() || dst_img.has_rgb()) + { + error_metrics.compute(src_img, dst_img, 0, 3, false); + error_metrics.print(L"RGB Total "); + + error_metrics.compute(src_img, dst_img, 0, 3, true); + error_metrics.print(L"RGB Average"); + + error_metrics.compute(src_img, dst_img, 0, 0); + error_metrics.print(L"Luma "); + + error_metrics.compute(src_img, dst_img, 0, 1); + error_metrics.print(L"Red "); + + error_metrics.compute(src_img, dst_img, 1, 1); + error_metrics.print(L"Green "); + + error_metrics.compute(src_img, dst_img, 2, 1); + error_metrics.print(L"Blue "); + } + + if (src_img.has_alpha() || dst_img.has_alpha()) + { + error_metrics.compute(src_img, dst_img, 3, 1); + error_metrics.print(L"Alpha "); + } + } + + static uint8 regen_z(uint x, uint y) + { + float vx = math::clamp((x - 128.0f) * 1.0f/127.0f, -1.0f, 1.0f); + float vy = math::clamp((y - 128.0f) * 1.0f/127.0f, -1.0f, 1.0f); + float vz = sqrt(math::clamp(1.0f - vx * vx - vy * vy, 0.0f, 1.0f)); + + vz = vz * 127.0f + 128.0f; + + if (vz < 128.0f) + vz -= .5f; + else + vz += .5f; + + int ib = math::float_to_int(vz); + + return static_cast(math::clamp(ib, 0, 255)); + } + + void convert_image(image_u8& img, image_utils::conversion_type conv_type) + { + switch (conv_type) + { + case image_utils::cConversion_To_CCxY: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagLumaChroma)); + break; + } + case image_utils::cConversion_From_CCxY: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid)); + break; + } + case image_utils::cConversion_To_xGxR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_From_xGxR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_To_xGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_To_AGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_From_xGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_From_AGBR: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case image_utils::cConversion_XY_to_XYZ: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); + break; + } + case cConversion_Y_To_A: + { + img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagAValid)); + break; + } + case cConversion_A_To_RGBA: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid)); + break; + } + case cConversion_Y_To_RGB: + { + img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagGrayscale | (img.has_alpha() ? pixel_format_helpers::cCompFlagAValid : 0))); + break; + } + default: + { + CRNLIB_ASSERT(false); + return; + } + } + + for (uint y = 0; y < img.get_height(); y++) + { + for (uint x = 0; x < img.get_width(); x++) + { + color_quad_u8 src(img(x, y)); + color_quad_u8 dst; + + switch (conv_type) + { + case image_utils::cConversion_To_CCxY: + { + color::RGB_to_YCC(dst, src); + break; + } + case image_utils::cConversion_From_CCxY: + { + color::YCC_to_RGB(dst, src); + break; + } + case image_utils::cConversion_To_xGxR: + { + dst.r = 0; + dst.g = src.g; + dst.b = 0; + dst.a = src.r; + break; + } + case image_utils::cConversion_From_xGxR: + { + dst.r = src.a; + dst.g = src.g; + // This is kinda iffy, we're assuming the image is a normal map here. + dst.b = regen_z(src.a, src.g); + dst.a = 255; + break; + } + case image_utils::cConversion_To_xGBR: + { + dst.r = 0; + dst.g = src.g; + dst.b = src.b; + dst.a = src.r; + break; + } + case image_utils::cConversion_To_AGBR: + { + dst.r = src.a; + dst.g = src.g; + dst.b = src.b; + dst.a = src.r; + break; + } + case image_utils::cConversion_From_xGBR: + { + dst.r = src.a; + dst.g = src.g; + dst.b = src.b; + dst.a = 255; + break; + } + case image_utils::cConversion_From_AGBR: + { + dst.r = src.a; + dst.g = src.g; + dst.b = src.b; + dst.a = src.r; + break; + } + case image_utils::cConversion_XY_to_XYZ: + { + dst.r = src.r; + dst.g = src.g; + // This is kinda iffy, we're assuming the image is a normal map here. + dst.b = regen_z(src.r, src.g); + dst.a = 255; + break; + } + case image_utils::cConversion_Y_To_A: + { + dst.r = src.r; + dst.g = src.g; + dst.b = src.b; + dst.a = static_cast(src.get_luma()); + break; + } + case image_utils::cConversion_Y_To_RGB: + { + uint8 y = static_cast(src.get_luma()); + dst.r = y; + dst.g = y; + dst.b = y; + dst.a = src.a; + break; + } + case image_utils::cConversion_A_To_RGBA: + { + dst.r = src.a; + dst.g = src.a; + dst.b = src.a; + dst.a = src.a; + break; + } + default: + { + CRNLIB_ASSERT(false); + dst = src; + break; + } + } + + img(x, y) = dst; + } + } + } + + image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt) + { + image_utils::conversion_type conv_type = image_utils::cConversion_Invalid; + + if (cooking) + { + switch (fmt) + { + case PIXEL_FMT_DXT5_CCxY: + { + conv_type = image_utils::cConversion_To_CCxY; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + conv_type = image_utils::cConversion_To_xGxR; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + conv_type = image_utils::cConversion_To_xGBR; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + conv_type = image_utils::cConversion_To_AGBR; + break; + } + default: break; + } + } + else + { + switch (fmt) + { + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + { + conv_type = image_utils::cConversion_XY_to_XYZ; + break; + } + case PIXEL_FMT_DXT5_CCxY: + { + conv_type = image_utils::cConversion_From_CCxY; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + conv_type = image_utils::cConversion_From_xGxR; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + conv_type = image_utils::cConversion_From_xGBR; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + conv_type = image_utils::cConversion_From_AGBR; + break; + } + default: break; + } + } + + return conv_type; + } + + image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT5_CCxY: return image_utils::cConversion_To_CCxY; + case cCRNFmtDXT5_xGxR: return image_utils::cConversion_To_xGxR; + case cCRNFmtDXT5_xGBR: return image_utils::cConversion_To_xGBR; + case cCRNFmtDXT5_AGBR: return image_utils::cConversion_To_AGBR; + default: break; + } + return image_utils::cConversion_Invalid; + } + + double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels) + { + if (!n) + return 0.0f; + + double sum = 0.0f; + double sum2 = 0.0f; + + for (uint i = 0; i < n; i++) + { + const color_quad_u8& cp = pPixels[i]; + + if (!num_channels) + { + uint l = cp.get_luma(); + sum += l; + sum2 += l*l; + } + else + { + for (uint c = 0; c < num_channels; c++) + { + uint l = cp[first_channel + c]; + sum += l; + sum2 += l*l; + } + } + } + + double w = math::maximum(1U, num_channels) * n; + sum /= w; + sum2 /= w; + + double var = sum2 - sum * sum; + var = math::maximum(var, 0.0f); + + return sqrt(var); + } + + } // namespace image_utils + +} // namespace crnlib diff --git a/crnlib/crn_image_utils.h b/crnlib/crn_image_utils.h new file mode 100644 index 00000000..ac50f08d --- /dev/null +++ b/crnlib/crn_image_utils.h @@ -0,0 +1,141 @@ +// File: crn_image_utils.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_image.h" + +namespace crnlib +{ + enum pixel_format; + + namespace image_utils + { + bool load_from_file_stb(const wchar_t* pFilename, image_u8& img); + + enum + { + cSaveIgnoreAlpha = 1, + cSaveGrayscale = 2 + }; + + const int cSaveLuma = -1; + + bool save_to_file_stb(const wchar_t* pFilename, const image_u8& img, uint save_flags = 0, int comp_index = cSaveLuma); + + bool load_from_file(image_u8& dest, const wchar_t* pFilename, int flags = 0); + + bool save_to_grayscale_file(const wchar_t* pFilename, const image_u8& src, int component, int flags = 0); + + bool save_to_file(const wchar_t* pFilename, const image_u8& src, int flags = 0, bool ignore_alpha = false); + + bool has_alpha(const image_u8& img); + bool is_normal_map(const image_u8& img, const wchar_t* pFilename = NULL); + void renorm_normal_map(image_u8& img); + + struct resample_params + { + resample_params() : + m_dst_width(0), + m_dst_height(0), + m_pFilter("lanczos4"), + m_filter_scale(1.0f), + m_srgb(true), + m_wrapping(false), + m_first_comp(0), + m_num_comps(4), + m_source_gamma(2.2f), // 1.75f + m_multithreaded(true) + { + } + + uint m_dst_width; + uint m_dst_height; + const char* m_pFilter; + float m_filter_scale; + bool m_srgb; + bool m_wrapping; + uint m_first_comp; + uint m_num_comps; + float m_source_gamma; + bool m_multithreaded; + }; + + bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params); + bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params); + bool resample(const image_u8& src, image_u8& dst, const resample_params& params); + + bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale = 2); + + class error_metrics + { + public: + error_metrics() { utils::zero_this(this); } + + void print(const wchar_t* pName) const; + + // If num_channels==0, luma error is computed. + // If pHist != NULL, it must point to a 256 entry array. + bool compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error = true); + + uint mMax; + double mMean; + double mMeanSquared; + double mRootMeanSquared; + double mPeakSNR; + + inline bool operator== (const error_metrics& other) const + { + return mPeakSNR == other.mPeakSNR; + } + + inline bool operator< (const error_metrics& other) const + { + return mPeakSNR < other.mPeakSNR; + } + + inline bool operator> (const error_metrics& other) const + { + return mPeakSNR > other.mPeakSNR; + } + }; + + void print_image_metrics(const image_u8& src_img, const image_u8& dst_img); + + double compute_block_ssim(uint n, const uint8* pX, const uint8* pY); + double compute_ssim(const image_u8& a, const image_u8& b, int channel_index); + void print_ssim(const image_u8& src_img, const image_u8& dst_img); + + enum conversion_type + { + cConversion_Invalid = -1, + + cConversion_To_CCxY, + cConversion_From_CCxY, + + cConversion_To_xGxR, + cConversion_From_xGxR, + + cConversion_To_xGBR, + cConversion_From_xGBR, + + cConversion_To_AGBR, + cConversion_From_AGBR, + + cConversion_XY_to_XYZ, + + cConversion_Y_To_A, + + cConversion_A_To_RGBA, + cConversion_Y_To_RGB, + + cConversionTotal + }; + + void convert_image(image_u8& img, conversion_type conv_type); + + image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt); + + image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt); + + double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels); + } +} diff --git a/crnlib/crn_intersect.h b/crnlib/crn_intersect.h new file mode 100644 index 00000000..a773dd23 --- /dev/null +++ b/crnlib/crn_intersect.h @@ -0,0 +1,123 @@ +// File: crn_intersect.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_ray.h" + +namespace crnlib +{ + namespace intersection + { + enum result + { + cBackfacing = -1, + cFailure = 0, + cSuccess, + cParallel, + cInside, + }; + + // Returns cInside, cSuccess, or cFailure. + // Algorithm: Graphics Gems 1 + template + result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + enum + { + cNumDim = vector_type::num_elements, + cRight = 0, + cLeft = 1, + cMiddle = 2 + }; + + bool inside = true; + int quadrant[cNumDim]; + scalar_type candidate_plane[cNumDim]; + + for (int i = 0; i < cNumDim; i++) + { + if (ray.get_origin()[i] < box[0][i]) + { + quadrant[i] = cLeft; + candidate_plane[i] = box[0][i]; + inside = false; + } + else if (ray.get_origin()[i] > box[1][i]) + { + quadrant[i] = cRight; + candidate_plane[i] = box[1][i]; + inside = false; + } + else + { + quadrant[i] = cMiddle; + } + } + + if (inside) + { + coord = ray.get_origin(); + t = 0.0f; + return cInside; + } + + scalar_type max_t[cNumDim]; + for (int i = 0; i < cNumDim; i++) + { + if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) + max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; + else + max_t[i] = -1.0f; + } + + int which_plane = 0; + for (int i = 1; i < cNumDim; i++) + if (max_t[which_plane] < max_t[i]) + which_plane = i; + + if (max_t[which_plane] < 0.0f) + return cFailure; + + for (int i = 0; i < cNumDim; i++) + { + if (i != which_plane) + { + coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; + + if ( (coord[i] < box[0][i]) || (coord[i] > box[1][i]) ) + return cFailure; + } + else + { + coord[i] = candidate_plane[i]; + } + + CRNLIB_ASSERT(coord[i] >= box[0][i] && coord[i] <= box[1][i]); + } + + t = max_t[which_plane]; + return cSuccess; + } + + template + result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) + { + if (!box.contains(ray.get_origin())) + { + started_within = false; + return ray_aabb(coord, t, ray, box); + } + + started_within = true; + + float diag_dist = box.diagonal_length() * 1.5f; + ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); + + result res(ray_aabb(coord, t, outside_ray, box)); + if (res != cSuccess) + return res; + + t = math::maximum(0.0f, diag_dist - t); + return cSuccess; + } + } +} diff --git a/crnlib/crn_lzma_codec.cpp b/crnlib/crn_lzma_codec.cpp new file mode 100644 index 00000000..31148796 --- /dev/null +++ b/crnlib/crn_lzma_codec.cpp @@ -0,0 +1,137 @@ +// File: crn_lzma_codec.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_lzma_codec.h" +#include "crn_strutils.h" +#include "crn_checksum.h" +#include "lzma_lzmalib.h" + +namespace crnlib +{ + lzma_codec::lzma_codec() : + m_pCompress(LzmaCompress), + m_pUncompress(LzmaUncompress) + { + CRNLIB_ASSUME(cLZMAPropsSize == LZMA_PROPS_SIZE); + } + + lzma_codec::~lzma_codec() + { + } + + bool lzma_codec::pack(const void* p, uint n, crnlib::vector& buf) + { + if (n > 1024U*1024U*1024U) + return false; + + uint max_comp_size = n + math::maximum(128, n >> 8); + buf.resize(sizeof(header) + max_comp_size); + + header* pHDR = reinterpret_cast(&buf[0]); + uint8* pComp_data = &buf[sizeof(header)]; + + utils::zero_object(*pHDR); + + pHDR->m_uncomp_size = n; + pHDR->m_adler32 = adler32(p, n); + + if (n) + { + size_t destLen = 0; + size_t outPropsSize = 0; + int status = SZ_ERROR_INPUT_EOF; + + for (uint trial = 0; trial < 3; trial++) + { + destLen = max_comp_size; + outPropsSize = cLZMAPropsSize; + + status = (*m_pCompress)(pComp_data, &destLen, reinterpret_cast(p), n, + pHDR->m_lzma_props, &outPropsSize, + -1, /* 0 <= level <= 9, default = 5 */ + 0, /* default = (1 << 24) */ + -1, /* 0 <= lc <= 8, default = 3 */ + -1, /* 0 <= lp <= 4, default = 0 */ + -1, /* 0 <= pb <= 4, default = 2 */ + -1, /* 5 <= fb <= 273, default = 32 */ + (g_number_of_processors > 1) ? 2 : 1 + ); + + if (status != SZ_ERROR_OUTPUT_EOF) + break; + + max_comp_size += ((n+1)/2); + buf.resize(sizeof(header) + max_comp_size); + pHDR = reinterpret_cast(&buf[0]); + pComp_data = &buf[sizeof(header)]; + } + + if (status != SZ_OK) + { + buf.clear(); + return false; + } + + pHDR->m_comp_size = static_cast(destLen); + + buf.resize(CRNLIB_SIZEOF_U32(header) + static_cast(destLen)); + } + + pHDR->m_sig = header::cSig; + pHDR->m_checksum = static_cast(adler32((uint8*)pHDR + header::cChecksumSkipBytes, sizeof(header) - header::cChecksumSkipBytes)); + + return true; + } + + bool lzma_codec::unpack(const void* p, uint n, crnlib::vector& buf) + { + buf.resize(0); + + if (n < sizeof(header)) + return false; + + const header& hdr = *static_cast(p); + if (hdr.m_sig != header::cSig) + return false; + + if (static_cast(adler32((const uint8*)&hdr + header::cChecksumSkipBytes, sizeof(hdr) - header::cChecksumSkipBytes)) != hdr.m_checksum) + return false; + + if (!hdr.m_uncomp_size) + return true; + + if (!hdr.m_comp_size) + return false; + + if (hdr.m_uncomp_size > 1024U*1024U*1024U) + return false; + + if (!buf.try_resize(hdr.m_uncomp_size)) + return false; + + const uint8* pComp_data = static_cast(p) + sizeof(header); + size_t srcLen = n - sizeof(header); + if (srcLen < hdr.m_comp_size) + return false; + + size_t destLen = hdr.m_uncomp_size; + + int status = (*m_pUncompress)(&buf[0], &destLen, pComp_data, &srcLen, + hdr.m_lzma_props, cLZMAPropsSize); + + if ((status != SZ_OK) || (destLen != hdr.m_uncomp_size)) + { + buf.clear(); + return false; + } + + if (adler32(&buf[0], buf.size()) != hdr.m_adler32) + { + buf.clear(); + return false; + } + + return true; + } + +} // namespace crnlib diff --git a/crnlib/crn_lzma_codec.h b/crnlib/crn_lzma_codec.h new file mode 100644 index 00000000..79ac9967 --- /dev/null +++ b/crnlib/crn_lzma_codec.h @@ -0,0 +1,60 @@ +// File: crn_lzma_codec.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_packed_uint.h" + +namespace crnlib +{ + class lzma_codec + { + public: + lzma_codec(); + ~lzma_codec(); + + // Always available, because we're statically linking in lzmalib now vs. dynamically loading the DLL. + const bool is_initialized() const { return true; } + + bool pack(const void* p, uint n, crnlib::vector& buf); + + bool unpack(const void* p, uint n, crnlib::vector& buf); + + private: + typedef int (__stdcall *LzmaCompressFuncPtr)(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); + + typedef int (__stdcall *LzmaUncompressFuncPtr)(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, + const unsigned char *props, size_t propsSize); + + LzmaCompressFuncPtr m_pCompress; + LzmaUncompressFuncPtr m_pUncompress; + + enum { cLZMAPropsSize = 5 }; + +#pragma pack(push) +#pragma pack(1) + struct header + { + enum { cSig = 'L' | ('0' << 8), cChecksumSkipBytes = 3 }; + packed_uint<2> m_sig; + uint8 m_checksum; + + uint8 m_lzma_props[cLZMAPropsSize]; + + packed_uint<4> m_comp_size; + packed_uint<4> m_uncomp_size; + + packed_uint<4> m_adler32; + }; +#pragma pack(pop) + + }; + +} // namespace crnlib diff --git a/crnlib/crn_math.cpp b/crnlib/crn_math.cpp new file mode 100644 index 00000000..3df29c24 --- /dev/null +++ b/crnlib/crn_math.cpp @@ -0,0 +1,76 @@ +// File: crn_math.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" + +namespace crnlib +{ + namespace math + { + uint g_bitmasks[32] = + { + 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, + 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, + 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, + 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, + 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, + 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, + 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, + 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U + }; + + double compute_entropy(const uint8* p, uint n) + { + uint hist[256]; + utils::zero_object(hist); + + for (uint i = 0; i < n; i++) + hist[*p++]++; + + double entropy = 0.0f; + + const double invln2 = 1.0f/log(2.0f); + for (uint i = 0; i < 256; i++) + { + if (!hist[i]) + continue; + + double prob = static_cast(hist[i]) / n; + entropy += (-log(prob) * invln2) * hist[i]; + } + + return entropy; + } + + void compute_lower_pow2_dim(int& width, int& height) + { + const int tex_width = width; + const int tex_height = height; + + width = 1; + for ( ; ; ) + { + if ((width * 2) > tex_width) + break; + width *= 2; + } + + height = 1; + for ( ; ; ) + { + if ((height * 2) > tex_height) + break; + height *= 2; + } + } + + void compute_upper_pow2_dim(int& width, int& height) + { + if (!math::is_power_of_2((uint32)width)) + width = math::next_pow2((uint32)width); + + if (!math::is_power_of_2((uint32)height)) + height = math::next_pow2((uint32)height); + } + + } // namespace math +} // namespace crnlib diff --git a/crnlib/crn_math.h b/crnlib/crn_math.h new file mode 100644 index 00000000..6da79a52 --- /dev/null +++ b/crnlib/crn_math.h @@ -0,0 +1,222 @@ +// File: crn_math.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#if defined(_M_IX86) && defined(_MSC_VER) + #include + #pragma intrinsic(__emulu) + unsigned __int64 __emulu(unsigned int a,unsigned int b ); +#endif + +namespace crnlib +{ + namespace math + { + const float cNearlyInfinite = 1.0e+37f; + + const float cDegToRad = 0.01745329252f; + const float cRadToDeg = 57.29577951f; + + extern uint g_bitmasks[32]; + + template inline bool within_closed_range(T a, T b, T c) { return (a >= b) && (a <= c); } + + template inline bool within_open_range(T a, T b, T c) { return (a >= b) && (a < c); } + + // Yes I know these should probably be pass by ref, not val: + // http://www.stepanovpapers.com/notes.pdf + // Just don't use them on non-simple (non built-in) types! + template inline T minimum(T a, T b) { return (a < b) ? a : b; } + + template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } + + template inline T maximum(T a, T b) { return (a > b) ? a : b; } + + template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } + + template inline T lerp(T a, T b, U c) { return a + (b - a) * c; } + + template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } + + template inline T saturate(T value) { return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value); } + + inline int float_to_int(float f) { return static_cast(f); } + + inline uint float_to_uint(float f) { return static_cast(f); } + + inline int float_to_int(double f) { return static_cast(f); } + + inline uint float_to_uint(double f) { return static_cast(f); } + + inline int float_to_int_round(float f) { return static_cast((f < 0.0f) ? -floor(-f + .5f) : floor(f + .5f)); } + + inline uint float_to_uint_round(float f) { return static_cast((f < 0.0f) ? 0.0f : floor(f + .5f)); } + + template inline int sign(T value) { return (value < 0) ? -1 : ((value > 0) ? 1 : 0); } + + template inline T square(T value) { return value * value; } + + inline bool is_power_of_2(uint32 x) { return x && ((x & (x - 1U)) == 0U); } + inline bool is_power_of_2(uint64 x) { return x && ((x & (x - 1U)) == 0U); } + + template inline T align_up_value(T x, uint alignment) + { + CRNLIB_ASSERT(is_power_of_2(alignment)); + uint q = static_cast(x); + q = (q + alignment - 1) & (~(alignment - 1)); + return static_cast(q); + } + + template inline T align_down_value(T x, uint alignment) + { + CRNLIB_ASSERT(is_power_of_2(alignment)); + uint q = static_cast(x); + q = q & (~(alignment - 1)); + return static_cast(q); + } + + template inline T get_align_up_value_delta(T x, uint alignment) + { + return align_up_value(x, alignment) - x; + } + + // From "Hackers Delight" + inline uint32 next_pow2(uint32 val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint64 next_pow2(uint64 val) + { + val--; + val |= val >> 32; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + inline uint floor_log2i(uint v) + { + uint l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint ceil_log2i(uint v) + { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + l++; + return l; + } + + // Returns the total number of bits needed to encode v. + inline uint total_bits(uint v) + { + uint l = 0; + while (v > 0U) + { + v >>= 1; + l++; + } + return l; + } + + // Actually counts the number of set bits, but hey + inline uint bitmask_size(uint mask) + { + uint size = 0; + while (mask) + { + mask &= (mask - 1U); + size++; + } + return size; + } + + inline uint bitmask_ofs(uint mask) + { + if (!mask) + return 0; + uint ofs = 0; + while ((mask & 1U) == 0) + { + mask >>= 1U; + ofs++; + } + return ofs; + } + + // See Bit Twiddling Hacks (public domain) + // http://www-graphics.stanford.edu/~seander/bithacks.html + inline uint count_trailing_zero_bits(uint v) + { + uint c = 32; // c will be the number of zero bits on the right + + static const unsigned int B[] = { 0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF }; + static const unsigned int S[] = { 1, 2, 4, 8, 16 }; // Our Magic Binary Numbers + + for (int i = 4; i >= 0; --i) // unroll for more speed + { + if (v & B[i]) + { + v <<= S[i]; + c -= S[i]; + } + } + + if (v) + { + c--; + } + + return c; + } + + inline uint count_leading_zero_bits(uint v) + { + uint temp; + uint result = 32U; + + temp = (v >> 16U); if (temp) { result -= 16U; v = temp; } + temp = (v >> 8U); if (temp) { result -= 8U; v = temp; } + temp = (v >> 4U); if (temp) { result -= 4U; v = temp; } + temp = (v >> 2U); if (temp) { result -= 2U; v = temp; } + temp = (v >> 1U); if (temp) { result -= 1U; v = temp; } + + if (v & 1U) + result--; + + return result; + } + + inline uint64 emulu(uint32 a, uint32 b) + { +#if defined(_M_IX86) && defined(_MSC_VER) + return __emulu(a, b); +#else + return static_cast(a) * static_cast(b); +#endif + } + + double compute_entropy(const uint8* p, uint n); + + void compute_lower_pow2_dim(int& width, int& height); + void compute_upper_pow2_dim(int& width, int& height); + } + +} // namespace crnlib + diff --git a/crnlib/crn_matrix.h b/crnlib/crn_matrix.h new file mode 100644 index 00000000..14b44bf8 --- /dev/null +++ b/crnlib/crn_matrix.h @@ -0,0 +1,565 @@ +// File: crn_matrix.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#include "crn_vec.h" + +namespace crnlib +{ + template Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) + { + CRNLIB_ASSUME(Z::num_rows == X::num_rows); + CRNLIB_ASSUME(Z::num_cols == Y::num_cols); + CRNLIB_ASSUME(X::num_cols == Y::num_rows); + CRNLIB_ASSERT((&result != &lhs) && (&result != &rhs)); + for (int r = 0; r < X::num_rows; r++) + for (int c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); + for (uint i = 1; i < X::num_cols; i++) + s += lhs(r, i) * rhs(i, c); + result(r, c) = s; + } + return result; + } + + template Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) + { + CRNLIB_ASSUME(Z::num_rows == X::num_cols); + CRNLIB_ASSUME(Z::num_cols == Y::num_cols); + CRNLIB_ASSUME(X::num_rows == Y::num_rows); + for (int r = 0; r < X::num_cols; r++) + for (int c = 0; c < Y::num_cols; c++) + { + typename Z::scalar_type s = lhs(0, r) * rhs(0, c); + for (uint i = 1; i < X::num_rows; i++) + s += lhs(i, r) * rhs(i, c); + result(r, c) = s; + } + return result; + } + + template Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) + { + CRNLIB_ASSUME(Z::num_rows == X::num_rows); + CRNLIB_ASSUME(Z::num_cols == Y::num_rows); + CRNLIB_ASSUME(X::num_cols == Y::num_cols); + for (int r = 0; r < X::num_rows; r++) + for (int c = 0; c < Y::num_rows; c++) + { + typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); + for (uint i = 1; i < X::num_cols; i++) + s += lhs(r, i) * rhs(c, i); + result(r, c) = s; + } + return result; + } + + template + class matrix + { + public: + typedef T scalar_type; + enum { num_rows = R, num_cols = C }; + + typedef vec col_vec; + typedef vec<(R > 1) ? (R - 1) : 0, T> subcol_vec; + + typedef vec row_vec; + typedef vec<(C > 1) ? (C - 1) : 0, T> subrow_vec; + + inline matrix() { } + + inline matrix(eClear) { clear(); } + + inline matrix(const T* p) { set(p); } + + inline matrix(const matrix& other) + { + for (uint i = 0; i < R; i++) + m_rows[i] = other.m_rows[i]; + } + + inline matrix& operator= (const matrix& rhs) + { + if (this != &rhs) + for (uint i = 0; i < R; i++) + m_rows[i] = rhs.m_rows[i]; + return *this; + } + + inline matrix(T val00, T val01, + T val10, T val11) + { + set(val00, val01, val10, val11); + } + + inline matrix(T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + set(val00, val01, val02, val10, val11, val12, val20, val21, val22); + } + + inline matrix(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); + } + + inline void set(const float* p) + { + for (uint i = 0; i < R; i++) + { + m_rows[i].set(p); + p += C; + } + } + + inline void set(T val00, T val01, + T val10, T val11) + { + m_rows[0].set(val00, val01); + if (R >= 2) + { + m_rows[1].set(val10, val11); + + for (uint i = 2; i < R; i++) + m_rows[i].clear(); + } + } + + inline void set(T val00, T val01, T val02, + T val10, T val11, T val12, + T val20, T val21, T val22) + { + m_rows[0].set(val00, val01, val02); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22); + + for (uint i = 3; i < R; i++) + m_rows[i].clear(); + } + } + } + + inline void set(T val00, T val01, T val02, T val03, + T val10, T val11, T val12, T val13, + T val20, T val21, T val22, T val23, + T val30, T val31, T val32, T val33) + { + m_rows[0].set(val00, val01, val02, val03); + if (R >= 2) + { + m_rows[1].set(val10, val11, val12, val13); + if (R >= 3) + { + m_rows[2].set(val20, val21, val22, val23); + + if (R >= 4) + { + m_rows[3].set(val30, val31, val32, val33); + + for (uint i = 4; i < R; i++) + m_rows[i].clear(); + } + } + } + } + + inline T operator() (uint r, uint c) const + { + CRNLIB_ASSERT((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline T& operator() (uint r, uint c) + { + CRNLIB_ASSERT((r < R) && (c < C)); + return m_rows[r][c]; + } + + inline const row_vec& operator[] (uint r) const + { + CRNLIB_ASSERT(r < R); + return m_rows[r]; + } + + inline row_vec& operator[] (uint r) + { + CRNLIB_ASSERT(r < R); + return m_rows[r]; + } + + inline const row_vec& get_row (uint r) const { return (*this)[r]; } + inline row_vec& get_row (uint r) { return (*this)[r]; } + + inline col_vec get_col(uint c) const + { + CRNLIB_ASSERT(c < C); + col_vec result; + for (uint i = 0; i < R; i++) + result[i] = m_rows[i][c]; + return result; + } + + inline void set_col(uint c, const col_vec& col) + { + CRNLIB_ASSERT(c < C); + for (uint i = 0; i < R; i++) + m_rows[i][c] = col[i]; + } + + inline void set_col(uint c, const subcol_vec& col) + { + CRNLIB_ASSERT(c < C); + for (uint i = 0; i < (R - 1); i++) + m_rows[i][c] = col[i]; + + m_rows[R - 1][c] = 0.0f; + } + + inline const row_vec& get_translate() const + { + return m_rows[R - 1]; + } + + inline matrix& set_translate(const row_vec& r) + { + m_rows[R - 1] = r; + return *this; + } + + inline matrix& set_translate(const subrow_vec& r) + { + m_rows[R - 1] = row_vec(r).as_point(); + return *this; + } + + inline const T* get_ptr() const { return reinterpret_cast(&m_rows[0]); } + inline T* get_ptr() { return reinterpret_cast< T*>(&m_rows[0]); } + + inline matrix& operator+= (const matrix& other) + { + for (uint i = 0; i < R; i++) + m_rows[i] += other.m_rows[i]; + return *this; + } + + inline matrix& operator-= (const matrix& other) + { + for (uint i = 0; i < R; i++) + m_rows[i] -= other.m_rows[i]; + return *this; + } + + inline matrix& operator*= (T val) + { + for (uint i = 0; i < R; i++) + m_rows[i] *= val; + return *this; + } + + inline matrix& operator/= (T val) + { + for (uint i = 0; i < R; i++) + m_rows[i] /= val; + return *this; + } + + inline matrix& operator*= (const matrix& other) + { + matrix result; + matrix_mul_helper(result, *this, other); + *this = result; + return *this; + } + + friend inline matrix operator+ (const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint i = 0; i < R; i++) + result[i] = lhs.m_rows[i] + rhs.m_rows[i]; + return result; + } + + friend inline matrix operator- (const matrix& lhs, const matrix& rhs) + { + matrix result; + for (uint i = 0; i < R; i++) + result[i] = lhs.m_rows[i] - rhs.m_rows[i]; + return result; + } + + friend inline matrix operator* (const matrix& lhs, T val) + { + matrix result; + for (uint i = 0; i < R; i++) + result[i] = lhs.m_rows[i] * val; + return result; + } + + friend inline matrix operator/ (const matrix& lhs, T val) + { + matrix result; + for (uint i = 0; i < R; i++) + result[i] = lhs.m_rows[i] / val; + return result; + } + + friend inline matrix operator* (T val, const matrix& rhs) + { + matrix result; + for (uint i = 0; i < R; i++) + result[i] = val * rhs.m_rows[i]; + return result; + } + + friend inline matrix operator* (const matrix& lhs, const matrix& rhs) + { + matrix result; + return matrix_mul_helper(result, lhs, rhs); + } + + friend inline row_vec operator* (const col_vec& a, const matrix& b) + { + return transform(a, b); + } + + inline matrix operator+ () const + { + return *this; + } + + inline matrix operator- () const + { + matrix result; + for (uint i = 0; i < R; i++) + result[i] = -m_rows[i]; + return result; + } + + inline void clear(void) + { + for (uint i = 0; i < R; i++) + m_rows[i].clear(); + } + + inline void set_zero_matrix() + { + clear(); + } + + inline void set_identity_matrix() + { + for (uint i = 0; i < R; i++) + { + m_rows[i].clear(); + m_rows[i][i] = 1.0f; + } + } + + inline matrix& set_scale_matrix(float s) + { + clear(); + for (int i = 0; i < (R - 1); i++) + m_rows[i][i] = s; + m_rows[R - 1][C - 1] = 1.0f; + return *this; + } + + inline matrix& set_scale_matrix(const row_vec& s) + { + clear(); + for (uint i = 0; i < R; i++) + m_rows[i][i] = s[i]; + return *this; + } + + inline matrix& set_translate_matrix(const row_vec& s) + { + set_identity_matrix(); + set_translate(s); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y) + { + set_identity_matrix(); + set_translate(row_vec(x, y).as_point()); + return *this; + } + + inline matrix& set_translate_matrix(float x, float y, float z) + { + set_identity_matrix(); + set_translate(row_vec(x, y, z).as_point()); + return *this; + } + + inline matrix get_transposed(void) const + { + matrix result; + for (uint i = 0; i < R; i++) + for (uint j = 0; j < C; j++) + result.m_rows[i][j] = m_rows[j][i]; + return result; + } + + inline matrix& transpose_in_place(void) + { + matrix result; + for (uint i = 0; i < R; i++) + for (uint j = 0; j < C; j++) + result.m_rows[i][j] = m_rows[j][i]; + *this = result; + return *this; + } + + // This method transforms a column vec by a matrix (D3D-style). + static inline row_vec transform(const col_vec& a, const matrix& b) + { + row_vec result(b[0] * a[0]); + for (uint r = 1; r < R; r++) + result += b[r] * a[r]; + return result; + } + + // This method transforms a column vec by a matrix. Last component of vec is assumed to be 1. + static inline row_vec transform_point(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + result += b[r] * a[r]; + result += b[R - 1]; + return result; + } + + // This method transforms a column vec by a matrix. Last component of vec is assumed to be 0. + static inline row_vec transform_vector(const col_vec& a, const matrix& b) + { + row_vec result(0); + for (int r = 0; r < (R - 1); r++) + result += b[r] * a[r]; + return result; + } + + static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < R; r++) + { + const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; + for (int c = 0; c < (C - 1); c++) + result[c] += b[r][c] * s; + } + return result; + } + + static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) + { + subcol_vec result(0); + for (int r = 0; r < (R - 1); r++) + { + const T s = a[r]; + for (int c = 0; c < (C - 1); c++) + result[c] += b[r][c] * s; + } + return result; + } + + // This method transforms a column vec by the transpose of a matrix. + static inline col_vec transform_transposed(const matrix& b, const col_vec& a) + { + CRNLIB_ASSUME(R == C); + col_vec result; + for (uint r = 0; r < R; r++) + result[r] = b[r] * a; + return result; + } + + // This method transforms a column vec by the transpose of a matrix. Last component of vec is assumed to be 0. + static inline col_vec transform_vector_transposed(const matrix& b, const col_vec& a) + { + CRNLIB_ASSUME(R == C); + col_vec result; + for (uint r = 0; r < R; r++) + { + T s = 0; + for (uint c = 0; c < (C - 1); c++) + s += b[r][c] * a[c]; + + result[r] = s; + } + return result; + } + + // This method transforms a matrix by a row vector (OGL style). + static inline col_vec transform(const matrix& b, const row_vec& a) + { + col_vec result; + for (int r = 0; r < R; r++) + result[r] = b[r] * a; + return result; + } + + static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) + { + return matrix_mul_helper(result, lhs, rhs); + } + + static inline matrix make_scale_matrix(float s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(const row_vec& s) + { + return matrix().set_scale_matrix(s); + } + + static inline matrix make_scale_matrix(float x, float y) + { + CRNLIB_ASSUME(R >= 3 && C >= 3); + matrix result; + result.clear(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + result.m_rows[2][2] = 1.0f; + return result; + } + + static inline matrix make_scale_matrix(float x, float y, float z) + { + CRNLIB_ASSUME(R >= 4 && C >= 4); + matrix result; + result.clear(); + result.m_rows[0][0] = x; + result.m_rows[1][1] = y; + result.m_rows[2][2] = z; + result.m_rows[3][3] = 1.0f; + return result; + } + + private: + row_vec m_rows[R]; + }; + + typedef matrix<2, 2, float> matrix22F; + typedef matrix<2, 2, double> matrix22D; + + typedef matrix<3, 3, float> matrix33F; + typedef matrix<3, 3, double> matrix33D; + + typedef matrix<4, 4, float> matrix44F; + typedef matrix<4, 4, double> matrix44D; + + typedef matrix<8, 8, float> matrix88F; + +} // namespace crnlib diff --git a/crnlib/crn_mem.cpp b/crnlib/crn_mem.cpp new file mode 100644 index 00000000..02e73fea --- /dev/null +++ b/crnlib/crn_mem.cpp @@ -0,0 +1,289 @@ +// File: crn_mem.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_spinlock.h" +#include "crn_console.h" +#include "../inc/crnlib.h" +#include +#include "crn_winhdr.h" + +#define CRNLIB_MEM_STATS 0 + +#ifndef CRNLIB_USE_WIN32_API +#define _msize malloc_usable_size +#endif + +namespace crnlib +{ +#if CRNLIB_MEM_STATS + #if CRNLIB_64BIT_POINTERS + typedef LONGLONG mem_stat_t; + #define CRNLIB_MEM_COMPARE_EXCHANGE InterlockedCompareExchange64 + #else + typedef LONG mem_stat_t; + #define CRNLIB_MEM_COMPARE_EXCHANGE InterlockedCompareExchange + #endif + + static volatile mem_stat_t g_total_blocks; + static volatile mem_stat_t g_total_allocated; + static volatile mem_stat_t g_max_allocated; + + static mem_stat_t update_total_allocated(int block_delta, mem_stat_t byte_delta) + { + mem_stat_t cur_total_blocks; + for ( ; ; ) + { + cur_total_blocks = (mem_stat_t)g_total_blocks; + mem_stat_t new_total_blocks = static_cast(cur_total_blocks + block_delta); + CRNLIB_ASSERT(new_total_blocks >= 0); + if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_blocks, new_total_blocks, cur_total_blocks) == cur_total_blocks) + break; + } + + mem_stat_t cur_total_allocated, new_total_allocated; + for ( ; ; ) + { + cur_total_allocated = g_total_allocated; + new_total_allocated = static_cast(cur_total_allocated + byte_delta); + CRNLIB_ASSERT(new_total_allocated >= 0); + if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_allocated, new_total_allocated, cur_total_allocated) == cur_total_allocated) + break; + } + for ( ; ; ) + { + mem_stat_t cur_max_allocated = g_max_allocated; + mem_stat_t new_max_allocated = CRNLIB_MAX(new_total_allocated, cur_max_allocated); + if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_max_allocated, new_max_allocated, cur_max_allocated) == cur_max_allocated) + break; + } + return new_total_allocated; + } +#endif // CRNLIB_MEM_STATS + + static void* crnlib_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data) + { + pUser_data; + + void* p_new; + + if (!p) + { + p_new = ::malloc(size); + CRNLIB_ASSERT( (reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + + if (pActual_size) + *pActual_size = p_new ? ::_msize(p_new) : 0; + } + else if (!size) + { + ::free(p); + p_new = NULL; + + if (pActual_size) + *pActual_size = 0; + } + else + { + void* p_final_block = p; +#ifdef WIN32 + p_new = ::_expand(p, size); +#else + + p_new = NULL; +#endif + + if (p_new) + { + CRNLIB_ASSERT( (reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + p_final_block = p_new; + } + else if (movable) + { + p_new = ::realloc(p, size); + + if (p_new) + { + CRNLIB_ASSERT( (reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0 ); + p_final_block = p_new; + } + } + + if (pActual_size) + *pActual_size = ::_msize(p_final_block); + } + + return p_new; + } + + static size_t crnlib_default_msize(void* p, void* pUser_data) + { + pUser_data; + return p ? _msize(p) : 0; + } + + static crn_realloc_func g_pRealloc = crnlib_default_realloc; + static crn_msize_func g_pMSize = crnlib_default_msize; + static void* g_pUser_data; + + void crnlib_mem_error(const char* p_msg) + { + crnlib_assert(p_msg, __FILE__, __LINE__); + } + + void* crnlib_malloc(size_t size, size_t* pActual_size) + { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + size = sizeof(uint32); + + if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) + { + crnlib_mem_error("crnlib_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); + + if (pActual_size) + *pActual_size = actual_size; + + if ((!p_new) || (actual_size < size)) + { + crnlib_mem_error("crnlib_malloc: out of memory"); + return NULL; + } + + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + +#if CRNLIB_MEM_STATS + CRNLIB_ASSERT((*g_pMSize)(p_new, g_pUser_data) == actual_size); + update_total_allocated(1, static_cast(actual_size)); +#endif + + return p_new; + } + + void* crnlib_realloc(void* p, size_t size, size_t* pActual_size, bool movable) + { + if ((ptr_bits_t)p & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) + { + crnlib_mem_error("crnlib_realloc: bad ptr"); + return NULL; + } + + if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) + { + crnlib_mem_error("crnlib_malloc: size too big"); + return NULL; + } + +#if CRNLIB_MEM_STATS + size_t cur_size = p ? (*g_pMSize)(p, g_pUser_data) : 0; + CRNLIB_ASSERT(!p || (cur_size >= sizeof(uint32))); +#endif + if ((size) && (size < sizeof(uint32))) + size = sizeof(uint32); + + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + + if (pActual_size) + *pActual_size = actual_size; + + CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); + +#if CRNLIB_MEM_STATS + CRNLIB_ASSERT(!p_new || ((*g_pMSize)(p_new, g_pUser_data) == actual_size)); + + int num_new_blocks = 0; + if (p) + { + if (!p_new) + num_new_blocks = -1; + } + else if (p_new) + { + num_new_blocks = 1; + } + update_total_allocated(num_new_blocks, static_cast(actual_size) - static_cast(cur_size)); +#endif + + return p_new; + } + + void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size) + { + size_t total = count * size; + void *p = crnlib_malloc(total, pActual_size); + if (p) memset(p, 0, total); + return p; + } + + void crnlib_free(void* p) + { + if (!p) + return; + + if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) + { + crnlib_mem_error("crnlib_free: bad ptr"); + return; + } + +#if CRNLIB_MEM_STATS + size_t cur_size = (*g_pMSize)(p, g_pUser_data); + CRNLIB_ASSERT(cur_size >= sizeof(uint32)); + update_total_allocated(-1, -static_cast(cur_size)); +#endif + + (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); + } + + size_t crnlib_msize(void* p) + { + if (!p) + return 0; + + if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) + { + crnlib_mem_error("crnlib_msize: bad ptr"); + return 0; + } + + return (*g_pMSize)(p, g_pUser_data); + } + + void crnlib_print_mem_stats() + { +#if CRNLIB_MEM_STATS + if (console::is_initialized()) + { + console::debug(L"crnlib_print_mem_stats:"); + console::debug(L"Current blocks: %u, allocated: %I64u, max ever allocated: %I64i", g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); + } + else + { + printf("crnlib_print_mem_stats:\n"); + printf("Current blocks: %u, allocated: %I64u, max ever allocated: %I64i\n", g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); + } +#endif + } + +} // namespace crnlib + +void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data) +{ + if ((!pRealloc) || (!pMSize)) + { + crnlib::g_pRealloc = crnlib::crnlib_default_realloc; + crnlib::g_pMSize = crnlib::crnlib_default_msize; + crnlib::g_pUser_data = NULL; + } + else + { + crnlib::g_pRealloc = pRealloc; + crnlib::g_pMSize = pMSize; + crnlib::g_pUser_data = pUser_data; + } +} diff --git a/crnlib/crn_mem.h b/crnlib/crn_mem.h new file mode 100644 index 00000000..aea30f3e --- /dev/null +++ b/crnlib/crn_mem.h @@ -0,0 +1,185 @@ +// File: crn_mem.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#ifndef CRNLIB_MIN_ALLOC_ALIGNMENT +#define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 +#endif + +namespace crnlib +{ +#if CRNLIB_64BIT_POINTERS + const uint64 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x400000000ULL; +#else + const uint32 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; +#endif + + void* crnlib_malloc(size_t size, size_t* pActual_size = NULL); + void* crnlib_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); + void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size = NULL); + void crnlib_free(void* p); + size_t crnlib_msize(void* p); + void crnlib_print_mem_stats(); + void crnlib_mem_error(const char* p_msg); + + // omfg - there must be a better way + + template + inline T* crnlib_new() + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + if (CRNLIB_IS_SCALAR_TYPE(T)) + return p; + return helpers::construct(p); + } + + template + inline T* crnlib_new(const A& init0) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0); + } + + template + inline T* crnlib_new(A& init0) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0); + } + + template + inline T* crnlib_new(const A& init0, const B& init1) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10); + } + + template + inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10, const L& init11) + { + T* p = static_cast(crnlib_malloc(sizeof(T))); + return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10, init11); + } + + template + inline T* crnlib_new_array(uint32 num) + { + if (!num) num = 1; + + uint64 total = CRNLIB_MIN_ALLOC_ALIGNMENT + sizeof(T) * num; + if (total > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) + { + crnlib_mem_error("crnlib_new_array: Array too large!"); + return NULL; + } + uint8* q = static_cast(crnlib_malloc(static_cast(total))); + + T* p = reinterpret_cast(q + CRNLIB_MIN_ALLOC_ALIGNMENT); + + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; + + if (!CRNLIB_IS_SCALAR_TYPE(T)) + { + helpers::construct_array(p, num); + } + return p; + } + + template + inline void crnlib_delete(T* p) + { + if (p) + { + if (!CRNLIB_IS_SCALAR_TYPE(T)) + { + helpers::destruct(p); + } + crnlib_free(p); + } + } + + template + inline void crnlib_delete_array(T* p) + { + if (p) + { + const uint32 num = reinterpret_cast(p)[-1]; + const uint32 num_check = reinterpret_cast(p)[-2]; + CRNLIB_ASSERT(num && (num == ~num_check)); + if (num == ~num_check) + { + if (!CRNLIB_IS_SCALAR_TYPE(T)) + { + helpers::destruct_array(p, num); + } + + crnlib_free(reinterpret_cast(p) - CRNLIB_MIN_ALLOC_ALIGNMENT); + } + } + } + +} // namespace crnlib diff --git a/crnlib/crn_mutex.h b/crnlib/crn_mutex.h new file mode 100644 index 00000000..f0824dd1 --- /dev/null +++ b/crnlib/crn_mutex.h @@ -0,0 +1,40 @@ +// File: crn_mutex.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + class mutex + { + mutex(const mutex&); + mutex& operator= (const mutex&); + + public: + mutex(unsigned int spin_count = 0); + ~mutex(); + void lock(); + void unlock(); + void set_spin_count(unsigned int count); + + private: + int m_buf[12]; + +#ifdef CRNLIB_BUILD_DEBUG + unsigned int m_lock_count; +#endif + }; + + class scoped_mutex + { + scoped_mutex(const scoped_mutex&); + scoped_mutex& operator= (const scoped_mutex&); + + public: + inline scoped_mutex(mutex& m) : m_mutex(m) { m_mutex.lock(); } + inline ~scoped_mutex() { m_mutex.unlock(); } + + private: + mutex& m_mutex; + }; + +} // namespace crnlib diff --git a/crnlib/crn_packed_uint.h b/crnlib/crn_packed_uint.h new file mode 100644 index 00000000..bde60c17 --- /dev/null +++ b/crnlib/crn_packed_uint.h @@ -0,0 +1,91 @@ +// File: crn_packed_uint +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + template + struct packed_uint + { + inline packed_uint() { } + + inline packed_uint(unsigned int val) { *this = val; } + + inline packed_uint(const packed_uint& other) { *this = other; } + + inline packed_uint& operator= (const packed_uint& rhs) + { + if (this != &rhs) + memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); + return *this; + } + + inline packed_uint& operator= (unsigned int val) + { +#ifdef CRNLIB_BUILD_DEBUG + if (N == 1) + { + CRNLIB_ASSERT(val <= 0xFFU); + } + else if (N == 2) + { + CRNLIB_ASSERT(val <= 0xFFFFU); + } + else if (N == 3) + { + CRNLIB_ASSERT(val <= 0xFFFFFFU); + } +#endif + + val <<= (8U * (4U - N)); + + for (unsigned int i = 0; i < N; i++) + { + m_buf[i] = static_cast(val >> 24U); + val <<= 8U; + } + + return *this; + } + + inline operator unsigned int() const + { + switch (N) + { + case 1: return m_buf[0]; + case 2: return (m_buf[0] << 8U) | m_buf[1]; + case 3: return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); + default: return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); + } + } + + unsigned char m_buf[N]; + }; + template + class packed_value + { + public: + packed_value() { } + packed_value(T val) { *this = val; } + + inline operator T() const + { + T result = 0; + for (int i = sizeof(T) - 1; i >= 0; i--) + result = static_cast((result << 8) | m_bytes[i]); + return result; + } + packed_value& operator= (T val) + { + for (int i = 0; i < sizeof(T); i++) + { + m_bytes[i] = static_cast(val); + val >>= 8; + } + return *this; + } + private: + uint8 m_bytes[sizeof(T)]; + }; +} // namespace crnlib + diff --git a/crnlib/crn_pixel_format.cpp b/crnlib/crn_pixel_format.cpp new file mode 100644 index 00000000..9191e20b --- /dev/null +++ b/crnlib/crn_pixel_format.cpp @@ -0,0 +1,317 @@ +// File: crn_pixel_format.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_pixel_format.h" +#include "crn_image.h" + +namespace crnlib +{ + namespace pixel_format_helpers + { + const pixel_format g_all_pixel_formats[] = + { + PIXEL_FMT_DXT1, + PIXEL_FMT_DXT2, + PIXEL_FMT_DXT3, + PIXEL_FMT_DXT4, + PIXEL_FMT_DXT5, + PIXEL_FMT_3DC, + PIXEL_FMT_DXN, + PIXEL_FMT_DXT5A, + PIXEL_FMT_DXT5_CCxY, + PIXEL_FMT_DXT5_xGxR, + PIXEL_FMT_DXT5_xGBR, + PIXEL_FMT_DXT5_AGBR, + PIXEL_FMT_DXT1A, + PIXEL_FMT_R8G8B8, + PIXEL_FMT_L8, + PIXEL_FMT_A8, + PIXEL_FMT_A8L8, + PIXEL_FMT_A8R8G8B8 + }; + + uint get_num_formats() + { + return sizeof(g_all_pixel_formats) / sizeof(g_all_pixel_formats[0]); + } + + pixel_format get_pixel_format_by_index(uint index) + { + CRNLIB_ASSERT(index < get_num_formats()); + return g_all_pixel_formats[index]; + } + + const wchar_t* get_pixel_format_string(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_INVALID: return L"INVALID"; + case PIXEL_FMT_DXT1: return L"DXT1"; + case PIXEL_FMT_DXT1A: return L"DXT1A"; + case PIXEL_FMT_DXT2: return L"DXT2"; + case PIXEL_FMT_DXT3: return L"DXT3"; + case PIXEL_FMT_DXT4: return L"DXT4"; + case PIXEL_FMT_DXT5: return L"DXT5"; + case PIXEL_FMT_3DC: return L"3DC"; + case PIXEL_FMT_DXN: return L"DXN"; + case PIXEL_FMT_DXT5A: return L"DXT5A"; + case PIXEL_FMT_DXT5_CCxY: return L"DXT5_CCxY"; + case PIXEL_FMT_DXT5_xGxR: return L"DXT5_xGxR"; + case PIXEL_FMT_DXT5_xGBR: return L"DXT5_xGBR"; + case PIXEL_FMT_DXT5_AGBR: return L"DXT5_AGBR"; + case PIXEL_FMT_R8G8B8: return L"R8G8B8"; + case PIXEL_FMT_A8R8G8B8: return L"A8R8G8B8"; + case PIXEL_FMT_A8: return L"A8"; + case PIXEL_FMT_L8: return L"L8"; + case PIXEL_FMT_A8L8: return L"A8L8"; + default: break; + } + CRNLIB_ASSERT(false); + return L"?"; + } + + const char* get_pixel_format_stringa(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_INVALID: return "INVALID"; + case PIXEL_FMT_DXT1: return "DXT1"; + case PIXEL_FMT_DXT1A: return "DXT1A"; + case PIXEL_FMT_DXT2: return "DXT2"; + case PIXEL_FMT_DXT3: return "DXT3"; + case PIXEL_FMT_DXT4: return "DXT4"; + case PIXEL_FMT_DXT5: return "DXT5"; + case PIXEL_FMT_3DC: return "3DC"; + case PIXEL_FMT_DXN: return "DXN"; + case PIXEL_FMT_DXT5A: return "DXT5A"; + case PIXEL_FMT_DXT5_CCxY: return "DXT5_CCxY"; + case PIXEL_FMT_DXT5_xGxR: return "DXT5_xGxR"; + case PIXEL_FMT_DXT5_xGBR: return "DXT5_xGBR"; + case PIXEL_FMT_DXT5_AGBR: return "DXT5_AGBR"; + case PIXEL_FMT_R8G8B8: return "R8G8B8"; + case PIXEL_FMT_A8R8G8B8: return "A8R8G8B8"; + case PIXEL_FMT_A8: return "A8"; + case PIXEL_FMT_L8: return "L8"; + case PIXEL_FMT_A8L8: return "A8L8"; + default: break; + } + CRNLIB_ASSERT(false); + return "?"; + } + + const wchar_t* get_crn_format_string(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: return L"DXT1"; + case cCRNFmtDXT3: return L"DXT3"; + case cCRNFmtDXT5: return L"DXT5"; + case cCRNFmtDXT5_CCxY: return L"DXT5_CCxY"; + case cCRNFmtDXT5_xGBR: return L"DXT5_xGBR"; + case cCRNFmtDXT5_AGBR: return L"DXT5_AGBR"; + case cCRNFmtDXT5_xGxR: return L"DXT5_xGxR"; + case cCRNFmtDXN_XY: return L"DXN_XY"; + case cCRNFmtDXN_YX: return L"DXN_YX"; + case cCRNFmtDXT5A: return L"DXT5A"; + default: break; + } + CRNLIB_ASSERT(false); + return L"?"; + } + + const char* get_crn_format_stringa(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: return "DXT1"; + case cCRNFmtDXT3: return "DXT3"; + case cCRNFmtDXT5: return "DXT5"; + case cCRNFmtDXT5_CCxY: return "DXT5_CCxY"; + case cCRNFmtDXT5_xGBR: return "DXT5_xGBR"; + case cCRNFmtDXT5_AGBR: return "DXT5_AGBR"; + case cCRNFmtDXT5_xGxR: return "DXT5_xGxR"; + case cCRNFmtDXN_XY: return "DXN_XY"; + case cCRNFmtDXN_YX: return "DXN_YX"; + case cCRNFmtDXT5A: return "DXT5A"; + default: break; + } + CRNLIB_ASSERT(false); + return "?"; + } + + component_flags get_component_flags(pixel_format fmt) + { + // These flags are for *uncooked* pixels, i.e. after after adding Z to DXN maps, or converting YCC maps to RGB, etc. + + uint flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; + switch (fmt) + { + case PIXEL_FMT_DXT1: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; + break; + } + case PIXEL_FMT_DXT1A: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT5A: + { + flags = cCompFlagAValid; + break; + } + case PIXEL_FMT_DXT5_CCxY: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagLumaChroma; + break; + } + case PIXEL_FMT_DXT5_xGBR: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_DXT5_AGBR: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_DXT5_xGxR: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_3DC: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_DXN: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; + break; + } + case PIXEL_FMT_R8G8B8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; + break; + } + case PIXEL_FMT_A8R8G8B8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; + break; + } + case PIXEL_FMT_A8: + { + flags = cCompFlagAValid; + break; + } + case PIXEL_FMT_L8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagGrayscale; + break; + } + case PIXEL_FMT_A8L8: + { + flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; + break; + } + default: + { + CRNLIB_ASSERT(0); + break; + } + } + return static_cast(flags); + } + + crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt) + { + crn_format fmt = cCRNFmtDXT1; + switch (crn_fmt) + { + case PIXEL_FMT_DXT1: + case PIXEL_FMT_DXT1A: + fmt = cCRNFmtDXT1; + break; + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + fmt = cCRNFmtDXT5; + break; + case PIXEL_FMT_3DC: + fmt = cCRNFmtDXN_YX; + break; + case PIXEL_FMT_DXN: + fmt = cCRNFmtDXN_XY; + break; + case PIXEL_FMT_DXT5A: + fmt = cCRNFmtDXT5A; + break; + case PIXEL_FMT_R8G8B8: + case PIXEL_FMT_L8: + fmt = cCRNFmtDXT1; + break; + case PIXEL_FMT_A8R8G8B8: + case PIXEL_FMT_A8: + case PIXEL_FMT_A8L8: + fmt = cCRNFmtDXT5; + break; + case PIXEL_FMT_DXT5_CCxY: + fmt = cCRNFmtDXT5_CCxY; + break; + case PIXEL_FMT_DXT5_xGBR: + fmt = cCRNFmtDXT5_xGBR; + break; + case PIXEL_FMT_DXT5_AGBR: + fmt = cCRNFmtDXT5_AGBR; + break; + case PIXEL_FMT_DXT5_xGxR: + fmt = cCRNFmtDXT5_xGxR; + break; + default: + { + CRNLIB_ASSERT(false); + break; + } + } + return fmt; + } + + pixel_format convert_crn_format_to_pixel_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: return PIXEL_FMT_DXT1; + case cCRNFmtDXT3: return PIXEL_FMT_DXT3; + case cCRNFmtDXT5: return PIXEL_FMT_DXT5; + case cCRNFmtDXT5_CCxY: return PIXEL_FMT_DXT5_CCxY; + case cCRNFmtDXT5_xGxR: return PIXEL_FMT_DXT5_xGxR; + case cCRNFmtDXT5_xGBR: return PIXEL_FMT_DXT5_xGBR; + case cCRNFmtDXT5_AGBR: return PIXEL_FMT_DXT5_AGBR; + case cCRNFmtDXN_XY: return PIXEL_FMT_DXN; + case cCRNFmtDXN_YX: return PIXEL_FMT_3DC; + case cCRNFmtDXT5A: return PIXEL_FMT_DXT5A; + default: break; + } + + return PIXEL_FMT_INVALID; + } + + } // namespace pixel_format + +} // namespace crnlib + diff --git a/crnlib/crn_pixel_format.h b/crnlib/crn_pixel_format.h new file mode 100644 index 00000000..c2f6457e --- /dev/null +++ b/crnlib/crn_pixel_format.h @@ -0,0 +1,278 @@ +// File: crn_pixel_format.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt.h" +#include "../inc/crnlib.h" +#include "../inc/dds_defs.h" + +namespace crnlib +{ + namespace pixel_format_helpers + { + uint get_num_formats(); + pixel_format get_pixel_format_by_index(uint index); + + const wchar_t* get_pixel_format_string(pixel_format fmt); + const char* get_pixel_format_stringa(pixel_format fmt); + + const wchar_t* get_crn_format_string(crn_format fmt); + const char* get_crn_format_stringa(crn_format fmt); + + inline bool is_grayscale(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_L8: + case PIXEL_FMT_A8L8: + return true; + default: break; + } + return false; + } + + inline bool is_dxt1(pixel_format fmt) + { + return (fmt == PIXEL_FMT_DXT1) || (fmt == PIXEL_FMT_DXT1A); + } + + inline bool has_alpha(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1A: + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + case PIXEL_FMT_DXT5A: + case PIXEL_FMT_A8R8G8B8: + case PIXEL_FMT_A8: + case PIXEL_FMT_A8L8: + return true; + default: break; + } + return false; + } + + inline bool is_alpha_only(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_A8: + case PIXEL_FMT_DXT5A: + return true; + default: break; + } + return false; + } + + inline bool is_normal_map(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_AGBR: + return true; + default: break; + } + return false; + } + + inline int is_dxt(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: + case PIXEL_FMT_DXT1A: + case PIXEL_FMT_DXT2: + case PIXEL_FMT_DXT3: + case PIXEL_FMT_DXT4: + case PIXEL_FMT_DXT5: + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXT5A: + case PIXEL_FMT_DXN: + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + return true; + default: break; + } + return false; + } + + inline int get_fundamental_format(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1A: + return PIXEL_FMT_DXT1; + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + return PIXEL_FMT_DXT5; + default: break; + } + return fmt; + } + + inline dxt_format get_dxt_format(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: return cDXT1; + case PIXEL_FMT_DXT1A: return cDXT1A; + case PIXEL_FMT_DXT2: return cDXT3; + case PIXEL_FMT_DXT3: return cDXT3; + case PIXEL_FMT_DXT4: return cDXT5; + case PIXEL_FMT_DXT5: return cDXT5; + case PIXEL_FMT_3DC: return cDXN_YX; + case PIXEL_FMT_DXT5A: return cDXT5A; + case PIXEL_FMT_DXN: return cDXN_XY; + case PIXEL_FMT_DXT5_CCxY: return cDXT5; + case PIXEL_FMT_DXT5_xGxR: return cDXT5; + case PIXEL_FMT_DXT5_xGBR: return cDXT5; + case PIXEL_FMT_DXT5_AGBR: return cDXT5; + default: break; + } + return cDXTInvalid; + } + + inline pixel_format from_dxt_format(dxt_format dxt_fmt) + { + switch (dxt_fmt) + { + case cDXT1: + return PIXEL_FMT_DXT1; + case cDXT1A: + return PIXEL_FMT_DXT1A; + case cDXT3: + return PIXEL_FMT_DXT3; + case cDXT5: + return PIXEL_FMT_DXT5; + case cDXN_XY: + return PIXEL_FMT_DXN; + case cDXN_YX: + return PIXEL_FMT_3DC; + case cDXT5A: + return PIXEL_FMT_DXT5A; + default: break; + } + CRNLIB_ASSERT(false); + return PIXEL_FMT_INVALID; + } + + inline bool is_pixel_format_non_srgb(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXN: + case PIXEL_FMT_DXT5A: + case PIXEL_FMT_DXT5_CCxY: + case PIXEL_FMT_DXT5_xGxR: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + return true; + default: break; + } + return false; + } + + inline bool is_crn_format_non_srgb(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + case cCRNFmtDXT5A: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return true; + default: break; + } + return false; + } + + inline uint get_bpp(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: return 4; + case PIXEL_FMT_DXT1A: return 4; + case PIXEL_FMT_DXT2: return 8; + case PIXEL_FMT_DXT3: return 8; + case PIXEL_FMT_DXT4: return 8; + case PIXEL_FMT_DXT5: return 8; + case PIXEL_FMT_3DC: return 8; + case PIXEL_FMT_DXT5A: return 4; + case PIXEL_FMT_R8G8B8: return 24; + case PIXEL_FMT_A8R8G8B8: return 32; + case PIXEL_FMT_A8: return 8; + case PIXEL_FMT_L8: return 8; + case PIXEL_FMT_A8L8: return 16; + case PIXEL_FMT_DXN: return 8; + case PIXEL_FMT_DXT5_CCxY: return 8; + case PIXEL_FMT_DXT5_xGxR: return 8; + case PIXEL_FMT_DXT5_xGBR: return 8; + case PIXEL_FMT_DXT5_AGBR: return 8; + default: break; + } + CRNLIB_ASSERT(false); + return 0; + }; + + inline uint get_dxt_bytes_per_block(pixel_format fmt) + { + switch (fmt) + { + case PIXEL_FMT_DXT1: return 8; + case PIXEL_FMT_DXT1A: return 8; + case PIXEL_FMT_DXT5A: return 8; + + case PIXEL_FMT_DXT2: return 16; + case PIXEL_FMT_DXT3: return 16; + case PIXEL_FMT_DXT4: return 16; + case PIXEL_FMT_DXT5: return 16; + case PIXEL_FMT_3DC: return 16; + case PIXEL_FMT_DXN: return 16; + case PIXEL_FMT_DXT5_CCxY: return 16; + case PIXEL_FMT_DXT5_xGxR: return 16; + case PIXEL_FMT_DXT5_xGBR: return 16; + case PIXEL_FMT_DXT5_AGBR: return 16; + default: break; + } + CRNLIB_ASSERT(false); + return 0; + } + + enum component_flags + { + cCompFlagRValid = 1, + cCompFlagGValid = 2, + cCompFlagBValid = 4, + cCompFlagAValid = 8, + + cCompFlagGrayscale = 16, + cCompFlagNormalMap = 32, + cCompFlagLumaChroma = 64, + + cDefaultCompFlags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid + }; + + component_flags get_component_flags(pixel_format fmt); + + crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt); + + pixel_format convert_crn_format_to_pixel_format(crn_format fmt); + + } // namespace pixel_format_helpers + +} // namespace crnlib + diff --git a/crnlib/crn_platform.cpp b/crnlib/crn_platform.cpp new file mode 100644 index 00000000..bc720414 --- /dev/null +++ b/crnlib/crn_platform.cpp @@ -0,0 +1,19 @@ +// File: crn_platform.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_winhdr.h" + +bool crnlib_is_debugger_present(void) +{ + return IsDebuggerPresent() != 0; +} + +void crnlib_debug_break(void) +{ + DebugBreak(); +} + +void crnlib_output_debug_string(const char* p) +{ + OutputDebugStringA(p); +} diff --git a/crnlib/crn_platform.h b/crnlib/crn_platform.h new file mode 100644 index 00000000..8af90182 --- /dev/null +++ b/crnlib/crn_platform.h @@ -0,0 +1,49 @@ +// File: crn_platform.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#ifdef CRNLIB_PLATFORM_PC + const bool c_crnlib_little_endian_platform = true; +#else + const bool c_crnlib_little_endian_platform = false; +#endif + +const bool c_crnlib_big_endian_platform = !c_crnlib_little_endian_platform; + +inline bool crnlib_is_little_endian() { return c_crnlib_little_endian_platform; } +inline bool crnlib_is_big_endian() { return c_crnlib_big_endian_platform; } + +inline bool crnlib_is_pc() +{ +#ifdef CRNLIB_PLATFORM_PC + return true; +#else + return false; +#endif +} + +inline bool crnlib_is_x86() +{ +#ifdef CRNLIB_PLATFORM_PC_X86 + return true; +#else + return false; +#endif +} + +inline bool crnlib_is_x64() +{ +#ifdef CRNLIB_PLATFORM_PC_X64 + return true; +#else + return false; +#endif +} + +bool crnlib_is_debugger_present(void); +void crnlib_debug_break(void); +void crnlib_output_debug_string(const char* p); + +// actually in crnlib_assert.cpp +void crnlib_assert(const char* pExp, const char* pFile, unsigned line); +void crnlib_fail(const char* pExp, const char* pFile, unsigned line); diff --git a/crnlib/crn_prefix_coding.cpp b/crnlib/crn_prefix_coding.cpp new file mode 100644 index 00000000..6585ab1d --- /dev/null +++ b/crnlib/crn_prefix_coding.cpp @@ -0,0 +1,356 @@ +// File: crn_prefix_coding.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_prefix_coding.h" +//#include "rand.h" + +#ifdef CRNLIB_BUILD_DEBUG + //#define TEST_DECODER_TABLES +#endif + +namespace crnlib +{ + + namespace prefix_coding + { + bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size) + { + const uint cMaxEverCodeSize = 34; + + if ((!num_syms) || (num_syms > cMaxSupportedSyms) || (max_code_size < 1) || (max_code_size > cMaxEverCodeSize)) + return false; + + uint num_codes[cMaxEverCodeSize + 1]; + utils::zero_object(num_codes); + + bool should_limit = false; + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(c <= cMaxEverCodeSize); + + num_codes[c]++; + if (c > max_code_size) + should_limit = true; + } + } + + if (!should_limit) + return true; + + uint ofs = 0; + uint next_sorted_ofs[cMaxEverCodeSize + 1]; + for (uint i = 1; i <= cMaxEverCodeSize; i++) + { + next_sorted_ofs[i] = ofs; + ofs += num_codes[i]; + } + + if ((ofs < 2) || (ofs > cMaxSupportedSyms)) + return true; + + if (ofs > (1U << max_code_size)) + return false; + + for (uint i = max_code_size + 1; i <= cMaxEverCodeSize; i++) + num_codes[max_code_size] += num_codes[i]; + + // Technique of adjusting tree to enforce maximum code size from LHArc. + + uint total = 0; + for (uint i = max_code_size; i; --i) + total += (num_codes[i] << (max_code_size - i)); + + if (total == (1U << max_code_size)) + return true; + + do + { + num_codes[max_code_size]--; + + uint i; + for (i = max_code_size - 1; i; --i) + { + if (!num_codes[i]) + continue; + num_codes[i]--; + num_codes[i + 1] += 2; + break; + } + if (!i) + return false; + + total--; + } while (total != (1U << max_code_size)); + + uint8 new_codesizes[cMaxSupportedSyms]; + uint8* p = new_codesizes; + for (uint i = 1; i <= max_code_size; i++) + { + uint n = num_codes[i]; + if (n) + { + memset(p, i, n); + p += n; + } + } + + for (uint i = 0; i < num_syms; i++) + { + const uint c = pCodesizes[i]; + if (c) + { + uint ofs = next_sorted_ofs[c]; + next_sorted_ofs[c] = ofs + 1; + + pCodesizes[i] = static_cast(new_codesizes[ofs]); + } + } + + return true; + } + + bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes) + { + uint num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(c <= cMaxExpectedCodeSize); + num_codes[c]++; + } + } + + uint code = 0; + + uint next_code[cMaxExpectedCodeSize + 1]; + next_code[0] = 0; + + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + next_code[i] = code; + + code = (code + num_codes[i]) << 1; + } + + if (code != (1 << (cMaxExpectedCodeSize + 1))) + { + uint t = 0; + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + t += num_codes[i]; + if (t > 1) + return false; + } + } + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(next_code[c] <= UINT16_MAX); + pCodes[i] = static_cast(next_code[c]++); + + CRNLIB_ASSERT(math::total_bits(pCodes[i]) <= pCodesizes[i]); + } + } + + return true; + } + + bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits) + { + uint min_codes[cMaxExpectedCodeSize]; + + if ((!num_syms) || (table_bits > cMaxTableBits)) + return false; + + pTables->m_num_syms = num_syms; + + uint num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + num_codes[c]++; + } + + uint sorted_positions[cMaxExpectedCodeSize + 1]; + + uint code = 0; + + uint total_used_syms = 0; + uint max_code_size = 0; + uint min_code_size = UINT_MAX; + for (uint i = 1; i <= cMaxExpectedCodeSize; i++) + { + const uint n = num_codes[i]; + + if (!n) + pTables->m_max_codes[i - 1] = 0;//UINT_MAX; + else + { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); + + min_codes[i - 1] = code; + + pTables->m_max_codes[i - 1] = code + n - 1; + pTables->m_max_codes[i - 1] = 1 + ((pTables->m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); + + pTables->m_val_ptrs[i - 1] = total_used_syms; + + sorted_positions[i] = total_used_syms; + + code += n; + total_used_syms += n; + } + + code <<= 1; + } + + pTables->m_total_used_syms = total_used_syms; + + if (total_used_syms > pTables->m_cur_sorted_symbol_order_size) + { + pTables->m_cur_sorted_symbol_order_size = total_used_syms; + + if (!math::is_power_of_2(total_used_syms)) + pTables->m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + + if (pTables->m_sorted_symbol_order) + { + crnlib_delete_array(pTables->m_sorted_symbol_order); + pTables->m_sorted_symbol_order = NULL; + } + + pTables->m_sorted_symbol_order = crnlib_new_array(pTables->m_cur_sorted_symbol_order_size); + } + + pTables->m_min_code_size = static_cast(min_code_size); + pTables->m_max_code_size = static_cast(max_code_size); + + for (uint i = 0; i < num_syms; i++) + { + uint c = pCodesizes[i]; + if (c) + { + CRNLIB_ASSERT(num_codes[c]); + + uint sorted_pos = sorted_positions[c]++; + + CRNLIB_ASSERT(sorted_pos < total_used_syms); + + pTables->m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } + + if (table_bits <= pTables->m_min_code_size) + table_bits = 0; + pTables->m_table_bits = table_bits; + + if (table_bits) + { + uint table_size = 1 << table_bits; + if (table_size > pTables->m_cur_lookup_size) + { + pTables->m_cur_lookup_size = table_size; + + if (pTables->m_lookup) + { + crnlib_delete_array(pTables->m_lookup); + pTables->m_lookup = NULL; + } + + pTables->m_lookup = crnlib_new_array(table_size); + } + + memset(pTables->m_lookup, 0xFF, static_cast(sizeof(pTables->m_lookup[0])) * (1UL << table_bits)); + + for (uint codesize = 1; codesize <= table_bits; codesize++) + { + if (!num_codes[codesize]) + continue; + + const uint fillsize = table_bits - codesize; + const uint fillnum = 1 << fillsize; + + const uint min_code = min_codes[codesize - 1]; + const uint max_code = pTables->get_unshifted_max_code(codesize); + const uint val_ptr = pTables->m_val_ptrs[codesize - 1]; + + for (uint code = min_code; code <= max_code; code++) + { + const uint sym_index = pTables->m_sorted_symbol_order[ val_ptr + code - min_code ]; + CRNLIB_ASSERT( pCodesizes[sym_index] == codesize ); + + for (uint j = 0; j < fillnum; j++) + { + const uint t = j + (code << fillsize); + + CRNLIB_ASSERT(t < (1U << table_bits)); + + CRNLIB_ASSERT(pTables->m_lookup[t] == UINT32_MAX); + + pTables->m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } + + for (uint i = 0; i < cMaxExpectedCodeSize; i++) + pTables->m_val_ptrs[i] -= min_codes[i]; + + pTables->m_table_max_code = 0; + pTables->m_decode_start_code_size = pTables->m_min_code_size; + + if (table_bits) + { + uint i; + for (i = table_bits; i >= 1; i--) + { + if (num_codes[i]) + { + pTables->m_table_max_code = pTables->m_max_codes[i - 1]; + break; + } + } + if (i >= 1) + { + pTables->m_decode_start_code_size = table_bits + 1; + for (uint i = table_bits + 1; i <= max_code_size; i++) + { + if (num_codes[i]) + { + pTables->m_decode_start_code_size = i; + break; + } + } + } + } + + // sentinels + pTables->m_max_codes[cMaxExpectedCodeSize] = UINT_MAX; + pTables->m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + + pTables->m_table_shift = 32 - pTables->m_table_bits; + + return true; + } + + } // namespace prefix_codig + + +} // namespace crnlib + + diff --git a/crnlib/crn_prefix_coding.h b/crnlib/crn_prefix_coding.h new file mode 100644 index 00000000..e07bbee9 --- /dev/null +++ b/crnlib/crn_prefix_coding.h @@ -0,0 +1,116 @@ +// File: crn_prefix_coding.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + namespace prefix_coding + { + const uint cMaxExpectedCodeSize = 16; + const uint cMaxSupportedSyms = 8192; + const uint cMaxTableBits = 11; + + bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size); + + bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes); + + class decoder_tables + { + public: + inline decoder_tables() : + m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + } + + inline decoder_tables(const decoder_tables& other) : + m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + *this = other; + } + + decoder_tables& operator= (const decoder_tables& other) + { + if (this == &other) + return *this; + + clear(); + + memcpy(this, &other, sizeof(*this)); + + if (other.m_lookup) + { + m_lookup = crnlib_new_array(m_cur_lookup_size); + memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } + + if (other.m_sorted_symbol_order) + { + m_sorted_symbol_order = crnlib_new_array(m_cur_sorted_symbol_order_size); + memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + + return *this; + } + + inline void clear() + { + if (m_lookup) + { + crnlib_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) + { + crnlib_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } + + inline ~decoder_tables() + { + if (m_lookup) + crnlib_delete_array(m_lookup); + + if (m_sorted_symbol_order) + crnlib_delete_array(m_sorted_symbol_order); + } + + // DO NOT use any complex classes here - it is bitwise copied. + + uint m_num_syms; + uint m_total_used_syms; + uint m_table_bits; + uint m_table_shift; + uint m_table_max_code; + uint m_decode_start_code_size; + + uint8 m_min_code_size; + uint8 m_max_code_size; + + uint m_max_codes[cMaxExpectedCodeSize + 1]; + int m_val_ptrs[cMaxExpectedCodeSize + 1]; + + uint m_cur_lookup_size; + uint32* m_lookup; + + uint m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; + + inline uint get_unshifted_max_code(uint len) const + { + CRNLIB_ASSERT( (len >= 1) && (len <= cMaxExpectedCodeSize) ); + uint k = m_max_codes[len - 1]; + if (!k) + return UINT_MAX; + return (k - 1) >> (16 - len); + } + }; + + bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits); + + } // namespace prefix_coding + +} // namespace crnlib diff --git a/crnlib/crn_qdxt1.cpp b/crnlib/crn_qdxt1.cpp new file mode 100644 index 00000000..9ef780ac --- /dev/null +++ b/crnlib/crn_qdxt1.cpp @@ -0,0 +1,909 @@ +// File: crn_qdxt.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_qdxt1.h" +#include "crn_dxt1.h" +#include "crn_dxt_fast.h" +#include "crn_image_utils.h" +#include "crn_dxt_hc_common.h" + +#define GENERATE_DEBUG_IMAGES 0 + +namespace crnlib +{ + qdxt1::qdxt1(task_pool& task_pool) : + m_pTask_pool(&task_pool), + m_main_thread_id(0), + m_canceled(false), + m_progress_start(0), + m_progress_range(100), + m_num_blocks(0), + m_pBlocks(NULL), + m_pDst_elements(NULL), + m_elements_per_block(0), + m_max_selector_clusters(0), + m_prev_percentage_complete(-1), + m_selector_clusterizer(task_pool) + { + } + + qdxt1::~qdxt1() + { + } + + void qdxt1::clear() + { + m_main_thread_id = 0; + m_num_blocks = 0; + m_pBlocks = 0; + m_pDst_elements = NULL; + m_elements_per_block = 0; + m_params.clear(); + m_endpoint_clusterizer.clear(); + m_endpoint_cluster_indices.clear(); + m_max_selector_clusters = 0; + m_canceled = false; + m_progress_start = 0; + m_progress_range = 100; + m_selector_clusterizer.clear(); + + for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++) + m_cached_selector_cluster_indices[i].clear(); + + m_cluster_hash.clear(); + + m_prev_percentage_complete = -1; + } + + bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params) + { + clear(); + + CRNLIB_ASSERT(n && pBlocks); + + m_main_thread_id = get_current_thread_id(); + + m_num_blocks = n; + m_pBlocks = pBlocks; + m_params = params; + + m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); + + m_progress_start = 0; + m_progress_range = 75; + + const bool debugging = false; + image_u8 debug_img; + + if ((m_params.m_hierarchical) && (m_params.m_num_mips)) + { + vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); + training_vecs.resize(m_num_blocks); + + uint encoding_hist[cNumChunkEncodings]; + utils::zero_object(encoding_hist); + + uint total_processed_blocks = 0; + uint next_progress_threshold = 512; + + for (uint level = 0; level < m_params.m_num_mips; level++) + { + const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level]; + + const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; + const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; + + const uint level_width = level_desc.m_block_width * 4; + const uint level_height = level_desc.m_block_height * 4; + + if (debugging) + debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); + + for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) + { + for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) + { + color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < cChunkPixelHeight; y++) + { + const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); + + const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); + + for (uint x = 0; x < cChunkPixelWidth; x++) + { + const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); + + const uint block_index = outer_block_index + (pix_x >> 2); + + const dxt_pixel_block& block = m_pBlocks[block_index]; + + const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; + + chunk_pixels[x + y * 8] = p; + } + } + + struct layout_results + { + uint m_low_color; + uint m_high_color; + uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; + uint64 m_error; + //float m_penalty; + }; + layout_results layouts[cNumChunkTileLayouts]; + + for (uint l = 0; l < cNumChunkTileLayouts; l++) + { + const uint width = g_chunk_tile_layouts[l].m_width; + const uint height = g_chunk_tile_layouts[l].m_height; + const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; + const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; + + color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; + for (uint y = 0; y < height; y++) + for (uint x = 0; x < width; x++) + layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; + + const uint n = width * height; + dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors); + + color_quad_u8 c[4]; + dxt1_block::get_block_colors(c, static_cast(layouts[l].m_low_color), static_cast(layouts[l].m_high_color)); + + uint64 error = 0; + for (uint i = 0; i < n; i++) + error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false); + + layouts[l].m_error = error; + +#if 0 + if ((width > 4) || (height > 4)) + { + const uint dist = color::elucidian_distance( + dxt1_block::unpack_color(static_cast(layouts[l].m_low_color), true), + dxt1_block::unpack_color(static_cast(layouts[l].m_high_color), true), false); + + layouts[l].m_penalty = math::clamp((sqrt((float)dist) - 75.0f) / 150.0f, 0.0f, 2.0f); + if ((width == 8) && (height == 8)) + layouts[l].m_penalty *= 2.0f; + } + else + { + layouts[l].m_penalty = 0.0f; + } +#endif + } + + double best_peak_snr = -1.0f; + uint best_encoding = 0; + + for (uint e = 0; e < cNumChunkEncodings; e++) + { + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; + + double total_error = 0; + + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; + + //double mean_squared = total_error * (1.0f / (16.0f * 3.0f)); + double mean_squared = total_error * (1.0f / (64.0f * 3.0f)); + double root_mean_squared = sqrt(mean_squared); + + double peak_snr = 999999.0f; + if (mean_squared) + peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); + + float adaptive_tile_color_psnr_derating = 2.4f; + //if (level) + // adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); + if ((level) && (adaptive_tile_color_psnr_derating > .25f)) + { + adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast(level))); + } + + float color_derating = math::lerp( 0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f ); + peak_snr = peak_snr - color_derating; + + //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; + + if (peak_snr > best_peak_snr) + { + best_peak_snr = peak_snr; + best_encoding = e; + } + } + + encoding_hist[best_encoding]++; + + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; + + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + { + const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; + + uint layout_index = tile_desc.m_layout_index; + const layout_results& layout = layouts[layout_index]; + color_quad_u8 c[4]; + if (debugging) + dxt1_block::get_block_colors(c, static_cast(layout.m_low_color), static_cast(layout.m_high_color)); + + color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < tile_desc.m_height; y++) + { + const uint pix_y = y + tile_desc.m_y_ofs; + + for (uint x = 0; x < tile_desc.m_width; x++) + { + const uint pix_x = x + tile_desc.m_x_ofs; + + tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth]; + + if (debugging) + debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; + } + } + + color_quad_u8 l, h; + dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); + + //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); + const uint dist = color::elucidian_distance(l, h, false); + + const uint cColorDistToWeight = 5000; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); + + vec6F ev; + + ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2]; + ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2]; + + for (uint y = 0; y < (tile_desc.m_height >> 2); y++) + { + uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); + if (block_y >= level_desc.m_block_height) + continue; + + for (uint x = 0; x < (tile_desc.m_width >> 2); x++) + { + uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); + if (block_x >= level_desc.m_block_width) + break; + + uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; + + training_vecs[block_index].first = ev; + training_vecs[block_index].second = weight; + + total_processed_blocks++; + + //if (debugging) + //{ + // debug_img(block_x, block_y) = l; + // debug_img(block_x + level_desc.m_block_width, block_y) = h; + //} + + } // x + } // y + } //t + + if (total_processed_blocks >= next_progress_threshold) + { + next_progress_threshold += 512; + + if (!update_progress(total_processed_blocks, m_num_blocks - 1)) + return false; + } + + } // chunk_x + } // chunk_y + +#if GENERATE_DEBUG_IMAGES + if (debugging) + image_utils::save_to_file_stb(dynamic_wstring(cVarArg, L"debug_%u.tga", level).get_ptr(), debug_img, image_utils::cSaveIgnoreAlpha); +#endif + + } // level + +#if 0 + trace("chunk encoding hist: "); + for (uint i = 0; i < cNumChunkEncodings; i++) + trace("%u ", encoding_hist[i]); + trace("\n"); +#endif + } + else + { + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + return false; + } + + color_quad_u8 l, h; + dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h); + + //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); + const uint dist = color::elucidian_distance(l, h, false); + + const uint cColorDistToWeight = 5000; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); + + vec6F ev; + + ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2]; + ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2]; + + m_endpoint_clusterizer.add_training_vec(ev, weight); + } + } + + const uint cMaxEndpointClusters = 65535U; + + m_progress_start = 75; + m_progress_range = 20; + + if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) + return false; + + crnlib::hash_map selector_hash; + + m_progress_start = 95; + m_progress_range = 5; + + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + return false; + } + + dxt1_block dxt_blk; + dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]); + + uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24); + + selector_hash.insert(selectors); + } + + m_max_selector_clusters = selector_hash.size() + 128; + +// trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size()); +// trace("max selector clusters: %u\n", m_max_selector_clusters); + + update_progress(1, 1); + + return true; + } + + bool qdxt1::update_progress(uint value, uint max_value) + { + if (!m_params.m_pProgress_func) + return true; + + uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; + if ((int)percentage == m_prev_percentage_complete) + return true; + m_prev_percentage_complete = percentage; + + if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) + { + m_canceled = true; + return false; + } + + return true; + } + + void qdxt1::pack_endpoints_task(uint64 data, void* pData_ptr) + { + pData_ptr; + const uint thread_index = static_cast(data); + + crnlib::vector cluster_pixels; + cluster_pixels.reserve(1024); + + crnlib::vector selectors; + selectors.reserve(1024); + + dxt1_endpoint_optimizer optimizer; + dxt1_endpoint_optimizer::params p; + dxt1_endpoint_optimizer::results r; + + p.m_quality = m_params.m_dxt_quality; + p.m_use_alpha_blocks = m_params.m_use_alpha_blocks; + p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold; + p.m_perceptual = m_params.m_perceptual; + + uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); + cluster_index_progress_mask /= 2; + cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); + cluster_index_progress_mask -= 1; + + cluster_id cid; + const crnlib::vector& indices = cid.m_cells; + + for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + return; + + if ((cluster_index & cluster_index_progress_mask) == 0) + { + if (get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) + return; + } + } + + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; + + selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); + + bool found = false; + uint32 found_endpoints = 0; + + cid.set(cluster_indices); + + { + scoped_spinlock lock(m_cluster_hash_lock); + + cluster_hash::const_iterator it(m_cluster_hash.find(cid)); + if (it != m_cluster_hash.end()) + { + CRNLIB_ASSERT(cid == it->first); + + found = true; + found_endpoints = it->second; + } + } + + if (found) + { + const uint16 low_color = static_cast(found_endpoints); + const uint16 high_color = static_cast((found_endpoints >> 16U)); + + color_quad_u8 block_colors[4]; + dxt1_block::get_block_colors(block_colors, low_color, high_color); + + const bool is_alpha_block = (low_color <= high_color); + + for (uint block_iter = 0; block_iter < indices.size(); block_iter++) + { + const uint block_index = indices[block_iter]; + + const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + dxt1_block& dxt_block = get_block(block_index); + + dxt_block.set_low_color(static_cast(low_color)); + dxt_block.set_high_color(static_cast(high_color)); + + uint mask = 0; + for (int i = 15; i >= 0; i--) + { + mask <<= 2; + + const color_quad_u8& c = pSrc_pixels[i]; + + uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false); + uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false); + uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false); + + uint selector = 0, best_dist = dist0; + + if (dist1 < best_dist) { selector = 1; best_dist = dist1; } + if (dist2 < best_dist) { selector = 2; best_dist = dist2; } + + if (!is_alpha_block) + { + uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false); + if (dist3 < best_dist) { selector = 3; } + } + else + { + if (c.a < m_params.m_dxt1a_alpha_threshold) + selector = 3; + } + + mask |= selector; + } + + dxt_block.m_selectors[0] = static_cast(mask & 0xFF); + dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); + dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); + dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); + } + } + } + else + { + cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize); + + color_quad_u8* pDst = &cluster_pixels[0]; + + bool has_alpha_pixels = false; + + for (uint block_iter = 0; block_iter < indices.size(); block_iter++) + { + const uint block_index = indices[block_iter]; + + const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const color_quad_u8& src = pSrc_pixels[i]; + + if (src.a < m_params.m_dxt1a_alpha_threshold) + has_alpha_pixels = true; + + *pDst++ = src; + } + } + + p.m_block_index = cluster_index; + p.m_num_pixels = cluster_pixels.size(); + p.m_pPixels = cluster_pixels.begin(); + + r.m_pSelectors = selectors.begin(); + + uint low_color, high_color; + if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels)) + { + p.m_pixels_have_alpha = has_alpha_pixels; + + optimizer.compute(p, r); + low_color = r.m_low_color; + high_color = r.m_high_color; + } + else + { + dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true); + } + + const uint8* pSrc_selectors = selectors.begin(); + + for (uint block_iter = 0; block_iter < indices.size(); block_iter++) + { + const uint block_index = indices[block_iter]; + + dxt1_block& dxt_block = get_block(block_index); + + dxt_block.set_low_color(static_cast(low_color)); + dxt_block.set_high_color(static_cast(high_color)); + + uint mask = 0; + for (int i = 15; i >= 0; i--) + { + mask <<= 2; + mask |= pSrc_selectors[i]; + } + pSrc_selectors += (cDXTBlockSize * cDXTBlockSize); + + dxt_block.m_selectors[0] = static_cast(mask & 0xFF); + dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); + dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); + dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); + + } + + { + scoped_spinlock lock(m_cluster_hash_lock); + + m_cluster_hash.insert(cid, low_color | (high_color << 16)); + } + } + + } + } + + struct optimize_selectors_params + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); + + optimize_selectors_params( + crnlib::vector< crnlib::vector >& selector_cluster_indices) : + m_selector_cluster_indices(selector_cluster_indices) + { + } + + crnlib::vector< crnlib::vector >& m_selector_cluster_indices; + }; + + void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); + + optimize_selectors_params& task_params = *static_cast(pData_ptr); + + crnlib::vector block_categories[2]; + block_categories[0].reserve(2048); + block_categories[1].reserve(2048); + + for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + return; + + if ((cluster_index & 255) == 0) + { + if (get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) + return; + } + } + + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; + + if (selector_indices.size() <= 1) + continue; + + block_categories[0].resize(0); + block_categories[1].resize(0); + + for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) + { + const uint block_index = selector_indices[block_iter]; + + const dxt1_block& src_block = get_block(block_index); + + if (!src_block.is_alpha_block()) + block_categories[0].push_back(block_index); + else + { + bool has_alpha_pixels = false; + + if (m_params.m_dxt1a_alpha_threshold > 0) + { + const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const color_quad_u8& src = pSrc_pixels[i]; + if (src.a < m_params.m_dxt1a_alpha_threshold) + { + has_alpha_pixels = true; + break; + } + } + } + + if (has_alpha_pixels) + continue; + + block_categories[1].push_back(block_index); + } + } + + dxt1_block blk; + utils::zero_object(blk); + + for (uint block_type = 0; block_type <= 1; block_type++) + { + const crnlib::vector& block_indices = block_categories[block_type]; + if (block_indices.size() <= 1) + continue; + + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + uint best_s = 0; + uint64 best_error = 0xFFFFFFFFFFULL; + + uint max_s = 4; + if (block_type == 1) + max_s = 3; + + for (uint s = 0; s < max_s; s++) + { + uint64 total_error = 0; + + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; + + const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; + + const dxt1_block& dst_block = get_block(block_index); + + color_quad_u8 colors[4]; + dxt1_block::get_block_colors(colors, static_cast(dst_block.get_low_color()), static_cast(dst_block.get_high_color())); + + uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false); + + total_error += error; + } + + if (total_error < best_error) + { + best_error = total_error; + best_s = s; + } + } + + blk.set_selector(x, y, best_s); + + } // x + } // y + + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; + + dxt1_block& dst_block = get_block(block_index); + + memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); + } + } + + } // cluster_index + } + + bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData) + { + return static_cast(pData)->update_progress(percentage_completed, 100U); + } + + bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector< crnlib::vector >& selector_cluster_indices) + { + m_progress_start = m_progress_range; + m_progress_range = 33; + + weighted_selector_vec_array selector_vecs(m_num_blocks); + + for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) + { + dxt1_block& dxt1_block = get_block(block_iter); + + vec16F sv; + float* pDst = &sv[0]; + + for (uint y = 0; y < 4; y++) + for (uint x = 0; x < 4; x++) + *pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)]; + + const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true)); + const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true)); + const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false); + + const uint cColorDistToWeight = 2000; + const uint cMaxWeight = 2048; + uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); + + selector_vecs[block_iter].m_vec = sv; + selector_vecs[block_iter].m_weight = weight; + } + + return m_selector_clusterizer.create_clusters( + selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this); + } + + bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul) + { + CRNLIB_ASSERT(m_num_blocks); + + m_main_thread_id = get_current_thread_id(); + m_canceled = false; + + m_pDst_elements = pDst_elements; + m_elements_per_block = elements_per_block; + m_params = params; + if (!m_params.m_use_alpha_blocks) + m_params.m_dxt1a_alpha_threshold = 0; + + m_prev_percentage_complete = -1; + + CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality); + const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality; + const float endpoint_quality = powf(quality, 1.8f * quality_power_mul); + const float selector_quality = powf(quality, 1.65f * quality_power_mul); + + //const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size()); + //const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters); + const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size()); + const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters); + + if (quality >= 1.0f) + { + m_endpoint_cluster_indices.resize(m_num_blocks); + for (uint i = 0; i < m_num_blocks; i++) + { + m_endpoint_cluster_indices[i].resize(1); + m_endpoint_cluster_indices[i][0] = i; + } + } + else + m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); + +// trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size()); + + uint total_blocks = 0; + uint max_blocks = 0; + for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) + { + uint num = m_endpoint_cluster_indices[i].size(); + total_blocks += num; + max_blocks = math::maximum(max_blocks, num); + } +#if 0 + trace("Num clusters: %u, Average blocks per cluster: %u, Max blocks per cluster: %u\n", + m_endpoint_cluster_indices.size(), + total_blocks / m_endpoint_cluster_indices.size(), + max_blocks); +#endif + + crnlib::vector< crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; + + m_progress_start = 0; + if (quality >= 1.0f) + m_progress_range = 100; + else if (selector_cluster_indices.empty()) + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; + else + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i); + m_pTask_pool->join(); + + if (m_canceled) + return false; + + if (quality >= 1.0f) + return true; + + if (selector_cluster_indices.empty()) + { + create_selector_clusters(max_selector_clusters, selector_cluster_indices); + + if (m_canceled) + { + selector_cluster_indices.clear(); + + return false; + } + } + + m_progress_start += m_progress_range; + m_progress_range = 100 - m_progress_start; + + optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params); + + m_pTask_pool->join(); + + return !m_canceled; + } + +} // namespace crnlib + + diff --git a/crnlib/crn_qdxt1.h b/crnlib/crn_qdxt1.h new file mode 100644 index 00000000..8a03ed31 --- /dev/null +++ b/crnlib/crn_qdxt1.h @@ -0,0 +1,187 @@ +// File: crn_qdxt1.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt.h" +#include "crn_task_pool.h" +#include "crn_spinlock.h" +#include "crn_hash_map.h" +#include "crn_clusterizer.h" +#include "crn_hash.h" +#include "crn_threaded_clusterizer.h" +#include "crn_dxt_image.h" + +namespace crnlib +{ + struct qdxt1_params + { + qdxt1_params() + { + clear(); + } + + void clear() + { + m_quality_level = cMaxQuality; + m_dxt_quality = cCRNDXTQualityUber; + m_perceptual = true; + m_dxt1a_alpha_threshold = 0; + m_use_alpha_blocks = true; + m_pProgress_func = NULL; + m_pProgress_data = NULL; + m_num_mips = 0; + m_hierarchical = true; + utils::zero_object(m_mip_desc); + m_progress_start = 0; + m_progress_range = 100; + } + + void init(const dxt_image::pack_params &pp, int quality_level, bool hierarchical) + { + m_dxt_quality = pp.m_quality; + m_hierarchical = hierarchical; + m_perceptual = pp.m_perceptual; + m_use_alpha_blocks = pp.m_use_both_block_types; + m_quality_level = quality_level; + m_dxt1a_alpha_threshold = pp.m_dxt1a_alpha_threshold; + } + + enum { cMaxQuality = cCRNMaxQualityLevel }; + uint m_quality_level; + + uint m_dxt1a_alpha_threshold; + crn_dxt_quality m_dxt_quality; + bool m_perceptual; + bool m_use_alpha_blocks; + bool m_hierarchical; + + struct mip_desc + { + uint m_first_block; + uint m_block_width; + uint m_block_height; + }; + + uint m_num_mips; + enum { cMaxMips = 128 }; + mip_desc m_mip_desc[cMaxMips]; + + typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); + progress_callback_func m_pProgress_func; + void* m_pProgress_data; + uint m_progress_start; + uint m_progress_range; + }; + + class qdxt1 + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt1); + + public: + qdxt1(task_pool& task_pool); + ~qdxt1(); + + void clear(); + + bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params); + + uint get_num_blocks() const { return m_num_blocks; } + const dxt_pixel_block* get_blocks() const { return m_pBlocks; } + + bool pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul); + + private: + task_pool* m_pTask_pool; + uint32 m_main_thread_id; + bool m_canceled; + + uint m_progress_start; + uint m_progress_range; + + uint m_num_blocks; + const dxt_pixel_block* m_pBlocks; + + dxt1_block* m_pDst_elements; + uint m_elements_per_block; + qdxt1_params m_params; + + uint m_max_selector_clusters; + + int m_prev_percentage_complete; + + typedef vec<6, float> vec6F; + typedef clusterizer vec6F_clusterizer; + vec6F_clusterizer m_endpoint_clusterizer; + + crnlib::vector< crnlib::vector > m_endpoint_cluster_indices; + + typedef vec<16, float> vec16F; + typedef threaded_clusterizer vec16F_clusterizer; + + typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; + typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; + + vec16F_clusterizer m_selector_clusterizer; + + crnlib::vector< crnlib::vector > m_cached_selector_cluster_indices[qdxt1_params::cMaxQuality + 1]; + + struct cluster_id + { + cluster_id() : m_hash(0) + { + + } + + cluster_id(const crnlib::vector& indices) + { + set(indices); + } + + void set(const crnlib::vector& indices) + { + m_cells.resize(indices.size()); + + for (uint i = 0; i < indices.size(); i++) + m_cells[i] = static_cast(indices[i]); + + std::sort(m_cells.begin(), m_cells.end()); + + m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); + } + + bool operator< (const cluster_id& rhs) const + { + return m_cells < rhs.m_cells; + } + + bool operator== (const cluster_id& rhs) const + { + if (m_hash != rhs.m_hash) + return false; + + return m_cells == rhs.m_cells; + } + + crnlib::vector m_cells; + + size_t m_hash; + + operator size_t() const { return m_hash; } + }; + + typedef crnlib::hash_map cluster_hash; + cluster_hash m_cluster_hash; + spinlock m_cluster_hash_lock; + + static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); + static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); + bool update_progress(uint value, uint max_value); + void pack_endpoints_task(uint64 data, void* pData_ptr); + void optimize_selectors_task(uint64 data, void* pData_ptr); + bool create_selector_clusters(uint max_selector_clusters, crnlib::vector< crnlib::vector >& selector_cluster_indices); + + inline dxt1_block& get_block(uint index) const { return m_pDst_elements[index * m_elements_per_block]; } + }; + + CRNLIB_DEFINE_BITWISE_MOVABLE(qdxt1::cluster_id); + +} // namespace crnlib diff --git a/crnlib/crn_qdxt5.cpp b/crnlib/crn_qdxt5.cpp new file mode 100644 index 00000000..b423fed9 --- /dev/null +++ b/crnlib/crn_qdxt5.cpp @@ -0,0 +1,826 @@ +// File: crn_qdxt5.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_qdxt5.h" +#include "crn_dxt5a.h" +#include "crn_image.h" +#include "crn_image_utils.h" +#include "crn_dxt_fast.h" +#include "crn_dxt_hc_common.h" + +#define QDXT5_DEBUGGING 0 + +namespace crnlib +{ + qdxt5::qdxt5(task_pool& task_pool) : + m_pTask_pool(&task_pool), + m_main_thread_id(0), + m_canceled(false), + m_progress_start(0), + m_progress_range(100), + m_num_blocks(0), + m_pBlocks(NULL), + m_pDst_elements(NULL), + m_elements_per_block(0), + m_max_selector_clusters(0), + m_prev_percentage_complete(-1), + m_selector_clusterizer(task_pool) + { + } + + qdxt5::~qdxt5() + { + } + + void qdxt5::clear() + { + m_main_thread_id = 0; + m_num_blocks = 0; + m_pBlocks = 0; + m_pDst_elements = NULL; + m_elements_per_block = 0; + m_params.clear(); + m_endpoint_clusterizer.clear(); + m_endpoint_cluster_indices.clear(); + m_max_selector_clusters = 0; + m_canceled = false; + m_progress_start = 0; + m_progress_range = 100; + m_selector_clusterizer.clear(); + + for (uint i = 0; i <= qdxt5_params::cMaxQuality; i++) + m_cached_selector_cluster_indices[i].clear(); + + m_cluster_hash.clear(); + + m_prev_percentage_complete = -1; + } + + bool qdxt5::init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params) + { + clear(); + + CRNLIB_ASSERT(n && pBlocks); + + m_main_thread_id = get_current_thread_id(); + + m_num_blocks = n; + m_pBlocks = pBlocks; + m_params = params; + + m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); + + m_progress_start = 0; + m_progress_range = 75; + + image_u8 debug_img; + + const bool debugging = true; + + if ((m_params.m_hierarchical) && (m_params.m_num_mips)) + { + vec2F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); + training_vecs.resize(m_num_blocks); + + uint encoding_hist[cNumChunkEncodings]; + utils::zero_object(encoding_hist); + + uint total_processed_blocks = 0; + uint next_progress_threshold = 512; + + for (uint level = 0; level < m_params.m_num_mips; level++) + { + const qdxt5_params::mip_desc& level_desc = m_params.m_mip_desc[level]; + + const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; + const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; + + const uint level_width = level_desc.m_block_width * 4; + const uint level_height = level_desc.m_block_height * 4; + + if (debugging) + debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); + + for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) + { + for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) + { + color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < cChunkPixelHeight; y++) + { + const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); + + const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); + + for (uint x = 0; x < cChunkPixelWidth; x++) + { + const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); + + const uint block_index = outer_block_index + (pix_x >> 2); + + const dxt_pixel_block& block = m_pBlocks[block_index]; + + const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; + + chunk_pixels[x + y * 8] = p; + } + } + + struct layout_results + { + uint m_low_color; + uint m_high_color; + uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; + uint64 m_error; + //float m_penalty; + }; + layout_results layouts[cNumChunkTileLayouts]; + + for (uint l = 0; l < cNumChunkTileLayouts; l++) + { + const uint width = g_chunk_tile_layouts[l].m_width; + const uint height = g_chunk_tile_layouts[l].m_height; + const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; + const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; + + color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; + for (uint y = 0; y < height; y++) + for (uint x = 0; x < width; x++) + layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; + + const uint n = width * height; + dxt_fast::compress_alpha_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors, m_params.m_comp_index); + + uint c[dxt5_block::cMaxSelectorValues]; + dxt5_block::get_block_values(c, layouts[l].m_low_color, layouts[l].m_high_color); + + uint64 error = 0; + for (uint i = 0; i < n; i++) + error += math::square((int)layout_pixels[i][m_params.m_comp_index] - (int)c[layouts[l].m_selectors[i]]); + + layouts[l].m_error = error; + } + + double best_peak_snr = -1.0f; + uint best_encoding = 0; + + for (uint e = 0; e < cNumChunkEncodings; e++) + { + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; + + double total_error = 0; + + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; + + double mean_squared = total_error * (1.0f / 64.0f); + double root_mean_squared = sqrt(mean_squared); + + double peak_snr = 999999.0f; + if (mean_squared) + peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); + + float adaptive_tile_alpha_psnr_derating = 2.4f; + //if (level) + // adaptive_tile_alpha_psnr_derating = math::lerp(adaptive_tile_alpha_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); + if ((level) && (adaptive_tile_alpha_psnr_derating > .25f)) + { + adaptive_tile_alpha_psnr_derating = math::maximum(.25f, adaptive_tile_alpha_psnr_derating / powf(3.0f, static_cast(level))); + } + + float alpha_derating = math::lerp( 0.0f, adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f ); + peak_snr = peak_snr - alpha_derating; + + //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; + + if (peak_snr > best_peak_snr) + { + best_peak_snr = peak_snr; + best_encoding = e; + } + } + + encoding_hist[best_encoding]++; + + const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; + + for (uint t = 0; t < encoding_desc.m_num_tiles; t++) + { + const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; + + uint layout_index = tile_desc.m_layout_index; + const layout_results& layout = layouts[layout_index]; + + uint c[dxt5_block::cMaxSelectorValues]; + if (debugging) + dxt5_block::get_block_values(c, layout.m_low_color, layout.m_high_color); + + color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; + + for (uint y = 0; y < tile_desc.m_height; y++) + { + const uint pix_y = y + tile_desc.m_y_ofs; + + for (uint x = 0; x < tile_desc.m_width; x++) + { + const uint pix_x = x + tile_desc.m_x_ofs; + + uint a = chunk_pixels[pix_x + pix_y * cChunkPixelWidth][m_params.m_comp_index]; + + tile_pixels[x + y * tile_desc.m_width].set(a, a, a, 255); + + if (debugging) + debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; + } + } + + color_quad_u8 l, h; + dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); + + const uint dist = math::square((int)l[0] - (int)h[0]); + + const int cAlphaErrorToWeight = 8; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); + + vec2F ev; + + ev[0] = l[0]; + ev[1] = h[0]; + + for (uint y = 0; y < (tile_desc.m_height >> 2); y++) + { + uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); + if (block_y >= level_desc.m_block_height) + continue; + + for (uint x = 0; x < (tile_desc.m_width >> 2); x++) + { + uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); + if (block_x >= level_desc.m_block_width) + break; + + uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; + + training_vecs[block_index].first = ev; + training_vecs[block_index].second = weight; + + total_processed_blocks++; + + } // x + } // y + } //t + + if (total_processed_blocks >= next_progress_threshold) + { + next_progress_threshold += 512; + + if (!update_progress(total_processed_blocks, m_num_blocks - 1)) + return false; + } + + } // chunk_x + } // chunk_y + +#if QDXT5_DEBUGGING + if (debugging) + image_utils::save_to_file_stb(dynamic_wstring(cVarArg, L"debug_%u.tga", level).get_ptr(), debug_img, image_utils::cSaveIgnoreAlpha); +#endif + + } // level + +#if 0 + trace("chunk encoding hist: "); + for (uint i = 0; i < cNumChunkEncodings; i++) + trace("%u ", encoding_hist[i]); + trace("\n"); +#endif + } + else + { + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + return false; + } + + color_quad_u8 c[16]; + for (uint y = 0; y < cDXTBlockSize; y++) + for (uint x = 0; x < cDXTBlockSize; x++) + c[x+y*cDXTBlockSize].set(m_pBlocks[block_index].m_pixels[y][x][m_params.m_comp_index], 255); + + color_quad_u8 l, h; + dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, c, l, h); + + const uint dist = math::square((int)l[0] - (int)h[0]); + + const int cAlphaErrorToWeight = 8; + const uint cMaxWeight = 8; + uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); + + vec2F ev; + + ev[0] = l[0]; + ev[1] = h[0]; + + m_endpoint_clusterizer.add_training_vec(ev, weight); + } + } + + const uint cMaxEndpointClusters = 65535U; + + m_progress_start = 75; + m_progress_range = 20; + + if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) + return false; + + crnlib::hash_map selector_hash; + + m_progress_start = 95; + m_progress_range = 5; + + for (uint block_index = 0; block_index < m_num_blocks; block_index++) + { + if ((block_index & 511) == 0) + { + if (!update_progress(block_index, m_num_blocks - 1)) + return false; + } + + dxt5_block dxt_blk; + dxt_fast::compress_alpha_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0], m_params.m_comp_index); + + uint64 selectors = 0; + for (uint i = 0; i < dxt5_block::cNumSelectorBytes; i++) + selectors |= static_cast(dxt_blk.m_selectors[i]) << (i * 8U); + + selector_hash.insert(selectors); + } + + m_max_selector_clusters = selector_hash.size() + 128; + + update_progress(1, 1); + + return true; + } + + bool qdxt5::update_progress(uint value, uint max_value) + { + if (!m_params.m_pProgress_func) + return true; + + uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; + if ((int)percentage == m_prev_percentage_complete) + return true; + m_prev_percentage_complete = percentage; + + if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) + { + m_canceled = true; + return false; + } + + return true; + } + + void qdxt5::pack_endpoints_task(uint64 data, void* pData_ptr) + { + pData_ptr; + const uint thread_index = static_cast(data); + + crnlib::vector cluster_pixels; + cluster_pixels.reserve(1024); + + crnlib::vector selectors; + selectors.reserve(1024); + + dxt5_endpoint_optimizer optimizer; + dxt5_endpoint_optimizer::params p; + dxt5_endpoint_optimizer::results r; + + p.m_quality = m_params.m_dxt_quality; + p.m_comp_index = m_params.m_comp_index; + p.m_use_both_block_types = m_params.m_use_both_block_types; + + uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); + cluster_index_progress_mask /= 2; + cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); + cluster_index_progress_mask -= 1; + + for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + return; + + if ((cluster_index & cluster_index_progress_mask) == 0) + { + if (get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) + return; + } + } + + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; + + selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); + + cluster_pixels.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); + + color_quad_u8* pDst = &cluster_pixels[0]; + + for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) + { + const uint block_index = cluster_indices[block_iter]; + + const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; + + for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) + { + const color_quad_u8& src = pSrc_pixels[i]; + + *pDst++ = src; + } + } + + p.m_block_index = cluster_index; + p.m_num_pixels = cluster_pixels.size(); + p.m_pPixels = cluster_pixels.begin(); + + r.m_pSelectors = selectors.begin(); + + uint low_color; + uint high_color; + if (m_params.m_dxt_quality != cCRNDXTQualitySuperFast) + { + optimizer.compute(p, r); + low_color = r.m_first_endpoint; + high_color = r.m_second_endpoint; + } + else + { + dxt_fast::compress_alpha_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), m_params.m_comp_index); + } + + const uint8* pSrc_selectors = selectors.begin(); + + for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) + { + const uint block_index = cluster_indices[block_iter]; + + dxt5_block& dxt_block = get_block(block_index); + + dxt_block.set_low_alpha(low_color); + dxt_block.set_high_alpha(high_color); + + for (uint y = 0; y < 4; y++) + for (uint x = 0; x < 4; x++) + dxt_block.set_selector(x, y, *pSrc_selectors++); + } + } + } + + struct optimize_selectors_params + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); + + optimize_selectors_params( + crnlib::vector< crnlib::vector >& selector_cluster_indices) : + m_selector_cluster_indices(selector_cluster_indices) + { + } + + crnlib::vector< crnlib::vector >& m_selector_cluster_indices; + }; + + void qdxt5::optimize_selectors_task(uint64 data, void* pData_ptr) + { + const uint thread_index = static_cast(data); + + optimize_selectors_params& task_params = *static_cast(pData_ptr); + + crnlib::vector block_categories[2]; + block_categories[0].reserve(2048); + block_categories[1].reserve(2048); + + for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) + { + if (m_canceled) + return; + + if ((cluster_index & 255) == 0) + { + if (get_current_thread_id() == m_main_thread_id) + { + if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) + return; + } + } + + if (m_pTask_pool->get_num_threads()) + { + if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; + + if (selector_indices.size() <= 1) + continue; + + block_categories[0].resize(0); + block_categories[1].resize(0); + + for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) + { + const uint block_index = selector_indices[block_iter]; + + const dxt5_block& src_block = get_block(block_index); + + block_categories[src_block.is_alpha6_block()].push_back(block_index); + } + + dxt5_block blk; + utils::zero_object(blk); + + for (uint block_type = 0; block_type <= 1; block_type++) + { + const crnlib::vector& block_indices = block_categories[block_type]; + if (block_indices.size() <= 1) + continue; + + for (uint y = 0; y < cDXTBlockSize; y++) + { + for (uint x = 0; x < cDXTBlockSize; x++) + { + uint best_s = 0; + uint64 best_error = 0xFFFFFFFFFFULL; + + for (uint s = 0; s < dxt5_block::cMaxSelectorValues; s++) + { + uint64 total_error = 0; + + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; + + const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; + + const dxt5_block& dst_block = get_block(block_index); + + uint values[dxt5_block::cMaxSelectorValues]; + dxt5_block::get_block_values(values, dst_block.get_low_alpha(), dst_block.get_high_alpha()); + + int error = math::square((int)orig_color[m_params.m_comp_index] - (int)values[s]); + + total_error += error; + } + + if (total_error < best_error) + { + best_error = total_error; + best_s = s; + } + } + + blk.set_selector(x, y, best_s); + + } // x + } // y + + for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) + { + const uint block_index = block_indices[block_iter]; + + dxt5_block& dst_block = get_block(block_index); + + memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); + } + } + + } // cluster_index + } + + bool qdxt5::generate_codebook_progress_callback(uint percentage_completed, void* pData) + { + return static_cast(pData)->update_progress(percentage_completed, 100U); + } + + bool qdxt5::create_selector_clusters(uint max_selector_clusters, crnlib::vector< crnlib::vector >& selector_cluster_indices) + { + weighted_selector_vec_array selector_vecs[2]; + crnlib::vector selector_vec_remap[2]; + + for (uint block_type = 0; block_type < 2; block_type++) + { + for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) + { + dxt5_block& dxt5_block = get_block(block_iter); + if ((uint)dxt5_block.is_alpha6_block() != block_type) + continue; + + vec16F sv; + float* pDst = &sv[0]; + + bool uses_absolute_values = false; + + for (uint y = 0; y < 4; y++) + { + for (uint x = 0; x < 4; x++) + { + const uint s = dxt5_block.get_selector(x, y); + + float f; + if (dxt5_block.is_alpha6_block()) + { + if (s >= 6) + { + uses_absolute_values = true; + f = 0.0f; + } + else + f = g_dxt5_alpha6_to_linear[s]; + } + else + f = g_dxt5_to_linear[s]; + + *pDst++ = f; + } + } + + if (uses_absolute_values) + continue; + + int low_alpha = dxt5_block.get_low_alpha(); + int high_alpha = dxt5_block.get_high_alpha(); + int dist = math::square(low_alpha - high_alpha); + + const uint cAlphaDistToWeight = 8; + const uint cMaxWeight = 2048; + uint weight = math::clamp(dist / cAlphaDistToWeight, 1, cMaxWeight); + + selector_vecs[block_type].resize(selector_vecs[block_type].size() + 1); + selector_vecs[block_type].back().m_vec = sv; + selector_vecs[block_type].back().m_weight = weight; + + selector_vec_remap[block_type].push_back(block_iter); + } + } + + selector_cluster_indices.clear(); + + for (uint block_type = 0; block_type < 2; block_type++) + { + if (selector_vecs[block_type].empty()) + continue; + + if ((selector_vecs[block_type].size() / (float)m_num_blocks) < .01f) + continue; + uint max_clusters = static_cast((math::emulu(selector_vecs[block_type].size(), max_selector_clusters) + (m_num_blocks - 1)) / m_num_blocks); + max_clusters = math::minimum(math::maximum(64U, max_clusters), selector_vecs[block_type].size()); + if (max_clusters >= selector_vecs[block_type].size()) + continue; + +#if QDXT5_DEBUGGING + trace("max_clusters (%u): %u\n", block_type, max_clusters); +#endif + + crnlib::vector< crnlib::vector > block_type_selector_cluster_indices; + + if (!block_type) + { + m_progress_start = m_progress_range; + m_progress_range = 16; + } + else + { + m_progress_start = m_progress_range + 16; + m_progress_range = 17; + } + + if (!m_selector_clusterizer.create_clusters( + selector_vecs[block_type], max_clusters, block_type_selector_cluster_indices, generate_codebook_progress_callback, this)) + { + return false; + } + + const uint first_cluster = selector_cluster_indices.size(); + selector_cluster_indices.enlarge(block_type_selector_cluster_indices.size()); + + for (uint i = 0; i < block_type_selector_cluster_indices.size(); i++) + { + crnlib::vector& indices = selector_cluster_indices[first_cluster + i]; + indices.swap(block_type_selector_cluster_indices[i]); + + for (uint j = 0; j < indices.size(); j++) + indices.at(j) = selector_vec_remap[block_type][indices.at(j)]; + } + } + + return true; + } + + bool qdxt5::pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params) + { + CRNLIB_ASSERT(m_num_blocks); + + m_main_thread_id = get_current_thread_id(); + m_canceled = false; + + m_pDst_elements = pDst_elements; + m_elements_per_block = elements_per_block; + m_params = params; + + m_prev_percentage_complete = -1; + + CRNLIB_ASSERT(m_params.m_quality_level <= qdxt5_params::cMaxQuality); + const float quality = m_params.m_quality_level / (float)qdxt5_params::cMaxQuality; + const float endpoint_quality = powf(quality, 2.1f); + const float selector_quality = powf(quality, 1.65f); + + const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 16U, m_endpoint_clusterizer.get_codebook_size()); + const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 32U, m_max_selector_clusters); + +#if QDXT5_DEBUGGING + trace("max endpoint clusters: %u\n", max_endpoint_clusters); + trace("max selector clusters: %u\n", max_selector_clusters); +#endif + + if (quality >= 1.0f) + { + m_endpoint_cluster_indices.resize(m_num_blocks); + for (uint i = 0; i < m_num_blocks; i++) + { + m_endpoint_cluster_indices[i].resize(1); + m_endpoint_cluster_indices[i][0] = i; + } + } + else + m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); + + uint total_blocks = 0; + uint max_blocks = 0; + for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) + { + uint num = m_endpoint_cluster_indices[i].size(); + total_blocks += num; + max_blocks = math::maximum(max_blocks, num); + } + + crnlib::vector< crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; + + m_progress_start = 0; + if (quality >= 1.0f) + m_progress_range = 100; + else if (selector_cluster_indices.empty()) + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; + else + m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &qdxt5::pack_endpoints_task, i); + m_pTask_pool->join(); + + if (m_canceled) + return false; + + if (quality >= 1.0f) + return true; + + if (selector_cluster_indices.empty()) + { + create_selector_clusters(max_selector_clusters, selector_cluster_indices); + + if (m_canceled) + { + selector_cluster_indices.clear(); + + return false; + } + } + + m_progress_start += m_progress_range; + m_progress_range = 100 - m_progress_start; + + optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &qdxt5::optimize_selectors_task, i, &optimize_selectors_task_params); + + m_pTask_pool->join(); + + return !m_canceled; + } + +} // namespace crnlib diff --git a/crnlib/crn_qdxt5.h b/crnlib/crn_qdxt5.h new file mode 100644 index 00000000..ce3c6643 --- /dev/null +++ b/crnlib/crn_qdxt5.h @@ -0,0 +1,196 @@ +// File: crn_qdxt5.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_task_pool.h" +#include "crn_spinlock.h" +#include "crn_hash_map.h" +#include "crn_clusterizer.h" +#include "crn_hash.h" +#include "crn_threaded_clusterizer.h" +#include "crn_dxt.h" +#include "crn_dxt_image.h" + +namespace crnlib +{ + struct qdxt5_params + { + qdxt5_params() + { + clear(); + } + + void clear() + { + m_quality_level = cMaxQuality; + m_dxt_quality = cCRNDXTQualityUber; + + m_pProgress_func = NULL; + m_pProgress_data = NULL; + m_num_mips = 0; + m_hierarchical = true; + utils::zero_object(m_mip_desc); + + m_comp_index = 3; + m_progress_start = 0; + m_progress_range = 100; + + m_use_both_block_types = true; + } + + void init(const dxt_image::pack_params &pp, int quality_level, bool hierarchical, int comp_index = 3) + { + m_dxt_quality = pp.m_quality; + m_hierarchical = hierarchical; + m_comp_index = comp_index; + m_use_both_block_types = pp.m_use_both_block_types; + m_quality_level = quality_level; + } + + enum { cMaxQuality = cCRNMaxQualityLevel }; + uint m_quality_level; + crn_dxt_quality m_dxt_quality; + bool m_hierarchical; + + struct mip_desc + { + uint m_first_block; + uint m_block_width; + uint m_block_height; + }; + + uint m_num_mips; + enum { cMaxMips = 128 }; + mip_desc m_mip_desc[cMaxMips]; + + typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); + progress_callback_func m_pProgress_func; + void* m_pProgress_data; + uint m_progress_start; + uint m_progress_range; + + uint m_comp_index; + + bool m_use_both_block_types; + }; + + class qdxt5 + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt5); + + public: + qdxt5(task_pool& task_pool); + ~qdxt5(); + + void clear(); + + bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params); + + uint get_num_blocks() const { return m_num_blocks; } + const dxt_pixel_block* get_blocks() const { return m_pBlocks; } + + bool pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params); + + private: + task_pool* m_pTask_pool; + uint32 m_main_thread_id; + bool m_canceled; + + uint m_progress_start; + uint m_progress_range; + + uint m_num_blocks; + const dxt_pixel_block* m_pBlocks; + + dxt5_block* m_pDst_elements; + uint m_elements_per_block; + qdxt5_params m_params; + + uint m_max_selector_clusters; + + int m_prev_percentage_complete; + + typedef vec<2, float> vec2F; + typedef clusterizer vec2F_clusterizer; + vec2F_clusterizer m_endpoint_clusterizer; + + crnlib::vector< crnlib::vector > m_endpoint_cluster_indices; + + typedef vec<16, float> vec16F; + typedef threaded_clusterizer vec16F_clusterizer; + + typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; + typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; + + vec16F_clusterizer m_selector_clusterizer; + + crnlib::vector< crnlib::vector > m_cached_selector_cluster_indices[qdxt5_params::cMaxQuality + 1]; + + struct cluster_id + { + cluster_id() : m_hash(0) + { + + } + + cluster_id(const crnlib::vector& indices) + { + set(indices); + } + + void set(const crnlib::vector& indices) + { + m_cells.resize(indices.size()); + + for (uint i = 0; i < indices.size(); i++) + m_cells[i] = static_cast(indices[i]); + + std::sort(m_cells.begin(), m_cells.end()); + + m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); + } + + bool operator< (const cluster_id& rhs) const + { + return m_cells < rhs.m_cells; + } + + bool operator== (const cluster_id& rhs) const + { + if (m_hash != rhs.m_hash) + return false; + + return m_cells == rhs.m_cells; + } + + crnlib::vector m_cells; + + size_t m_hash; + + operator size_t() const { return m_hash; } + }; + + typedef crnlib::hash_map cluster_hash; + cluster_hash m_cluster_hash; + spinlock m_cluster_hash_lock; + + static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); + static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); + bool update_progress(uint value, uint max_value); + void pack_endpoints_task(uint64 data, void* pData_ptr); + void optimize_selectors_task(uint64 data, void* pData_ptr); + bool create_selector_clusters(uint max_selector_clusters, crnlib::vector< crnlib::vector >& selector_cluster_indices); + + inline dxt5_block& get_block(uint index) const { return m_pDst_elements[index * m_elements_per_block]; } + }; + +} // namespace crnlib + + + + + + + + + + diff --git a/crnlib/crn_rand.cpp b/crnlib/crn_rand.cpp new file mode 100644 index 00000000..1e14abcb --- /dev/null +++ b/crnlib/crn_rand.cpp @@ -0,0 +1,365 @@ +// File: crn_rand.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +// See: +// http://www.ciphersbyritter.com/NEWS4/RANDC.HTM +// http://burtleburtle.net/bob/rand/smallprng.html +// http://www.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf +// See GPG7, page 120, or http://www.lomont.org/Math/Papers/2008/Lomont_PRNG_2008.pdf +#include "crn_core.h" +#include "crn_rand.h" +#include "crn_hash.h" + +#define znew (z=36969*(z&65535)+(z>>16)) +#define wnew (w=18000*(w&65535)+(w>>16)) +#define MWC ((znew<<16)+wnew ) +#define SHR3 (jsr^=(jsr<<17), jsr^=(jsr>>13), jsr^=(jsr<<5)) +#define CONG (jcong=69069*jcong+1234567) +#define FIB ((b=a+b),(a=b-a)) +#define KISS ((MWC^CONG)+SHR3) +#define LFIB4 (c++,t[c]=t[c]+t[UC(c+58)]+t[UC(c+119)]+t[UC(c+178)]) +#define SWB (c++,bro=(x>(32-(k)))) +#define rot(x,k) CRNLIB_ROTATE_LEFT(x,k) + +namespace crnlib +{ + static const double cNorm = 1.0 / (double)0x100000000ULL; + + kiss99::kiss99() + { + x = 123456789; + y = 362436000; + z = 521288629; + c = 7654321; + } + + void kiss99::seed(uint32 i, uint32 j, uint32 k) + { + x = i; + y = j; + z = k; + c = 7654321; + } + + inline uint32 kiss99::next() + { + x = 69069*x+12345; + + y ^= (y<<13); + y ^= (y>>17); + y ^= (y<<5); + + uint64 t = c; + t += (698769069ULL*z); + c = static_cast(t >> 32); + z = static_cast(t); + + return (x+y+z); + } + + inline uint32 ranctx::next() + { + uint32 e = a - rot(b, 27); + a = b ^ rot(c, 17); + b = c + d; + c = d + e; + d = e + a; + return d; + } + + void ranctx::seed(uint32 seed) + { + a = 0xf1ea5eed, b = c = d = seed; + for (uint32 i=0; i<20; ++i) + next(); + } + + well512::well512() + { + seed(0xDEADBE3F); + } + + void well512::seed(uint32 seed[well512::cStateSize]) + { + memcpy(m_state, seed, sizeof(m_state)); + m_index = 0; + } + + void well512::seed(uint32 seed) + { + uint32 jsr = utils::swap32(seed) ^ 0xAAC29377; + + for (uint i = 0; i < cStateSize; i++) + { + SHR3; + seed = bitmix32c(seed); + + m_state[i] = seed ^ jsr; + } + m_index = 0; + } + + void well512::seed(uint32 seed1, uint32 seed2, uint32 seed3) + { + uint32 jsr = seed2; + uint32 jcong = seed3; + + for (uint i = 0; i < cStateSize; i++) + { + SHR3; + seed1 = bitmix32c(seed1); + CONG; + + m_state[i] = seed1 ^ jsr ^ jcong; + } + m_index = 0; + } + + inline uint32 well512::next() + { + uint32 a, b, c, d; + a = m_state[m_index]; + c = m_state[(m_index+13)&15]; + b = a^c^(a<<16)^(c<<15); + c = m_state[(m_index+9)&15]; + c ^= (c>>11); + a = m_state[m_index] = b^c; + d = a^((a<<5)&0xDA442D20UL); + m_index = (m_index + 15)&15; + a = m_state[m_index]; + m_state[m_index] = a^b^d^(a<<2)^(b<<18)^(c<<28); + return m_state[m_index]; + } + + random::random() + { + seed(12345,65435,34221); + } + + random::random(uint32 i) + { + seed(i); + } + + void random::seed(uint32 i1, uint32 i2, uint32 i3) + { + m_ranctx.seed(i1^i2^i3); + + m_kiss99.seed(i1, i2, i3); + + m_well512.seed(i1, i2, i3); + + for (uint i = 0; i < 100; i++) + urand32(); + } + + void random::seed(uint32 i) + { + uint32 jsr = i; + SHR3; SHR3; + uint32 jcong = utils::swap32(~jsr); + CONG; CONG; + uint32 i1 = SHR3 ^ CONG; + uint32 i2 = SHR3 ^ CONG; + uint32 i3 = SHR3 + CONG; + seed(i1, i2, i3); + } + + uint32 random::urand32() + { + return m_kiss99.next() ^ (m_ranctx.next() + m_well512.next()); + } + + uint32 random::fast_urand32() + { + return m_well512.next(); + } + + uint32 random::bit() + { + uint32 k = urand32(); + return (k ^ (k >> 6) ^ (k >> 10) ^ (k >> 30)) & 1; + } + + double random::drand(double l, double h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + return l; + + return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); + } + + float random::frand(float l, float h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + return l; + + float r = static_cast(l + (h - l) * (urand32() * cNorm)); + + return math::clamp(r, l, h); + } + + int random::irand(int l, int h) + { + CRNLIB_ASSERT(l < h); + if (l >= h) + return l; + + uint32 range = static_cast(h - l); + + uint32 rnd = urand32(); + +#if defined(_M_IX86) && defined(_MSC_VER) + //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); + uint32 x[2]; + *reinterpret_cast(x) = __emulu(range, rnd); + uint32 rnd_range = x[1]; +#else + uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); +#endif + + int result = l + rnd_range; + CRNLIB_ASSERT((result >= l) && (result < h)); + return result; + } + + /* + ALGORITHM 712, COLLECTED ALGORITHMS FROM ACM. + THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE, + VOL. 18, NO. 4, DECEMBER, 1992, PP. 434-435. + The function returns a normally distributed pseudo-random number + with a given mean and standard devaiation. Calls are made to a + function subprogram which must return independent random + numbers uniform in the interval (0,1). + The algorithm uses the ratio of uniforms method of A.J. Kinderman + and J.F. Monahan augmented with quadratic bounding curves. + */ + double random::gaussian(double mean, double stddev) + { + double q,u,v,x,y; + + /* + Generate P = (u,v) uniform in rect. enclosing acceptance region + Make sure that any random numbers <= 0 are rejected, since + gaussian() requires uniforms > 0, but RandomUniform() delivers >= 0. + */ + do { + u = drand(0, 1); + v = drand(0, 1); + if (u <= 0.0 || v <= 0.0) { + u = 1.0; + v = 1.0; + } + v = 1.7156 * (v - 0.5); + + /* Evaluate the quadratic form */ + x = u - 0.449871; + y = fabs(v) + 0.386595; + q = x * x + y * (0.19600 * y - 0.25472 * x); + + /* Accept P if inside inner ellipse */ + if (q < 0.27597) + break; + + /* Reject P if outside outer ellipse, or outside acceptance region */ + } while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u)); + + /* Return ratio of P's coordinates as the normal deviate */ + return (mean + stddev * v / u); + } + + void random::test() + { + } + + fast_random::fast_random() : + jsr(0xABCD917A), + jcong(0x17F3DEAD) + { + } + + fast_random::fast_random(const fast_random& other) : + jsr(other.jsr), jcong(other.jcong) + { + } + + fast_random::fast_random(uint32 i) + { + seed(i); + } + + fast_random& fast_random::operator=(const fast_random& other) + { + jsr = other.jsr; + jcong = other.jcong; + return *this; + } + + void fast_random::seed(uint32 i) + { + jsr = i; + SHR3; + SHR3; + jcong = (~i) ^ 0xDEADBEEF; + + SHR3; + CONG; + } + + uint32 fast_random::urand32() + { + return SHR3 ^ CONG; + } + + int fast_random::irand(int l, int h) + { + CRNLIB_ASSERT(l < h); + if (l >= h) + return l; + + uint32 range = static_cast(h - l); + + uint32 rnd = urand32(); + +#if defined(_M_IX86) && defined(_MSC_VER) + //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); + uint32 x[2]; + *reinterpret_cast(x) = __emulu(range, rnd); + uint32 rnd_range = x[1]; +#else + uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); +#endif + + int result = l + rnd_range; + CRNLIB_ASSERT((result >= l) && (result < h)); + return result; + } + + double fast_random::drand(double l, double h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + return l; + + return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); + } + + float fast_random::frand(float l, float h) + { + CRNLIB_ASSERT(l <= h); + if (l >= h) + return l; + + float r = static_cast(l + (h - l) * (urand32() * cNorm)); + + return math::clamp(r, l, h); + } + +} // namespace crnlib + diff --git a/crnlib/crn_rand.h b/crnlib/crn_rand.h new file mode 100644 index 00000000..4cf48e81 --- /dev/null +++ b/crnlib/crn_rand.h @@ -0,0 +1,114 @@ +// File: crn_rand.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + class kiss99 + { + public: + kiss99(); + + void seed(uint32 i, uint32 j, uint32 k); + + inline uint32 next(); + + private: + uint32 x; + uint32 y; + uint32 z; + uint32 c; + }; + + class well512 + { + public: + well512(); + + enum { cStateSize = 16 }; + void seed(uint32 seed[cStateSize]); + void seed(uint32 seed); + void seed(uint32 seed1, uint32 seed2, uint32 seed3); + + inline uint32 next(); + + private: + uint32 m_state[cStateSize]; + uint32 m_index; + }; + + class ranctx + { + public: + ranctx() { seed(0xDE149737); } + + void seed(uint32 seed); + + inline uint32 next(); + + private: + uint32 a; + uint32 b; + uint32 c; + uint32 d; + }; + + class random + { + public: + random(); + random(uint32 i); + + void seed(uint32 i); + void seed(uint32 i1, uint32 i2, uint32 i3); + + uint32 urand32(); + + // "Fast" variant uses no multiplies. + uint32 fast_urand32(); + + uint32 bit(); + + // Returns random between [0, 1) + double drand(double l, double h); + + float frand(float l, float h); + + // Returns random between [l, h) + int irand(int l, int h); + + double gaussian(double mean, double stddev); + + void test(); + + private: + ranctx m_ranctx; + kiss99 m_kiss99; + well512 m_well512; + }; + + // Simpler, minimal state PRNG + class fast_random + { + public: + fast_random(); + fast_random(uint32 i); + fast_random(const fast_random& other); + fast_random& operator=(const fast_random& other); + + void seed(uint32 i); + + uint32 urand32(); + + int irand(int l, int h); + + double drand(double l, double h); + + float frand(float l, float h); + + private: + uint32 jsr; + uint32 jcong; + }; + +} // namespace crnlib diff --git a/crnlib/crn_ray.h b/crnlib/crn_ray.h new file mode 100644 index 00000000..b3893a2a --- /dev/null +++ b/crnlib/crn_ray.h @@ -0,0 +1,52 @@ +// File: crn_ray.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_vec.h" + +namespace crnlib +{ + template + class ray + { + public: + typedef vector_type vector_t; + typedef typename vector_type::scalar_type scalar_type; + + inline ray() { } + inline ray(eClear) { clear(); } + inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) { } + + inline void clear() + { + m_origin.clear(); + m_direction.clear(); + } + + inline const vector_type& get_origin(void) const { return m_origin; } + inline void set_origin(const vector_type& origin) { m_origin = origin; } + + inline const vector_type& get_direction(void) const { return m_direction; } + inline void set_direction(const vector_type& direction) { m_direction = direction; } + + inline scalar_type set_endpoints(const vector_type& start, const vector_type& end, const vector_type& def) + { + m_origin = start; + + m_direction = end - start; + return m_direction.normalize(&def); + } + + inline vector_type eval(scalar_type t) const + { + return m_origin + m_direction * t; + } + + private: + vector_type m_origin; + vector_type m_direction; + }; + + typedef ray ray2F; + typedef ray ray3F; + +} // namespace crnlib diff --git a/crnlib/crn_rect.h b/crnlib/crn_rect.h new file mode 100644 index 00000000..8482763f --- /dev/null +++ b/crnlib/crn_rect.h @@ -0,0 +1,81 @@ +// File: crn_rect.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_vec.h" +#include "crn_hash.h" + +namespace crnlib +{ + class rect + { + public: + inline rect() + { + } + + inline rect(eClear) + { + clear(); + } + + inline rect(int left, int top, int right, int bottom) + { + set(left, top, right, bottom); + } + + inline rect(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline rect(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline void clear() + { + m_corner[0].clear(); + m_corner[1].clear(); + } + + inline void set(int left, int top, int right, int bottom) + { + m_corner[0].set(left, top); + m_corner[1].set(right, bottom); + } + + inline void set(const vec2I& lo, const vec2I& hi) + { + m_corner[0] = lo; + m_corner[1] = hi; + } + + inline void set(const vec2I& point) + { + m_corner[0] = point; + m_corner[1].set(point[0] + 1, point[1] + 1); + } + + inline uint get_width() const { return m_corner[1][0] - m_corner[0][0]; } + inline uint get_height() const { return m_corner[1][1] - m_corner[0][1]; } + + inline int get_left() const { return m_corner[0][0]; } + inline int get_top() const { return m_corner[0][1]; } + inline int get_right() const { return m_corner[1][0]; } + inline int get_bottom() const { return m_corner[1][1]; } + + inline bool is_empty() const { return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); } + + inline uint get_dimension(uint axis) const { return m_corner[1][axis] - m_corner[0][axis]; } + + inline const vec2I& operator[] (uint i) const { CRNLIB_ASSERT(i < 2); return m_corner[i]; } + inline vec2I& operator[] (uint i) { CRNLIB_ASSERT(i < 2); return m_corner[i]; } + + private: + vec2I m_corner[2]; + }; + +} // namespace crnlib diff --git a/crnlib/crn_resample_filters.cpp b/crnlib/crn_resample_filters.cpp new file mode 100644 index 00000000..49862e61 --- /dev/null +++ b/crnlib/crn_resample_filters.cpp @@ -0,0 +1,337 @@ +// File: crn_resample_filters.cpp +// RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ +#include "crn_core.h" +#include "crn_resample_filters.h" + +namespace crnlib +{ + #define M_PI 3.14159265358979323846 + + // To add your own filter, insert the new function below and update the filter table. + // There is no need to make the filter function particularly fast, because it's + // only called during initializing to create the X and Y axis contributor tables. + +#define BOX_FILTER_SUPPORT (0.5f) + static float box_filter(float t) /* pulse/Fourier window */ + { + // make_clist() calls the filter function with t inverted (pos = left, neg = right) + if ((t >= -0.5f) && (t < 0.5f)) + return 1.0f; + else + return 0.0f; + } + +#define TENT_FILTER_SUPPORT (1.0f) + static float tent_filter(float t) /* box (*) box, bilinear/triangle */ + { + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + return 1.0f - t; + else + return 0.0f; + } + +#define BELL_SUPPORT (1.5f) + static float bell_filter(float t) /* box (*) box (*) box */ + { + if (t < 0.0f) + t = -t; + + if (t < .5f) + return (.75f - (t * t)); + + if (t < 1.5f) + { + t = (t - 1.5f); + return (.5f * (t * t)); + } + + return (0.0f); + } + +#define B_SPLINE_SUPPORT (2.0f) + static float B_spline_filter(float t) /* box (*) box (*) box (*) box */ + { + float tt; + + if (t < 0.0f) + t = -t; + + if (t < 1.0f) + { + tt = t * t; + return ((.5f * tt * t) - tt + (2.0f / 3.0f)); + } + else if (t < 2.0f) + { + t = 2.0f - t; + return ((1.0f / 6.0f) * (t * t * t)); + } + + return (0.0f); + } + + // Dodgson, N., "Quadratic Interpolation for Image Resampling" +#define QUADRATIC_SUPPORT 1.5f + static float quadratic(float t, const float R) + { + if (t < 0.0f) + t = -t; + if (t < QUADRATIC_SUPPORT) + { + float tt = t * t; + if (t <= .5f) + return (-2.0f * R) * tt + .5f * (R + 1.0f); + else + return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f); + } + else + return 0.0f; + } + + static float quadratic_interp_filter(float t) + { + return quadratic(t, 1.0f); + } + + static float quadratic_approx_filter(float t) + { + return quadratic(t, .5f); + } + + static float quadratic_mix_filter(float t) + { + return quadratic(t, .8f); + } + + // Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics." + // Computer Graphics, Vol. 22, No. 4, pp. 221-228. + // (B, C) + // (1/3, 1/3) - Defaults recommended by Mitchell and Netravali + // (1, 0) - Equivalent to the Cubic B-Spline + // (0, 0.5) - Equivalent to the Catmull-Rom Spline + // (0, C) - The family of Cardinal Cubic Splines + // (B, 0) - Duff's tensioned B-Splines. + static float mitchell(float t, const float B, const float C) + { + float tt; + + tt = t * t; + + if(t < 0.0f) + t = -t; + + if(t < 1.0f) + { + t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt)) + + ((-18.0f + 12.0f * B + 6.0f * C) * tt) + + (6.0f - 2.0f * B)); + + return (t / 6.0f); + } + else if (t < 2.0f) + { + t = (((-1.0f * B - 6.0f * C) * (t * tt)) + + ((6.0f * B + 30.0f * C) * tt) + + ((-12.0f * B - 48.0f * C) * t) + + (8.0f * B + 24.0f * C)); + + return (t / 6.0f); + } + + return (0.0f); + } + +#define MITCHELL_SUPPORT (2.0f) + static float mitchell_filter(float t) + { + return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f); + } + +#define CATMULL_ROM_SUPPORT (2.0f) + static float catmull_rom_filter(float t) + { + return mitchell(t, 0.0f, .5f); + } + + static double sinc(double x) + { + x = (x * M_PI); + + if ((x < 0.01f) && (x > -0.01f)) + return 1.0f + x*x*(-1.0f/6.0f + x*x*1.0f/120.0f); + + return sin(x) / x; + } + + static float clean(double t) + { + const float EPSILON = .0000125f; + if (fabs(t) < EPSILON) + return 0.0f; + return (float)t; + } + + //static double blackman_window(double x) + //{ + // return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x); + //} + + static double blackman_exact_window(double x) + { + return 0.42659071f + 0.49656062f * cos(M_PI*x) + 0.07684867f * cos(2.0f*M_PI*x); + } + +#define BLACKMAN_SUPPORT (3.0f) + static float blackman_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 3.0f) + //return clean(sinc(t) * blackman_window(t / 3.0f)); + return clean(sinc(t) * blackman_exact_window(t / 3.0f)); + else + return (0.0f); + } + +#define GAUSSIAN_SUPPORT (1.25f) + static float gaussian_filter(float t) // with blackman window + { + if (t < 0) + t = -t; + if (t < GAUSSIAN_SUPPORT) + return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT)); + else + return 0.0f; + } + + // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. +#define LANCZOS3_SUPPORT (3.0f) + static float lanczos3_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 3.0f) + return clean(sinc(t) * sinc(t / 3.0f)); + else + return (0.0f); + } + +#define LANCZOS4_SUPPORT (4.0f) + static float lanczos4_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 4.0f) + return clean(sinc(t) * sinc(t / 4.0f)); + else + return (0.0f); + } + +#define LANCZOS6_SUPPORT (6.0f) + static float lanczos6_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 6.0f) + return clean(sinc(t) * sinc(t / 6.0f)); + else + return (0.0f); + } + +#define LANCZOS12_SUPPORT (12.0f) + static float lanczos12_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < 12.0f) + return clean(sinc(t) * sinc(t / 12.0f)); + else + return (0.0f); + } + + static double bessel0(double x) + { + const double EPSILON_RATIO = 1E-16; + double xh, sum, pow, ds; + int k; + + xh = 0.5 * x; + sum = 1.0; + pow = 1.0; + k = 0; + ds = 1.0; + while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? + { + ++k; + pow = pow * (xh / k); + ds = pow * pow; + sum = sum + ds; + } + + return sum; + } + + static const float KAISER_ALPHA = 4.0; + static double kaiser(double alpha, double half_width, double x) + { + const double ratio = (x / half_width); + return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); + } + +#define KAISER_SUPPORT 3 + static float kaiser_filter(float t) + { + if (t < 0.0f) + t = -t; + + if (t < KAISER_SUPPORT) + { + // db atten + const float att = 40.0f; + const float alpha = (float)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96)); + //const float alpha = KAISER_ALPHA; + return (float)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t)); + } + + return 0.0f; + } + + const resample_filter g_resample_filters[] = + { + { "box", box_filter, BOX_FILTER_SUPPORT }, + { "tent", tent_filter, TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BELL_SUPPORT }, + { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, + }; + + const int g_num_resample_filters = sizeof(g_resample_filters) / sizeof(g_resample_filters[0]); + + int find_resample_filter(const char* pName) + { + for (int i = 0; i < g_num_resample_filters; i++) + if (_stricmp(pName, g_resample_filters[i].name) == 0) + return i; + return cInvalidIndex; + } + +} // namespace crnlib diff --git a/crnlib/crn_resample_filters.h b/crnlib/crn_resample_filters.h new file mode 100644 index 00000000..299fea93 --- /dev/null +++ b/crnlib/crn_resample_filters.h @@ -0,0 +1,21 @@ +// File: crn_resample_filters.h +// RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ +#pragma once + +namespace crnlib +{ + typedef float (*resample_filter_func)(float t); + + struct resample_filter + { + char name[32]; + resample_filter_func func; + float support; + }; + + extern const resample_filter g_resample_filters[]; + extern const int g_num_resample_filters; + + int find_resample_filter(const char* pName); + +} // namespace crnlib diff --git a/crnlib/crn_resampler.cpp b/crnlib/crn_resampler.cpp new file mode 100644 index 00000000..cbe2ab52 --- /dev/null +++ b/crnlib/crn_resampler.cpp @@ -0,0 +1,884 @@ +// File: crn_resampler.h +// RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ +#include "crn_core.h" +#include "crn_resampler.h" +#include "crn_resample_filters.h" + +namespace crnlib +{ + #define resampler_assert CRNLIB_ASSERT + + static inline int resampler_range_check(int v, int h) { h; resampler_assert((v >= 0) && (v < h)); return v; } + + #ifndef max + #define max(a,b) (((a) > (b)) ? (a) : (b)) + #endif + + #ifndef min + #define min(a,b) (((a) < (b)) ? (a) : (b)) + #endif + + #ifndef TRUE + #define TRUE (1) + #endif + + #ifndef FALSE + #define FALSE (0) + #endif + + #define RESAMPLER_DEBUG 0 + + // (x mod y) with special handling for negative x values. + static inline int posmod(int x, int y) + { + if (x >= 0) + return (x % y); + else + { + int m = (-x) % y; + + if (m != 0) + m = y - m; + + return (m); + } + } + + // Float to int cast with truncation. + static inline int cast_to_int(Resample_Real i) + { + return (int)i; + } + + /* Ensure that the contributing source sample is + * within bounds. If not, reflect, clamp, or wrap. + */ + int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op) + { + int n; + + if (j < 0) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = -j; + + if (n >= src_x) + n = src_x - 1; + } + else if (boundary_op == BOUNDARY_WRAP) + n = posmod(j, src_x); + else + n = 0; + } + else if (j >= src_x) + { + if (boundary_op == BOUNDARY_REFLECT) + { + n = (src_x - j) + (src_x - 1); + + if (n < 0) + n = 0; + } + else if (boundary_op == BOUNDARY_WRAP) + n = posmod(j, src_x); + else + n = src_x - 1; + } + else + n = j; + + return n; + } + + // The make_clist() method generates, for all destination samples, + // the list of all source samples with non-zero weighted contributions. + Resampler::Contrib_List* Resampler::make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real (*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs) + { + typedef struct + { + // The center of the range in DISCRETE coordinates (pixel center = 0.0f). + Resample_Real center; + int left, right; + } Contrib_Bounds; + + int i, j, k, n, left, right; + Resample_Real total_weight; + Resample_Real xscale, center, half_width, weight; + Contrib_List* Pcontrib; + Contrib* Pcpool; + Contrib* Pcpool_next; + Contrib_Bounds* Pcontrib_bounds; + + if ((Pcontrib = (Contrib_List*)crnlib_calloc(dst_x, sizeof(Contrib_List))) == NULL) + return NULL; + + Pcontrib_bounds = (Contrib_Bounds*)crnlib_calloc(dst_x, sizeof(Contrib_Bounds)); + if (!Pcontrib_bounds) + { + crnlib_free(Pcontrib); + return (NULL); + } + + const Resample_Real oo_filter_scale = 1.0f / filter_scale; + + const Resample_Real NUDGE = 0.5f; + xscale = dst_x / (Resample_Real)src_x; + + if (xscale < 1.0f) + { + int total; + + /* Handle case when there are fewer destination + * samples than source samples (downsampling/minification). + */ + + // stretched half width of filter + half_width = (filter_support / xscale) * filter_scale; + + // Find the range of source sample(s) that will contribute to each destination sample. + + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; + + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); + + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; + + n += (right - left + 1); + } + + /* Allocate memory for contributors. */ + + if ((n == 0) || ((Pcpool = (Contrib*)crnlib_calloc(n, sizeof(Contrib))) == NULL)) + { + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return NULL; + } + total = n; + + Pcpool_next = Pcpool; + + /* Create the list of source samples which + * contribute to each destination sample. + */ + + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; + + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; + + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + resampler_assert ((Pcpool_next - Pcpool) <= total); + + total_weight = 0; + + for (j = left; j <= right; j++) + total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale); + const Resample_Real norm = static_cast(1.0f / total_weight); + + total_weight = 0; + + #if RESAMPLER_DEBUG + printf("%i: ", i); + #endif + + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm; + if (weight == 0.0f) + continue; + + n = reflect(j, src_x, boundary_op); + + #if RESAMPLER_DEBUG + printf("%i(%f), ", n, weight); + #endif + + /* Increment the number of source + * samples which contribute to the + * current destination sample. + */ + + k = Pcontrib[i].n++; + + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + + total_weight += weight; /* total weight of all contributors */ + + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } + + #if RESAMPLER_DEBUG + printf("\n\n"); + #endif + + //resampler_assert(Pcontrib[i].n); + //resampler_assert(max_k != -1); + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + crnlib_free(Pcpool); + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return NULL; + } + + if (total_weight != 1.0f) + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + else + { + /* Handle case when there are more + * destination samples than source + * samples (upsampling). + */ + + half_width = filter_support * filter_scale; + + // Find the source sample(s) that contribute to each destination sample. + + for (i = 0, n = 0; i < dst_x; i++) + { + // Convert from discrete to continuous coordinates, scale, then convert back to discrete. + center = ((Resample_Real)i + NUDGE) / xscale; + center -= NUDGE; + center += src_ofs; + + left = cast_to_int((Resample_Real)floor(center - half_width)); + right = cast_to_int((Resample_Real)ceil(center + half_width)); + + Pcontrib_bounds[i].center = center; + Pcontrib_bounds[i].left = left; + Pcontrib_bounds[i].right = right; + + n += (right - left + 1); + } + + /* Allocate memory for contributors. */ + + int total = n; + if ((total == 0) || ((Pcpool = (Contrib*)crnlib_calloc(total, sizeof(Contrib))) == NULL)) + { + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return NULL; + } + + Pcpool_next = Pcpool; + + /* Create the list of source samples which + * contribute to each destination sample. + */ + + for (i = 0; i < dst_x; i++) + { + int max_k = -1; + Resample_Real max_w = -1e+20f; + + center = Pcontrib_bounds[i].center; + left = Pcontrib_bounds[i].left; + right = Pcontrib_bounds[i].right; + + Pcontrib[i].n = 0; + Pcontrib[i].p = Pcpool_next; + Pcpool_next += (right - left + 1); + resampler_assert((Pcpool_next - Pcpool) <= total); + + total_weight = 0; + for (j = left; j <= right; j++) + total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale); + + const Resample_Real norm = static_cast(1.0f / total_weight); + + total_weight = 0; + + #if RESAMPLER_DEBUG + printf("%i: ", i); + #endif + + for (j = left; j <= right; j++) + { + weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm; + if (weight == 0.0f) + continue; + + n = reflect(j, src_x, boundary_op); + + #if RESAMPLER_DEBUG + printf("%i(%f), ", n, weight); + #endif + + /* Increment the number of source + * samples which contribute to the + * current destination sample. + */ + + k = Pcontrib[i].n++; + + Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ + Pcontrib[i].p[k].weight = weight; /* store src sample weight */ + + total_weight += weight; /* total weight of all contributors */ + + if (weight > max_w) + { + max_w = weight; + max_k = k; + } + } + + #if RESAMPLER_DEBUG + printf("\n\n"); + #endif + + //resampler_assert(Pcontrib[i].n); + //resampler_assert(max_k != -1); + + if ((max_k == -1) || (Pcontrib[i].n == 0)) + { + crnlib_free(Pcpool); + crnlib_free(Pcontrib); + crnlib_free(Pcontrib_bounds); + return NULL; + } + + if (total_weight != 1.0f) + Pcontrib[i].p[max_k].weight += 1.0f - total_weight; + } + } + + #if RESAMPLER_DEBUG + printf("*******\n"); + #endif + + crnlib_free(Pcontrib_bounds); + + return Pcontrib; + } + + void Resampler::resample_x(Sample* Pdst, const Sample* Psrc) + { + resampler_assert(Pdst); + resampler_assert(Psrc); + + int i, j; + Sample total; + Contrib_List *Pclist = m_Pclist_x; + Contrib *p; + + for (i = m_resample_dst_x; i > 0; i--, Pclist++) + { + #if CRNLIB_RESAMPLER_DEBUG_OPS + total_ops += Pclist->n; + #endif + + for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++) + total += Psrc[p->pixel] * p->weight; + + *Pdst++ = total; + } + } + + void Resampler::scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) + { + int i; + + #if CRNLIB_RESAMPLER_DEBUG_OPS + total_ops += dst_x; + #endif + + // Not += because temp buf wasn't cleared. + for (i = dst_x; i > 0; i--) + *Ptmp++ = *Psrc++ * weight; + } + + void Resampler::scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) + { + #if CRNLIB_RESAMPLER_DEBUG_OPS + total_ops += dst_x; + #endif + + for (int i = dst_x; i > 0; i--) + (*Ptmp++) += *Psrc++ * weight; + } + + void Resampler::clamp(Sample* Pdst, int n) + { + while (n > 0) + { + Sample x = *Pdst; + *Pdst++ = clamp_sample(x); + n--; + } + } + + void Resampler::resample_y(Sample* Pdst) + { + int i, j; + Sample* Psrc; + Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y]; + + Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst; + resampler_assert(Ptmp); + + /* Process each contributor. */ + + for (i = 0; i < Pclist->n; i++) + { + /* locate the contributor's location in the scan + * buffer -- the contributor must always be found! + */ + + for (j = 0; j < MAX_SCAN_BUF_SIZE; j++) + if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel) + break; + + resampler_assert(j < MAX_SCAN_BUF_SIZE); + + Psrc = m_Pscan_buf->scan_buf_l[j]; + + if (!i) + scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + else + scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); + + /* If this source line doesn't contribute to any + * more destination lines then mark the scanline buffer slot + * which holds this source line as free. + * (The max. number of slots used depends on the Y + * axis sampling factor and the scaled filter width.) + */ + + if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0) + { + m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = FALSE; + m_Pscan_buf->scan_buf_y[j] = -1; + } + } + + /* Now generate the destination line */ + + if (m_delay_x_resample) // Was X resampling delayed until after Y resampling? + { + resampler_assert(Pdst != Ptmp); + resample_x(Pdst, Ptmp); + } + else + { + resampler_assert(Pdst == Ptmp); + } + + if (m_lo < m_hi) + clamp(Pdst, m_resample_dst_x); + } + + bool Resampler::put_line(const Sample* Psrc) + { + int i; + + if (m_cur_src_y >= m_resample_src_y) + return false; + + /* Does this source line contribute + * to any destination line? if not, + * exit now. + */ + + if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)]) + { + m_cur_src_y++; + return true; + } + + /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */ + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + if (m_Pscan_buf->scan_buf_y[i] == -1) + break; + + /* If the buffer is full, exit with an error. */ + + if (i == MAX_SCAN_BUF_SIZE) + { + m_status = STATUS_SCAN_BUFFER_FULL; + return false; + } + + m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = TRUE; + m_Pscan_buf->scan_buf_y[i] = m_cur_src_y; + + /* Does this slot have any memory allocated to it? */ + + if (!m_Pscan_buf->scan_buf_l[i]) + { + if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return false; + } + } + + // Resampling on the X axis first? + if (m_delay_x_resample) + { + resampler_assert(m_intermediate_x == m_resample_src_x); + + // Y-X resampling order + memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample)); + } + else + { + resampler_assert(m_intermediate_x == m_resample_dst_x); + + // X-Y resampling order + resample_x(m_Pscan_buf->scan_buf_l[i], Psrc); + } + + m_cur_src_y++; + + return true; + } + + const Resampler::Sample* Resampler::get_line() + { + int i; + + /* If all the destination lines have been + * generated, then always return NULL. + */ + + if (m_cur_dst_y == m_resample_dst_y) + return NULL; + + /* Check to see if all the required + * contributors are present, if not, + * return NULL. + */ + + for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) + if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) + return NULL; + + resample_y(m_Pdst_buf); + + m_cur_dst_y++; + + return m_Pdst_buf; + } + + Resampler::~Resampler() + { + int i; + + #if CRNLIB_RESAMPLER_DEBUG_OPS + printf("actual ops: %i\n", total_ops); + #endif + + crnlib_free(m_Pdst_buf); + m_Pdst_buf = NULL; + + if (m_Ptmp_buf) + { + crnlib_free(m_Ptmp_buf); + m_Ptmp_buf = NULL; + } + + /* Don't deallocate a contibutor list + * if the user passed us one of their own. + */ + + if ((m_Pclist_x) && (!m_clist_x_forced)) + { + crnlib_free(m_Pclist_x->p); + crnlib_free(m_Pclist_x); + m_Pclist_x = NULL; + } + + if ((m_Pclist_y) && (!m_clist_y_forced)) + { + crnlib_free(m_Pclist_y->p); + crnlib_free(m_Pclist_y); + m_Pclist_y = NULL; + } + + crnlib_free(m_Psrc_y_count); + m_Psrc_y_count = NULL; + + crnlib_free(m_Psrc_y_flag); + m_Psrc_y_flag = NULL; + + if (m_Pscan_buf) + { + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + crnlib_free(m_Pscan_buf->scan_buf_l[i]); + + crnlib_free(m_Pscan_buf); + m_Pscan_buf = NULL; + } + } + + void Resampler::restart() + { + if (STATUS_OKAY != m_status) + return; + + m_cur_src_y = m_cur_dst_y = 0; + + int i, j; + for (i = 0; i < m_resample_src_y; i++) + { + m_Psrc_y_count[i] = 0; + m_Psrc_y_flag[i] = FALSE; + } + + for (i = 0; i < m_resample_dst_y; i++) + { + for (j = 0; j < m_Pclist_y[i].n; j++) + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + + crnlib_free(m_Pscan_buf->scan_buf_l[i]); + m_Pscan_buf->scan_buf_l[i] = NULL; + } + } + + Resampler::Resampler(int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op, + Resample_Real sample_low, Resample_Real sample_high, + const char* Pfilter_name, + Contrib_List* Pclist_x, + Contrib_List* Pclist_y, + Resample_Real filter_x_scale, + Resample_Real filter_y_scale, + Resample_Real src_x_ofs, + Resample_Real src_y_ofs) + { + int i, j; + Resample_Real support, (*func)(Resample_Real); + + resampler_assert(src_x > 0); + resampler_assert(src_y > 0); + resampler_assert(dst_x > 0); + resampler_assert(dst_y > 0); + + #if CRNLIB_RESAMPLER_DEBUG_OPS + total_ops = 0; + #endif + + m_lo = sample_low; + m_hi = sample_high; + + m_delay_x_resample = false; + m_intermediate_x = 0; + m_Pdst_buf = NULL; + m_Ptmp_buf = NULL; + m_clist_x_forced = false; + m_Pclist_x = NULL; + m_clist_y_forced = false; + m_Pclist_y = NULL; + m_Psrc_y_count = NULL; + m_Psrc_y_flag = NULL; + m_Pscan_buf = NULL; + m_status = STATUS_OKAY; + + m_resample_src_x = src_x; + m_resample_src_y = src_y; + m_resample_dst_x = dst_x; + m_resample_dst_y = dst_y; + + m_boundary_op = boundary_op; + + if ((m_Pdst_buf = (Sample*)crnlib_malloc(m_resample_dst_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + // Find the specified filter. + + if (Pfilter_name == NULL) + Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; + + for (i = 0; i < g_num_resample_filters; i++) + if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0) + break; + + if (i == g_num_resample_filters) + { + m_status = STATUS_BAD_FILTER_NAME; + return; + } + + func = g_resample_filters[i].func; + support = g_resample_filters[i].support; + + /* Create contributor lists, unless the user supplied custom lists. */ + + if (!Pclist_x) + { + m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs); + if (!m_Pclist_x) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_x = Pclist_x; + m_clist_x_forced = true; + } + + if (!Pclist_y) + { + m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs); + if (!m_Pclist_y) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + else + { + m_Pclist_y = Pclist_y; + m_clist_y_forced = true; + } + + if ((m_Psrc_y_count = (int*)crnlib_calloc(m_resample_src_y, sizeof(int))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + if ((m_Psrc_y_flag = (unsigned char*)crnlib_calloc(m_resample_src_y, sizeof(unsigned char))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + /* Count how many times each source line + * contributes to a destination line. + */ + + for (i = 0; i < m_resample_dst_y; i++) + for (j = 0; j < m_Pclist_y[i].n; j++) + m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; + + if ((m_Pscan_buf = (Scan_Buf*)crnlib_malloc(sizeof(Scan_Buf))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + + for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) + { + m_Pscan_buf->scan_buf_y[i] = -1; + m_Pscan_buf->scan_buf_l[i] = NULL; + } + + m_cur_src_y = m_cur_dst_y = 0; + { + // Determine which axis to resample first by comparing the number of multiplies required + // for each possibility. + int x_ops = count_ops(m_Pclist_x, m_resample_dst_x); + int y_ops = count_ops(m_Pclist_y, m_resample_dst_y); + + // Hack 10/2000: Weight Y axis ops a little more than X axis ops. + // (Y axis ops use more cache resources.) + int xy_ops = x_ops * m_resample_src_y + + (4 * y_ops * m_resample_dst_x)/3; + + int yx_ops = (4 * y_ops * m_resample_src_x)/3 + + x_ops * m_resample_dst_y; + + #if CRNLIB_RESAMPLER_DEBUG_OPS + printf("src: %i %i\n", m_resample_src_x, m_resample_src_y); + printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y); + printf("x_ops: %i\n", x_ops); + printf("y_ops: %i\n", y_ops); + printf("xy_ops: %i\n", xy_ops); + printf("yx_ops: %i\n", yx_ops); + #endif + + // Now check which resample order is better. In case of a tie, choose the order + // which buffers the least amount of data. + if ((xy_ops > yx_ops) || + ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x)) + ) + { + m_delay_x_resample = true; + m_intermediate_x = m_resample_src_x; + } + else + { + m_delay_x_resample = false; + m_intermediate_x = m_resample_dst_x; + } + #if CRNLIB_RESAMPLER_DEBUG_OPS + printf("delaying: %i\n", m_delay_x_resample); + #endif + } + + if (m_delay_x_resample) + { + if ((m_Ptmp_buf = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == NULL) + { + m_status = STATUS_OUT_OF_MEMORY; + return; + } + } + } + + void Resampler::get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y) + { + if (ptr_clist_x) + *ptr_clist_x = m_Pclist_x; + + if (ptr_clist_y) + *ptr_clist_y = m_Pclist_y; + } + + int Resampler::get_filter_num() + { + return g_num_resample_filters; + } + + const char* Resampler::get_filter_name(int filter_num) + { + if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) + return NULL; + else + return g_resample_filters[filter_num].name; + } + +} // namespace crnlib diff --git a/crnlib/crn_resampler.h b/crnlib/crn_resampler.h new file mode 100644 index 00000000..160e9943 --- /dev/null +++ b/crnlib/crn_resampler.h @@ -0,0 +1,173 @@ +// File: crn_resampler.h +// RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ +#pragma once + +namespace crnlib +{ + #define CRNLIB_RESAMPLER_DEBUG_OPS 0 + #define CRNLIB_RESAMPLER_DEFAULT_FILTER "lanczos4" + + #define CRNLIB_RESAMPLER_MAX_DIMENSION 16384 + + // float or double + typedef float Resample_Real; + + class Resampler + { + public: + typedef Resample_Real Sample; + + struct Contrib + { + Resample_Real weight; + unsigned short pixel; + }; + + struct Contrib_List + { + unsigned short n; + Contrib* p; + }; + + enum Boundary_Op + { + BOUNDARY_WRAP = 0, + BOUNDARY_REFLECT = 1, + BOUNDARY_CLAMP = 2 + }; + + enum Status + { + STATUS_OKAY = 0, + STATUS_OUT_OF_MEMORY = 1, + STATUS_BAD_FILTER_NAME = 2, + STATUS_SCAN_BUFFER_FULL = 3 + }; + + // src_x/src_y - Input dimensions + // dst_x/dst_y - Output dimensions + // boundary_op - How to sample pixels near the image boundaries + // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high + // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler + // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) + Resampler( + int src_x, int src_y, + int dst_x, int dst_y, + Boundary_Op boundary_op = BOUNDARY_CLAMP, + Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, + const char* Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER, + Contrib_List* Pclist_x = NULL, + Contrib_List* Pclist_y = NULL, + Resample_Real filter_x_scale = 1.0f, + Resample_Real filter_y_scale = 1.0f, + Resample_Real src_x_ofs = 0.0f, + Resample_Real src_y_ofs = 0.0f); + + ~Resampler(); + + // Reinits resampler so it can handle another frame. + void restart(); + + // false on out of memory. + bool put_line(const Sample* Psrc); + + // NULL if no scanlines are currently available (give the resampler more scanlines!) + const Sample* get_line(); + + Status status() const { return m_status; } + + // Returned contributor lists can be shared with another Resampler. + void get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y); + Contrib_List* get_clist_x() const { return m_Pclist_x; } + Contrib_List* get_clist_y() const { return m_Pclist_y; } + + // Filter accessors. + static int get_filter_num(); + static const char* get_filter_name(int filter_num); + + static Contrib_List* make_clist( + int src_x, int dst_x, Boundary_Op boundary_op, + Resample_Real (*Pfilter)(Resample_Real), + Resample_Real filter_support, + Resample_Real filter_scale, + Resample_Real src_ofs); + + private: + Resampler(); + Resampler(const Resampler& o); + Resampler& operator= (const Resampler& o); + + #ifdef CRNLIB_RESAMPLER_DEBUG_OPS + int total_ops; + #endif + + int m_intermediate_x; + + int m_resample_src_x; + int m_resample_src_y; + int m_resample_dst_x; + int m_resample_dst_y; + + Boundary_Op m_boundary_op; + + Sample* m_Pdst_buf; + Sample* m_Ptmp_buf; + + Contrib_List* m_Pclist_x; + Contrib_List* m_Pclist_y; + + bool m_clist_x_forced; + bool m_clist_y_forced; + + bool m_delay_x_resample; + + int* m_Psrc_y_count; + unsigned char* m_Psrc_y_flag; + + // The maximum number of scanlines that can be buffered at one time. + enum { MAX_SCAN_BUF_SIZE = CRNLIB_RESAMPLER_MAX_DIMENSION }; + + struct Scan_Buf + { + int scan_buf_y[MAX_SCAN_BUF_SIZE]; + Sample* scan_buf_l[MAX_SCAN_BUF_SIZE]; + }; + + Scan_Buf* m_Pscan_buf; + + int m_cur_src_y; + int m_cur_dst_y; + + Status m_status; + + void resample_x(Sample* Pdst, const Sample* Psrc); + void scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); + void scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); + void clamp(Sample* Pdst, int n); + void resample_y(Sample* Pdst); + + static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); + + inline int count_ops(Contrib_List* Pclist, int k) + { + int i, t = 0; + for (i = 0; i < k; i++) + t += Pclist[i].n; + return (t); + } + + Resample_Real m_lo; + Resample_Real m_hi; + + inline Resample_Real clamp_sample(Resample_Real f) const + { + if (f < m_lo) + f = m_lo; + else if (f > m_hi) + f = m_hi; + return f; + } + }; + +} // namespace crnlib + diff --git a/crnlib/crn_ryg_dxt.cpp b/crnlib/crn_ryg_dxt.cpp new file mode 100644 index 00000000..fc2d7a52 --- /dev/null +++ b/crnlib/crn_ryg_dxt.cpp @@ -0,0 +1,609 @@ +// File: crn_ryg_dxt.cpp +// RYG's real-time DXT compressor - Public domain. +#include "crn_core.h" +#include "crn_ryg_types.hpp" +#include "crn_ryg_dxt.hpp" + +#ifdef _MSC_VER +#pragma warning (disable: 4244) // conversion from 'a' to 'b', possible loss of data +#endif + +namespace ryg_dxt +{ + // Couple of tables... + sU8 Expand5[32]; + sU8 Expand6[64]; + sU8 OMatch5[256][2]; + sU8 OMatch6[256][2]; + sU8 OMatch5_3[256][2]; + sU8 OMatch6_3[256][2]; + sU8 QuantRBTab[256+16]; + sU8 QuantGTab[256+16]; + + static sInt Mul8Bit(sInt a,sInt b) + { + sInt t = a*b + 128; + return (t + (t >> 8)) >> 8; + } + + union Pixel + { + struct + { + sU8 b,g,r,a; + }; + sU32 v; + + void From16Bit(sU16 v) + { + sInt rv = (v & 0xf800) >> 11; + sInt gv = (v & 0x07e0) >> 5; + sInt bv = (v & 0x001f) >> 0; + + a = 0; + r = Expand5[rv]; + g = Expand6[gv]; + b = Expand5[bv]; + } + + sU16 As16Bit() const + { + return (Mul8Bit(r,31) << 11) + (Mul8Bit(g,63) << 5) + Mul8Bit(b,31); + } + + void LerpRGB(const Pixel &p1,const Pixel &p2,sInt f) + { + r = p1.r + Mul8Bit(p2.r - p1.r,f); + g = p1.g + Mul8Bit(p2.g - p1.g,f); + b = p1.b + Mul8Bit(p2.b - p1.b,f); + } + }; + + /****************************************************************************/ + + static void PrepareOptTable4(sU8 *Table,const sU8 *expand,sInt size) + { + for(sInt i=0;i<256;i++) + { + sInt bestErr = 256; + + for(sInt min=0;min>8); // approx. .03f + + if(err < bestErr) + { + Table[i*2+0] = max; + Table[i*2+1] = min; + bestErr = err; + } + } + } + } + } + + static void PrepareOptTable3(sU8 *Table,const sU8 *expand,sInt size) + { + for(sInt i=0;i<256;i++) + { + sInt bestErr = 256; + + for(sInt min=0;min> 1) - i); + err += ((sAbs(maxe-mine)*8)>>8); // approx. .03f + + if(err < bestErr) + { + Table[i*2+0] = max; + Table[i*2+1] = min; + bestErr = err; + } + } + } + } + } + + static inline void EvalColors(Pixel *color,sU16 c0,sU16 c1) + { + color[0].From16Bit(c0); + color[1].From16Bit(c1); + color[2].LerpRGB(color[0],color[1],0x55); + color[3].LerpRGB(color[0],color[1],0xaa); + } + + // Block dithering function. Simply dithers a block to 565 RGB. + // (Floyd-Steinberg) + static void DitherBlock(Pixel *dest,const Pixel *block) + { + sInt err[8],*ep1 = err,*ep2 = err+4; + + // process channels seperately + for(sInt ch=0;ch<3;ch++) + { + sU8 *bp = (sU8 *) block; + sU8 *dp = (sU8 *) dest; + sU8 *quant = (ch == 1) ? QuantGTab+8 : QuantRBTab+8; + + bp += ch; + dp += ch; + sSetMem(err,0,sizeof(err)); + + for(sInt y=0;y<4;y++) + { + // pixel 0 + dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)]; + ep1[0] = bp[ 0] - dp[ 0]; + + // pixel 1 + dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)]; + ep1[1] = bp[ 4] - dp[ 4]; + + // pixel 2 + dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)]; + ep1[2] = bp[ 8] - dp[ 8]; + + // pixel 3 + dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)]; + ep1[3] = bp[12] - dp[12]; + + // advance to next line + sSwap(ep1,ep2); + bp += 16; + dp += 16; + } + } + } + + // The color matching function + static sU32 MatchColorsBlock(const Pixel *block,const Pixel *color,sBool dither) + { + sU32 mask = 0; + sInt dirr = color[0].r - color[1].r; + sInt dirg = color[0].g - color[1].g; + sInt dirb = color[0].b - color[1].b; + + sInt dots[16]; + for(sInt i=0;i<16;i++) + dots[i] = block[i].r*dirr + block[i].g*dirg + block[i].b*dirb; + + sInt stops[4]; + for(sInt i=0;i<4;i++) + stops[i] = color[i].r*dirr + color[i].g*dirg + color[i].b*dirb; + + sInt c0Point = (stops[1] + stops[3]) >> 1; + sInt halfPoint = (stops[3] + stops[2]) >> 1; + sInt c3Point = (stops[2] + stops[0]) >> 1; + + if(!dither) + { + // the version without dithering is straightforward + for(sInt i=15;i>=0;i--) + { + mask <<= 2; + sInt dot = dots[i]; + + if(dot < halfPoint) + mask |= (dot < c0Point) ? 1 : 3; + else + mask |= (dot < c3Point) ? 2 : 0; + } + } + else + { + // with floyd-steinberg dithering (see above) + sInt err[8],*ep1 = err,*ep2 = err+4; + sInt *dp = dots; + + c0Point <<= 4; + halfPoint <<= 4; + c3Point <<= 4; + for(sInt i=0;i<8;i++) + err[i] = 0; + + for(sInt y=0;y<4;y++) + { + sInt dot,lmask,step; + + // pixel 0 + dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + + ep1[0] = dp[0] - stops[step]; + lmask = step; + + // pixel 1 + dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + + ep1[1] = dp[1] - stops[step]; + lmask |= step<<2; + + // pixel 2 + dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + + ep1[2] = dp[2] - stops[step]; + lmask |= step<<4; + + // pixel 3 + dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]); + if(dot < halfPoint) + step = (dot < c0Point) ? 1 : 3; + else + step = (dot < c3Point) ? 2 : 0; + + ep1[3] = dp[3] - stops[step]; + lmask |= step<<6; + + // advance to next line + sSwap(ep1,ep2); + dp += 4; + mask |= lmask << (y*8); + } + } + + return mask; + } + + // The color optimization function. (Clever code, part 1) + static void OptimizeColorsBlock(const Pixel *block,sU16 &max16,sU16 &min16) + { + static const sInt nIterPower = 4; + + // determine color distribution + sInt mu[3],min[3],max[3]; + + for(sInt ch=0;ch<3;ch++) + { + const sU8 *bp = ((const sU8 *) block) + ch; + sInt muv,minv,maxv; + + muv = minv = maxv = bp[0]; + for(sInt i=4;i<64;i+=4) + { + muv += bp[i]; + minv = sMin(minv,bp[i]); + maxv = sMax(maxv,bp[i]); + } + + mu[ch] = (muv + 8) >> 4; + min[ch] = minv; + max[ch] = maxv; + } + + // determine covariance matrix + sInt cov[6]; + for(sInt i=0;i<6;i++) + cov[i] = 0; + + for(sInt i=0;i<16;i++) + { + sInt r = block[i].r - mu[2]; + sInt g = block[i].g - mu[1]; + sInt b = block[i].b - mu[0]; + + cov[0] += r*r; + cov[1] += r*g; + cov[2] += r*b; + cov[3] += g*g; + cov[4] += g*b; + cov[5] += b*b; + } + + // convert covariance matrix to float, find principal axis via power iter + sF32 covf[6],vfr,vfg,vfb; + for(sInt i=0;i<6;i++) + covf[i] = cov[i] / 255.0f; + + vfr = max[2] - min[2]; + vfg = max[1] - min[1]; + vfb = max[0] - min[0]; + + for(sInt iter=0;iter maxd) + { + maxd = dot; + maxp = block[i]; + } + } + + // Reduce to 16 bit colors + max16 = maxp.As16Bit(); + min16 = minp.As16Bit(); + } + + // The refinement function. (Clever code, part 2) + // Tries to optimize colors to suit block contents better. + // (By solving a least squares system via normal equations+Cramer's rule) + static sBool RefineBlock(const Pixel *block,sU16 &max16,sU16 &min16,sU32 mask) + { + static const sInt w1Tab[4] = { 3,0,2,1 }; + static const sInt prods[4] = { 0x090000,0x000900,0x040102,0x010402 }; + // ^some magic to save a lot of multiplies in the accumulating loop... + + sInt akku = 0; + sInt At1_r,At1_g,At1_b; + sInt At2_r,At2_g,At2_b; + sU32 cm = mask; + + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for(sInt i=0;i<16;i++,cm>>=2) + { + sInt step = cm&3; + sInt w1 = w1Tab[step]; + sInt r = block[i].r; + sInt g = block[i].g; + sInt b = block[i].b; + + akku += prods[step]; + At1_r += w1*r; + At1_g += w1*g; + At1_b += w1*b; + At2_r += r; + At2_g += g; + At2_b += b; + } + + At2_r = 3*At2_r - At1_r; + At2_g = 3*At2_g - At1_g; + At2_b = 3*At2_b - At1_b; + + // extract solutions and decide solvability + sInt xx = akku >> 16; + sInt yy = (akku >> 8) & 0xff; + sInt xy = (akku >> 0) & 0xff; + + if(!yy || !xx || xx*yy == xy*xy) + return sFALSE; + + sF32 frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy); + sF32 fg = frb * 63.0f / 31.0f; + + sU16 oldMin = min16; + sU16 oldMax = max16; + + // solve. + max16 = sClamp((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11; + max16 |= sClamp((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5; + max16 |= sClamp((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0; + + min16 = sClamp((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11; + min16 |= sClamp((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5; + min16 |= sClamp((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0; + + return oldMin != min16 || oldMax != max16; + } + + // Color block compression + static void CompressColorBlock(sU8 *dest,const sU32 *src,sInt quality) + { + const Pixel *block = (const Pixel *) src; + Pixel dblock[16],color[4]; + + // check if block is constant + sU32 min,max; + min = max = block[0].v; + + for(sInt i=1;i<16;i++) + { + min = sMin(min,block[i].v); + max = sMax(max,block[i].v); + } + + // perform block compression + sU16 min16,max16; + sU32 mask; + + if(min != max) // no constant color + { + // first step: compute dithered version for PCA if desired + if(quality) + DitherBlock(dblock,block); + + // second step: pca+map along principal axis + OptimizeColorsBlock(quality ? dblock : block,max16,min16); + if(max16 != min16) + { + EvalColors(color,max16,min16); + mask = MatchColorsBlock(block,color,quality != 0); + } + else + mask = 0; + + // third step: refine + if(RefineBlock(quality ? dblock : block,max16,min16,mask)) + { + if(max16 != min16) + { + EvalColors(color,max16,min16); + mask = MatchColorsBlock(block,color,quality != 0); + } + else + mask = 0; + } + + } + else // constant color + { + sInt r = block[0].r; + sInt g = block[0].g; + sInt b = block[0].b; + + mask = 0xaaaaaaaa; + max16 = (OMatch5[r][0]<<11) | (OMatch6[g][0]<<5) | OMatch5[b][0]; + min16 = (OMatch5[r][1]<<11) | (OMatch6[g][1]<<5) | OMatch5[b][1]; + } + + // write the color block + if(max16 < min16) + { + sSwap(max16,min16); + mask ^= 0x55555555; + } + + ((sU16 *) dest)[0] = max16; + ((sU16 *) dest)[1] = min16; + ((sU32 *) dest)[1] = mask; + } + + // Alpha block compression (this is easy for a change) + static void CompressAlphaBlock(sU8 *dest,const sU32 *src,sInt quality) + { + quality; + const Pixel *block = (const Pixel *) src; + + // find min/max color + sInt min,max; + min = max = block[0].a; + + for(sInt i=1;i<16;i++) + { + min = sMin(min,block[i].a); + max = sMax(max,block[i].a); + } + + // encode them + *dest++ = max; + *dest++ = min; + + // determine bias and emit color indices + sInt dist = max-min; + sInt bias = min*7 - (dist >> 1); + sInt dist4 = dist*4; + sInt dist2 = dist*2; + sInt bits = 0,mask=0; + + for(sInt i=0;i<16;i++) + { + sInt a = block[i].a*7 - bias; + sInt ind,t; + + // select index (hooray for bit magic) + t = (dist4 - a) >> 31; ind = t & 4; a -= dist4 & t; + t = (dist2 - a) >> 31; ind += t & 2; a -= dist2 & t; + t = (dist - a) >> 31; ind += t & 1; + + ind = -ind & 7; + ind ^= (2 > ind); + + // write index + mask |= ind << bits; + if((bits += 3) >= 8) + { + *dest++ = mask; + mask >>= 8; + bits -= 8; + } + } + } + + /****************************************************************************/ + + void sInitDXT() + { + for(sInt i=0;i<32;i++) + Expand5[i] = (i<<3)|(i>>2); + + for(sInt i=0;i<64;i++) + Expand6[i] = (i<<2)|(i>>4); + + for(sInt i=0;i<256+16;i++) + { + sInt v = sClamp(i-8,0,255); + QuantRBTab[i] = Expand5[Mul8Bit(v,31)]; + QuantGTab[i] = Expand6[Mul8Bit(v,63)]; + } + + PrepareOptTable4(&OMatch5[0][0],Expand5,32); + PrepareOptTable4(&OMatch6[0][0],Expand6,64); + + PrepareOptTable3(&OMatch5_3[0][0],Expand5,32); + PrepareOptTable3(&OMatch6_3[0][0],Expand6,64); + } + + void sCompressDXTBlock(sU8 *dest,const sU32 *src,sBool alpha,sInt quality) + { + CRNLIB_ASSERT(Expand5[1]); + + // if alpha specified, compress alpha as well + if(alpha) + { + CompressAlphaBlock(dest,src,quality); + dest += 8; + } + + // compress the color part + CompressColorBlock(dest,src,quality); + } + + void sCompressDXT5ABlock(sU8 *dest,const sU32 *src,sInt quality) + { + CRNLIB_ASSERT(Expand5[1]); + + CompressAlphaBlock(dest,src,quality); + } + +} // namespace ryg_dxt + diff --git a/crnlib/crn_semaphore.h b/crnlib/crn_semaphore.h new file mode 100644 index 00000000..e2f71656 --- /dev/null +++ b/crnlib/crn_semaphore.h @@ -0,0 +1,25 @@ +// File: crn_semaphore.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + class semaphore + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); + + public: + semaphore(int32 initialCount = 0, int32 maximumCount = 1, const char* pName = NULL); + ~semaphore(); + + inline void *get_handle(void) const { return m_handle; } + + void release(int32 releaseCount = 1, int32 *pPreviousCount = NULL); + + bool wait(uint32 milliseconds = UINT32_MAX); + + private: + void *m_handle; + }; + +} // namespace crnlib diff --git a/crnlib/crn_sparse_array.h b/crnlib/crn_sparse_array.h new file mode 100644 index 00000000..62a93a7d --- /dev/null +++ b/crnlib/crn_sparse_array.h @@ -0,0 +1,399 @@ +// File: crn_sparse_array.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + template + class sparse_array_traits + { + public: + static inline void* alloc_space(uint size) + { + return crnlib_malloc(size); + } + + static inline void free_space(void* p) + { + crnlib_free(p); + } + + static inline void construct_group(T* p) + { + scalar_type::construct_array(p, 1U << Log2N); + } + + static inline void destruct_group(T* p) + { + scalar_type::destruct_array(p, 1U << Log2N); + } + + static inline void construct_element(T* p) + { + scalar_type::construct(p); + } + + static inline void destruct_element(T* p) + { + scalar_type::destruct(p); + } + + static inline void copy_group(T* pDst, const T* pSrc) + { + for (uint j = 0; j < (1U << Log2N); j++) + pDst[j] = pSrc[j]; + } + }; + + template class Traits = sparse_array_traits> + class sparse_array : public Traits + { + public: + enum { N = 1U << Log2N }; + + inline sparse_array() : m_size(0), m_num_active_groups(0) + { + init_default(); + } + + inline sparse_array(uint size) : m_size(0), m_num_active_groups(0) + { + init_default(); + + resize(size); + } + + inline sparse_array(const sparse_array& other) : m_size(0), m_num_active_groups(0) + { + init_default(); + + *this = other; + } + + inline ~sparse_array() + { + for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) + free_group(m_groups[i]); + + deinit_default(); + } + + bool assign(const sparse_array& other) + { + if (this == &other) + return true; + + if (!try_resize(other.size())) + return false; + + for (uint i = 0; i < other.m_groups.size(); i++) + { + const T* p = other.m_groups[i]; + + T* q = m_groups[i]; + + if (p) + { + if (!q) + { + q = alloc_group(true); + if (!q) + return false; + + m_groups[i] = q; + } + + copy_group(q, p); + } + else if (q) + { + free_group(q); + m_groups[i] = NULL; + } + } + + return true; + } + + sparse_array& operator= (const sparse_array& other) + { + if (!assign(other)) + { + CRNLIB_FAIL("Out of memory"); + } + + return *this; + } + + bool operator== (const sparse_array& other) const + { + if (m_size != other.m_size) + return false; + + for (uint i = 0; i < m_size; i++) + if (!((*this)[i] == other[i])) + return false; + + return true; + } + + bool operator< (const sparse_array& rhs) const + { + const uint min_size = math::minimum(m_size, rhs.m_size); + + uint i; + for (i = 0; i < min_size; i++) + if (!((*this)[i] == rhs[i])) + break; + + if (i < min_size) + return (*this)[i] < rhs[i]; + + return m_size < rhs.m_size; + } + + void clear() + { + if (m_groups.size()) + { + for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) + free_group(m_groups[i]); + + m_groups.clear(); + } + + m_size = 0; + + CRNLIB_ASSERT(!m_num_active_groups); + } + + bool try_resize(uint size) + { + if (m_size == size) + return true; + + const uint new_num_groups = (size + N - 1) >> Log2N; + if (new_num_groups != m_groups.size()) + { + for (uint i = new_num_groups; i < m_groups.size(); i++) + free_group(m_groups[i]); + + if (!m_groups.try_resize(new_num_groups)) + return false; + } + + m_size = size; + return true; + } + + void resize(uint size) + { + if (!try_resize(size)) + { + CRNLIB_FAIL("Out of memory"); + } + } + + inline uint size() const { return m_size; } + inline bool empty() const { return 0 == m_size; } + + inline uint capacity() const { return m_groups.size(); } + + inline const T& operator[] (uint i) const + { + CRNLIB_ASSERT(i < m_size); + const T* p = m_groups[i >> Log2N]; + const void *t = m_default; + return p ? p[i & (N - 1)] : *reinterpret_cast(t); + } + + inline const T* get(uint i) const + { + CRNLIB_ASSERT(i < m_size); + const T* p = m_groups[i >> Log2N]; + return p ? &p[i & (N - 1)] : NULL; + } + + inline T* get(uint i) + { + CRNLIB_ASSERT(i < m_size); + T* p = m_groups[i >> Log2N]; + return p ? &p[i & (N - 1)] : NULL; + } + + inline bool is_present(uint i) const + { + CRNLIB_ASSERT(i < m_size); + return m_groups[i >> Log2N] != NULL; + } + + inline uint get_num_groups() const { return m_groups.size(); } + + inline const T* get_group(uint group_index) const + { + return m_groups[group_index]; + } + + inline T* get_group(uint group_index) + { + return m_groups[group_index]; + } + + inline uint get_group_size() const + { + return N; + } + + inline T* ensure_valid(uint index) + { + CRNLIB_ASSERT(index <= m_size); + + const uint group_index = index >> Log2N; + + if (group_index >= m_groups.size()) + { + T* p = alloc_group(true); + if (!p) + return NULL; + + if (!m_groups.try_push_back(p)) + { + free_group(p); + return NULL; + } + } + + T* p = m_groups[group_index]; + if (!p) + { + p = alloc_group(true); + if (!p) + return NULL; + + m_groups[group_index] = p; + } + + m_size = math::maximum(index + 1, m_size); + + return p + (index & (N - 1)); + } + + inline bool set(uint index, const T& obj) + { + T* p = ensure_valid(index); + if (!p) + return false; + + *p = obj; + + return true; + } + + inline void push_back(const T& obj) + { + if (!set(m_size, obj)) + { + CRNLIB_FAIL("Out of memory"); + } + } + + inline bool try_push_back(const T& obj) + { + return set(m_size, obj); + } + + inline void pop_back() + { + CRNLIB_ASSERT(m_size); + if (m_size) + resize(m_size - 1); + } + + inline void unset_range(uint start, uint num) + { + if (!num) + return; + + CRNLIB_ASSERT((start + num) <= capacity()); + + const uint num_to_skip = math::minimum(math::get_align_up_value_delta(start, N), num); + num -= num_to_skip; + + const uint first_group = (start + num_to_skip) >> Log2N; + const uint num_groups = num >> Log2N; + + for (uint i = 0; i < num_groups; i++) + { + T* p = m_groups[first_group + i]; + if (p) + { + free_group(p); + m_groups[i] = NULL; + } + } + } + + inline void unset_all() + { + unset_range(0, m_groups.size() << Log2N); + } + + inline void swap(sparse_array& other) + { + utils::swap(m_size, other.m_size); + m_groups.swap(other.m_groups); + utils::swap(m_num_active_groups, other.m_num_active_groups); + } + + private: + uint m_size; + uint m_num_active_groups; + + crnlib::vector m_groups; + + uint64 m_default[(sizeof(T) + sizeof(uint64) - 1) / sizeof(uint64)]; + + inline T* alloc_group(bool nofail = false) + { + T* p = static_cast(alloc_space(N * sizeof(T))); + + if (!p) + { + if (nofail) + return NULL; + + CRNLIB_FAIL("Out of memory"); + } + + construct_group(p); + + m_num_active_groups++; + + return p; + } + + inline void free_group(T* p) + { + if (p) + { + CRNLIB_ASSERT(m_num_active_groups); + m_num_active_groups--; + + destruct_group(p); + + free_space(p); + } + } + + inline void init_default() + { + construct_element(reinterpret_cast(m_default)); + } + + inline void deinit_default() + { + destruct_element(reinterpret_cast(m_default)); + } + }; + +} // namespace crnlib diff --git a/crnlib/crn_sparse_bit_array.cpp b/crnlib/crn_sparse_bit_array.cpp new file mode 100644 index 00000000..d0bdcb37 --- /dev/null +++ b/crnlib/crn_sparse_bit_array.cpp @@ -0,0 +1,538 @@ +// File: crn_sparse_bit_array.h +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_sparse_bit_array.h" + +namespace crnlib +{ + sparse_bit_array::sparse_bit_array() : + m_num_groups(0), m_ppGroups(NULL) + { + } + + sparse_bit_array::sparse_bit_array(uint size) : + m_num_groups(0), m_ppGroups(NULL) + { + resize(size); + } + + sparse_bit_array::sparse_bit_array(sparse_bit_array& other) + { + m_num_groups = other.m_num_groups; + m_ppGroups = (uint32**)crnlib_malloc(m_num_groups * sizeof(uint32*)); + CRNLIB_VERIFY(m_ppGroups); + + for (uint i = 0; i < m_num_groups; i++) + { + if (other.m_ppGroups[i]) + { + m_ppGroups[i] = alloc_group(false); + memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); + } + else + m_ppGroups[i] = NULL; + } + } + + sparse_bit_array::~sparse_bit_array() + { + clear(); + } + + sparse_bit_array& sparse_bit_array::operator= (sparse_bit_array& other) + { + if (this == &other) + return *this; + + if (m_num_groups != other.m_num_groups) + { + clear(); + + m_num_groups = other.m_num_groups; + m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); + CRNLIB_VERIFY(m_ppGroups); + } + + for (uint i = 0; i < m_num_groups; i++) + { + if (other.m_ppGroups[i]) + { + if (!m_ppGroups[i]) + m_ppGroups[i] = alloc_group(false); + memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); + } + else if (m_ppGroups[i]) + { + free_group(m_ppGroups[i]); + m_ppGroups[i] = NULL; + } + } + + return *this; + } + + void sparse_bit_array::clear() + { + if (!m_num_groups) + return; + + for (uint i = 0; i < m_num_groups; i++) + free_group(m_ppGroups[i]); + + crnlib_free(m_ppGroups); + m_ppGroups = NULL; + + m_num_groups = 0; + } + + void sparse_bit_array::swap(sparse_bit_array& other) + { + utils::swap(m_ppGroups, other.m_ppGroups); + utils::swap(m_num_groups, other.m_num_groups); + } + + void sparse_bit_array::optimize() + { + for (uint i = 0; i < m_num_groups; i++) + { + uint32* s = m_ppGroups[i]; + if (s) + { + uint j; + for (j = 0; j < cDWORDsPerGroup; j++) + if (s[j]) + break; + if (j == cDWORDsPerGroup) + { + free_group(s); + m_ppGroups[i] = NULL; + } + } + } + } + + void sparse_bit_array::set_bit_range(uint index, uint num) + { + CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + + if (!num) + return; + else if (num == 1) + { + set_bit(index); + return; + } + + while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + const uint group_bit_ofs = index & cBitsPerGroupMask; + + const uint dword_bit_ofs = group_bit_ofs & 31; + const uint max_bits_to_set = 32 - dword_bit_ofs; + + const uint bits_to_set = math::minimum(max_bits_to_set, num); + const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + + pGroup[group_bit_ofs >> 5] |= (msk << dword_bit_ofs); + + num -= bits_to_set; + if (!num) + return; + + index += bits_to_set; + } + + while (num >= cBitsPerGroup) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + memset(pGroup, 0xFF, sizeof(uint32) * cDWORDsPerGroup); + + num -= cBitsPerGroup; + index += cBitsPerGroup; + } + + while (num) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + uint group_bit_ofs = index & cBitsPerGroupMask; + + uint bits_to_set = math::minimum(32U, num); + uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + + pGroup[group_bit_ofs >> 5] |= (msk << (group_bit_ofs & 31)); + + num -= bits_to_set; + index += bits_to_set; + } + } + + void sparse_bit_array::clear_all_bits() + { + for (uint i = 0; i < m_num_groups; i++) + { + uint32* pGroup = m_ppGroups[i]; + if (pGroup) + memset(pGroup, 0, sizeof(uint32) * cDWORDsPerGroup); + } + } + + void sparse_bit_array::clear_bit_range(uint index, uint num) + { + CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + + if (!num) + return; + else if (num == 1) + { + clear_bit(index); + return; + } + + while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + const uint group_bit_ofs = index & cBitsPerGroupMask; + + const uint dword_bit_ofs = group_bit_ofs & 31; + const uint max_bits_to_set = 32 - dword_bit_ofs; + + const uint bits_to_set = math::minimum(max_bits_to_set, num); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + + pGroup[group_bit_ofs >> 5] &= (~(msk << dword_bit_ofs)); + } + + num -= bits_to_set; + if (!num) + return; + + index += bits_to_set; + } + + while (num >= cBitsPerGroup) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + free_group(pGroup); + m_ppGroups[group_index] = NULL; + } + + num -= cBitsPerGroup; + index += cBitsPerGroup; + } + + while (num) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint bits_to_set = math::minimum(32u, num); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + uint group_bit_ofs = index & cBitsPerGroupMask; + + uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); + + pGroup[group_bit_ofs >> 5] &= (~(msk << (group_bit_ofs & 31))); + } + + num -= bits_to_set; + index += bits_to_set; + } + } + + void sparse_bit_array::resize(uint size) + { + uint num_groups = (size + cBitsPerGroup - 1) >> cBitsPerGroupShift; + if (num_groups == m_num_groups) + return; + + if (!num_groups) + { + clear(); + return; + } + + sparse_bit_array temp; + temp.swap(*this); + + m_num_groups = num_groups; + m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); + CRNLIB_VERIFY(m_ppGroups); + + uint n = math::minimum(temp.m_num_groups, m_num_groups); + for (uint i = 0; i < n; i++) + { + uint32* p = temp.m_ppGroups[i]; + if (p) + { + m_ppGroups[i] = temp.m_ppGroups[i]; + temp.m_ppGroups[i] = NULL; + } + } + } + + sparse_bit_array& sparse_bit_array::operator&= (const sparse_bit_array& other) + { + if (this == &other) + return *this; + + CRNLIB_VERIFY(other.m_num_groups == m_num_groups); + + for (uint i = 0; i < m_num_groups; i++) + { + uint32* d = m_ppGroups[i]; + if (!d) + continue; + uint32* s = other.m_ppGroups[i]; + + if (!s) + { + free_group(d); + m_ppGroups[i] = NULL; + } + else + { + uint32 oc = 0; + for (uint j = 0; j < cDWORDsPerGroup; j++) + { + uint32 c = d[j] & s[j]; + d[j] = c; + oc |= c; + } + if (!oc) + { + free_group(d); + m_ppGroups[i] = NULL; + } + } + } + + return *this; + } + + sparse_bit_array& sparse_bit_array::operator|= (const sparse_bit_array& other) + { + if (this == &other) + return *this; + + CRNLIB_VERIFY(other.m_num_groups == m_num_groups); + + for (uint i = 0; i < m_num_groups; i++) + { + uint32* s = other.m_ppGroups[i]; + if (!s) + continue; + + uint32* d = m_ppGroups[i]; + if (!d) + { + d = alloc_group(true); + m_ppGroups[i] = d; + memcpy(d, s, cBytesPerGroup); + } + else + { + uint32 oc = 0; + for (uint j = 0; j < cDWORDsPerGroup; j++) + { + uint32 c = d[j] | s[j]; + d[j] = c; + oc |= c; + } + if (!oc) + { + free_group(d); + m_ppGroups[i] = NULL; + } + } + } + + return *this; + } + + sparse_bit_array& sparse_bit_array::and_not(const sparse_bit_array& other) + { + if (this == &other) + return *this; + + CRNLIB_VERIFY(other.m_num_groups == m_num_groups); + + for (uint i = 0; i < m_num_groups; i++) + { + uint32* d = m_ppGroups[i]; + if (!d) + continue; + uint32* s = other.m_ppGroups[i]; + if (!s) + continue; + + uint32 oc = 0; + for (uint j = 0; j < cDWORDsPerGroup; j++) + { + uint32 c = d[j] & (~s[j]); + d[j] = c; + oc |= c; + } + if (!oc) + { + free_group(d); + m_ppGroups[i] = NULL; + } + } + + return *this; + } + + int sparse_bit_array::find_first_set_bit(uint index, uint num) const + { + CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); + + if (!num) + return -1; + + while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + const uint group_bit_ofs = index & cBitsPerGroupMask; + const uint dword_bit_ofs = group_bit_ofs & 31; + + const uint max_bits_to_examine = 32 - dword_bit_ofs; + const uint bits_to_examine = math::minimum(max_bits_to_examine, num); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); + + uint bits = pGroup[group_bit_ofs >> 5] & (msk << dword_bit_ofs); + if (bits) + { + uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + int set_index = num_trailing_zeros + (index & ~31); + CRNLIB_ASSERT(get_bit(set_index)); + return set_index; + } + } + + num -= bits_to_examine; + if (!num) + return -1; + + index += bits_to_examine; + } + + while (num >= cBitsPerGroup) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + for (uint i = 0; i < cDWORDsPerGroup; i++) + { + uint32 bits = pGroup[i]; + if (bits) + { + uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + + int set_index = num_trailing_zeros + index + (i << 5); + CRNLIB_ASSERT(get_bit(set_index)); + return set_index; + } + } + + } + + num -= cBitsPerGroup; + index += cBitsPerGroup; + } + + while (num) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint bits_to_examine = math::minimum(32U, num); + + uint32* pGroup = m_ppGroups[group_index]; + if (pGroup) + { + uint group_bit_ofs = index & cBitsPerGroupMask; + + uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); + + uint32 bits = pGroup[group_bit_ofs >> 5] & (msk << (group_bit_ofs & 31)); + if (bits) + { + uint num_trailing_zeros = math::count_trailing_zero_bits(bits); + + int set_index = num_trailing_zeros + (index & ~31); + CRNLIB_ASSERT(get_bit(set_index)); + return set_index; + } + } + + num -= bits_to_examine; + index += bits_to_examine; + } + + return -1; + } + +} // namespace crnlib + + + + + + + + + + + + diff --git a/crnlib/crn_sparse_bit_array.h b/crnlib/crn_sparse_bit_array.h new file mode 100644 index 00000000..75a26bce --- /dev/null +++ b/crnlib/crn_sparse_bit_array.h @@ -0,0 +1,176 @@ +// File: crn_sparse_bit_array.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + class sparse_bit_array + { + public: + sparse_bit_array(); + sparse_bit_array(uint size); + sparse_bit_array(sparse_bit_array& other); + ~sparse_bit_array(); + + sparse_bit_array& operator= (sparse_bit_array& other); + + void clear(); + + inline uint get_size() { return (m_num_groups << cBitsPerGroupShift); } + + void resize(uint size); + + sparse_bit_array& operator&= (const sparse_bit_array& other); + sparse_bit_array& operator|= (const sparse_bit_array& other); + sparse_bit_array& and_not(const sparse_bit_array& other); + + void swap(sparse_bit_array& other); + + void optimize(); + + void set_bit_range(uint index, uint num); + void clear_bit_range(uint index, uint num); + + void clear_all_bits(); + + inline void set_bit(uint index) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + uint bit_ofs = index & (cBitsPerGroup - 1); + + pGroup[bit_ofs >> 5] |= (1U << (bit_ofs & 31)); + } + + inline void clear_bit(uint index) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + uint bit_ofs = index & (cBitsPerGroup - 1); + + pGroup[bit_ofs >> 5] &= (~(1U << (bit_ofs & 31))); + } + + inline void set(uint index, bool value) + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + uint bit_ofs = index & (cBitsPerGroup - 1); + + uint bit = (1U << (bit_ofs & 31)); + + uint c = pGroup[bit_ofs >> 5]; + uint mask = (uint)(-(int)value); + + pGroup[bit_ofs >> 5] = (c & ~bit) | (mask & bit); + } + + inline bool get_bit(uint index) const + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + return 0; + + uint bit_ofs = index & (cBitsPerGroup - 1); + + uint bit = (1U << (bit_ofs & 31)); + + return (pGroup[bit_ofs >> 5] & bit) != 0; + } + + inline uint32 get_uint32(uint index) const + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + return 0; + + uint bit_ofs = index & (cBitsPerGroup - 1); + + return pGroup[bit_ofs >> 5]; + } + + inline void set_uint32(uint index, uint32 value) const + { + uint group_index = index >> cBitsPerGroupShift; + CRNLIB_ASSERT(group_index < m_num_groups); + + uint32* pGroup = m_ppGroups[group_index]; + if (!pGroup) + { + pGroup = alloc_group(true); + m_ppGroups[group_index] = pGroup; + } + + uint bit_ofs = index & (cBitsPerGroup - 1); + + pGroup[bit_ofs >> 5] = value; + } + + int find_first_set_bit(uint index, uint num) const; + + enum + { + cDWORDsPerGroupShift = 4U, + cDWORDsPerGroup = 1U << cDWORDsPerGroupShift, + + cBitsPerGroupShift = cDWORDsPerGroupShift + 5, + cBitsPerGroup = 1U << cBitsPerGroupShift, + cBitsPerGroupMask = cBitsPerGroup - 1U, + + cBytesPerGroup = cDWORDsPerGroup * sizeof(uint32) + }; + + uint get_num_groups() const { return m_num_groups; } + uint32** get_groups() { return m_ppGroups; } + + private: + uint m_num_groups; + uint32** m_ppGroups; + + static inline uint32* alloc_group(bool clear) + { + uint32* p = (uint32*)crnlib_malloc(cBytesPerGroup); + CRNLIB_VERIFY(p); + if (clear) memset(p, 0, cBytesPerGroup); + return p; + } + + static inline void free_group(void* p) + { + if (p) + crnlib_free(p); + } + }; + + +} // namespace crnlib diff --git a/crnlib/crn_spinlock.h b/crnlib/crn_spinlock.h new file mode 100644 index 00000000..d608d335 --- /dev/null +++ b/crnlib/crn_spinlock.h @@ -0,0 +1,38 @@ +// File: crn_spinlock.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + // Simple non-recursive spinlock. + class spinlock + { + public: + inline spinlock() : m_flag(0) { } + + void lock(uint32 max_spins = 4096, bool yielding = true, bool memoryBarrier = true); + + inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) { lock(max_spins, yielding, false); } + + void unlock(); + + inline void unlock_no_barrier() { m_flag = CRNLIB_FALSE; } + + private: + volatile int32 m_flag; + }; + + class scoped_spinlock + { + scoped_spinlock(const scoped_spinlock&); + scoped_spinlock& operator= (const scoped_spinlock&); + + public: + inline scoped_spinlock(spinlock& lock) : m_lock(lock) { m_lock.lock(); } + inline ~scoped_spinlock() { m_lock.unlock(); } + + private: + spinlock& m_lock; + }; + +} // namespace crnlib diff --git a/crnlib/crn_stb_image.cpp b/crnlib/crn_stb_image.cpp new file mode 100644 index 00000000..213d028f --- /dev/null +++ b/crnlib/crn_stb_image.cpp @@ -0,0 +1,3952 @@ +/* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c + when you control the images you're loading + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline (no JPEG progressive, no oddball channel decimations) + PNG 8-bit only + BMP non-1bpp, non-RLE + TGA (not sure what subset, if a subset) + PSD (composited view only, no extra channels) + HDR (radiance rgbE format) + writes BMP,TGA (define STBI_NO_WRITE to remove code) + decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) + + TODO: + stbi_info_* + + history: + 1.18 fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another stb_malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less + than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant +*/ + +#ifdef _MSC_VER +#pragma warning (disable: 4793) // function compiled as native +#endif + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +//// begin header file //////////////////////////////////////////////////// +// +// Limitations: +// - no progressive/interlaced support (jpeg, png) +// - 8-bit samples only (jpeg, png) +// - not threadsafe +// - channel subsampling of at most 2 in each dimension (jpeg) +// - no delayed line count (jpeg) -- IJG doesn't support either +// +// Basic usage (see HDR discussion below): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *comp -- outputs # of image components in image file +// int req_comp -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise. +// If req_comp is non-zero, *comp has the number of components that _would_ +// have been output otherwise. E.g. if you set req_comp to 4, you will always +// get RGBA output, but you can check *comp to easily see if it's opaque. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *comp will be unchanged. The function stbi_failure_reason() +// can be queried for an extremely brief, end-user unfriendly explanation +// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid +// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG and BMP images are automatically depalettized. +// +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); + +#define _CRT_SECURE_NO_WARNINGS + +#ifndef STBI_NO_STDIO +#include +#endif + +namespace crnlib { + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for req_comp + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4, +}; + +typedef unsigned char stbi_uc; + +//#ifdef __cplusplus +//extern "C" { +//#endif + +// WRITING API + +#if !defined(STBI_NO_WRITE) && !defined(STBI_NO_STDIO) +// write a BMP/TGA file given tightly packed 'comp' channels (no padding, nor bmp-stride-padding) +// (you must include the appropriate extension in the filename). +// returns TRUE on success, FALSE if couldn't open file, error writing file +extern int stbi_write_bmp (char const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_bmp_w (wchar_t const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_tga (char const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_tga_w (wchar_t const *filename, int x, int y, int comp, const void *data); +#endif + +// PRIMARY API - works on images of any type + +// load image by filename, open file, or memory buffer +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_w (wchar_t const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +// for stbi_load_from_file, file pointer is left pointing immediately after image + +#ifndef STBI_NO_HDR +#ifndef STBI_NO_STDIO +extern float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif +extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + +extern void stbi_hdr_to_ldr_gamma(float gamma); +extern void stbi_hdr_to_ldr_scale(float scale); + +extern void stbi_ldr_to_hdr_gamma(float gamma); +extern void stbi_ldr_to_hdr_scale(float scale); + +#endif // STBI_NO_HDR + +// get a VERY brief reason for failure +// NOT THREADSAFE +extern char *stbi_failure_reason (void); + +// free the loaded image -- this is just stb_free() +extern void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +extern int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_is_hdr (char const *filename); +extern int stbi_is_hdr_from_file(FILE *f); +#endif + +// ZLIB client - used by PNG, available for other purposes + +extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +// TYPE-SPECIFIC ACCESS + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// define new loaders +typedef struct +{ + int (*test_memory)(stbi_uc const *buffer, int len); + stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + #ifndef STBI_NO_STDIO + int (*test_file)(FILE *f); + stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp); + #endif +} stbi_loader; + +// register a loader by filling out the above structure (you must defined ALL functions) +// returns 1 if added or already added, 0 if not added (too many loaders) +// NOT THREADSAFE +extern int stbi_register_loader(stbi_loader *loader); + +// define faster low-level operations (typically SIMD support) +#if STBI_SIMD +typedef void (*stbi_idct_8x8)(uint8 *out, int out_stride, short data[64], unsigned short *dequantize); +// compute an integer IDCT on "input" +// input[x] = data[x] * dequantize[x] +// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride' +// CLAMP results to 0..255 +typedef void (*stbi_YCbCr_to_RGB_run)(uint8 *output, uint8 const *y, uint8 const *cb, uint8 const *cr, int count, int step); +// compute a conversion from YCbCr to RGB +// 'count' pixels +// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B +// y: Y input channel +// cb: Cb input channel; scale/biased to be 0..255 +// cr: Cr input channel; scale/biased to be 0..255 + +extern void stbi_install_idct(stbi_idct_8x8 func); +extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func); +#endif // STBI_SIMD + +//#ifdef __cplusplus +//} +//#endif + +} + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifndef STBI_HEADER_FILE_ONLY + +#include "crn_core.h" + +#ifndef STBI_NO_HDR +#include // ldexp +#include // strcmp +#endif + +#ifndef STBI_NO_STDIO +#include +#endif +#include +#include +#include +#include + +namespace crnlib { + +inline void* stb_malloc(size_t c) { return crnlib::crnlib_malloc(c); } +inline void* stb_realloc(void *p, size_t c) { return crnlib::crnlib_realloc(p, c); } +inline void stb_free(void *p) { crnlib::crnlib_free(p); } + +#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__MINGW64__) + #ifdef __cplusplus + #define __forceinline inline + #else + #define __forceinline + #endif +#endif + + +// implementation: +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef signed int int32; +typedef unsigned int uint; + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(uint32)==4]; + +#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE) +#define STBI_NO_WRITE +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Generic API that works on all image types +// + +// this is not threadsafe +static char *failure_reason; + +char *stbi_failure_reason(void) +{ + return failure_reason; +} + +static int e(char *str) +{ + failure_reason = str; + return 0; +} + +#ifdef STBI_NO_FAILURE_STRINGS + #define e(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define e(x,y) e(y) +#else + #define e(x,y) e(x) +#endif + +#define epf(x,y) ((float *) (e(x,y)?NULL:NULL)) +#define epuc(x,y) ((unsigned char *) (e(x,y)?NULL:NULL)) + +void stbi_image_free(void *retval_from_stbi_load) +{ + stb_free(retval_from_stbi_load); +} + +#define MAX_LOADERS 32 +stbi_loader *loaders[MAX_LOADERS]; +static int max_loaders = 0; + +int stbi_register_loader(stbi_loader *loader) +{ + int i; + for (i=0; i < MAX_LOADERS; ++i) { + // already present? + if (loaders[i] == loader) + return 1; + // end of the list? + if (loaders[i] == NULL) { + loaders[i] = loader; + max_loaders = i+1; + return 1; + } + } + // no room for it + return 0; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_STDIO +unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_w(wchar_t const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = _wfopen(filename, L"rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_file(f)) + return stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + if (stbi_png_test_file(f)) + return stbi_png_load_from_file(f,x,y,comp,req_comp); + if (stbi_bmp_test_file(f)) + return stbi_bmp_load_from_file(f,x,y,comp,req_comp); + if (stbi_psd_test_file(f)) + return stbi_psd_load_from_file(f,x,y,comp,req_comp); + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) { + float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_file(f)) + return loaders[i]->load_from_file(f,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_file(f)) + return stbi_tga_load_from_file(f,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_memory(buffer,len)) + return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_png_test_memory(buffer,len)) + return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_bmp_test_memory(buffer,len)) + return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_psd_test_memory(buffer,len)) + return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp); + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) { + float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_memory(buffer,len)) + return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_memory(buffer,len)) + return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} + +#ifndef STBI_NO_HDR + +#ifndef STBI_NO_STDIO +float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + float *result; + if (!f) return epf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) + return stbi_hdr_load_from_file(f,x,y,comp,req_comp); + #endif + data = stbi_load_from_file(f, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) + return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + #endif + data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is +// defined, for API simplicity; if STBI_NO_HDR is defined, it always +// reports false! + +int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_memory(buffer, len); + #else + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +extern int stbi_is_hdr (char const *filename) +{ + FILE *f = fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +extern int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_file(f); + #else + return 0; + #endif +} + +#endif + +// @TODO: get image dimensions & components without fully decoding +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_HDR +static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f; +static float l2h_gamma=2.2f, l2h_scale=1.0f; + +void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; } +void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; } + +void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; } +void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; } +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + SCAN_load=0, + SCAN_type, + SCAN_header, +}; + +typedef struct +{ + uint32 img_x, img_y; + int img_n, img_out_n; + + #ifndef STBI_NO_STDIO + FILE *img_file; + #endif + uint8 *img_buffer, *img_buffer_end; +} stbi; + +#ifndef STBI_NO_STDIO +static void start_file(stbi *s, FILE *f) +{ + s->img_file = f; +} +#endif + +static void start_mem(stbi *s, uint8 const *buffer, int len) +{ +#ifndef STBI_NO_STDIO + s->img_file = NULL; +#endif + s->img_buffer = (uint8 *) buffer; + s->img_buffer_end = (uint8 *) buffer+len; +} + +__forceinline static int get8(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int c = fgetc(s->img_file); + return c == EOF ? 0 : c; + } +#endif + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + return 0; +} + +__forceinline static int at_eof(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) + return feof(s->img_file); +#endif + return s->img_buffer >= s->img_buffer_end; +} + +__forceinline static uint8 get8u(stbi *s) +{ + return (uint8) get8(s); +} + +static void skip(stbi *s, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) + fseek(s->img_file, n, SEEK_CUR); + else +#endif + s->img_buffer += n; +} + +static int get16(stbi *s) +{ + int z = get8(s); + return (z << 8) + get8(s); +} + +static uint32 get32(stbi *s) +{ + uint32 z = get16(s); + return (z << 16) + get16(s); +} + +static int get16le(stbi *s) +{ + int z = get8(s); + return z + (get8(s) << 8); +} + +static uint32 get32le(stbi *s) +{ + uint32 z = get16le(s); + return z + (get16le(s) << 16); +} + +static void getn(stbi *s, stbi_uc *buffer, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + fread(buffer, 1, n, s->img_file); + return; + } +#endif + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; +} + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so stb_malloc a new one and free that one +// only failure mode is stb_malloc failing + +static uint8 compute_y(int r, int g, int b) +{ + return (uint8) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + assert(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stb_malloc(req_comp * x * y); + if (good == NULL) { + stb_free(data); + return epuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define COMBO(a,b) ((a)*8+(b)) + #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch(COMBO(img_n, req_comp)) { + CASE(1,2) dest[0]=src[0], dest[1]=255; break; + CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break; + CASE(2,1) dest[0]=src[0]; break; + CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break; + CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break; + CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break; + CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break; + CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break; + default: assert(0); + } + #undef CASE + } + + stb_free(data); + return good; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output = (float *) stb_malloc(x * y * comp * sizeof(float)); + if (output == NULL) { stb_free(data); return epf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale; + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + stb_free(data); + return output; +} + +#define float2int(x) ((int) (x)) +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output = (stbi_uc *) stb_malloc(x * y * comp); + if (output == NULL) { stb_free(data); return epuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = float2int(z); + } + } + stb_free(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) +// +// simple implementation +// - channel subsampling of at most 2 in each dimension +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - uses a lot of intermediate memory, could cache poorly +// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 +// stb_jpeg: 1.34 seconds (MSVC6, default release build) +// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) +// IJL11.dll: 1.08 seconds (compiled by intel) +// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) +// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + uint8 fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + uint16 code[256]; + uint8 values[256]; + uint8 size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} huffman; + +typedef struct +{ + #if STBI_SIMD + unsigned short dequant2[4][64]; + #endif + stbi s; + huffman huff_dc[4]; + huffman huff_ac[4]; + uint8 dequant[4][64]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + uint8 *data; + void *raw_data; + uint8 *linebuf; + } img_comp[4]; + + uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int scan_n, order[4]; + int restart_interval, todo; +} jpeg; + +static int build_huffman(huffman *h, int *count) +{ + int i,j,k=0,code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (uint8) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (uint16) (code++); + if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (uint8) i; + } + } + } + return 1; +} + +static void grow_buffer_unsafe(jpeg *j) +{ + do { + int b = j->nomore ? 0 : get8(&j->s); + if (b == 0xff) { + int c = get8(&j->s); + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer = (j->code_buffer << 8) | b; + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +__forceinline static int decode(jpeg *j, huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + if (h->size[k] > j->code_bits) + return -1; + j->code_bits -= h->size[k]; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + if (j->code_bits < 16) + temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff; + else + temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k]; + assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + return h->values[c]; +} + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +__forceinline static int extend_receive(jpeg *j, int n) +{ + unsigned int m = 1 << (n-1); + unsigned int k; + if (j->code_bits < n) grow_buffer_unsafe(j); + k = (j->code_buffer >> (j->code_bits - n)) & bmask[n]; + j->code_bits -= n; + // the following test is probably a random branch that won't + // predict well. I tried to table accelerate it but failed. + // maybe it's compiling as a conditional move? + if (k < m) + return (-1 << n) + k + 1; + else + return k; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static uint8 dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) +{ + int diff,dc,k; + int t = decode(j, hdc); + if (t < 0) return e("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) dc; + + // decode AC components, see JPEG spec + k = 1; + do { + int r,s; + int rs = decode(j, hac); + if (rs < 0) return e("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + data[dezigzag[k++]] = (short) extend_receive(j,s); + } + } while (k < 64); + return 1; +} + +// take a -128..127 value and clamp it and convert to 0..255 +__forceinline static uint8 clamp(int x) +{ + x += 128; + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (uint8) x; +} + +#define f2f(x) (int) (((x) * 4096 + 0.5)) +#define fsh(x) ((x) << 12) + +// derived from jidctint -- DCT_ISLOW +#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * f2f(0.5411961f); \ + t2 = p1 + p3*f2f(-1.847759065f); \ + t3 = p1 + p2*f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = fsh(p2+p3); \ + t1 = fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*f2f( 1.175875602f); \ + t0 = t0*f2f( 0.298631336f); \ + t1 = t1*f2f( 2.053119869f); \ + t2 = t2*f2f( 3.072711026f); \ + t3 = t3*f2f( 1.501321110f); \ + p1 = p5 + p1*f2f(-0.899976223f); \ + p2 = p5 + p2*f2f(-2.562915447f); \ + p3 = p3*f2f(-1.961570560f); \ + p4 = p4*f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +#if !STBI_SIMD +// .344 seconds on 3*anemones.jpg +static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize) +{ + int i,val[64],*v=val; + uint8 *o,*dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +#else +static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize) +{ + int i,val[64],*v=val; + uint8 *o; + unsigned short *dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +static stbi_idct_8x8 stbi_idct_installed = idct_block; + +extern void stbi_install_idct(stbi_idct_8x8 func) +{ + stbi_idct_installed = func; +} +#endif + +#define MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static uint8 get_marker(jpeg *j) +{ + uint8 x; + if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; } + x = get8u(&j->s); + if (x != 0xff) return MARKER_none; + while (x == 0xff) + x = get8u(&j->s); + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, reset the entropy decoder and +// the dc prediction +static void reset(jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->marker = MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int parse_entropy_coded_data(jpeg *z) +{ + reset(z); + if (z->scan_n == 1) { + int i,j; + #if STBI_SIMD + __declspec(align(16)) + #endif + short data[64]; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } else { // interleaved! + int i,j,k,x,y; + short data[64]; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } + return 1; +} + +static int process_marker(jpeg *z, int m) +{ + int L; + switch (m) { + case MARKER_none: // no marker found + return e("expected marker","Corrupt JPEG"); + + case 0xC2: // SOF - progressive + return e("progressive jpeg","JPEG format not supported (progressive)"); + + case 0xDD: // DRI - specify restart interval + if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG"); + z->restart_interval = get16(&z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = get16(&z->s)-2; + while (L > 0) { + int q = get8(&z->s); + int p = q >> 4; + int t = q & 15,i; + if (p != 0) return e("bad DQT type","Corrupt JPEG"); + if (t > 3) return e("bad DQT table","Corrupt JPEG"); + for (i=0; i < 64; ++i) + z->dequant[t][dezigzag[i]] = get8u(&z->s); + #if STBI_SIMD + for (i=0; i < 64; ++i) + z->dequant2[t][i] = z->dequant[t][i]; + #endif + L -= 65; + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = get16(&z->s)-2; + while (L > 0) { + uint8 *v; + int sizes[16],i,m=0; + int q = get8(&z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = get8(&z->s); + m += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < m; ++i) + v[i] = get8u(&z->s); + L -= m; + } + return L==0; + } + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + skip(&z->s, get16(&z->s)-2); + return 1; + } + return 0; +} + +// after we see SOS +static int process_scan_header(jpeg *z) +{ + int i; + int Ls = get16(&z->s); + z->scan_n = get8(&z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = get8(&z->s), which; + int q = get8(&z->s); + for (which = 0; which < z->s.img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s.img_n) return 0; + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + get8(&z->s); // should be 63, but might be 0 + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + + return 1; +} + +static int process_frame_header(jpeg *z, int scan) +{ + stbi *s = &z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG + p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires + c = get8(s); + if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG"); + + for (i=0; i < s->img_n; ++i) { + z->img_comp[i].id = get8(s); + if (z->img_comp[i].id != i+1) // JFIF requires + if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! + return e("bad component ID","Corrupt JPEG"); + q = get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); + z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); + } + + if (scan != SCAN_load) return 1; + + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].raw_data = stb_malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); + if (z->img_comp[i].raw_data == NULL) { + for(--i; i >= 0; --i) { + stb_free(z->img_comp[i].raw_data); + z->img_comp[i].data = NULL; + } + return e("outofmem", "Out of memory"); + } + // align blocks for installable-idct using mmx/sse + z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + z->img_comp[i].linebuf = NULL; + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define DNL(x) ((x) == 0xdc) +#define SOI(x) ((x) == 0xd8) +#define EOI(x) ((x) == 0xd9) +#define SOF(x) ((x) == 0xc0 || (x) == 0xc1) +#define SOS(x) ((x) == 0xda) + +static int decode_jpeg_header(jpeg *z, int scan) +{ + int m; + z->marker = MARKER_none; // initialize cached marker to empty + m = get_marker(z); + if (!SOI(m)) return e("no SOI","Corrupt JPEG"); + if (scan == SCAN_type) return 1; + m = get_marker(z); + while (!SOF(m)) { + if (!process_marker(z,m)) return 0; + m = get_marker(z); + while (m == MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG"); + m = get_marker(z); + } + } + if (!process_frame_header(z, scan)) return 0; + return 1; +} + +static int decode_jpeg_image(jpeg *j) +{ + int m; + j->restart_interval = 0; + if (!decode_jpeg_header(j, SCAN_load)) return 0; + m = get_marker(j); + while (!EOI(m)) { + if (SOS(m)) { + if (!process_scan_header(j)) return 0; + if (!parse_entropy_coded_data(j)) return 0; + } else { + if (!process_marker(j, m)) return 0; + } + m = get_marker(j); + } + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1, + int w, int hs); + +#define div4(x) ((uint8) ((x) >> 2)) + +static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + return in_near; +} + +static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + for (i=0; i < w; ++i) + out[i] = div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + uint8 *input = in_near; + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = div4(n+input[i-1]); + out[i*2+1] = div4(n+input[i+1]); + } + out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + return out; +} + +#define div16(x) ((uint8) ((x) >> 4)) + +static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = div16(3*t0 + t1 + 8); + out[i*2 ] = div16(3*t1 + t0 + 8); + } + out[w*2-1] = div4(t1+2); + return out; +} + +static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +#define float2fixed(x) ((int) ((x) * 65536 + 0.5)) + +// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) +// VC6 without processor=Pro is generating multiple LEAs per multiply! +static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 16) + 32768; // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr*float2fixed(1.40200f); + g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); + b = y_fixed + cb*float2fixed(1.77200f); + r >>= 16; + g >>= 16; + b >>= 16; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (uint8)r; + out[1] = (uint8)g; + out[2] = (uint8)b; + out[3] = 255; + out += step; + } +} + +#if STBI_SIMD +static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row; + +void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func) +{ + stbi_YCbCr_installed = func; +} +#endif + + +// clean up the temporary component buffers +static void cleanup_jpeg(jpeg *j) +{ + int i; + for (i=0; i < j->s.img_n; ++i) { + if (j->img_comp[i].data) { + stb_free(j->img_comp[i].raw_data); + j->img_comp[i].data = NULL; + } + if (j->img_comp[i].linebuf) { + stb_free(j->img_comp[i].linebuf); + j->img_comp[i].linebuf = NULL; + } + } +} + +typedef struct +{ + resample_row_func resample; + uint8 *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi_resample; + +static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n; + // validate req_comp + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + z->s.img_n = 0; + + // load a jpeg image from whichever source + if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s.img_n; + + if (z->s.img_n == 3 && n < 3) + decode_n = 1; + else + decode_n = z->s.img_n; + + // resample and color-convert + { + int k; + uint i,j; + uint8 *output; + uint8 *coutput[4]; + + stbi_resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (uint8 *) stb_malloc(z->s.img_x + 3); + if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s.img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2; + else r->resample = resample_row_generic; + } + + // can't error after this so, this is safe + output = (uint8 *) stb_malloc(n * z->s.img_x * z->s.img_y + 1); + if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s.img_y; ++j) { + uint8 *out = output + n * z->s.img_x * j; + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + uint8 *y = coutput[0]; + if (z->s.img_n == 3) { + #if STBI_SIMD + stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n); + #else + YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n); + #endif + } else + for (i=0; i < z->s.img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + uint8 *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s.img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + cleanup_jpeg(z); + *out_x = z->s.img_x; + *out_y = z->s.img_y; + if (comp) *comp = z->s.img_n; // report original components, not output + return output; + } +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_file(&j.s, f); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_jpeg_test_file(FILE *f) +{ + int n,r; + jpeg j; + n = ftell(f); + start_file(&j.s, f); + r = decode_jpeg_header(&j, SCAN_type); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_jpeg_test_memory(stbi_uc const *buffer, int len) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return decode_jpeg_header(&j, SCAN_type); +} + +// @TODO: +#ifndef STBI_NO_STDIO +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can stb_malloc/stb_realloc) +// performance +// - fast huffman + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define ZFAST_BITS 9 // accelerate all cases in default tables +#define ZFAST_MASK ((1 << ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + uint16 fast[1 << ZFAST_BITS]; + uint16 firstcode[16]; + int maxcode[17]; + uint16 firstsymbol[16]; + uint8 size[288]; + uint16 value[288]; +} zhuffman; + +__forceinline static int bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +__forceinline static int bit_reverse(int v, int bits) +{ + assert(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return bitreverse16(v) >> (16-bits); +} + +static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 255, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + assert(sizes[i] <= (1 << i)); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (uint16) code; + z->firstsymbol[i] = (uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + z->size[c] = (uint8)s; + z->value[c] = (uint16)i; + if (s <= ZFAST_BITS) { + int k = bit_reverse(next_code[s],s); + while (k < (1 << ZFAST_BITS)) { + z->fast[k] = (uint16) c; + k += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + uint8 *zbuffer, *zbuffer_end; + int num_bits; + uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + zhuffman z_length, z_distance; +} zbuf; + +__forceinline static int zget8(zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void fill_bits(zbuf *z) +{ + do { + assert(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +__forceinline static unsigned int zreceive(zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z) +{ + int b,s,k; + if (a->num_bits < 16) fill_bits(a); + b = z->fast[a->code_buffer & ZFAST_MASK]; + if (b < 0xffff) { + s = z->size[b]; + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; + } + + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = bit_reverse(a->code_buffer, 16); + for (s=ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + assert(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +static int expand(zbuf *z, int n) // need to make room for n bytes +{ + char *q; + int cur, limit; + if (!z->z_expandable) return e("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) stb_realloc(z->zout_start, limit); + if (q == NULL) return e("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static int length_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static int length_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static int dist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int parse_huffman_block(zbuf *a) +{ + for(;;) { + int z = zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes + if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0; + *a->zout++ = (char) z; + } else { + uint8 *p; + int len,dist; + if (z == 256) return 1; + z -= 257; + len = length_base[z]; + if (length_extra[z]) len += zreceive(a, length_extra[z]); + z = zhuffman_decode(a, &a->z_distance); + if (z < 0) return e("bad huffman code","Corrupt PNG"); + dist = dist_base[z]; + if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); + if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG"); + if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0; + p = (uint8 *) (a->zout - dist); + while (len--) + *a->zout++ = *p++; + } + } +} + +static int compute_huffman_codes(zbuf *a) +{ + static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + zhuffman z_codelength; + uint8 lencodes[286+32+137];//padding for maximum single op + uint8 codelength_sizes[19]; + int i,n; + + int hlit = zreceive(a,5) + 257; + int hdist = zreceive(a,5) + 1; + int hclen = zreceive(a,4) + 4; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (uint8) s; + } + if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < hlit + hdist) { + int c = zhuffman_decode(a, &z_codelength); + assert(c >= 0 && c < 19); + if (c < 16) + lencodes[n++] = (uint8) c; + else if (c == 16) { + c = zreceive(a,2)+3; + memset(lencodes+n, lencodes[n-1], c); + n += c; + } else if (c == 17) { + c = zreceive(a,3)+3; + memset(lencodes+n, 0, c); + n += c; + } else { + assert(c == 18); + c = zreceive(a,7)+11; + memset(lencodes+n, 0, c); + n += c; + } + } + if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG"); + if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int parse_uncompressed_block(zbuf *a) +{ + uint8 header[4]; + int len,nlen,k; + if (a->num_bits & 7) + zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns? + a->code_buffer >>= 8; + a->num_bits -= 8; + } + assert(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = (uint8) zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!expand(a, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int parse_zlib_header(zbuf *a) +{ + int cmf = zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = zget8(a); + if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +// @TODO: should statically initialize these for optimal thread safety +static uint8 default_length[288], default_distance[32]; +static void init_defaults(void) +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) default_length[i] = 8; + for ( ; i <= 255; ++i) default_length[i] = 9; + for ( ; i <= 279; ++i) default_length[i] = 7; + for ( ; i <= 287; ++i) default_length[i] = 8; + + for (i=0; i <= 31; ++i) default_distance[i] = 5; +} + +int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead +static int parse_zlib(zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = zreceive(a,1); + type = zreceive(a,2); + if (type == 0) { + if (!parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!default_distance[31]) init_defaults(); + if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0; + if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0; + } else { + if (!compute_huffman_codes(a)) return 0; + } + if (!parse_huffman_block(a)) return 0; + } + if (stbi_png_partial && a->zout - a->zout_start > 65536) + break; + } while (!final); + return 1; +} + +static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return parse_zlib(a, parse_header); +} + +char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + zbuf a; + char *p = (char *) stb_malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + stb_free(a.zout_start); + return NULL; + } +} + +char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + zbuf a; + char *p = (char *) stb_malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer+len; + if (do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + stb_free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + + +typedef struct +{ + uint32 length; + uint32 type; +} chunk; + +#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +static chunk get_chunk_header(stbi *s) +{ + chunk c; + c.length = get32(s); + c.type = get32(s); + return c; +} + +static int check_png_header(stbi *s) +{ + static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi s; + uint8 *idata, *expanded, *out; +} png; + + +enum { + F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, + F_avg_first, F_paeth_first, +}; + +static uint8 first_row_filter[5] = +{ + F_none, F_sub, F_none, F_avg_first, F_paeth_first +}; + +static int paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +// create the png data from post-deflated data +static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y) +{ + stbi *s = &a->s; + uint32 i,j,stride = x*out_n; + int k; + int img_n = s->img_n; // copy it into a local for later + assert(out_n == s->img_n || out_n == s->img_n+1); + if (stbi_png_partial) y = 1; + a->out = (uint8 *) stb_malloc(x * y * out_n); + if (!a->out) return e("outofmem", "Out of memory"); + if (!stbi_png_partial) { + if (s->img_x == x && s->img_y == y) + if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + else // interlaced: + if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } + for (j=0; j < y; ++j) { + uint8 *cur = a->out + stride*j; + uint8 *prior = cur - stride; + int filter = *raw++; + if (filter > 4) return e("invalid filter","Corrupt PNG"); + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + // handle first pixel explicitly + for (k=0; k < img_n; ++k) { + switch(filter) { + case F_none : cur[k] = raw[k]; break; + case F_sub : cur[k] = raw[k]; break; + case F_up : cur[k] = raw[k] + prior[k]; break; + case F_avg : cur[k] = raw[k] + (prior[k]>>1); break; + case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break; + case F_avg_first : cur[k] = raw[k]; break; + case F_paeth_first: cur[k] = raw[k]; break; + } + } + if (img_n != out_n) cur[img_n] = 255; + raw += img_n; + cur += out_n; + prior += out_n; + // this is a little gross, so that we don't switch per-pixel or per-component + if (img_n == out_n) { + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ + for (k=0; k < img_n; ++k) + switch(filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break; + } + #undef CASE + } else { + assert(img_n+1 == out_n); + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ + for (k=0; k < img_n; ++k) + switch(filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break; + } + #undef CASE + } + } + return 1; +} + +static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced) +{ + uint8 *final; + int p; + int save; + if (!interlaced) + return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y); + save = stbi_png_partial; + stbi_png_partial = 0; + + // de-interlacing + final = (uint8 *) stb_malloc(a->s.img_x * a->s.img_y * out_n); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { + stb_free(final); + return 0; + } + for (j=0; j < y; ++j) + for (i=0; i < x; ++i) + memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, + a->out + (j*x+i)*out_n, out_n); + stb_free(a->out); + raw += (x*out_n+1)*y; + raw_len -= (x*out_n+1)*y; + } + } + a->out = final; + + stbi_png_partial = save; + return 1; +} + +static int compute_transparency(png *z, uint8 tc[3], int out_n) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + assert(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n) +{ + uint32 i, pixel_count = a->s.img_x * a->s.img_y; + uint8 *p, *temp_out, *orig = a->out; + + p = (uint8 *) stb_malloc(pixel_count * pal_img_n); + if (p == NULL) return e("outofmem", "Out of memory"); + + // between here and stb_free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + stb_free(a->out); + a->out = temp_out; + return 1; +} + +static int parse_png_file(png *z, int scan, int req_comp) +{ + uint8 palette[1024], pal_img_n=0; + uint8 has_trans=0, tc[3]; + uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0; + stbi *s = &z->s; + + if (!check_png_header(s)) return 0; + + if (scan == SCAN_type) return 1; + + for(;;first=0) { + chunk c = get_chunk_header(s); + if (first && c.type != PNG_TYPE('I','H','D','R')) + return e("first not IHDR","Corrupt PNG"); + switch (c.type) { + case PNG_TYPE('I','H','D','R'): { + int depth,color,comp,filter; + if (!first) return e("multiple IHDR","Corrupt PNG"); + if (c.length != 13) return e("bad IHDR len","Corrupt PNG"); + s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); + s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); + depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); + color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG"); + comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG"); + filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG"); + interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + if (scan == SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case PNG_TYPE('P','L','T','E'): { + if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = get8u(s); + palette[i*4+1] = get8u(s); + palette[i*4+2] = get8u(s); + palette[i*4+3] = 255; + } + break; + } + + case PNG_TYPE('t','R','N','S'): { + if (z->idata) return e("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = get8u(s); + } else { + if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); + if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG"); + has_trans = 1; + for (k=0; k < s->img_n; ++k) + tc[k] = (uint8) get16(s); // non 8-bit images will be larger + } + break; + } + + case PNG_TYPE('I','D','A','T'): { + if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG"); + if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; } + if (ioff + c.length > idata_limit) { + uint8 *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + p = (uint8 *) stb_realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); + z->idata = p; + } + #ifndef STBI_NO_STDIO + if (s->img_file) + { + if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG"); + } + else + #endif + { + memcpy(z->idata+ioff, s->img_buffer, c.length); + s->img_buffer += c.length; + } + ioff += c.length; + break; + } + + case PNG_TYPE('I','E','N','D'): { + uint32 raw_len; + if (scan != SCAN_load) return 1; + if (z->idata == NULL) return e("no IDAT","Corrupt PNG"); + z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len); + if (z->expanded == NULL) return 0; // zlib should set error + stb_free(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0; + if (has_trans) + if (!compute_transparency(z, tc, s->img_out_n)) return 0; + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!expand_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } + stb_free(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX chunk not known"; + invalid_chunk[0] = (uint8) (c.type >> 24); + invalid_chunk[1] = (uint8) (c.type >> 16); + invalid_chunk[2] = (uint8) (c.type >> 8); + invalid_chunk[3] = (uint8) (c.type >> 0); + #endif + return e(invalid_chunk, "PNG not supported: unknown chunk type"); + } + skip(s, c.length); + break; + } + // end of chunk, read and skip CRC + get32(s); + } +} + +static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp) +{ + unsigned char *result=NULL; + p->expanded = NULL; + p->idata = NULL; + p->out = NULL; + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + if (parse_png_file(p, SCAN_load, req_comp)) { + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s.img_out_n) { + result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y); + p->s.img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s.img_x; + *y = p->s.img_y; + if (n) *n = p->s.img_n; + } + stb_free(p->out); p->out = NULL; + stb_free(p->expanded); p->expanded = NULL; + stb_free(p->idata); p->idata = NULL; + + return result; +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_file(&p.s, f); + return do_png(&p, x,y,comp,req_comp); +} + +unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_png_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_mem(&p.s, buffer,len); + return do_png(&p, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_png_test_file(FILE *f) +{ + png p; + int n,r; + n = ftell(f); + start_file(&p.s, f); + r = parse_png_file(&p, SCAN_type,STBI_default); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_png_test_memory(stbi_uc const *buffer, int len) +{ + png p; + start_mem(&p.s, buffer, len); + return parse_png_file(&p, SCAN_type,STBI_default); +} + +// TODO: load header from png +#ifndef STBI_NO_STDIO +int stbi_png_info (char const *filename, int *x, int *y, int *comp) +{ + png p; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + start_file(&p.s, f); + if (parse_png_file(&p, SCAN_header, 0)) { + if(x) *x = p.s.img_x; + if(y) *y = p.s.img_y; + if (comp) *comp = p.s.img_n; + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// Microsoft/Windows BMP image + +static int bmp_test(stbi *s) +{ + int sz; + if (get8(s) != 'B') return 0; + if (get8(s) != 'M') return 0; + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + get32le(s); // discard data offset + sz = get32le(s); + if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1; + return 0; +} + +#ifndef STBI_NO_STDIO +int stbi_bmp_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = bmp_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_bmp_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_test(&s); +} + +// returns 0..31 for the highest set bit +static int high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +static int shiftsigned(int v, int shift, int bits) +{ + int result; + int z=0; + + if (shift < 0) v <<= -shift; + else v >>= shift; + result = v; + + z = bits; + while (z < 8) { + result += v >> z; + z += bits; + } + return result; +} + +static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + uint8 *out; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; + stbi_uc pal[256][4]; + int psize=0,i,j,compress=0,width; + int bpp, flip_vertically, pad, target, offset, hsz; + if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP"); + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + offset = get32le(s); + hsz = get32le(s); + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown"); + failure_reason = "bad BMP"; + if (hsz == 12) { + s->img_x = get16le(s); + s->img_y = get16le(s); + } else { + s->img_x = get32le(s); + s->img_y = get32le(s); + } + if (get16le(s) != 1) return 0; + bpp = get16le(s); + if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit"); + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + if (hsz == 12) { + if (bpp < 24) + psize = (offset - 14 - 24) / 3; + } else { + compress = get32le(s); + if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE"); + get32le(s); // discard sizeof + get32le(s); // discard hres + get32le(s); // discard vres + get32le(s); // discard colorsused + get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + get32le(s); + get32le(s); + get32le(s); + get32le(s); + } + if (bpp == 16 || bpp == 32) { + mr = mg = mb = 0; + if (compress == 0) { + if (bpp == 32) { + mr = 0xff << 16; + mg = 0xff << 8; + mb = 0xff << 0; + ma = 0xff << 24; + fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 + } else { + mr = 31 << 10; + mg = 31 << 5; + mb = 31 << 0; + } + } else if (compress == 3) { + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (mr == mg && mg == mb) { + // ?!?!? + return NULL; + } + } else + return NULL; + } + } else { + assert(hsz == 108); + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + ma = get32le(s); + get32le(s); // discard color space + for (i=0; i < 12; ++i) + get32le(s); // discard color space parameters + } + if (bpp < 16) + psize = (offset - 14 - hsz) >> 2; + } + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + out = (stbi_uc *) stb_malloc(target * s->img_x * s->img_y); + if (!out) return epuc("outofmem", "Out of memory"); + if (bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { stb_free(out); return epuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = get8(s); + pal[i][1] = get8(s); + pal[i][0] = get8(s); + if (hsz != 12) get8(s); + pal[i][3] = 255; + } + skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); + if (bpp == 4) width = (s->img_x + 1) >> 1; + else if (bpp == 8) width = s->img_x; + else { stb_free(out); return epuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=get8(s),v2=0; + if (bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (bpp == 8) ? get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + skip(s, pad); + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + skip(s, offset - 14 - hsz); + if (bpp == 24) width = 3 * s->img_x; + else if (bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (bpp == 24) { + easy = 1; + } else if (bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP"); + // right shift amt to put high bit in position #7 + rshift = high_bit(mr)-7; rcount = bitcount(mr); + gshift = high_bit(mg)-7; gcount = bitcount(mr); + bshift = high_bit(mb)-7; bcount = bitcount(mr); + ashift = high_bit(ma)-7; acount = bitcount(mr); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + int a; + out[z+2] = get8(s); + out[z+1] = get8(s); + out[z+0] = get8(s); + z += 3; + a = (easy == 2 ? get8(s) : 255); + if (target == 4) out[z++] = a; + } + } else { + for (i=0; i < (int) s->img_x; ++i) { + uint32 v = (bpp == 16 ? get16le(s) : get32le(s)); + int a; + out[z++] = shiftsigned(v & mr, rshift, rcount); + out[z++] = shiftsigned(v & mg, gshift, gcount); + out[z++] = shiftsigned(v & mb, bshift, bcount); + a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); + if (target == 4) out[z++] = a; + } + } + skip(s, pad); + } + } + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = target; + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_bmp_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return bmp_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_load(&s, x,y,comp,req_comp); +} + +// Targa Truevision - TGA +// by Jonathan Dummer + +static int tga_test(stbi *s) +{ + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE + get16(s); // discard palette start + get16(s); // discard palette length + get8(s); // discard bits per palette color entry + get16(s); // discard x origin + get16(s); // discard y origin + if( get16(s) < 1 ) return 0; // test width + if( get16(s) < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = tga_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_tga_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_test(&s); +} + +static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + // read in the TGA header stuff + int tga_offset = get8u(s); + int tga_indexed = get8u(s); + int tga_image_type = get8u(s); + int tga_is_RLE = 0; + int tga_palette_start = get16le(s); + int tga_palette_len = get16le(s); + int tga_palette_bits = get8u(s); + int tga_x_origin = get16le(s); + int tga_y_origin = get16le(s); + int tga_width = get16le(s); + int tga_height = get16le(s); + int tga_bits_per_pixel = get8u(s); + int tga_inverted = get8u(s); + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4]; + unsigned char trans_data[4]; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + // do a tiny bit of precessing + if( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + /* int tga_alpha_bits = tga_inverted & 15; */ + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // error check + if( //(tga_indexed) || + (tga_width < 1) || (tga_height < 1) || + (tga_image_type < 1) || (tga_image_type > 3) || + ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) && + (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)) + ) + { + return NULL; + } + + // If I'm paletted, then I'll use the number of bits from the palette + if( tga_indexed ) + { + tga_bits_per_pixel = tga_palette_bits; + } + + // tga info + *x = tga_width; + *y = tga_height; + if( (req_comp < 1) || (req_comp > 4) ) + { + // just use whatever the file was + req_comp = tga_bits_per_pixel / 8; + *comp = req_comp; + } else + { + // force a new number of components + *comp = tga_bits_per_pixel/8; + } + tga_data = (unsigned char*)stb_malloc( tga_width * tga_height * req_comp ); + + // skip to the data's starting position (offset usually = 0) + skip(s, tga_offset ); + // do I need to load a palette? + if( tga_indexed ) + { + // any data to skip? (offset usually = 0) + skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stb_malloc( tga_palette_len * tga_palette_bits / 8 ); + getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 ); + } + // load the data + for( i = 0; i < tga_width * tga_height; ++i ) + { + // if I'm in RLE mode, do I need to get a RLE chunk? + if( tga_is_RLE ) + { + if( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = get8u(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if( read_next_pixel ) + { + // load however much data we did have + if( tga_indexed ) + { + // read in 1 byte, then perform the lookup + int pal_idx = get8u(s); + if( pal_idx >= tga_palette_len ) + { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_bits_per_pixel / 8; + for( j = 0; j*8 < tga_bits_per_pixel; ++j ) + { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else + { + // read in the data raw + for( j = 0; j*8 < tga_bits_per_pixel; ++j ) + { + raw_data[j] = get8u(s); + } + } + // convert raw to the intermediate format + switch( tga_bits_per_pixel ) + { + case 8: + // Luminous => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 16: + // Luminous,Alpha => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[1]; + break; + case 24: + // BGR => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 32: + // BGRA => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[3]; + break; + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + // convert to final format + switch( req_comp ) + { + case 1: + // RGBA => Luminance + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + break; + case 2: + // RGBA => Luminance,Alpha + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + tga_data[i*req_comp+1] = trans_data[3]; + break; + case 3: + // RGBA => RGB + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + break; + case 4: + // RGBA => RGBA + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + tga_data[i*req_comp+3] = trans_data[3]; + break; + } + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if( tga_inverted ) + { + for( j = 0; j*2 < tga_height; ++j ) + { + int index1 = j * tga_width * req_comp; + int index2 = (tga_height - 1 - j) * tga_width * req_comp; + for( i = tga_width * req_comp; i > 0; --i ) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if( tga_palette != NULL ) + { + stb_free( tga_palette ); + } + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_tga_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return tga_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB + +static int psd_test(stbi *s) +{ + if (get32(s) != 0x38425053) return 0; // "8BPS" + else return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_psd_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = psd_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_psd_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_test(&s); +} + +static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + int pixelCount; + int channelCount, compression; + int channel, i, count, len; + int w,h; + uint8 *out; + + // Check identifier + if (get32(s) != 0x38425053) // "8BPS" + return epuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (get16(s) != 1) + return epuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = get16(s); + if (channelCount < 0 || channelCount > 16) + return epuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = get32(s); + w = get32(s); + + // Make sure the depth is 8 bits. + if (get16(s) != 8) + return epuc("unsupported bit depth", "PSD bit depth is not 8 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (get16(s) != 3) + return epuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + skip(s,get32(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + skip(s, get32(s) ); + + // Skip the reserved data. + skip(s, get32(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = get16(s); + if (compression > 1) + return epuc("bad compression", "PSD has an unknown compression format"); + + // Create the destination image. + out = (stbi_uc *) stb_malloc(4 * w*h); + if (!out) return epuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4; + } else { + // Read the RLE data. + count = 0; + while (count < pixelCount) { + len = get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + count += len; + while (len) { + *p = get8(s); + p += 4; + len--; + } + } else if (len > 128) { + uint32 val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0x0FF; + len += 2; + val = get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out + channel; + if (channel > channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4; + } else { + // Read the data. + count = 0; + for (i = 0; i < pixelCount; i++) + *p = get8(s), p += 4; + } + } + } + + if (req_comp && req_comp != 4) { + out = convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // convert_format frees input on failure + } + + if (comp) *comp = channelCount; + *y = h; + *x = w; + + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_psd_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return psd_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int hdr_test(stbi *s) +{ + char *signature = "#?RADIANCE\n"; + int i; + for (i=0; signature[i]; ++i) + if (get8(s) != signature[i]) + return 0; + return 1; +} + +int stbi_hdr_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return hdr_test(&s); +} + +#ifndef STBI_NO_STDIO +int stbi_hdr_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = hdr_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +#define HDR_BUFLEN 1024 +static char *hdr_gettoken(stbi *z, char *buffer) +{ + int len=0; + char *s = buffer, c = '\0'; + + c = get8(z); + + while (!at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == HDR_BUFLEN-1) { + // flush to end of line + while (!at_eof(z) && get8(z) != '\n') + ; + break; + } + c = get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + + +static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + char buffer[HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + + + // Check identifier + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + return epf("not HDR", "Corrupt HDR image"); + + // Parse header + while(1) { + token = hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return epf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = strtol(token, NULL, 10); + + *x = width; + *y = height; + + *comp = 3; + if (req_comp == 0) req_comp = 3; + + // Read data + hdr_data = (float *) stb_malloc(height * width * req_comp * sizeof(float)); + + // Load image data + // image data is stored as some number of sca + if( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + getn(s, rgbe, 4); + hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = get8(s); + c2 = get8(s); + len = get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4] = { c1,c2,len, get8(s) }; + hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + stb_free(scanline); + goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format + } + len <<= 8; + len |= get8(s); + if (len != width) { stb_free(hdr_data); stb_free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) scanline = (stbi_uc *) stb_malloc(width * 4); + + for (k = 0; k < 4; ++k) { + i = 0; + while (i < width) { + count = get8(s); + if (count > 128) { + // Run + value = get8(s); + count -= 128; + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = get8(s); + } + } + } + for (i=0; i < width; ++i) + hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + stb_free(scanline); + } + + return hdr_data; +} + +#ifndef STBI_NO_STDIO +float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return hdr_load(&s,x,y,comp,req_comp); +} +#endif + +float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer, len); + return hdr_load(&s,x,y,comp,req_comp); +} + +#endif // STBI_NO_HDR + +/////////////////////// write image /////////////////////// + +#ifndef STBI_NO_WRITE + +static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); } + +static void writefv(FILE *f, char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { uint8 x = va_arg(v, int); write8(f,x); break; } + case '2': { int16 x = va_arg(v, int); write8(f,x); write8(f,x>>8); break; } + case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; } + default: + assert(0); + va_end(v); + return; + } + } +} + +static void writef(FILE *f, char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + writefv(f,fmt,v); + va_end(v); +} + +static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int write_alpha, int scanline_pad) +{ + uint8 bg[3] = { 255, 0, 255}, px[3]; + uint32 zero = 0; + int i,j,k, j_end; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + uint8 *d = (uint8 *) data + (j*x+i)*comp; + if (write_alpha < 0) + fwrite(&d[comp-1], 1, 1, f); + switch (comp) { + case 1: + case 2: writef(f, "111", d[0],d[0],d[0]); + break; + case 4: + if (!write_alpha) { + for (k=0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255; + writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]); + break; + } + if (write_alpha > 0) + fwrite(&d[comp-1], 1, 1, f); + } + fwrite(&zero,scanline_pad,1,f); + } +} + +static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = fopen(filename, "wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +static int outfile_w(wchar_t const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = _wfopen(filename, L"wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return outfile(filename,-1,-1,x,y,comp,data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_bmp_w(wchar_t const *filename, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return outfile_w(filename,-1,-1,x,y,comp,data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + int has_alpha = !(comp & 1); + return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0, + "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +int stbi_write_tga_w(wchar_t const *filename, int x, int y, int comp, const void *data) +{ + int has_alpha = !(comp & 1); + return outfile_w(filename, -1,-1, x, y, comp, data, has_alpha, 0, + "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +// any other image formats that do interleaved rgb data? +// PNG: requires adler32,crc32 -- significant amount of code +// PSD: no, channels output separately +// TIFF: no, stripwise-interleaved... i think + +#endif // STBI_NO_WRITE + +} + +#endif // STBI_HEADER_FILE_ONLY diff --git a/crnlib/crn_strutils.cpp b/crnlib/crn_strutils.cpp new file mode 100644 index 00000000..3dddbd5f --- /dev/null +++ b/crnlib/crn_strutils.cpp @@ -0,0 +1,1260 @@ +// File: crn_strutils.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_strutils.h" +#include + +namespace crnlib +{ + char* strcpy_safe(char* pDst, uint dst_len, const char* pSrc) + { + CRNLIB_ASSERT(pDst && pSrc && dst_len); + if (!dst_len) + return pDst; + + char* q = pDst; + char c; + + do + { + if (dst_len == 1) + { + *q++ = '\0'; + break; + } + + c = *pSrc++; + *q++ = c; + + dst_len--; + + } while (c); + + CRNLIB_ASSERT((q - pDst) <= (int)dst_len); + + return pDst; + } + + bool int_to_string(int value, char* pDst, uint len) + { + CRNLIB_ASSERT(pDst); + + const uint cBufSize = 16; + char buf[cBufSize]; + + uint j = static_cast((value < 0) ? -value : value); + + char* p = buf + cBufSize - 1; + + *p-- = '\0'; + + do + { + *p-- = static_cast('0' + (j % 10)); + j /= 10; + } while (j); + + if (value < 0) + *p-- = '-'; + + const size_t total_bytes = (buf + cBufSize - 1) - p; + if (total_bytes > len) + return false; + + for (size_t i = 0; i < total_bytes; i++) + pDst[i] = p[1 + i]; + + return true; + } + + bool uint_to_string(uint value, char* pDst, uint len) + { + CRNLIB_ASSERT(pDst); + + const uint cBufSize = 16; + char buf[cBufSize]; + + char* p = buf + cBufSize - 1; + + *p-- = '\0'; + + do + { + *p-- = static_cast('0' + (value % 10)); + value /= 10; + } while (value); + + const size_t total_bytes = (buf + cBufSize - 1) - p; + if (total_bytes > len) + return false; + + for (size_t i = 0; i < total_bytes; i++) + pDst[i] = p[1 + i]; + + return true; + } + + bool string_to_int(const char*& pBuf, int& value) + { + value = 0; + + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; + + while (*p && isspace(*p)) + p++; + + uint result = 0; + bool negative = false; + + if (!isdigit(*p)) + { + if (p[0] == '-') + { + negative = true; + p++; + } + else + return false; + } + + while (*p && isdigit(*p)) + { + if (result & 0xE0000000U) + return false; + + const uint result8 = result << 3U; + const uint result2 = result << 1U; + + if (result2 > (0xFFFFFFFFU - result8)) + return false; + + result = result8 + result2; + + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFU - result)) + return false; + + result += c; + + p++; + } + + if (negative) + { + if (result > 0x80000000U) + { + value = 0; + return false; + } + value = -static_cast(result); + } + else + { + if (result > 0x7FFFFFFFU) + { + value = 0; + return false; + } + value = static_cast(result); + } + + pBuf = p; + + return true; + } + + bool string_to_int(const wchar_t*& pBuf, int& value) + { + value = 0; + + CRNLIB_ASSERT(pBuf); + const wchar_t* p = pBuf; + + while (*p && isspace(*p)) + p++; + + uint result = 0; + bool negative = false; + + if (!iswdigit(*p)) + { + if (p[0] == '-') + { + negative = true; + p++; + } + else + return false; + } + + while (*p && iswdigit(*p)) + { + if (result & 0xE0000000U) + return false; + + const uint result8 = result << 3U; + const uint result2 = result << 1U; + + if (result2 > (0xFFFFFFFFU - result8)) + return false; + + result = result8 + result2; + + uint c = p[0] - L'0'; + if (c > (0xFFFFFFFFU - result)) + return false; + + result += c; + + p++; + } + + if (negative) + { + if (result > 0x80000000U) + { + value = 0; + return false; + } + value = -static_cast(result); + } + else + { + if (result > 0x7FFFFFFFU) + { + value = 0; + return false; + } + value = static_cast(result); + } + + pBuf = p; + + return true; + } + + bool string_to_int64(const char*& pBuf, int64& value) + { + value = 0; + + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; + + while (*p && isspace(*p)) + p++; + + uint64 result = 0; + bool negative = false; + + if (!isdigit(*p)) + { + if (p[0] == '-') + { + negative = true; + p++; + } + else + return false; + } + + while (*p && isdigit(*p)) + { + if (result & 0xE000000000000000ULL) + return false; + + const uint64 result8 = result << 3U; + const uint64 result2 = result << 1U; + + if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) + return false; + + result = result8 + result2; + + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFFFFFFFFFULL - result)) + return false; + + result += c; + + p++; + } + + if (negative) + { + if (result > 0x8000000000000000ULL) + { + value = 0; + return false; + } + value = -static_cast(result); + } + else + { + if (result > 0x7FFFFFFFFFFFFFFFULL) + { + value = 0; + return false; + } + value = static_cast(result); + } + + pBuf = p; + + return true; + } + + bool string_to_uint(const char*& pBuf, uint& value) + { + value = 0; + + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; + + while (*p && isspace(*p)) + p++; + + uint result = 0; + + if (!isdigit(*p)) + return false; + + while (*p && isdigit(*p)) + { + if (result & 0xE0000000U) + return false; + + const uint result8 = result << 3U; + const uint result2 = result << 1U; + + if (result2 > (0xFFFFFFFFU - result8)) + return false; + + result = result8 + result2; + + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFU - result)) + return false; + + result += c; + + p++; + } + + value = result; + + pBuf = p; + + return true; + } + + bool string_to_uint(const wchar_t*& pBuf, uint& value) + { + value = 0; + + CRNLIB_ASSERT(pBuf); + const wchar_t* p = pBuf; + + while (*p && iswspace(*p)) + p++; + + uint result = 0; + + if (!iswdigit(*p)) + return false; + + while (*p && iswdigit(*p)) + { + if (result & 0xE0000000U) + return false; + + const uint result8 = result << 3U; + const uint result2 = result << 1U; + + if (result2 > (0xFFFFFFFFU - result8)) + return false; + + result = result8 + result2; + + uint c = p[0] - L'0'; + if (c > (0xFFFFFFFFU - result)) + return false; + + result += c; + + p++; + } + + value = result; + + pBuf = p; + + return true; + } + + bool string_to_uint64(const char*& pBuf, uint64& value) + { + value = 0; + + CRNLIB_ASSERT(pBuf); + const char* p = pBuf; + + while (*p && isspace(*p)) + p++; + + uint64 result = 0; + + if (!isdigit(*p)) + return false; + + while (*p && isdigit(*p)) + { + if (result & 0xE000000000000000ULL) + return false; + + const uint64 result8 = result << 3U; + const uint64 result2 = result << 1U; + + if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) + return false; + + result = result8 + result2; + + uint c = p[0] - '0'; + if (c > (0xFFFFFFFFFFFFFFFFULL - result)) + return false; + + result += c; + + p++; + } + + value = result; + + pBuf = p; + + return true; + } + + bool string_to_bool(const char* p, bool& value) + { + CRNLIB_ASSERT(p); + + value = false; + + if (_stricmp(p, "false") == 0) + return true; + + if (_stricmp(p, "true") == 0) + { + value = true; + return true; + } + + const char* q = p; + uint v; + if (string_to_uint(q, v)) + { + if (!v) + return true; + else if (v == 1) + { + value = true; + return true; + } + } + + return false; + } + + bool string_to_bool(const wchar_t* p, bool& value) + { + CRNLIB_ASSERT(p); + + value = false; + + if (_wcsicmp(p, L"false") == 0) + return true; + + if (_wcsicmp(p, L"true") == 0) + { + value = true; + return true; + } + + const wchar_t* q = p; + uint v; + if (string_to_uint(q, v)) + { + if (!v) + return true; + else if (v == 1) + { + value = true; + return true; + } + } + + return false; + } + + bool string_to_float(const char*& p, float& value, uint round_digit) + { + CRNLIB_ASSERT(p); + value = 0; + + enum { AF_BLANK = 1, AF_SIGN = 2, AF_DPOINT = 3, AF_BADCHAR = 4, AF_OVRFLOW = 5, AF_EXPONENT = 6, AF_NODIGITS = 7 }; + + const char* buf = p; + + int status = 0; + + if (round_digit > 10) + round_digit = 10; + + int got_sign_flag = 0; + int got_dp_flag = 0; + int got_num_flag = 0; + + int got_e_flag = 0; + int got_e_sign_flag = 0; + int e_sign = 0; + + uint whole_count = 0; + uint frac_count = 0; + + float whole = 0; + float frac = 0; + float scale = 1; + float exponent = 1; + + while (*buf) + { + if (!isspace(*buf)) + break; + + buf++; + } + + while (*buf) + { + int i = *buf++; + + switch (i) + { + case 'e': + case 'E': + { + got_e_flag = 1; + goto exit_while; + } + case '+': + { + if ((got_num_flag) || (got_sign_flag)) + { + status = AF_SIGN; + goto af_exit; + } + + got_sign_flag = 1; + + break; + } + case '-': + { + if ((got_num_flag) || (got_sign_flag)) + { + status = AF_SIGN; + goto af_exit; + } + + got_sign_flag = -1; + + break; + } + case '.': + { + if (got_dp_flag) + { + status = AF_DPOINT; + goto af_exit; + } + + got_dp_flag = 1; + + break; + } + default: + { + if ((i < '0') || (i > '9')) + goto exit_while; + else + { + i -= '0'; + + got_num_flag = 1; + + if (got_dp_flag) + { + if (frac_count < round_digit) + { + frac = frac * 10.0f + i; + + scale = scale * 10.0f; + } + else if (frac_count == round_digit) + { + if (i >= 5) /* check for round */ + frac = frac + 1.0f; + } + + frac_count++; + } + else + { + whole = whole * 10.0f + i; + + whole_count++; + + if (whole > 1e+30f) + { + status = AF_OVRFLOW; + goto af_exit; + } + } + } + + break; + } + } + } + + exit_while: + + if (got_e_flag) + { + if ((got_num_flag == 0) && (got_dp_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + int e = 0; + e_sign = 1; + got_num_flag = 0; + got_e_sign_flag = 0; + + while (*buf) + { + int i = *buf++; + + if (i == '+') + { + if ((got_num_flag) || (got_e_sign_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + e_sign = 1; + got_e_sign_flag = 1; + } + else if (i == '-') + { + if ((got_num_flag) || (got_e_sign_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + e_sign = -1; + got_e_sign_flag = 1; + } + else if ((i >= '0') && (i <= '9')) + { + got_num_flag = 1; + + if ((e = (e * 10) + (i - 48)) > 16) + { + status = AF_EXPONENT; + goto af_exit; + } + } + else + break; + } + + for (int i = 1; i <= e; i++) /* compute 10^e */ + exponent = exponent * 10.0f; + } + + if (((whole_count + frac_count) == 0) && (got_e_flag == 0)) + { + status = AF_NODIGITS; + goto af_exit; + } + + if (frac) + whole = whole + (frac / scale); + + if (got_e_flag) + { + if (e_sign > 0) + whole = whole * exponent; + else + whole = whole / exponent; + } + + if (got_sign_flag < 0) + whole = -whole; + + value = whole; + p = buf; + + af_exit: + return (status == 0); + } + + bool string_to_float(const wchar_t*& p, float& value, uint round_digit) + { + CRNLIB_ASSERT(p); + value = 0; + + enum { AF_BLANK = 1, AF_SIGN = 2, AF_DPOINT = 3, AF_BADCHAR = 4, AF_OVRFLOW = 5, AF_EXPONENT = 6, AF_NODIGITS = 7 }; + + const wchar_t* buf = p; + + int status = 0; + + if (round_digit > 10) + round_digit = 10; + + int got_sign_flag = 0; + int got_dp_flag = 0; + int got_num_flag = 0; + + int got_e_flag = 0; + int got_e_sign_flag = 0; + int e_sign = 0; + + uint whole_count = 0; + uint frac_count = 0; + + float whole = 0; + float frac = 0; + float scale = 1; + float exponent = 1; + + while (*buf) + { + if (!iswspace(*buf)) + break; + + buf++; + } + + while (*buf) + { + int i = *buf++; + + switch (i) + { + case L'e': + case L'E': + { + got_e_flag = 1; + goto exit_while; + } + case L'+': + { + if ((got_num_flag) || (got_sign_flag)) + { + status = AF_SIGN; + goto af_exit; + } + + got_sign_flag = 1; + + break; + } + case L'-': + { + if ((got_num_flag) || (got_sign_flag)) + { + status = AF_SIGN; + goto af_exit; + } + + got_sign_flag = -1; + + break; + } + case L'.': + { + if (got_dp_flag) + { + status = AF_DPOINT; + goto af_exit; + } + + got_dp_flag = 1; + + break; + } + default: + { + if ((i < L'0') || (i > L'9')) + goto exit_while; + else + { + i -= L'0'; + + got_num_flag = 1; + + if (got_dp_flag) + { + if (frac_count < round_digit) + { + frac = frac * 10.0f + i; + + scale = scale * 10.0f; + } + else if (frac_count == round_digit) + { + if (i >= 5) /* check for round */ + frac = frac + 1.0f; + } + + frac_count++; + } + else + { + whole = whole * 10.0f + i; + + whole_count++; + + if (whole > 1e+30f) + { + status = AF_OVRFLOW; + goto af_exit; + } + } + } + + break; + } + } + } + + exit_while: + + if (got_e_flag) + { + if ((got_num_flag == 0) && (got_dp_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + int e = 0; + e_sign = 1; + got_num_flag = 0; + got_e_sign_flag = 0; + + while (*buf) + { + int i = *buf++; + + if (i == L'+') + { + if ((got_num_flag) || (got_e_sign_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + e_sign = 1; + got_e_sign_flag = 1; + } + else if (i == L'-') + { + if ((got_num_flag) || (got_e_sign_flag)) + { + status = AF_EXPONENT; + goto af_exit; + } + + e_sign = -1; + got_e_sign_flag = 1; + } + else if ((i >= L'0') && (i <= L'9')) + { + got_num_flag = 1; + + if ((e = (e * 10) + (i - 48)) > 16) + { + status = AF_EXPONENT; + goto af_exit; + } + } + else + break; + } + + for (int i = 1; i <= e; i++) /* compute 10^e */ + exponent = exponent * 10.0f; + } + + if (((whole_count + frac_count) == 0) && (got_e_flag == 0)) + { + status = AF_NODIGITS; + goto af_exit; + } + + if (frac) + whole = whole + (frac / scale); + + if (got_e_flag) + { + if (e_sign > 0) + whole = whole * exponent; + else + whole = whole / exponent; + } + + if (got_sign_flag < 0) + whole = -whole; + + value = whole; + p = buf; + + af_exit: + return (status == 0); + } + + bool split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt) + { + CRNLIB_ASSERT(p); + + char drive_buf[_MAX_DRIVE]; + char dir_buf[_MAX_DIR]; + char fname_buf[_MAX_FNAME]; + char ext_buf[_MAX_EXT]; + +#ifdef _MSC_VER + errno_t error = _splitpath_s(p, + pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + if (error != 0) + return false; +#else + _splitpath(p, + pDrive ? drive_buf : NULL, + pDir ? dir_buf : NULL, + pFilename ? fname_buf : NULL, + pExt ? ext_buf : NULL); +#endif + + if (pDrive) *pDrive = drive_buf; + if (pDir) *pDir = dir_buf; + if (pFilename) *pFilename = fname_buf; + if (pExt) *pExt = ext_buf; + + return true; + } + + bool split_path(const wchar_t* p, dynamic_wstring* pDrive, dynamic_wstring* pDir, dynamic_wstring* pFilename, dynamic_wstring* pExt) + { + CRNLIB_ASSERT(p); + + wchar_t drive_buf[_MAX_DRIVE]; + wchar_t dir_buf[_MAX_DIR]; + wchar_t fname_buf[_MAX_FNAME]; + wchar_t ext_buf[_MAX_EXT]; + +#ifdef _MSC_VER + errno_t error = _wsplitpath_s(p, + pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, + pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, + pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, + pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); + if (error != 0) + return false; +#else + _wsplitpath(p, + pDrive ? drive_buf : NULL, + pDir ? dir_buf : NULL, + pFilename ? fname_buf : NULL, + pExt ? ext_buf : NULL); +#endif + + if (pDrive) *pDrive = drive_buf; + if (pDir) *pDir = dir_buf; + if (pFilename) *pFilename = fname_buf; + if (pExt) *pExt = ext_buf; + + return true; + } + + bool split_path(const char* p, dynamic_string& path, dynamic_string& filename) + { + dynamic_string temp_drive, temp_path, temp_ext; + if (!split_path(p, &temp_drive, &temp_path, &filename, &temp_ext)) + return false; + + filename += temp_ext; + + combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); + return true; + } + + bool split_path(const wchar_t* p, dynamic_wstring& path, dynamic_wstring& filename) + { + dynamic_wstring temp_drive, temp_path, temp_ext; + if (!split_path(p, &temp_drive, &temp_path, &filename, &temp_ext)) + return false; + + filename += temp_ext; + + combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); + return true; + } + + bool get_pathname(const char* p, dynamic_string& path) + { + dynamic_string temp_drive, temp_path; + if (!split_path(p, &temp_drive, &temp_path, NULL, NULL)) + return false; + + combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); + return true; + } + + bool get_pathname(const wchar_t* p, dynamic_wstring& path) + { + dynamic_wstring temp_drive, temp_path; + if (!split_path(p, &temp_drive, &temp_path, NULL, NULL)) + return false; + + combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); + return true; + } + + bool get_filename(const char* p, dynamic_string& filename) + { + dynamic_string temp_ext; + if (!split_path(p, NULL, NULL, &filename, &temp_ext)) + return false; + + filename += temp_ext; + return true; + } + + bool get_filename(const wchar_t* p, dynamic_wstring& filename) + { + dynamic_wstring temp_ext; + if (!split_path(p, NULL, NULL, &filename, &temp_ext)) + return false; + + filename += temp_ext; + return true; + } + + void combine_path(dynamic_string& dst, const char* pA, const char* pB) + { + dynamic_string temp; + temp = pA; + if ((!temp.is_empty()) && (pB[0] != '\\') && (pB[0] != '/')) + { + char c = temp[temp.get_len() - 1]; + if ((c != '\\') && (c != '/')) + { + temp.append_char('\\'); + } + } + temp += pB; + dst.swap(temp); + } + + void combine_path(dynamic_wstring& dst, const wchar_t* pA, const wchar_t* pB) + { + dynamic_wstring temp; + temp = pA; + if ((!temp.is_empty()) && (pB[0] != L'\\') && (pB[0] != L'/')) + { + wchar_t c = temp[temp.get_len() - 1]; + if ((c != L'\\') && (c != L'/')) + { + temp.append_char(L'\\'); + } + } + temp += pB; + dst.swap(temp); + } + + void combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC) + { + combine_path(dst, pA, pB); + combine_path(dst, dst.get_ptr(), pC); + } + + void combine_path(dynamic_wstring& dst, const wchar_t* pA, const wchar_t* pB, const wchar_t* pC) + { + combine_path(dst, pA, pB); + combine_path(dst, dst.get_ptr(), pC); + } + + void combine_path(dynamic_wstring& dst, const wchar_t* pA, const wchar_t* pB, const wchar_t* pC, const wchar_t *pD) + { + combine_path(dst, pA, pB); + combine_path(dst, dst.get_ptr(), pC); + combine_path(dst, dst.get_ptr(), pD); + } + + bool full_path(dynamic_string& path) + { +#ifndef _XBOX + char buf[CRNLIB_MAX_PATH]; + + char* p = _fullpath(buf, path.get_ptr(), CRNLIB_MAX_PATH); + if (!p) + return false; + + path.set(buf); +#endif + return true; + } + + bool full_path(dynamic_wstring& path) + { +#ifndef _XBOX + wchar_t buf[CRNLIB_MAX_PATH]; + + wchar_t* p = _wfullpath(buf, path.get_ptr(), CRNLIB_MAX_PATH); + if (!p) + return false; + + path.set(buf); +#endif + return true; + } + + bool get_extension(dynamic_string& filename) + { + int sep = filename.find_right('\\'); + if (sep < 0) + sep = filename.find_right('/'); + + int dot = filename.find_right('.'); + if (dot < sep) + { + filename.clear(); + return false; + } + + filename.right(dot + 1); + + return true; + } + + bool get_extension(dynamic_wstring& filename) + { + int sep = filename.find_right(L'\\'); + if (sep < 0) + sep = filename.find_right(L'/'); + + int dot = filename.find_right(L'.'); + if (dot < sep) + { + filename.clear(); + return false; + } + + filename.right(dot + 1); + + return true; + } + + bool remove_extension(dynamic_string& filename) + { + int sep = filename.find_right('\\'); + if (sep < 0) + sep = filename.find_right('/'); + + int dot = filename.find_right('.'); + if (dot < sep) + return false; + + filename.left(dot); + + return true; + } + + bool remove_extension(dynamic_wstring& filename) + { + int sep = filename.find_right(L'\\'); + if (sep < 0) + sep = filename.find_right(L'/'); + + int dot = filename.find_right(L'.'); + if (dot < sep) + return false; + + filename.left(dot); + + return true; + } + + bool create_path(const dynamic_wstring& path) + { + bool unc = false; + dynamic_wstring cur_path; + + const int l = path.get_len(); + + int n = 0; + while (n < l) + { + const wchar_t c = path.get_ptr()[n]; + + const bool sep = (c == L'/') || (c == L'\\'); + + if ((sep) || (n == (l - 1))) + { + if ((n == (l - 1)) && (!sep)) + cur_path.append_char(c); + + bool valid = false; + if ((cur_path.get_len() > 3) && (cur_path.get_ptr()[1] == L':')) + valid = true; + else if (cur_path.get_len() > 2) + { + if (unc) + valid = true; + unc = true; + } + + if (valid) + _wmkdir(cur_path.get_ptr()); + } + + cur_path.append_char(c); + + n++; + } + + return true; + } + + void trim_trailing_seperator(dynamic_wstring& path) + { + if ( (path.get_len()) && ( (path[path.get_len() - 1] == L'\\') || (path[path.get_len() - 1] == L'/') ) ) + path.truncate(path.get_len() - 1); + } + +} // namespace crnlib + + + diff --git a/crnlib/crn_strutils.h b/crnlib/crn_strutils.h new file mode 100644 index 00000000..b734c734 --- /dev/null +++ b/crnlib/crn_strutils.h @@ -0,0 +1,59 @@ +// File: crn_strutils.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + char* strcpy_safe(char* pDst, uint dst_len, const char* pSrc); + + bool int_to_string(int value, char* pDst, uint len); + bool uint_to_string(uint value, char* pDst, uint len); + + bool string_to_int(const char*& pBuf, int& value); + bool string_to_int(const wchar_t*& pBuf, int& value); + + bool string_to_uint(const char*& pBuf, uint& value); + bool string_to_uint(const wchar_t*& pBuf, uint& value); + + bool string_to_int64(const char*& pBuf, int64& value); + bool string_to_uint64(const char*& pBuf, uint64& value); + + bool string_to_bool(const char* p, bool& value); + bool string_to_bool(const wchar_t* p, bool& value); + + bool string_to_float(const char*& p, float& value, uint round_digit = 10U); + bool string_to_float(const wchar_t*& p, float& value, uint round_digit = 10U); + + bool split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt); + bool split_path(const wchar_t* p, dynamic_wstring* pDrive, dynamic_wstring* pDir, dynamic_wstring* pFilename, dynamic_wstring* pExt); + + bool split_path(const char* p, dynamic_string& path, dynamic_string& filename); + bool split_path(const wchar_t* p, dynamic_wstring& path, dynamic_wstring& filename); + + bool get_pathname(const char* p, dynamic_string& path); + bool get_pathname(const wchar_t* p, dynamic_wstring& path); + + bool get_filename(const char* p, dynamic_string& filename); + bool get_filename(const wchar_t* p, dynamic_wstring& filename); + + void combine_path(dynamic_string& dst, const char* pA, const char* pB); + void combine_path(dynamic_wstring& dst, const wchar_t* pA, const wchar_t* pB); + + void combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC); + void combine_path(dynamic_wstring& dst, const wchar_t* pA, const wchar_t* pB, const wchar_t* pC); + void combine_path(dynamic_wstring& dst, const wchar_t* pA, const wchar_t* pB, const wchar_t* pC, const wchar_t *pD); + + bool full_path(dynamic_string& path); + bool full_path(dynamic_wstring& path); + + bool get_extension(dynamic_string& filename); + bool get_extension(dynamic_wstring& filename); + + bool remove_extension(dynamic_string& filename); + bool remove_extension(dynamic_wstring& filename); + + bool create_path(const dynamic_wstring& path); + + void trim_trailing_seperator(dynamic_wstring& path); + +} // namespace crnlib diff --git a/crnlib/crn_symbol_codec.cpp b/crnlib/crn_symbol_codec.cpp new file mode 100644 index 00000000..f52d8dc9 --- /dev/null +++ b/crnlib/crn_symbol_codec.cpp @@ -0,0 +1,1901 @@ +// File: crn_symbol_codec.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_symbol_codec.h" +#include "crn_huffman_codes.h" + +namespace crnlib +{ + static float gProbCost[cSymbolCodecArithProbScale]; + + //const uint cArithProbMulLenSigBits = 8; + //const uint cArithProbMulLenSigScale = 1 << cArithProbMulLenSigBits; + + class arith_prob_cost_initializer + { + public: + arith_prob_cost_initializer() + { + const float cInvLn2 = 1.0f / 0.69314718f; + + for (uint i = 0; i < cSymbolCodecArithProbScale; i++) + gProbCost[i] = -logf(i * (1.0f / cSymbolCodecArithProbScale)) * cInvLn2; + } + }; + + static arith_prob_cost_initializer g_prob_cost_initializer; + + double symbol_histogram::calc_entropy() const + { + double total = 0.0f; + for (uint i = 0; i < m_hist.size(); i++) + total += m_hist[i]; + if (total == 0.0f) + return 0.0f; + + double entropy = 0.0f; + double neg_inv_log2 = -1.0f / log(2.0f); + double inv_total = 1.0f / total; + for (uint i = 0; i < m_hist.size(); i++) + { + if (m_hist[i]) + { + double bits = log(m_hist[i] * inv_total) * neg_inv_log2; + entropy += bits * m_hist[i]; + } + } + + return entropy; + } + + uint64 symbol_histogram::get_total() const + { + uint64 total = 0; + for (uint i = 0; i < m_hist.size(); i++) + total += m_hist[i]; + return total; + } + + adaptive_huffman_data_model::adaptive_huffman_data_model(bool encoding, uint total_syms) : + m_total_syms(0), + m_update_cycle(0), + m_symbols_until_update(0), + m_total_count(0), + m_pDecode_tables(NULL), + m_decoder_table_bits(0), + m_encoding(encoding) + { + if (total_syms) + init(encoding, total_syms); + } + + adaptive_huffman_data_model::adaptive_huffman_data_model(const adaptive_huffman_data_model& other) : + m_total_syms(0), + m_update_cycle(0), + m_symbols_until_update(0), + m_total_count(0), + m_pDecode_tables(NULL), + m_decoder_table_bits(0), + m_encoding(false) + { + *this = other; + } + + adaptive_huffman_data_model::~adaptive_huffman_data_model() + { + if (m_pDecode_tables) + crnlib_delete(m_pDecode_tables); + } + + adaptive_huffman_data_model& adaptive_huffman_data_model::operator= (const adaptive_huffman_data_model& rhs) + { + if (this == &rhs) + return *this; + + m_total_syms = rhs.m_total_syms; + + m_update_cycle = rhs.m_update_cycle; + m_symbols_until_update = rhs.m_symbols_until_update; + + m_total_count = rhs.m_total_count; + + m_sym_freq = rhs.m_sym_freq; + + m_codes = rhs.m_codes; + m_code_sizes = rhs.m_code_sizes; + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + *m_pDecode_tables = *rhs.m_pDecode_tables; + else + m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); + } + else + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + m_decoder_table_bits = rhs.m_decoder_table_bits; + m_encoding = rhs.m_encoding; + + return *this; + } + + void adaptive_huffman_data_model::clear() + { + m_sym_freq.clear(); + m_codes.clear(); + m_code_sizes.clear(); + + m_total_syms = 0; + m_update_cycle = 0; + m_symbols_until_update = 0; + m_decoder_table_bits = 0; + m_total_count = 0; + + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + } + + void adaptive_huffman_data_model::init(bool encoding, uint total_syms) + { + clear(); + + m_encoding = encoding; + + m_sym_freq.resize(total_syms); + m_code_sizes.resize(total_syms); + + m_total_syms = total_syms; + + if (m_total_syms <= 16) + m_decoder_table_bits = 0; + else + m_decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + + if (m_encoding) + m_codes.resize(total_syms); + else + m_pDecode_tables = crnlib_new(); + + reset(); + } + + void adaptive_huffman_data_model::reset() + { + if (!m_total_syms) + return; + + for (uint i = 0; i < m_total_syms; i++) + m_sym_freq[i] = 1; + + m_total_count = 0; + m_update_cycle = m_total_syms; + + update(); + + m_symbols_until_update = m_update_cycle = 8;//(m_total_syms + 6) >> 1; + } + + void adaptive_huffman_data_model::rescale() + { + uint total_freq = 0; + + for (uint i = 0; i < m_total_syms; i++) + { + uint freq = (m_sym_freq[i] + 1) >> 1; + total_freq += freq; + m_sym_freq[i] = static_cast(freq); + } + + m_total_count = total_freq; + } + + void adaptive_huffman_data_model::update() + { + m_total_count += m_update_cycle; + + if (m_total_count >= 32768) + rescale(); + + void* pTables = create_generate_huffman_codes_tables(); + + uint max_code_size, total_freq; + bool status = generate_huffman_codes(pTables, m_total_syms, &m_sym_freq[0], &m_code_sizes[0], max_code_size, total_freq); + CRNLIB_ASSERT(status); + CRNLIB_ASSERT(total_freq == m_total_count); + + if (max_code_size > prefix_coding::cMaxExpectedCodeSize) + prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], prefix_coding::cMaxExpectedCodeSize); + + free_generate_huffman_codes_tables(pTables); + + if (m_encoding) + status = prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0]); + else + status = prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, m_decoder_table_bits); + + CRNLIB_ASSERT(status); + status; + + m_update_cycle = (5 * m_update_cycle) >> 2; + uint max_cycle = (m_total_syms + 6) << 3; // this was << 2 - which is ~12% slower but compresses around .5% better + + if (m_update_cycle > max_cycle) + m_update_cycle = max_cycle; + + m_symbols_until_update = m_update_cycle; + } + + static_huffman_data_model::static_huffman_data_model() : + m_total_syms(0), + m_pDecode_tables(NULL), + m_encoding(false) + { + } + + static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : + m_total_syms(0), + m_pDecode_tables(NULL), + m_encoding(false) + { + *this = other; + } + + static_huffman_data_model::~static_huffman_data_model() + { + if (m_pDecode_tables) + crnlib_delete(m_pDecode_tables); + } + + static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) + { + if (this == &rhs) + return *this; + + m_total_syms = rhs.m_total_syms; + m_codes = rhs.m_codes; + m_code_sizes = rhs.m_code_sizes; + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + *m_pDecode_tables = *rhs.m_pDecode_tables; + else + m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); + } + else + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + m_encoding = rhs.m_encoding; + + return *this; + } + + void static_huffman_data_model::clear() + { + m_total_syms = 0; + m_codes.clear(); + m_code_sizes.clear(); + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + m_encoding = false; + } + + bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit) + { + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + m_encoding = encoding; + + m_total_syms = total_syms; + + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + + m_code_sizes.resize(total_syms); + + void* pTables = create_generate_huffman_codes_tables(); + + uint max_code_size = 0, total_freq; + bool status = generate_huffman_codes(pTables, m_total_syms, pSym_freq, &m_code_sizes[0], max_code_size, total_freq); + + free_generate_huffman_codes_tables(pTables); + + if (!status) + return false; + + if (max_code_size > code_size_limit) + { + if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) + return false; + } + + if (m_encoding) + { + m_codes.resize(total_syms); + + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) + return false; + } + else + { + m_codes.clear(); + + if (!m_pDecode_tables) + m_pDecode_tables = crnlib_new(); + + if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) + return false; + } + + return true; + } + + bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit) + { + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + crnlib::vector sym_freq16(total_syms); + + uint max_freq = 0; + for (uint i = 0; i < total_syms; i++) + max_freq = math::maximum(max_freq, pSym_freq[i]); + + if (!max_freq) + return false; + + if (max_freq <= UINT16_MAX) + { + for (uint i = 0; i < total_syms; i++) + sym_freq16[i] = static_cast(pSym_freq[i]); + } + else + { + for (uint i = 0; i < total_syms; i++) + { + uint f = pSym_freq[i]; + if (!f) + continue; + + uint64 fl = f; + + fl = ((fl << 16) - fl) + (max_freq >> 1); + fl /= max_freq; + if (fl < 1) + fl = 1; + + CRNLIB_ASSERT(fl <= UINT16_MAX); + + sym_freq16[i] = static_cast(fl); + } + } + + return init(encoding, total_syms, &sym_freq16[0], code_size_limit); + } + + bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit) + { + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + m_encoding = encoding; + + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + + m_code_sizes.resize(total_syms); + + uint min_code_size = UINT_MAX; + uint max_code_size = 0; + + for (uint i = 0; i < total_syms; i++) + { + uint s = pCode_sizes[i]; + m_code_sizes[i] = static_cast(s); + min_code_size = math::minimum(min_code_size, s); + max_code_size = math::maximum(max_code_size, s); + } + + if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) + return false; + + if (max_code_size > code_size_limit) + { + if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) + return false; + } + + if (m_encoding) + { + m_codes.resize(total_syms); + + if (m_pDecode_tables) + { + crnlib_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) + return false; + } + else + { + m_codes.clear(); + + if (!m_pDecode_tables) + m_pDecode_tables = crnlib_new(); + + if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) + return false; + } + + return true; + } + + bool static_huffman_data_model::init(bool encoding, const symbol_histogram& hist, uint code_size_limit) + { + return init(encoding, hist.size(), hist.get_ptr(), code_size_limit); + } + + bool static_huffman_data_model::prepare_decoder_tables() + { + uint total_syms = m_code_sizes.size(); + + CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); + + m_encoding = false; + + m_total_syms = total_syms; + + m_codes.clear(); + + if (!m_pDecode_tables) + m_pDecode_tables = crnlib_new(); + + return prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits()); + } + + uint static_huffman_data_model::compute_decoder_table_bits() const + { + uint decoder_table_bits = 0; + if (m_total_syms > 16) + decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + return decoder_table_bits; + } + + adaptive_bit_model::adaptive_bit_model() + { + clear(); + } + + adaptive_bit_model::adaptive_bit_model(float prob0) + { + set_probability_0(prob0); + } + + adaptive_bit_model::adaptive_bit_model(const adaptive_bit_model& other) : + m_bit_0_prob(other.m_bit_0_prob) + { + } + + adaptive_bit_model& adaptive_bit_model::operator= (const adaptive_bit_model& rhs) + { + m_bit_0_prob = rhs.m_bit_0_prob; + return *this; + } + + void adaptive_bit_model::clear() + { + m_bit_0_prob = 1U << (cSymbolCodecArithProbBits - 1); + } + + void adaptive_bit_model::set_probability_0(float prob0) + { + m_bit_0_prob = static_cast(math::clamp((uint)(prob0 * cSymbolCodecArithProbScale), 1, cSymbolCodecArithProbScale - 1)); + } + + float adaptive_bit_model::get_cost(uint bit) const + { + return gProbCost[bit ? (cSymbolCodecArithProbScale - m_bit_0_prob) : m_bit_0_prob]; + } + + void adaptive_bit_model::update(uint bit) + { + if (!bit) + m_bit_0_prob += ((cSymbolCodecArithProbScale - m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + else + m_bit_0_prob -= (m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + CRNLIB_ASSERT(m_bit_0_prob >= 1); + CRNLIB_ASSERT(m_bit_0_prob < cSymbolCodecArithProbScale); + } + + adaptive_arith_data_model::adaptive_arith_data_model(bool encoding, uint total_syms) + { + init(encoding, total_syms); + } + + adaptive_arith_data_model::adaptive_arith_data_model(const adaptive_arith_data_model& other) + { + m_total_syms = other.m_total_syms; + m_probs = other.m_probs; + } + + adaptive_arith_data_model::~adaptive_arith_data_model() + { + } + + adaptive_arith_data_model& adaptive_arith_data_model::operator= (const adaptive_arith_data_model& rhs) + { + m_total_syms = rhs.m_total_syms; + m_probs = rhs.m_probs; + return *this; + } + + void adaptive_arith_data_model::clear() + { + m_total_syms = 0; + m_probs.clear(); + } + + void adaptive_arith_data_model::init(bool encoding, uint total_syms) + { + encoding; + if (!total_syms) + { + clear(); + return; + } + + if ((total_syms < 2) || (!math::is_power_of_2(total_syms))) + total_syms = math::next_pow2(total_syms); + + m_total_syms = total_syms; + + m_probs.resize(m_total_syms); + } + + void adaptive_arith_data_model::reset() + { + for (uint i = 0; i < m_probs.size(); i++) + m_probs[i].clear(); + } + + float adaptive_arith_data_model::get_cost(uint sym) const + { + uint node = 1; + + uint bitmask = m_total_syms; + + float cost = 0.0f; + do + { + bitmask >>= 1; + + uint bit = (sym & bitmask) ? 1 : 0; + cost += m_probs[node].get_cost(bit); + node = (node << 1) + bit; + + } while (bitmask > 1); + + return cost; + } + + symbol_codec::symbol_codec() + { + clear(); + } + + void symbol_codec::clear() + { + m_pDecode_buf = NULL; + m_pDecode_buf_next = NULL; + m_pDecode_buf_end = NULL; + m_decode_buf_size = 0; + + m_bit_buf = 0; + m_bit_count = 0; + m_total_model_updates = 0; + m_mode = cNull; + m_simulate_encoding = false; + m_total_bits_written = 0; + + m_arith_base = 0; + m_arith_value = 0; + m_arith_length = 0; + m_arith_total_bits = 0; + + m_output_buf.clear(); + m_arith_output_buf.clear(); + m_output_syms.clear(); + } + + void symbol_codec::start_encoding(uint expected_file_size) + { + m_mode = cEncoding; + + m_total_model_updates = 0; + m_total_bits_written = 0; + + put_bits_init(expected_file_size); + + m_output_syms.resize(0); + + arith_start_encoding(); + } + + // Code length encoding symbols: + // 0-16 - actual code lengths + const uint cMaxCodelengthCodes = 21; + + const uint cSmallZeroRunCode = 17; + const uint cLargeZeroRunCode = 18; + const uint cSmallRepeatCode = 19; + const uint cLargeRepeatCode = 20; + + const uint cMinSmallZeroRunSize = 3; + const uint cMaxSmallZeroRunSize = 10; + const uint cMinLargeZeroRunSize = 11; + const uint cMaxLargeZeroRunSize = 138; + + const uint cSmallMinNonZeroRunSize = 3; + const uint cSmallMaxNonZeroRunSize = 6; + const uint cLargeMinNonZeroRunSize = 7; + const uint cLargeMaxNonZeroRunSize = 70; + + const uint cSmallZeroRunExtraBits = 3; + const uint cLargeZeroRunExtraBits = 7; + const uint cSmallNonZeroRunExtraBits = 2; + const uint cLargeNonZeroRunExtraBits = 6; + + static const uint8 g_most_probable_codelength_codes[] = + { + cSmallZeroRunCode, cLargeZeroRunCode, + cSmallRepeatCode, cLargeRepeatCode, + + 0, 8, + 7, 9, + 6, 10, + 5, 11, + 4, 12, + 3, 13, + 2, 14, + 1, 15, + 16 + }; + const uint cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); + + static inline void end_zero_run(uint& size, crnlib::vector& codes) + { + if (!size) + return; + + if (size < cMinSmallZeroRunSize) + { + while (size--) + codes.push_back(0); + } + else if (size <= cMaxSmallZeroRunSize) + codes.push_back( static_cast(cSmallZeroRunCode | ((size - cMinSmallZeroRunSize) << 8)) ); + else + { + CRNLIB_ASSERT((size >= cMinLargeZeroRunSize) && (size <= cMaxLargeZeroRunSize)); + codes.push_back( static_cast(cLargeZeroRunCode | ((size - cMinLargeZeroRunSize) << 8)) ); + } + + size = 0; + } + + static inline void end_nonzero_run(uint& size, uint len, crnlib::vector& codes) + { + if (!size) + return; + + if (size < cSmallMinNonZeroRunSize) + { + while (size--) + codes.push_back(static_cast(len)); + } + else if (size <= cSmallMaxNonZeroRunSize) + { + codes.push_back(static_cast(cSmallRepeatCode | ((size - cSmallMinNonZeroRunSize) << 8))); + } + else + { + CRNLIB_ASSERT((size >= cLargeMinNonZeroRunSize) && (size <= cLargeMaxNonZeroRunSize)); + codes.push_back(static_cast(cLargeRepeatCode | ((size - cLargeMinNonZeroRunSize) << 8))); + } + + size = 0; + } + + uint symbol_codec::encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate = false, static_huffman_data_model* pDeltaModel ) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + uint total_used_syms = 0; + for (uint i = model.m_total_syms; i > 0; i--) + { + if (model.m_code_sizes[i - 1]) + { + total_used_syms = i; + break; + } + } + + if (!total_used_syms) + { + if (!simulate) + { + encode_bits(0, math::total_bits(prefix_coding::cMaxSupportedSyms)); + } + + return math::total_bits(prefix_coding::cMaxSupportedSyms); + } + + crnlib::vector codes; + codes.reserve(model.m_total_syms); + + uint prev_len = UINT_MAX; + uint cur_zero_run_size = 0; + uint cur_nonzero_run_size = 0; + + const uint8* pCodesizes = &model.m_code_sizes[0]; + + crnlib::vector delta_code_sizes; + if ((pDeltaModel) && (pDeltaModel->get_total_syms())) + { + if (pDeltaModel->m_code_sizes.size() < total_used_syms) + return 0; + + delta_code_sizes.resize(total_used_syms); + for (uint i = 0; i < total_used_syms; i++) + { + int delta = (int)model.m_code_sizes[i] - (int)pDeltaModel->m_code_sizes[i]; + if (delta < 0) + delta += 17; + delta_code_sizes[i] = static_cast(delta); + } + + pCodesizes = delta_code_sizes.get_ptr(); + } + + for (uint i = 0; i <= total_used_syms; i++) + { + const uint len = (i < total_used_syms) ? *pCodesizes++ : 0xFF; + CRNLIB_ASSERT((len == 0xFF) || (len <= prefix_coding::cMaxExpectedCodeSize)); + + if (!len) + { + end_nonzero_run(cur_nonzero_run_size, prev_len, codes); + + if (++cur_zero_run_size == cMaxLargeZeroRunSize) + end_zero_run(cur_zero_run_size, codes); + } + else + { + end_zero_run(cur_zero_run_size, codes); + + if (len != prev_len) + { + end_nonzero_run(cur_nonzero_run_size, prev_len, codes); + + if (len != 0xFF) + codes.push_back(static_cast(len)); + } + else if (++cur_nonzero_run_size == cLargeMaxNonZeroRunSize) + end_nonzero_run(cur_nonzero_run_size, prev_len, codes); + } + + prev_len = len; + } + + uint16 hist[cMaxCodelengthCodes]; + utils::zero_object(hist); + + for (uint i = 0; i < codes.size(); i++) + { + uint code = codes[i] & 0xFF; + CRNLIB_ASSERT(code < cMaxCodelengthCodes); + hist[code] = static_cast(hist[code] + 1); + } + + static_huffman_data_model dm; + if (!dm.init(true, cMaxCodelengthCodes, hist, 7)) + return 0; + + uint num_codelength_codes_to_send; + for (num_codelength_codes_to_send = cNumMostProbableCodelengthCodes; num_codelength_codes_to_send > 0; num_codelength_codes_to_send--) + if (dm.get_cost(g_most_probable_codelength_codes[num_codelength_codes_to_send - 1])) + break; + + uint total_bits = math::total_bits(prefix_coding::cMaxSupportedSyms); + total_bits += 5; + total_bits += 3 * num_codelength_codes_to_send; + + if (!simulate) + { + encode_bits(total_used_syms, math::total_bits(prefix_coding::cMaxSupportedSyms)); + + encode_bits(num_codelength_codes_to_send, 5); + for (uint i = 0; i < num_codelength_codes_to_send; i++) + encode_bits(dm.get_cost(g_most_probable_codelength_codes[i]), 3); + } + + for (uint i = 0; i < codes.size(); i++) + { + uint code = codes[i]; + uint extra = code >> 8; + code &= 0xFF; + + uint extra_bits = 0; + if (code == cSmallZeroRunCode) + extra_bits = cSmallZeroRunExtraBits; + else if (code == cLargeZeroRunCode) + extra_bits = cLargeZeroRunExtraBits; + else if (code == cSmallRepeatCode) + extra_bits = cSmallNonZeroRunExtraBits; + else if (code == cLargeRepeatCode) + extra_bits = cLargeNonZeroRunExtraBits; + + total_bits += dm.get_cost(code); + + if (!simulate) + encode(code, dm); + + if (extra_bits) + { + if (!simulate) + encode_bits(extra, extra_bits); + + total_bits += extra_bits; + } + } + + return total_bits; + } + + void symbol_codec::encode_bits(uint bits, uint num_bits) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + if (!num_bits) + return; + + CRNLIB_ASSERT((num_bits == 32) || (bits <= ((1U << num_bits) - 1))); + + if (num_bits > 16) + { + record_put_bits(bits >> 16, num_bits - 16); + record_put_bits(bits & 0xFFFF, 16); + } + else + record_put_bits(bits, num_bits); + } + + void symbol_codec::encode_align_to_byte() + { + CRNLIB_ASSERT(m_mode == cEncoding); + + if (!m_simulate_encoding) + { + output_symbol sym; + sym.m_bits = 0; + sym.m_num_bits = output_symbol::cAlignToByteSym; + sym.m_arith_prob0 = 0; + m_output_syms.push_back(sym); + } + else + { + // We really don't know how many we're going to write, so just be conservative. + m_total_bits_written += 7; + } + } + + void symbol_codec::encode(uint sym, adaptive_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cEncoding); + CRNLIB_ASSERT(model.m_encoding); + + record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); + + uint freq = model.m_sym_freq[sym]; + freq++; + model.m_sym_freq[sym] = static_cast(freq); + + if (freq == UINT16_MAX) + model.rescale(); + + if (--model.m_symbols_until_update == 0) + { + m_total_model_updates++; + model.update(); + } + } + + void symbol_codec::encode(uint sym, static_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cEncoding); + CRNLIB_ASSERT(model.m_encoding); + + CRNLIB_ASSERT(model.m_code_sizes[sym]); + + record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); + } + + void symbol_codec::encode_truncated_binary(uint v, uint n) + { + CRNLIB_ASSERT((n >= 2) && (v < n)); + + uint k = math::floor_log2i(n); + uint u = (1 << (k + 1)) - n; + + if (v < u) + encode_bits(v, k); + else + encode_bits(v + u, k + 1); + } + + uint symbol_codec::encode_truncated_binary_cost(uint v, uint n) + { + CRNLIB_ASSERT((n >= 2) && (v < n)); + + uint k = math::floor_log2i(n); + uint u = (1 << (k + 1)) - n; + + if (v < u) + return k; + else + return k + 1; + } + + void symbol_codec::encode_golomb(uint v, uint m) + { + CRNLIB_ASSERT(m > 0); + + uint q = v / m; + uint r = v % m; + + while (q > 16) + { + encode_bits(0xFFFF, 16); + q -= 16; + } + + if (q) + encode_bits( (1 << q) - 1, q); + + encode_bits(0, 1); + + encode_truncated_binary(r, m); + } + + void symbol_codec::encode_rice(uint v, uint m) + { + CRNLIB_ASSERT(m > 0); + + uint q = v >> m; + uint r = v & ((1 << m) - 1); + + while (q > 16) + { + encode_bits(0xFFFF, 16); + q -= 16; + } + + if (q) + encode_bits( (1 << q) - 1, q); + + encode_bits(0, 1); + + encode_bits(r, m); + } + + uint symbol_codec::encode_rice_get_cost(uint v, uint m) + { + CRNLIB_ASSERT(m > 0); + + uint q = v >> m; + //uint r = v & ((1 << m) - 1); + + return q + 1 + m; + } + + void symbol_codec::arith_propagate_carry() + { + int index = m_arith_output_buf.size() - 1; + while (index >= 0) + { + uint c = m_arith_output_buf[index]; + + if (c == 0xFF) + m_arith_output_buf[index] = 0; + else + { + m_arith_output_buf[index]++; + break; + } + + index--; + } + } + + void symbol_codec::arith_renorm_enc_interval() + { + do + { + m_arith_output_buf.push_back( (m_arith_base >> 24) & 0xFF ); + m_total_bits_written += 8; + + m_arith_base <<= 8; + } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); + } + + void symbol_codec::arith_start_encoding() + { + m_arith_output_buf.resize(0); + + m_arith_base = 0; + m_arith_value = 0; + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_total_bits = 0; + } + + void symbol_codec::encode(uint bit, adaptive_bit_model& model, bool update_model) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + m_arith_total_bits++; + + if (!m_simulate_encoding) + { + output_symbol sym; + sym.m_bits = bit; + sym.m_num_bits = -1; + sym.m_arith_prob0 = model.m_bit_0_prob; + m_output_syms.push_back(sym); + } + + //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; + uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); + + if (!bit) + { + if (update_model) + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + + m_arith_length = x; + } + else + { + if (update_model) + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + + uint orig_base = m_arith_base; + m_arith_base += x; + m_arith_length -= x; + if (orig_base > m_arith_base) + arith_propagate_carry(); + } + + if (m_arith_length < cSymbolCodecArithMinLen) + arith_renorm_enc_interval(); + } + + void symbol_codec::encode(uint sym, adaptive_arith_data_model& model) + { + uint node = 1; + + uint bitmask = model.m_total_syms; + + do + { + bitmask >>= 1; + + uint bit = (sym & bitmask) ? 1 : 0; + encode(bit, model.m_probs[node]); + node = (node << 1) + bit; + + } while (bitmask > 1); + } + + void symbol_codec::arith_stop_encoding() + { + if (!m_arith_total_bits) + return; + + uint orig_base = m_arith_base; + + if (m_arith_length > 2 * cSymbolCodecArithMinLen) + { + m_arith_base += cSymbolCodecArithMinLen; + m_arith_length = (cSymbolCodecArithMinLen >> 1); + } + else + { + m_arith_base += (cSymbolCodecArithMinLen >> 1); + m_arith_length = (cSymbolCodecArithMinLen >> 9); + } + + if (orig_base > m_arith_base) + arith_propagate_carry(); + + arith_renorm_enc_interval(); + + while (m_arith_output_buf.size() < 4) + { + m_arith_output_buf.push_back(0); + m_total_bits_written += 8; + } + } + + void symbol_codec::stop_encoding(bool support_arith) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + arith_stop_encoding(); + + if (!m_simulate_encoding) + assemble_output_buf(support_arith); + + m_mode = cNull; + } + + void symbol_codec::record_put_bits(uint bits, uint num_bits) + { + CRNLIB_ASSERT(m_mode == cEncoding); + + CRNLIB_ASSERT(num_bits <= 25); + CRNLIB_ASSERT(m_bit_count >= 25); + + if (!num_bits) + return; + + m_total_bits_written += num_bits; + + if (!m_simulate_encoding) + { + output_symbol sym; + sym.m_bits = bits; + sym.m_num_bits = (uint16)num_bits; + sym.m_arith_prob0 = 0; + m_output_syms.push_back(sym); + } + } + + void symbol_codec::put_bits_init(uint expected_size) + { + m_bit_buf = 0; + m_bit_count = cBitBufSize; + + m_output_buf.resize(0); + m_output_buf.reserve(expected_size); + } + + void symbol_codec::put_bits(uint bits, uint num_bits) + { + CRNLIB_ASSERT(num_bits <= 25); + CRNLIB_ASSERT(m_bit_count >= 25); + + if (!num_bits) + return; + + m_bit_count -= num_bits; + m_bit_buf |= (static_cast(bits) << m_bit_count); + + m_total_bits_written += num_bits; + + while (m_bit_count <= (cBitBufSize - 8)) + { + m_output_buf.push_back(static_cast(m_bit_buf >> (cBitBufSize - 8))); + + m_bit_buf <<= 8; + m_bit_count += 8; + } + } + + void symbol_codec::put_bits_align_to_byte() + { + uint num_bits_in = cBitBufSize - m_bit_count; + if (num_bits_in & 7) + { + put_bits(0, 8 - (num_bits_in & 7)); + } + } + + void symbol_codec::flush_bits() + { + //put_bits(15, 4); // for table look-ahead + //put_bits(3, 3); // for table look-ahead + + put_bits(0, 7); // to ensure the last bits are flushed + } + + void symbol_codec::assemble_output_buf(bool support_arith) + { + m_total_bits_written = 0; + + uint arith_buf_ofs = 0; + + if (support_arith) + { + if (m_arith_output_buf.size()) + { + put_bits(1, 1); + + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_value = 0; + for (uint i = 0; i < 4; i++) + { + const uint c = m_arith_output_buf[arith_buf_ofs++]; + m_arith_value = (m_arith_value << 8) | c; + put_bits(c, 8); + } + } + else + { + put_bits(0, 1); + } + } + + for (uint sym_index = 0; sym_index < m_output_syms.size(); sym_index++) + { + const output_symbol& sym = m_output_syms[sym_index]; + + if (sym.m_num_bits == output_symbol::cAlignToByteSym) + { + put_bits_align_to_byte(); + } + else if (sym.m_num_bits == output_symbol::cArithSym) + { + if (m_arith_length < cSymbolCodecArithMinLen) + { + do + { + const uint c = (arith_buf_ofs < m_arith_output_buf.size()) ? m_arith_output_buf[arith_buf_ofs++] : 0; + put_bits(c, 8); + m_arith_value = (m_arith_value << 8) | c; + } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); + } + + //uint x = gArithProbMulTab[sym.m_arith_prob0 >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; + uint x = sym.m_arith_prob0 * (m_arith_length >> cSymbolCodecArithProbBits); + uint bit = (m_arith_value >= x); + + if (bit == 0) + { + m_arith_length = x; + } + else + { + m_arith_value -= x; + m_arith_length -= x; + } + + CRNLIB_VERIFY(bit == sym.m_bits); + } + else + { + put_bits(sym.m_bits, sym.m_num_bits); + } + } + + flush_bits(); + } + + //------------------------------------------------------------------------------------------------------------------ + // Decoding + //------------------------------------------------------------------------------------------------------------------ + + bool symbol_codec::start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag, need_bytes_func_ptr pNeed_bytes_func, void *pPrivate_data) + { + if (!buf_size) + return false; + + m_total_model_updates = 0; + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + m_pDecode_need_bytes_func = pNeed_bytes_func; + m_pDecode_private_data = pPrivate_data; + m_decode_buf_eof = eof_flag; + if (!pNeed_bytes_func) + { + m_decode_buf_eof = true; + } + + m_mode = cDecoding; + + get_bits_init(); + + return true; + } + + uint symbol_codec::decode_bits(uint num_bits) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + if (!num_bits) + return 0; + + if (num_bits > 16) + { + uint a = get_bits(num_bits - 16); + uint b = get_bits(16); + + return (a << 16) | b; + } + else + return get_bits(num_bits); + } + + void symbol_codec::decode_remove_bits(uint num_bits) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + while (num_bits > 16) + { + remove_bits(16); + num_bits -= 16; + } + + remove_bits(num_bits); + } + + uint symbol_codec::decode_peek_bits(uint num_bits) + { + CRNLIB_ASSERT(m_mode == cDecoding); + CRNLIB_ASSERT(num_bits <= 25); + + if (!num_bits) + return 0; + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + return static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + } + + uint symbol_codec::decode(adaptive_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cDecoding); + CRNLIB_ASSERT(!model.m_encoding); + + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + while (m_bit_count < (cBitBufSize - 8)) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); + uint sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; + + CRNLIB_ASSERT(t != UINT32_MAX); + sym = t & UINT16_MAX; + len = t >> 16; + + CRNLIB_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for ( ; ; ) + { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); + + if (((uint)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRNLIB_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + uint freq = model.m_sym_freq[sym]; + freq++; + model.m_sym_freq[sym] = static_cast(freq); + + if (freq == UINT16_MAX) + model.rescale(); + + if (--model.m_symbols_until_update == 0) + { + m_total_model_updates++; + model.update(); + } + + return sym; + } + + void symbol_codec::decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf_next; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + if (!m_pDecode_need_bytes_func) + m_decode_buf_eof = true; + else + m_decode_buf_eof = eof_flag; + } + + bool symbol_codec::decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel) + { + CRNLIB_ASSERT(m_mode == cDecoding); + + const uint total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); + if (!total_used_syms) + { + model.clear(); + return true; + } + + model.m_code_sizes.resize(total_used_syms); + memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); + + const uint num_codelength_codes_to_send = decode_bits(5); + if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) + return false; + + static_huffman_data_model dm; + dm.m_code_sizes.resize(cMaxCodelengthCodes); + + for (uint i = 0; i < num_codelength_codes_to_send; i++) + dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); + + if (!dm.prepare_decoder_tables()) + return false; + + uint ofs = 0; + while (ofs < total_used_syms) + { + const uint num_remaining = total_used_syms - ofs; + + uint code = decode(dm); + if (code <= 16) + model.m_code_sizes[ofs++] = static_cast(code); + else if (code == cSmallZeroRunCode) + { + uint len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if (code == cLargeZeroRunCode) + { + uint len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) + { + uint len; + if (code == cSmallRepeatCode) + len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; + else + len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; + + if ((!ofs) || (len > num_remaining)) + return false; + const uint prev = model.m_code_sizes[ofs - 1]; + if (!prev) + return false; + const uint end = ofs + len; + while (ofs < end) + model.m_code_sizes[ofs++] = static_cast(prev); + } + else + { + CRNLIB_ASSERT(0); + return false; + } + } + + if (ofs != total_used_syms) + return false; + + if ((pDeltaModel) && (pDeltaModel->get_total_syms())) + { + uint n = math::minimum(pDeltaModel->m_code_sizes.size(), total_used_syms); + for (uint i = 0; i < n; i++) + { + int codesize = model.m_code_sizes[i] + pDeltaModel->m_code_sizes[i]; + if (codesize > 16) + codesize -= 17; + model.m_code_sizes[i] = static_cast(codesize); + } + } + + return model.prepare_decoder_tables(); + } + + uint symbol_codec::decode(static_huffman_data_model& model) + { + CRNLIB_ASSERT(m_mode == cDecoding); + CRNLIB_ASSERT(!model.m_encoding); + + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + while (m_bit_count < (cBitBufSize - 8)) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); + uint sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; + + CRNLIB_ASSERT(t != UINT32_MAX); + sym = t & UINT16_MAX; + len = t >> 16; + + CRNLIB_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for ( ; ; ) + { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); + + if (((uint)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRNLIB_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + return sym; + } + + uint symbol_codec::decode_truncated_binary(uint n) + { + CRNLIB_ASSERT(n >= 2); + + uint k = math::floor_log2i(n); + uint u = (1 << (k + 1)) - n; + + uint i = decode_bits(k); + + if (i >= u) + i = ((i << 1) | decode_bits(1)) - u; + + return i; + } + + uint symbol_codec::decode_golomb(uint m) + { + CRNLIB_ASSERT(m > 1); + + uint q = 0; + + for ( ; ; ) + { + uint k = decode_peek_bits(16); + + uint l = utils::count_leading_zeros16((~k) & 0xFFFF); + q += l; + if (l < 16) + break; + } + + decode_remove_bits(q + 1); + + uint r = decode_truncated_binary(m); + + return (q * m) + r; + } + + uint symbol_codec::decode_rice(uint m) + { + CRNLIB_ASSERT(m > 0); + + uint q = 0; + + for ( ; ; ) + { + uint k = decode_peek_bits(16); + + uint l = utils::count_leading_zeros16((~k) & 0xFFFF); + + q += l; + + decode_remove_bits(l); + + if (l < 16) + break; + } + + decode_remove_bits(1); + + uint r = decode_bits(m); + + return (q << m) + r; + } + + uint64 symbol_codec::stop_decoding() + { + CRNLIB_ASSERT(m_mode == cDecoding); + + uint64 n = m_pDecode_buf_next - m_pDecode_buf; + + m_mode = cNull; + + return n; + } + + void symbol_codec::get_bits_init() + { + m_bit_buf = 0; + m_bit_count = 0; + } + + uint symbol_codec::get_bits(uint num_bits) + { + CRNLIB_ASSERT(num_bits <= 25); + + if (!num_bits) + return 0; + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + uint result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + + return result; + } + + void symbol_codec::remove_bits(uint num_bits) + { + CRNLIB_ASSERT(num_bits <= 25); + + if (!num_bits) + return; + + while (m_bit_count < (int)num_bits) + { + uint c = 0; + if (m_pDecode_buf_next == m_pDecode_buf_end) + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; + } + } + else + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + CRNLIB_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); + } + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + } + + void symbol_codec::decode_align_to_byte() + { + CRNLIB_ASSERT(m_mode == cDecoding); + + if (m_bit_count & 7) + { + remove_bits(m_bit_count & 7); + } + } + + int symbol_codec::decode_remove_byte_from_bit_buf() + { + if (m_bit_count < 8) + return -1; + int result = static_cast(m_bit_buf >> (cBitBufSize - 8)); + m_bit_buf <<= 8; + m_bit_count -= 8; + return result; + } + + uint symbol_codec::decode(adaptive_bit_model& model, bool update_model) + { + if (m_arith_length < cSymbolCodecArithMinLen) + { + uint c = get_bits(8); + m_arith_value = (m_arith_value << 8) | c; + + m_arith_length <<= 8; + CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); + } + + CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); + + //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; + uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); + uint bit = (m_arith_value >= x); + + if (!bit) + { + if (update_model) + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); + + m_arith_length = x; + } + else + { + if (update_model) + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); + + m_arith_value -= x; + m_arith_length -= x; + } + + return bit; + } + + uint symbol_codec::decode(adaptive_arith_data_model& model) + { + uint node = 1; + + do + { + uint bit = decode(model.m_probs[node]); + + node = (node << 1) + bit; + + } while (node < model.m_total_syms); + + return node - model.m_total_syms; + } + + void symbol_codec::start_arith_decoding() + { + CRNLIB_ASSERT(m_mode == cDecoding); + + m_arith_length = cSymbolCodecArithMaxLen; + m_arith_value = 0; + + if (get_bits(1)) + { + m_arith_value = (get_bits(8) << 24); + m_arith_value |= (get_bits(8) << 16); + m_arith_value |= (get_bits(8) << 8); + m_arith_value |= get_bits(8); + } + } + + void symbol_codec::decode_need_bytes() + { + if (!m_decode_buf_eof) + { + m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); + m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; + m_pDecode_buf_next = m_pDecode_buf; + } + } + +} // namespace crnlib diff --git a/crnlib/crn_symbol_codec.h b/crnlib/crn_symbol_codec.h new file mode 100644 index 00000000..11ef4585 --- /dev/null +++ b/crnlib/crn_symbol_codec.h @@ -0,0 +1,519 @@ +// File: crn_symbol_codec.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_prefix_coding.h" + +namespace crnlib +{ + class symbol_codec; + class adaptive_arith_data_model; + + const uint cSymbolCodecArithMinLen = 0x01000000U; + const uint cSymbolCodecArithMaxLen = 0xFFFFFFFFU; + + const uint cSymbolCodecArithProbBits = 11; + const uint cSymbolCodecArithProbScale = 1 << cSymbolCodecArithProbBits; + const uint cSymbolCodecArithProbMoveBits = 5; + + const uint cSymbolCodecArithProbMulBits = 8; + const uint cSymbolCodecArithProbMulScale = 1 << cSymbolCodecArithProbMulBits; + + class symbol_histogram + { + public: + inline symbol_histogram(uint size = 0) : m_hist(size) { } + + inline void clear() { m_hist.clear(); } + + inline uint size() const { return static_cast(m_hist.size()); } + + inline void inc_freq(uint x, uint amount = 1) + { + uint h = m_hist[x]; + CRNLIB_ASSERT( amount <= (0xFFFFFFFF - h) ); + m_hist[x] = h + amount; + } + + inline void set_all(uint val) { for (uint i = 0; i < m_hist.size(); i++) m_hist[i] = val; } + + inline void resize(uint new_size) { m_hist.resize(new_size); } + + inline const uint* get_ptr() const { return m_hist.empty() ? NULL : &m_hist.front(); } + + double calc_entropy() const; + + uint operator[] (uint i) const { return m_hist[i]; } + uint& operator[] (uint i) { return m_hist[i]; } + + uint64 get_total() const; + + private: + crnlib::vector m_hist; + }; + + class adaptive_huffman_data_model + { + public: + adaptive_huffman_data_model(bool encoding = true, uint total_syms = 0); + adaptive_huffman_data_model(const adaptive_huffman_data_model& other); + ~adaptive_huffman_data_model(); + + adaptive_huffman_data_model& operator= (const adaptive_huffman_data_model& rhs); + + void clear(); + + void init(bool encoding, uint total_syms); + void reset(); + + void rescale(); + + uint get_total_syms() const { return m_total_syms; } + uint get_cost(uint sym) const { return m_code_sizes[sym]; } + + public: + uint m_total_syms; + + uint m_update_cycle; + uint m_symbols_until_update; + + uint m_total_count; + + crnlib::vector m_sym_freq; + + crnlib::vector m_codes; + crnlib::vector m_code_sizes; + + prefix_coding::decoder_tables* m_pDecode_tables; + + uint8 m_decoder_table_bits; + bool m_encoding; + + void update(); + + friend class symbol_codec; + }; + + class static_huffman_data_model + { + public: + static_huffman_data_model(); + static_huffman_data_model(const static_huffman_data_model& other); + ~static_huffman_data_model(); + + static_huffman_data_model& operator= (const static_huffman_data_model& rhs); + + void clear(); + + bool init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit); + bool init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit); + bool init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit); + bool init(bool encoding, const symbol_histogram& hist, uint code_size_limit); + + uint get_total_syms() const { return m_total_syms; } + uint get_cost(uint sym) const { return m_code_sizes[sym]; } + + const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } + + private: + uint m_total_syms; + + crnlib::vector m_codes; + crnlib::vector m_code_sizes; + + prefix_coding::decoder_tables* m_pDecode_tables; + + bool m_encoding; + + bool prepare_decoder_tables(); + uint compute_decoder_table_bits() const; + + friend class symbol_codec; + }; + + class adaptive_bit_model + { + public: + adaptive_bit_model(); + adaptive_bit_model(float prob0); + adaptive_bit_model(const adaptive_bit_model& other); + + adaptive_bit_model& operator= (const adaptive_bit_model& rhs); + + void clear(); + void set_probability_0(float prob0); + void update(uint bit); + + float get_cost(uint bit) const; + + public: + uint16 m_bit_0_prob; + + friend class symbol_codec; + friend class adaptive_arith_data_model; + }; + + class adaptive_arith_data_model + { + public: + adaptive_arith_data_model(bool encoding = true, uint total_syms = 0); + adaptive_arith_data_model(const adaptive_arith_data_model& other); + ~adaptive_arith_data_model(); + + adaptive_arith_data_model& operator= (const adaptive_arith_data_model& rhs); + + void clear(); + + void init(bool encoding, uint total_syms); + void reset(); + + uint get_total_syms() const { return m_total_syms; } + float get_cost(uint sym) const; + + private: + uint m_total_syms; + typedef crnlib::vector adaptive_bit_model_vector; + adaptive_bit_model_vector m_probs; + + friend class symbol_codec; + }; + +#if (defined(_XBOX) || defined(_WIN64)) + #define CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER 1 +#else + #define CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER 0 +#endif + + class symbol_codec + { + public: + symbol_codec(); + + void clear(); + + // Encoding + void start_encoding(uint expected_file_size); + uint encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate, static_huffman_data_model* pDelta_model = NULL ); + void encode_bits(uint bits, uint num_bits); + void encode_align_to_byte(); + void encode(uint sym, adaptive_huffman_data_model& model); + void encode(uint sym, static_huffman_data_model& model); + void encode_truncated_binary(uint v, uint n); + static uint encode_truncated_binary_cost(uint v, uint n); + void encode_golomb(uint v, uint m); + void encode_rice(uint v, uint m); + static uint encode_rice_get_cost(uint v, uint m); + void encode(uint bit, adaptive_bit_model& model, bool update_model = true); + void encode(uint sym, adaptive_arith_data_model& model); + + inline void encode_enable_simulation(bool enabled) { m_simulate_encoding = enabled; } + inline bool encode_get_simulation() { return m_simulate_encoding; } + inline uint encode_get_total_bits_written() const { return m_total_bits_written; } + + void stop_encoding(bool support_arith); + + const crnlib::vector& get_encoding_buf() const { return m_output_buf; } + crnlib::vector& get_encoding_buf() { return m_output_buf; } + + // Decoding + + typedef void (*need_bytes_func_ptr)(size_t num_bytes_consumed, void *pPrivate_data, const uint8* &pBuf, size_t &buf_size, bool &eof_flag); + + bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = NULL, void *pPrivate_data = NULL); + void decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag = true); + inline uint64 decode_get_bytes_consumed() const { return m_pDecode_buf_next - m_pDecode_buf; } + inline uint64 decode_get_bits_remaining() const { return ((m_pDecode_buf_end - m_pDecode_buf_next) << 3) + m_bit_count; } + void start_arith_decoding(); + bool decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel); + uint decode_bits(uint num_bits); + uint decode_peek_bits(uint num_bits); + void decode_remove_bits(uint num_bits); + void decode_align_to_byte(); + int decode_remove_byte_from_bit_buf(); + uint decode(adaptive_huffman_data_model& model); + uint decode(static_huffman_data_model& model); + uint decode_truncated_binary(uint n); + uint decode_golomb(uint m); + uint decode_rice(uint m); + uint decode(adaptive_bit_model& model, bool update_model = true); + uint decode(adaptive_arith_data_model& model); + uint64 stop_decoding(); + + uint get_total_model_updates() const { return m_total_model_updates; } + + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + size_t m_decode_buf_size; + bool m_decode_buf_eof; + + need_bytes_func_ptr m_pDecode_need_bytes_func; + void* m_pDecode_private_data; + +#if CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER + typedef uint64 bit_buf_t; + enum { cBitBufSize = 64 }; +#else + typedef uint32 bit_buf_t; + enum { cBitBufSize = 32 }; +#endif + + bit_buf_t m_bit_buf; + int m_bit_count; + + uint m_total_model_updates; + + crnlib::vector m_output_buf; + crnlib::vector m_arith_output_buf; + + struct output_symbol + { + uint m_bits; + + enum { cArithSym = -1, cAlignToByteSym = -2 }; + int16 m_num_bits; + + uint16 m_arith_prob0; + }; + crnlib::vector m_output_syms; + + uint m_total_bits_written; + bool m_simulate_encoding; + + uint m_arith_base; + uint m_arith_value; + uint m_arith_length; + uint m_arith_total_bits; + + bool m_support_arith; + + void put_bits_init(uint expected_size); + void record_put_bits(uint bits, uint num_bits); + + void arith_propagate_carry(); + void arith_renorm_enc_interval(); + void arith_start_encoding(); + void arith_stop_encoding(); + + void put_bits(uint bits, uint num_bits); + void put_bits_align_to_byte(); + void flush_bits(); + void assemble_output_buf(bool support_arith); + + void get_bits_init(); + uint get_bits(uint num_bits); + void remove_bits(uint num_bits); + + void decode_need_bytes(); + + enum + { + cNull, + cEncoding, + cDecoding + } m_mode; + }; + +#define CRNLIB_SYMBOL_CODEC_USE_MACROS 1 + +#ifdef _XBOX + #define CRNLIB_READ_BIG_ENDIAN_UINT32(p) *reinterpret_cast(p) +#elif defined(_MSC_VER) + #define CRNLIB_READ_BIG_ENDIAN_UINT32(p) _byteswap_ulong(*reinterpret_cast(p)) +#else + #define CRNLIB_READ_BIG_ENDIAN_UINT32(p) utils::swap32(*reinterpret_cast(p)) +#endif + +#if CRNLIB_SYMBOL_CODEC_USE_MACROS + #define CRNLIB_SYMBOL_CODEC_DECODE_DECLARE(codec) \ + uint arith_value; \ + uint arith_length; \ + symbol_codec::bit_buf_t bit_buf; \ + int bit_count; \ + const uint8* pDecode_buf_next; + + #define CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + arith_value = codec.m_arith_value; \ + arith_length = codec.m_arith_length; \ + bit_buf = codec.m_bit_buf; \ + bit_count = codec.m_bit_count; \ + pDecode_buf_next = codec.m_pDecode_buf_next; + + #define CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.m_arith_value = arith_value; \ + codec.m_arith_length = arith_length; \ + codec.m_bit_buf = bit_buf; \ + codec.m_bit_count = bit_count; \ + codec.m_pDecode_buf_next = pDecode_buf_next; + + #define CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) \ + { \ + while (bit_count < (int)(num_bits)) \ + { \ + uint c = 0; \ + if (pDecode_buf_next == codec.m_pDecode_buf_end) \ + { \ + CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.decode_need_bytes(); \ + CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + if (pDecode_buf_next < codec.m_pDecode_buf_end) c = *pDecode_buf_next++; \ + } \ + else \ + c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + result = num_bits ? static_cast(bit_buf >> (symbol_codec::cBitBufSize - (num_bits))) : 0; \ + bit_buf <<= (num_bits); \ + bit_count -= (num_bits); \ + } + + #define CRNLIB_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) \ + { \ + if (arith_length < cSymbolCodecArithMinLen) \ + { \ + uint c; \ + CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, c, 8); \ + arith_value = (arith_value << 8) | c; \ + arith_length <<= 8; \ + } \ + uint x = model.m_bit_0_prob * (arith_length >> cSymbolCodecArithProbBits); \ + result = (arith_value >= x); \ + if (!result) \ + { \ + model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); \ + arith_length = x; \ + } \ + else \ + { \ + model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); \ + arith_value -= x; \ + arith_length -= x; \ + } \ + } + +#if CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER + #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ + { \ + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ + if (bit_count < 24) \ + { \ + uint c = 0; \ + pDecode_buf_next += sizeof(uint32); \ + if (pDecode_buf_next >= codec.m_pDecode_buf_end) \ + { \ + pDecode_buf_next -= sizeof(uint32); \ + while (bit_count < 24) \ + { \ + CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.decode_need_bytes(); \ + CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + if (pDecode_buf_next < codec.m_pDecode_buf_end) c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + } \ + else \ + { \ + c = CRNLIB_READ_BIG_ENDIAN_UINT32(pDecode_buf_next - sizeof(uint32)); \ + bit_count += 32; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + } \ + uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ + uint len; \ + if (k <= pTables->m_table_max_code) \ + { \ + uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ + result = t & UINT16_MAX; \ + len = t >> 16; \ + } \ + else \ + { \ + len = pTables->m_decode_start_code_size; \ + for ( ; ; ) \ + { \ + if (k <= pTables->m_max_codes[len - 1]) \ + break; \ + len++; \ + } \ + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ + if (((uint)val_ptr >= model.m_total_syms)) val_ptr = 0; \ + result = pTables->m_sorted_symbol_order[val_ptr]; \ + } \ + bit_buf <<= len; \ + bit_count -= len; \ + uint freq = model.m_sym_freq[result]; \ + freq++; \ + model.m_sym_freq[result] = static_cast(freq); \ + if (freq == UINT16_MAX) model.rescale(); \ + if (--model.m_symbols_until_update == 0) \ + { \ + model.update(); \ + } \ + } +#else + #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ + { \ + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ + while (bit_count < (symbol_codec::cBitBufSize - 8)) \ + { \ + uint c = 0; \ + if (pDecode_buf_next == codec.m_pDecode_buf_end) \ + { \ + CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ + codec.decode_need_bytes(); \ + CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ + if (pDecode_buf_next < codec.m_pDecode_buf_end) c = *pDecode_buf_next++; \ + } \ + else \ + c = *pDecode_buf_next++; \ + bit_count += 8; \ + bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ + } \ + uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ + uint len; \ + if (k <= pTables->m_table_max_code) \ + { \ + uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ + result = t & UINT16_MAX; \ + len = t >> 16; \ + } \ + else \ + { \ + len = pTables->m_decode_start_code_size; \ + for ( ; ; ) \ + { \ + if (k <= pTables->m_max_codes[len - 1]) \ + break; \ + len++; \ + } \ + int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ + if (((uint)val_ptr >= model.m_total_syms)) val_ptr = 0; \ + result = pTables->m_sorted_symbol_order[val_ptr]; \ + } \ + bit_buf <<= len; \ + bit_count -= len; \ + uint freq = model.m_sym_freq[result]; \ + freq++; \ + model.m_sym_freq[result] = static_cast(freq); \ + if (freq == UINT16_MAX) model.rescale(); \ + if (--model.m_symbols_until_update == 0) \ + { \ + model.update(); \ + } \ + } +#endif + +#else + #define CRNLIB_SYMBOL_CODEC_DECODE_DECLARE(codec) + #define CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) + #define CRNLIB_SYMBOL_CODEC_DECODE_END(codec) + + #define CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) result = codec.decode_bits(num_bits); + #define CRNLIB_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) result = codec.decode(model); + #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) result = codec.decode(model); +#endif + +} // namespace crnlib + diff --git a/crnlib/crn_task_pool.cpp b/crnlib/crn_task_pool.cpp new file mode 100644 index 00000000..930c81f6 --- /dev/null +++ b/crnlib/crn_task_pool.cpp @@ -0,0 +1,243 @@ +// File: crn_task_pool.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_task_pool.h" +#include + +#include "crn_winhdr.h" + +namespace crnlib +{ + task_pool::task_pool() : + m_num_threads(0), + m_num_outstanding_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + } + + task_pool::task_pool(uint num_threads) : + m_num_threads(0), + m_num_outstanding_tasks(0), + m_exit_flag(false) + { + utils::zero_object(m_threads); + bool status = init(num_threads); + CRNLIB_VERIFY(status); + } + + task_pool::~task_pool() + { + deinit(); + } + + bool task_pool::init(uint num_threads) + { + CRNLIB_ASSERT(num_threads <= cMaxThreads); + num_threads = math::minimum(num_threads, cMaxThreads); + + deinit(); + + m_task_condition_var.lock(); + + m_num_threads = num_threads; + + bool succeeded = true; + for (uint i = 0; i < num_threads; i++) + { + m_threads[i] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL); + + CRNLIB_ASSERT(m_threads[i] != 0); + if (!m_threads[i]) + { + succeeded = false; + break; + } + } + + m_task_condition_var.unlock(); + + if (!succeeded) + { + deinit(); + return false; + } + return true; + } + + void task_pool::deinit() + { + if (m_num_threads) + { + m_task_condition_var.lock(); + + m_exit_flag = true; + + m_task_condition_var.unlock(); + + for (uint i = 0; i < m_num_threads; i++) + { + if (m_threads[i]) + { + for ( ; ; ) + { + uint32 result = WaitForSingleObject(m_threads[i], 1000); + if (result == WAIT_OBJECT_0) + break; + } + + CloseHandle(m_threads[i]); + + m_threads[i] = NULL; + } + } + + m_num_threads = 0; + + m_exit_flag = false; + } + + m_tasks.clear(); + m_num_outstanding_tasks = 0; + } + + uint task_pool::get_num_threads() const + { + return m_num_threads; + } + + void task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) + { + CRNLIB_ASSERT(pFunc); + + m_task_condition_var.lock(); + + task tsk; + tsk.m_callback = pFunc; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = 0; + m_tasks.push_back(tsk); + + m_num_outstanding_tasks++; + + m_task_condition_var.unlock(); + } + + // It's the object's responsibility to crnlib_delete pObj within the execute_task() method, if needed! + void task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) + { + CRNLIB_ASSERT(pObj); + + m_task_condition_var.lock(); + + task tsk; + tsk.m_pObj = pObj; + tsk.m_data = data; + tsk.m_pData_ptr = pData_ptr; + tsk.m_flags = cTaskFlagObject; + m_tasks.push_back(tsk); + + m_num_outstanding_tasks++; + + m_task_condition_var.unlock(); + } + + bool task_pool::join_condition_func(void* pCallback_data_ptr, uint64 callback_data) + { + callback_data; + + task_pool* pPool = static_cast(pCallback_data_ptr); + + return (!pPool->m_num_outstanding_tasks) || pPool->m_exit_flag; + } + + void task_pool::process_task(task& tsk) + { + if (tsk.m_flags & cTaskFlagObject) + tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); + else + tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); + + m_task_condition_var.lock(); + + m_num_outstanding_tasks--; + + m_task_condition_var.unlock(); + } + + void task_pool::join() + { + for ( ; ; ) + { + m_task_condition_var.lock(); + + if (!m_tasks.empty()) + { + task tsk(m_tasks.front()); + m_tasks.pop_front(); + + m_task_condition_var.unlock(); + + process_task(tsk); + } + else + { + int result = m_task_condition_var.wait(join_condition_func, this); + result; + CRNLIB_ASSERT(result >= 0); + + m_task_condition_var.unlock(); + + break; + } + } + } + + bool task_pool::wait_condition_func(void* pCallback_data_ptr, uint64 callback_data) + { + callback_data; + + task_pool* pPool = static_cast(pCallback_data_ptr); + + return (!pPool->m_tasks.empty()) || pPool->m_exit_flag; + } + + unsigned __stdcall task_pool::thread_func(void* pContext) + { + //set_thread_name(GetCurrentThreadId(), "taskpoolhelper"); + + task_pool* pPool = static_cast(pContext); + + for ( ; ; ) + { + pPool->m_task_condition_var.lock(); + + int result = pPool->m_task_condition_var.wait(wait_condition_func, pPool); + + CRNLIB_ASSERT(result >= 0); + + if ((result < 0) || (pPool->m_exit_flag)) + { + pPool->m_task_condition_var.unlock(); + break; + } + + if (pPool->m_tasks.empty()) + pPool->m_task_condition_var.unlock(); + else + { + task tsk(pPool->m_tasks.front()); + pPool->m_tasks.pop_front(); + + pPool->m_task_condition_var.unlock(); + + pPool->process_task(tsk); + } + } + + _endthreadex(0); + return 0; + } + +} // namespace crnlib diff --git a/crnlib/crn_task_pool.h b/crnlib/crn_task_pool.h new file mode 100644 index 00000000..063cc238 --- /dev/null +++ b/crnlib/crn_task_pool.h @@ -0,0 +1,140 @@ +// File: crn_task_pool.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_condition_var.h" +#include + +namespace crnlib +{ + class task_pool + { + public: + task_pool(); + task_pool(uint num_threads); + ~task_pool(); + + enum { cMaxThreads = 16 }; + bool init(uint num_threads); + void deinit(); + + uint get_num_threads() const; + + // C-style task callback + typedef void (*task_callback_func)(uint64 data, void* pData_ptr); + void queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); + + class executable_task + { + public: + virtual void execute_task(uint64 data, void* pData_ptr) = 0; + }; + + // It's the caller's responsibility to crnlib_delete pObj within the execute_task() method, if needed! + void queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); + + template + inline void queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); + + void join(); + + private: + uint m_num_threads; + + uint m_num_outstanding_tasks; + + void* m_threads[cMaxThreads]; + + bool m_exit_flag; + + condition_var m_task_condition_var; + + enum task_flags + { + cTaskFlagObject = 1 + }; + + struct task + { + uint64 m_data; + void* m_pData_ptr; + + union + { + task_callback_func m_callback; + executable_task* m_pObj; + }; + + uint m_flags; + }; + + std::deque m_tasks; + + void process_task(task& tsk); + + static bool join_condition_func(void* pCallback_data_ptr, uint64 callback_data); + static bool wait_condition_func(void* pCallback_data_ptr, uint64 callback_data); + static unsigned __stdcall thread_func(void* pContext); + }; + + enum object_task_flags + { + cObjectTaskFlagDefault = 0, + cObjectTaskFlagDeleteAfterExecution = 1 + }; + + template + class object_task : public task_pool::executable_task + { + public: + object_task(uint flags = cObjectTaskFlagDefault) : + m_pObject(NULL), + m_pMethod(NULL), + m_flags(flags) + { + } + + typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); + + object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : + m_pObject(pObject), + m_pMethod(pMethod), + m_flags(flags) + { + CRNLIB_ASSERT(pObject && pMethod); + } + + void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) + { + CRNLIB_ASSERT(pObject && pMethod); + + m_pObject = pObject; + m_pMethod = pMethod; + m_flags = flags; + } + + T* get_object() const { return m_pObject; } + object_method_ptr get_method() const { return m_pMethod; } + + virtual void execute_task(uint64 data, void* pData_ptr) + { + (m_pObject->*m_pMethod)(data, pData_ptr); + + if (m_flags & cObjectTaskFlagDeleteAfterExecution) + crnlib_delete(this); + } + + protected: + T* m_pObject; + + object_method_ptr m_pMethod; + + uint m_flags; + }; + + template + inline void task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) + { + queue_task(crnlib_new< object_task >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution), data, pData_ptr); + } + +} // namespace crnlib diff --git a/crnlib/crn_texture_comp.cpp b/crnlib/crn_texture_comp.cpp new file mode 100644 index 00000000..54254db9 --- /dev/null +++ b/crnlib/crn_texture_comp.cpp @@ -0,0 +1,509 @@ +// File: crn_texture_comp.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_texture_comp.h" +#include "crn_dds_comp.h" +#include "crn_console.h" +#include "crn_rect.h" + +namespace crnlib +{ + static itexture_comp *create_texture_comp(crn_file_type file_type) + { + if (file_type == cCRNFileTypeCRN) + return crnlib_new(); + else if (file_type == cCRNFileTypeDDS) + return crnlib_new(); + else + return NULL; + } + + bool create_compressed_texture(const crn_comp_params ¶ms, crnlib::vector &comp_data, uint32 *pActual_quality_level, float *pActual_bitrate) + { + crn_comp_params local_params(params); + + if (pixel_format_helpers::is_crn_format_non_srgb(local_params.m_format)) + { + if (local_params.get_flag(cCRNCompFlagPerceptual)) + { + //console::warning(L"Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); + + // Destination compressed pixel format is swizzled or not RGB at all, so be sure perceptual colorspace metrics are disabled. + local_params.set_flag(cCRNCompFlagPerceptual, false); + } + } + + if (pActual_quality_level) *pActual_quality_level = 0; + if (pActual_bitrate) *pActual_bitrate = 0.0f; + + comp_data.resize(0); + + itexture_comp *pTexture_comp = create_texture_comp(local_params.m_file_type); + if (!pTexture_comp) + return false; + + if (!pTexture_comp->compress_init(local_params)) + { + crnlib_delete(pTexture_comp); + return false; + } + + if ( (local_params.m_target_bitrate <= 0.0f) || + (local_params.m_format == cCRNFmtDXT3) || + ((local_params.m_file_type == cCRNFileTypeCRN) && ((local_params.m_flags & cCRNCompFlagManualPaletteSizes) != 0)) + ) + { + if (!pTexture_comp->compress_pass(local_params, pActual_bitrate)) + { + crnlib_delete(pTexture_comp); + return false; + } + + comp_data.swap(pTexture_comp->get_comp_data()); + + if ((pActual_quality_level) && (local_params.m_target_bitrate <= 0.0)) + *pActual_quality_level = local_params.m_quality_level; + + crnlib_delete(pTexture_comp); + return true; + } + + // Interpolative search to find closest quality level to target bitrate. + const int cLowestQuality = 0; + const int cHighestQuality = cCRNMaxQualityLevel; + const int cNumQualityLevels = cHighestQuality - cLowestQuality + 1; + + float best_bitrate = 1e+10f; + int best_quality_level = -1; + const uint cMaxIterations = 8; + + for ( ; ; ) + { + int low_quality = cLowestQuality; + int high_quality = cHighestQuality; + + float cached_bitrates[cNumQualityLevels]; + for (int i = 0; i < cNumQualityLevels; i++) + cached_bitrates[i] = -1.0f; + + float highest_bitrate = 0.0f; + + uint iter_count = 0; + bool force_binary_search = false; + + while (low_quality <= high_quality) + { + if (params.m_flags & cCRNCompFlagDebugging) + { + console::debug(L"Quality level bracket: [%u, %u]", low_quality, high_quality); + } + + int trial_quality = (low_quality + high_quality) / 2; + + if ((iter_count) && (!force_binary_search)) + { + int bracket_low = trial_quality; + while ((cached_bitrates[bracket_low] < 0) && (bracket_low > cLowestQuality)) + bracket_low--; + + if (cached_bitrates[bracket_low] < 0) + trial_quality = static_cast(math::lerp((float)low_quality, (float)high_quality, .33f)); + else + { + int bracket_high = trial_quality + 1; + if (bracket_high <= cHighestQuality) + { + while ((cached_bitrates[bracket_high] < 0) && (bracket_high < cHighestQuality)) + bracket_high++; + + if (cached_bitrates[bracket_high] >= 0) + { + float bracket_low_bitrate = cached_bitrates[bracket_low]; + float bracket_high_bitrate = cached_bitrates[bracket_high]; + + if ((bracket_low_bitrate < bracket_high_bitrate) && + (bracket_low_bitrate < local_params.m_target_bitrate) && + (bracket_high_bitrate >= local_params.m_target_bitrate)) + { + int quality = low_quality + static_cast( ((local_params.m_target_bitrate - bracket_low_bitrate) * (high_quality - low_quality)) / (bracket_high_bitrate - bracket_low_bitrate) ); + + if ((quality >= low_quality) && (quality <= high_quality)) + { + trial_quality = quality; + } + } + } + } + } + } + + console::info(L"Compressing to quality level %u", trial_quality); + + float bitrate = 0.0f; + + local_params.m_quality_level = trial_quality; + + if (!pTexture_comp->compress_pass(local_params, &bitrate)) + { + crnlib_delete(pTexture_comp); + return false; + } + + cached_bitrates[trial_quality] = bitrate; + + highest_bitrate = math::maximum(highest_bitrate, bitrate); + + console::info(L"\nTried quality level %u, bpp: %3.3f", trial_quality, bitrate); + + if ( (best_quality_level < 0) || + ((bitrate <= local_params.m_target_bitrate) && (best_bitrate > local_params.m_target_bitrate)) || + (((bitrate <= local_params.m_target_bitrate) || (best_bitrate > local_params.m_target_bitrate)) && (fabs(bitrate - local_params.m_target_bitrate) < fabs(best_bitrate - local_params.m_target_bitrate))) + ) + { + best_bitrate = bitrate; + comp_data.swap(pTexture_comp->get_comp_data()); + best_quality_level = trial_quality; + if (params.m_flags & cCRNCompFlagDebugging) + { + console::debug(L"Choose new best quality level"); + } + + if ((best_bitrate <= local_params.m_target_bitrate) && (fabs(best_bitrate - local_params.m_target_bitrate) < .005f)) + break; + } + + if (bitrate > local_params.m_target_bitrate) + high_quality = trial_quality - 1; + else + low_quality = trial_quality + 1; + + iter_count++; + if (iter_count > cMaxIterations) + { + force_binary_search = true; + } + } + + if (((local_params.m_flags & cCRNCompFlagHierarchical) != 0) && + (highest_bitrate < local_params.m_target_bitrate) && + (fabs(best_bitrate - local_params.m_target_bitrate) >= .005f)) + { + console::info(L"Unable to achieve desired bitrate - disabling adaptive block sizes and retrying search."); + + local_params.m_flags &= ~cCRNCompFlagHierarchical; + + crnlib_delete(pTexture_comp); + pTexture_comp = create_texture_comp(local_params.m_file_type); + + if (!pTexture_comp->compress_init(local_params)) + { + crnlib_delete(pTexture_comp); + return false; + } + } + else + break; + } + + crnlib_delete(pTexture_comp); + pTexture_comp = NULL; + + if (best_quality_level < 0) + return false; + + if (pActual_quality_level) *pActual_quality_level = best_quality_level; + if (pActual_bitrate) *pActual_bitrate = best_bitrate; + + console::printf(L"Selected quality level %u bpp: %f", best_quality_level, best_bitrate); + + return true; + } + + static bool create_dds_tex(const crn_comp_params ¶ms, dds_texture &dds_tex) + { + image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; + + bool has_alpha = false; + for (uint face_index = 0; face_index < params.m_faces; face_index++) + { + for (uint level_index = 0; level_index < params.m_levels; level_index++) + { + const uint width = math::maximum(1U, params.m_width >> level_index); + const uint height = math::maximum(1U, params.m_height >> level_index); + + if (!params.m_pImages[face_index][level_index]) + return false; + + images[face_index][level_index].alias((color_quad_u8*)params.m_pImages[face_index][level_index], width, height); + if (!has_alpha) + has_alpha = image_utils::has_alpha(images[face_index][level_index]); + } + } + + for (uint face_index = 0; face_index < params.m_faces; face_index++) + for (uint level_index = 0; level_index < params.m_levels; level_index++) + images[face_index][level_index].set_component_valid(3, has_alpha); + + face_vec faces(params.m_faces); + + for (uint face_index = 0; face_index < params.m_faces; face_index++) + { + for (uint level_index = 0; level_index < params.m_levels; level_index++) + { + mip_level *pMip = crnlib_new(); + + image_u8 *pImage = crnlib_new(); + pImage->swap(images[face_index][level_index]); + pMip->assign(pImage); + + faces[face_index].push_back(pMip); + } + } + + dds_tex.assign(faces); + +#ifdef CRNLIB_BUILD_DEBUG + CRNLIB_ASSERT(dds_tex.check()); +#endif + + return true; + } + + bool create_texture_mipmaps(dds_texture &work_tex, const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, bool generate_mipmaps) + { + crn_comp_params new_params(params); + + bool generate_new_mips = false; + + switch (mipmap_params.m_mode) + { + case cCRNMipModeUseSourceOrGenerateMips: + { + if (work_tex.get_num_levels() == 1) + generate_new_mips = true; + break; + } + case cCRNMipModeUseSourceMips: + { + break; + } + case cCRNMipModeGenerateMips: + { + generate_new_mips = true; + break; + } + case cCRNMipModeNoMips: + { + work_tex.discard_mipmaps(); + break; + } + default: + { + CRNLIB_ASSERT(0); + break; + } + } + + rect window_rect(mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); + + if (!window_rect.is_empty()) + { + if (work_tex.get_num_faces() > 1) + { + console::warning(L"Can't crop cubemap textures"); + } + else + { + console::info(L"Cropping input texture from window (%ux%u)-(%ux%u)", window_rect.get_left(), window_rect.get_top(), window_rect.get_right(), window_rect.get_bottom()); + + if (!work_tex.crop(window_rect.get_left(), window_rect.get_top(), window_rect.get_width(), window_rect.get_height())) + console::warning(L"Failed cropping window rect"); + } + } + + int new_width = work_tex.get_width(); + int new_height = work_tex.get_height(); + + if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) + { + if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) + { + if (!mipmap_params.m_clamp_scale) + { + if (work_tex.get_num_faces() > 1) + { + console::warning(L"Can't crop cubemap textures"); + } + else + { + new_width = math::minimum(mipmap_params.m_clamp_width, new_width); + new_height = math::minimum(mipmap_params.m_clamp_height, new_height); + console::info(L"Clamping input texture to %ux%u", new_width, new_height); + work_tex.crop(0, 0, new_width, new_height); + } + } + } + } + + if (mipmap_params.m_scale_mode != cCRNSMDisabled) + { + bool is_pow2 = math::is_power_of_2((uint32)new_width) && math::is_power_of_2((uint32)new_height); + + switch (mipmap_params.m_scale_mode) + { + case cCRNSMAbsolute: + { + new_width = (uint)mipmap_params.m_scale_x; + new_height = (uint)mipmap_params.m_scale_y; + break; + } + case cCRNSMRelative: + { + new_width = (uint)(mipmap_params.m_scale_x * new_width + .5f); + new_height = (uint)(mipmap_params.m_scale_y * new_height + .5f); + break; + } + case cCRNSMLowerPow2: + { + if (!is_pow2) + math::compute_lower_pow2_dim(new_width, new_height); + break; + } + case cCRNSMNearestPow2: + { + if (!is_pow2) + { + int lwidth = new_width; + int lheight = new_height; + math::compute_lower_pow2_dim(lwidth, lheight); + + int uwidth = new_width; + int uheight = new_height; + math::compute_upper_pow2_dim(uwidth, uheight); + + if (labs(new_width - lwidth) < labs(new_width - uwidth)) + new_width = lwidth; + else + new_width = uwidth; + + if (labs(new_height - lheight) < labs(new_height - uheight)) + new_height = lheight; + else + new_height = uheight; + } + break; + } + case cCRNSMNextPow2: + { + if (!is_pow2) + math::compute_upper_pow2_dim(new_width, new_height); + break; + } + default: break; + } + } + + if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) + { + if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) + { + if (mipmap_params.m_clamp_scale) + { + new_width = math::minimum(mipmap_params.m_clamp_width, new_width); + new_height = math::minimum(mipmap_params.m_clamp_height, new_height); + } + } + } + + new_width = math::clamp(new_width, 1, cCRNMaxLevelResolution); + new_height = math::clamp(new_height, 1, cCRNMaxLevelResolution); + + if ((new_width != (int)work_tex.get_width()) || (new_height != (int)work_tex.get_height())) + { + console::info(L"Resampling input texture to %ux%u", new_width, new_height); + + const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); + + bool srgb = mipmap_params.m_gamma_filtering != 0; + + dds_texture::resample_params res_params; + res_params.m_pFilter = pFilter; + res_params.m_wrapping = mipmap_params.m_tiled != 0; + if (work_tex.get_num_faces()) + res_params.m_wrapping = false; + res_params.m_renormalize = mipmap_params.m_renormalize != 0; + res_params.m_filter_scale = 1.0f; + res_params.m_gamma = mipmap_params.m_gamma; + res_params.m_srgb = srgb; + res_params.m_multithreaded = (params.m_num_helper_threads > 0); + + if (!work_tex.resize(new_width, new_height, res_params)) + { + console::error(L"Failed resizing texture!"); + return false; + } + } + + if ((generate_new_mips) && (generate_mipmaps)) + { + bool srgb = mipmap_params.m_gamma_filtering != 0; + + const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); + + dds_texture::generate_mipmap_params gen_params; + gen_params.m_pFilter = pFilter; + gen_params.m_wrapping = mipmap_params.m_tiled != 0; + gen_params.m_renormalize = mipmap_params.m_renormalize != 0; + gen_params.m_filter_scale = mipmap_params.m_blurriness; + gen_params.m_gamma = mipmap_params.m_gamma; + gen_params.m_srgb = srgb; + gen_params.m_multithreaded = params.m_num_helper_threads > 0; + gen_params.m_max_mips = mipmap_params.m_max_levels; + gen_params.m_min_mip_size = mipmap_params.m_min_mip_size; + + console::info(L"Generating mipmaps using filter \"%S\"", pFilter); + + timer tm; + tm.start(); + if (!work_tex.generate_mipmaps(gen_params, true)) + { + console::error(L"Failed generating mipmaps!"); + return false; + } + double t = tm.get_elapsed_secs(); + + console::info(L"Generated %u mipmap levels in %3.3fs", work_tex.get_num_levels() - 1, t); + } + + return true; + } + + bool create_compressed_texture(const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, crnlib::vector &comp_data, uint32 *pActual_quality_level, float *pActual_bitrate) + { + comp_data.resize(0); + if (pActual_bitrate) *pActual_bitrate = 0.0f; + if (pActual_quality_level) *pActual_quality_level = 0; + + dds_texture work_tex; + if (!create_dds_tex(params, work_tex)) + { + console::error(L"Failed creating DDS texture from crn_comp_params!"); + return false; + } + + if (!create_texture_mipmaps(work_tex, params, mipmap_params, true)) + return false; + + crn_comp_params new_params(params); + new_params.m_levels = work_tex.get_num_levels(); + memset(new_params.m_pImages, 0, sizeof(new_params.m_pImages)); + + for (uint f = 0; f < work_tex.get_num_faces(); f++) + for (uint l = 0; l < work_tex.get_num_levels(); l++) + new_params.m_pImages[f][l] = (uint32*)work_tex.get_level(f, l)->get_image()->get_ptr(); + + return create_compressed_texture(new_params, comp_data, pActual_quality_level, pActual_bitrate); + } + +} // namespace crnlib + diff --git a/crnlib/crn_texture_comp.h b/crnlib/crn_texture_comp.h new file mode 100644 index 00000000..96242f52 --- /dev/null +++ b/crnlib/crn_texture_comp.h @@ -0,0 +1,33 @@ +// File: crn_texture_comp.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#include "../inc/crnlib.h" + +namespace crnlib +{ + class dds_texture; + + class itexture_comp + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(itexture_comp); + + public: + itexture_comp() { } + virtual ~itexture_comp() { } + + virtual const wchar_t *get_ext() const = 0; + + virtual bool compress_init(const crn_comp_params& params) = 0; + virtual bool compress_pass(const crn_comp_params& params, float *pEffective_bitrate) = 0; + virtual void compress_deinit() = 0; + + virtual const crnlib::vector& get_comp_data() const = 0; + virtual crnlib::vector& get_comp_data() = 0; + }; + + bool create_compressed_texture(const crn_comp_params ¶ms, crnlib::vector &comp_data, uint32 *pActual_quality_level, float *pActual_bitrate); + bool create_texture_mipmaps(dds_texture &work_tex, const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, bool generate_mipmaps); + bool create_compressed_texture(const crn_comp_params ¶ms, const crn_mipmap_params &mipmap_params, crnlib::vector &comp_data, uint32 *pActual_quality_level, float *pActual_bitrate); + +} // namespace crnlib diff --git a/crnlib/crn_texture_conversion.cpp b/crnlib/crn_texture_conversion.cpp new file mode 100644 index 00000000..409566c5 --- /dev/null +++ b/crnlib/crn_texture_conversion.cpp @@ -0,0 +1,705 @@ +// File: crn_texture_conversion.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_texture_conversion.h" +#include "crn_console.h" +#include "crn_win32_file_utils.h" +#include "crn_cfile_stream.h" +#include "crn_image_utils.h" +#include "crn_texture_comp.h" +#include "crn_strutils.h" + +namespace crnlib +{ + namespace texture_conversion + { + struct progress_params + { + convert_params* m_pParams; + uint m_start_percentage; + bool m_canceled; + }; + + convert_stats::convert_stats() + { + clear(); + } + + bool convert_stats::init( + const wchar_t* pSrc_filename, + const wchar_t* pDst_filename, + dds_texture& src_tex, + texture_file_types::format dst_file_type, + bool lzma_stats) + { + m_src_filename = pSrc_filename; + m_dst_filename = pDst_filename; + m_dst_file_type = dst_file_type; + + m_pInput_tex = &src_tex; + + win32_file_utils::get_file_size(pSrc_filename, m_input_file_size); + win32_file_utils::get_file_size(pDst_filename, m_output_file_size); + + m_total_input_pixels = 0; + for (uint i = 0; i < src_tex.get_num_levels(); i++) + { + uint width = math::maximum(1, src_tex.get_width() >> i); + uint height = math::maximum(1, src_tex.get_height() >> i); + m_total_input_pixels += width*height*src_tex.get_num_faces(); + } + + m_output_comp_file_size = 0; + + m_total_output_pixels = 0; + + if (lzma_stats) + { + vector dst_tex_bytes; + if (!cfile_stream::read_file_into_array(pDst_filename, dst_tex_bytes)) + { + console::error(L"Failed loading output file: %s", pDst_filename); + return false; + } + if (!dst_tex_bytes.size()) + { + console::error(L"Output file is empty: %s", pDst_filename); + return false; + } + vector cmp_tex_bytes; + lzma_codec lossless_codec; + if (lossless_codec.pack(dst_tex_bytes.get_ptr(), dst_tex_bytes.size(), cmp_tex_bytes)) + { + m_output_comp_file_size = cmp_tex_bytes.size(); + } + } + + if (!m_output_tex.load_from_file(pDst_filename, m_dst_file_type)) + { + console::error(L"Failed loading output file: %s", pDst_filename); + return false; + } + + for (uint i = 0; i < m_output_tex.get_num_levels(); i++) + { + uint width = math::maximum(1, m_output_tex.get_width() >> i); + uint height = math::maximum(1, m_output_tex.get_height() >> i); + m_total_output_pixels += width*height*m_output_tex.get_num_faces(); + } + CRNLIB_ASSERT(m_total_output_pixels == m_output_tex.get_total_pixels_in_all_faces_and_mips()); + + return true; + } + + bool convert_stats::print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const wchar_t *pCSVStatsFile) const + { + if (!m_pInput_tex) + return false; + + console::info(L"Input texture: %ux%u, Levels: %u, Faces: %u, Format: %s", + m_pInput_tex->get_width(), + m_pInput_tex->get_height(), + m_pInput_tex->get_num_levels(), + m_pInput_tex->get_num_faces(), + pixel_format_helpers::get_pixel_format_string(m_pInput_tex->get_format())); + + // Just casting the uint64's filesizes to uint32 here to work around gcc issues - it's not even possible to have files that large anyway. + console::info(L"Input pixels: %u, Input file size: %u, Input bits/pixel: %1.3f", + m_total_input_pixels, (uint32)m_input_file_size, (m_input_file_size * 8.0f) / m_total_input_pixels); + + console::info(L"Output texture: %ux%u, Levels: %u, Faces: %u, Format: %s", + m_output_tex.get_width(), + m_output_tex.get_height(), + m_output_tex.get_num_levels(), + m_output_tex.get_num_faces(), + pixel_format_helpers::get_pixel_format_string(m_output_tex.get_format())); + + console::info(L"Output pixels: %u, Output file size: %u, Output bits/pixel: %1.3f", + m_total_output_pixels, (uint32)m_output_file_size, (m_output_file_size * 8.0f) / m_total_output_pixels); + + if (m_output_comp_file_size) + { + console::info(L"LZMA compressed output file size: %u bytes, %1.3f bits/pixel", + (uint32)m_output_comp_file_size, (m_output_comp_file_size * 8.0f) / m_total_output_pixels); + } + if (psnr_metrics) + { + if ( (m_pInput_tex->get_width() != m_output_tex.get_width()) || (m_pInput_tex->get_height() != m_output_tex.get_height()) || (m_pInput_tex->get_num_faces() != m_output_tex.get_num_faces()) ) + { + console::warning(L"Unable to compute image statistics - input/output texture dimensions are different."); + } + else + { + uint num_levels = math::minimum(m_pInput_tex->get_num_levels(), m_output_tex.get_num_levels()); + + if (!mip_stats) + num_levels = 1; + + for (uint level = 0; level < num_levels; level++) + { + image_u8 a, b; + image_u8* pA = m_pInput_tex->get_level_image(0, level, a); + image_u8* pB = m_output_tex.get_level_image(0, level, b); + + if (pA && pB) + { + image_u8 grayscale_a, grayscale_b; + if (grayscale_sampling) + { + grayscale_a = *pA; + grayscale_a.convert_to_grayscale(); + pA = &grayscale_a; + + grayscale_b = *pB; + grayscale_b.convert_to_grayscale(); + pB = &grayscale_b; + } + + console::info(L"Mipmap level %u statistics:", level); + image_utils::print_image_metrics(*pA, *pB); + + if ((pA->has_rgb()) || (pB->has_rgb())) + image_utils::print_ssim(*pA, *pB); + } + } + + if (pCSVStatsFile) + { + // FIXME: This is kind of a hack, and should be combine with the code above. + image_u8 a, b; + image_u8* pA = m_pInput_tex->get_level_image(0, 0, a); + image_u8* pB = m_output_tex.get_level_image(0, 0, b); + if (pA && pB) + { + image_u8 grayscale_a, grayscale_b; + if (grayscale_sampling) + { + grayscale_a = *pA; + grayscale_a.convert_to_grayscale(); + pA = &grayscale_a; + + grayscale_b = *pB; + grayscale_b.convert_to_grayscale(); + pB = &grayscale_b; + } + + image_utils::error_metrics rgb_error; + image_utils::error_metrics luma_error; + if (rgb_error.compute(*pA, *pB, 0, 3, false) && luma_error.compute(*pA, *pB, 0, 0, true)) + { + FILE *pFile = NULL; +#ifdef _MSC_VER + _wfopen_s(&pFile, pCSVStatsFile, L"a"); +#else + pFile = _wfopen(pCSVStatsFile, L"a"); +#endif + if (!pFile) + console::warning(L"Unable to append to CSV stats file: %s\n", pCSVStatsFile); + else + { + dynamic_wstring filename; + split_path(m_src_filename.get_ptr(), NULL, NULL, &filename, NULL); + dynamic_string filenamea; + uint64 effective_output_size = m_output_comp_file_size ? m_output_comp_file_size : m_output_file_size; + float bitrate = (effective_output_size * 8.0f) / m_total_output_pixels; + fprintf(pFile, "%s,%u,%u,%u,%f,%f,%u,%f\n", + filename.as_ansi(filenamea).get_ptr(), + pB->get_width(), pB->get_height(), m_output_tex.get_num_levels(), + rgb_error.mRootMeanSquared, luma_error.mRootMeanSquared, + (uint32)effective_output_size, bitrate); + fclose(pFile); + } + } + } + } + } + } + + return true; + } + + void convert_stats::clear() + { + m_src_filename.clear(); + m_dst_filename.clear(); + m_dst_file_type = texture_file_types::cFormatInvalid; + + m_pInput_tex = NULL; + m_output_tex.clear(); + + m_input_file_size = 0; + m_total_input_pixels = 0; + + m_output_file_size = 0; + m_total_output_pixels = 0; + + m_output_comp_file_size = 0; + } + + //----------------------------------------------------------------------- + + static crn_bool crn_progress_callback(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) + { + progress_params& params = *static_cast(pUser_data_ptr); + + if (params.m_canceled) + return false; + if (!params.m_pParams->m_pProgress_func) + return true; + + int percentage_complete = params.m_start_percentage + (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * (100.0f - params.m_start_percentage) / total_phases); + + percentage_complete = math::clamp(percentage_complete, 0, 100); + + if (!params.m_pParams->m_pProgress_func(percentage_complete, params.m_pParams->m_pProgress_user_data)) + { + params.m_canceled = true; + return false; + } + + return true; + } + + static bool dxt_progress_callback_func(uint percentage_complete, void* pUser_data_ptr) + { + progress_params& params = *static_cast(pUser_data_ptr); + + if (params.m_canceled) + return false; + + if (!params.m_pParams->m_pProgress_func) + return true; + + int scaled_percentage_complete = params.m_start_percentage + (percentage_complete * (100 - params.m_start_percentage)) / 100; + + scaled_percentage_complete = math::clamp(scaled_percentage_complete, 0, 100); + + if (!params.m_pParams->m_pProgress_func(scaled_percentage_complete, params.m_pParams->m_pProgress_user_data)) + { + params.m_canceled = true; + return false; + } + + return true; + } + + static bool convert_error(const convert_params& params, const wchar_t* pError_msg) + { + params.m_status = false; + params.m_error_message = pError_msg; + + _wremove(params.m_dst_filename.get_ptr()); + + return false; + } + + static pixel_format choose_pixel_format(convert_params& params, const crn_comp_params &comp_params, const dds_texture& src_tex, texture_type tex_type) + { + const bool is_normal_map = (tex_type == cTextureTypeNormalMap); + + if (params.m_dst_file_type == texture_file_types::cFormatCRN) + { + if (is_normal_map) + { + switch (src_tex.get_format()) + { + case PIXEL_FMT_DXN: + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + case PIXEL_FMT_DXT5_xGxR: + return src_tex.get_format(); + default: + return PIXEL_FMT_DXT5_AGBR; + } + } + } + else if (params.m_dst_file_type == texture_file_types::cFormatDDS) + { + if (src_tex.get_source_file_type() != texture_file_types::cFormatCRN) + { + if (is_normal_map) + { + switch (src_tex.get_format()) + { + case PIXEL_FMT_DXN: + case PIXEL_FMT_3DC: + case PIXEL_FMT_DXT5_xGBR: + case PIXEL_FMT_DXT5_AGBR: + case PIXEL_FMT_DXT5_xGxR: + return src_tex.get_format(); + default: + return PIXEL_FMT_DXT5_AGBR; + } + } + else if (pixel_format_helpers::is_grayscale(src_tex.get_format())) + { + if (pixel_format_helpers::has_alpha(src_tex.get_format())) + return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; + else + return PIXEL_FMT_DXT1; + } + else if (pixel_format_helpers::has_alpha(src_tex.get_format())) + return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; + else + return PIXEL_FMT_DXT1; + } + } + else + { + // A regular image format. + if (pixel_format_helpers::is_grayscale(src_tex.get_format())) + { + if (pixel_format_helpers::has_alpha(src_tex.get_format())) + return PIXEL_FMT_A8L8; + else + return PIXEL_FMT_L8; + } + else if (pixel_format_helpers::has_alpha(src_tex.get_format())) + return PIXEL_FMT_A8R8G8B8; + else + return PIXEL_FMT_R8G8B8; + } + + return src_tex.get_format(); + } + + static void print_comp_params(const crn_comp_params &comp_params) + { + console::debug(L"\nTexture conversion compression parameters:"); + console::debug(L" Desired bitrate: %3.3f", comp_params.m_target_bitrate); + console::debug(L" CRN Quality: %i", comp_params.m_quality_level); + console::debug(L"CRN C endpoints/selectors: %u %u", comp_params.m_crn_color_endpoint_palette_size, comp_params.m_crn_color_selector_palette_size); + console::debug(L"CRN A endpoints/selectors: %u %u", comp_params.m_crn_alpha_endpoint_palette_size, comp_params.m_crn_alpha_selector_palette_size); + console::debug(L" DXT both block types: %u, Alpha threshold: %u", comp_params.get_flag(cCRNCompFlagUseBothBlockTypes), comp_params.m_dxt1a_alpha_threshold); + console::debug(L" DXT compression quality: %s", crn_get_dxt_quality_string(comp_params.m_dxt_quality)); + console::debug(L" Perceptual: %u, Large Blocks: %u", comp_params.get_flag(cCRNCompFlagPerceptual), comp_params.get_flag(cCRNCompFlagHierarchical)); + console::debug(L" Compressor: %s", get_dxt_compressor_name(comp_params.m_dxt_compressor_type)); + console::debug(L" Disable endpoint caching: %u", comp_params.get_flag(cCRNCompFlagDisableEndpointCaching)); + console::debug(L" Grayscale sampling: %u", comp_params.get_flag(cCRNCompFlagGrayscaleSampling)); + console::debug(L" Max helper threads: %u", comp_params.m_num_helper_threads); + console::debug(L""); + } + + static void print_mipmap_params(const crn_mipmap_params &mipmap_params) + { + console::debug(L"\nTexture conversion MIP-map parameters:"); + console::debug(L" Mode: %s", crn_get_mip_mode_name(mipmap_params.m_mode)); + console::debug(L" Filter: %S", crn_get_mip_filter_name(mipmap_params.m_filter)); + console::debug(L"Gamma filtering: %u, Gamma: %2.2f", mipmap_params.m_gamma_filtering, mipmap_params.m_gamma); + console::debug(L" Blurriness: %2.2f", mipmap_params.m_blurriness); + console::debug(L" Renormalize: %u", mipmap_params.m_renormalize); + console::debug(L" Tiled: %u", mipmap_params.m_tiled); + console::debug(L" Max Levels: %u", mipmap_params.m_max_levels); + console::debug(L" Min level size: %u", mipmap_params.m_min_mip_size); + console::debug(L" window: %u %u %u %u", mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); + console::debug(L" scale mode: %s", crn_get_scale_mode_desc(mipmap_params.m_scale_mode)); + console::debug(L" scale: %f %f", mipmap_params.m_scale_x, mipmap_params.m_scale_y); + console::debug(L" clamp: %u %u, clamp_scale: %u", mipmap_params.m_clamp_width, mipmap_params.m_clamp_height, mipmap_params.m_clamp_scale); + console::debug(L""); + } + + void convert_params::print() + { + console::debug(L"\nTexture conversion parameters:"); + console::debug(L" Resolution: %ux%u, Faces: %u, Levels: %u, Format: %s", + m_pInput_texture->get_width(), + m_pInput_texture->get_height(), + m_pInput_texture->get_num_faces(), + m_pInput_texture->get_num_levels(), + pixel_format_helpers::get_pixel_format_string(m_pInput_texture->get_format())); + + console::debug(L" texture_type: %s", get_texture_type_desc(m_texture_type)); + console::debug(L" dst_filename: %s", m_dst_filename.get_ptr()); + console::debug(L"dst_file_type: %s", texture_file_types::get_extension(m_dst_file_type)); + console::debug(L" dst_format: %s", pixel_format_helpers::get_pixel_format_string(m_dst_format)); + console::debug(L" quick: %u", m_quick); + } + + static bool write_compressed_texture( + dds_texture& work_tex, convert_params& params, crn_comp_params &comp_params, pixel_format dst_format, progress_params& progress_state, bool perceptual, convert_stats &stats) + { + comp_params.m_file_type = (params.m_dst_file_type == texture_file_types::cFormatCRN) ? cCRNFileTypeCRN : cCRNFileTypeDDS; + + comp_params.m_pProgress_func = crn_progress_callback; + comp_params.m_pProgress_func_data = &progress_state; + comp_params.set_flag(cCRNCompFlagPerceptual, perceptual); + + crn_format crn_fmt = pixel_format_helpers::convert_pixel_format_to_best_crn_format(dst_format); + comp_params.m_format = crn_fmt; + + console::message(L"Writing %s texture to file: \"%s\"", crn_get_format_string(crn_fmt), params.m_dst_filename.get_ptr()); + + uint32 actual_quality_level; + float actual_bitrate; + bool status = work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, &comp_params, &actual_quality_level, &actual_bitrate); + if (!status) + return convert_error(params, L"Failed writing output file!"); + + if (!params.m_no_stats) + { + if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) + { + console::warning(L"Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); + } + } + + return true; + } + + static bool convert_and_write_normal_texture(dds_texture& work_tex, convert_params& params, const crn_comp_params &comp_params, pixel_format dst_format, progress_params& progress_state, bool formats_differ, bool perceptual, convert_stats& stats) + { + if (formats_differ) + { + dxt_image::pack_params pack_params; + + pack_params.m_perceptual = perceptual; + pack_params.m_compressor = comp_params.m_dxt_compressor_type; + pack_params.m_pProgress_callback = dxt_progress_callback_func; + pack_params.m_pProgress_callback_user_data_ptr = &progress_state; + pack_params.m_dxt1a_alpha_threshold = comp_params.m_dxt1a_alpha_threshold; + pack_params.m_quality = comp_params.m_dxt_quality; + pack_params.m_endpoint_caching = !comp_params.get_flag(cCRNCompFlagDisableEndpointCaching); + pack_params.m_grayscale_sampling = comp_params.get_flag(cCRNCompFlagGrayscaleSampling); + if ((!comp_params.get_flag(cCRNCompFlagUseBothBlockTypes)) && (!comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) + pack_params.m_use_both_block_types = false; + + pack_params.m_num_helper_threads = comp_params.m_num_helper_threads; + pack_params.m_use_transparent_indices_for_black = comp_params.get_flag(cCRNCompFlagUseTransparentIndicesForBlack); + + console::info(L"Converting texture format from %s to %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); + + timer tm; + tm.start(); + + bool status = work_tex.convert(dst_format, pack_params); + + double t = tm.get_elapsed_secs(); + + console::info(L""); + + if (!status) + { + if (progress_state.m_canceled) + { + params.m_canceled = true; + return false; + } + else + { + return convert_error(params, L"Failed converting texture to output format!"); + } + } + + console::info(L"Texture format conversion took %3.3fs", t); + } + + if (params.m_write_mipmaps_to_multiple_files) + { + for (uint f = 0; f < work_tex.get_num_faces(); f++) + { + for (uint l = 0; l < work_tex.get_num_levels(); l++) + { + dynamic_wstring filename(params.m_dst_filename.get_ptr()); + + dynamic_wstring drv, dir, fn, ext; + if (!split_path(params.m_dst_filename.get_ptr(), &drv, &dir, &fn, &ext)) + return false; + + fn += dynamic_wstring(cVarArg, L"_face%u_mip%u", f, l).get_ptr(); + filename = drv + dir + fn + ext; + + mip_level *pLevel = work_tex.get_level(f, l); + + face_vec face(1); + face[0].push_back(crnlib_new(*pLevel)); + + dds_texture new_tex; + new_tex.assign(face); + + console::info(L"Writing texture face %u mip level %u to file %s", f, l, filename.get_ptr()); + + if (!new_tex.write_to_file(filename.get_ptr(), params.m_dst_file_type, NULL, NULL, NULL)) + return convert_error(params, L"Failed writing output file!"); + } + } + } + else + { + console::message(L"Writing texture to file: \"%s\"", params.m_dst_filename.get_ptr()); + + if (!work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, NULL, NULL, NULL)) + return convert_error(params, L"Failed writing output file!"); + + if (!params.m_no_stats) + { + if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) + { + console::warning(L"Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); + } + } + } + + return true; + } + + bool process(convert_params& params, convert_stats& stats) + { + texture_type tex_type = params.m_texture_type; + + crn_comp_params comp_params(params.m_comp_params); + crn_mipmap_params mipmap_params(params.m_mipmap_params); + + progress_params progress_state; + progress_state.m_pParams = ¶ms; + progress_state.m_canceled = false; + progress_state.m_start_percentage = 0; + + params.m_status = false; + params.m_error_message.clear(); + + if (params.m_pIntermediate_texture) + { + crnlib_delete(params.m_pIntermediate_texture); + params.m_pIntermediate_texture = NULL; + } + + params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); + + dds_texture& work_tex = *params.m_pInput_texture; + + if ((params.m_dst_format != PIXEL_FMT_INVALID) && (pixel_format_helpers::is_alpha_only(params.m_dst_format))) + { + if ((work_tex.get_comp_flags() & pixel_format_helpers::cCompFlagAValid) == 0) + { + console::warning(L"Output format is alpha-only, but input doesn't have alpha, so setting alpha to luminance."); + + work_tex.convert(PIXEL_FMT_A8, crnlib::dxt_image::pack_params()); + + if (tex_type == cTextureTypeNormalMap) + tex_type = cTextureTypeRegularMap; + } + } + + pixel_format dst_format = params.m_dst_format; + + if (dst_format == PIXEL_FMT_INVALID) + { + // Caller didn't specify a format to use, so try to pick something reasonable. + // This is actually much trickier than it seems, and the current approach kind of sucks. + dst_format = choose_pixel_format(params, comp_params, work_tex, tex_type); + } + + if ((dst_format == PIXEL_FMT_DXT1) && (comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) + dst_format = PIXEL_FMT_DXT1A; + else if (dst_format == PIXEL_FMT_DXT1A) + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); + + const bool is_normal_map = (tex_type == cTextureTypeNormalMap); + bool perceptual = comp_params.get_flag(cCRNCompFlagPerceptual); + if (is_normal_map) + { + perceptual = false; + mipmap_params.m_gamma_filtering = false; + } + + if (pixel_format_helpers::is_pixel_format_non_srgb(dst_format)) + { + if (perceptual) + { + //console::warning(L"Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); + perceptual = false; + } + } + + if (pixel_format_helpers::is_normal_map(dst_format)) + { + //if (perceptual) + //console::warning(L"Output pixel format is intended for normal maps, disabling perceptual color metrics"); + + perceptual = false; + } + + bool generate_mipmaps = texture_file_types::supports_mipmaps(params.m_dst_file_type); + if ((params.m_write_mipmaps_to_multiple_files) && ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS))) + { + generate_mipmaps = true; + } + + if (params.m_param_debugging) + { + params.print(); + + print_comp_params(comp_params); + print_mipmap_params(mipmap_params); + } + + if (!create_texture_mipmaps(work_tex, comp_params, mipmap_params, generate_mipmaps)) + return convert_error(params, L"Failed creating texture mipmaps!"); + + bool formats_differ = work_tex.get_format() != dst_format; + if (formats_differ) + { + if (pixel_format_helpers::is_dxt1(work_tex.get_format()) && pixel_format_helpers::is_dxt1(dst_format)) + formats_differ = false; + } + + bool status = false; + + timer t; + t.start(); + + if ( (params.m_dst_file_type == texture_file_types::cFormatCRN) || + ( (params.m_dst_file_type == texture_file_types::cFormatDDS) && (pixel_format_helpers::is_dxt(dst_format)) && + ((formats_differ) || (comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) + ) + ) + { + status = write_compressed_texture(work_tex, params, comp_params, dst_format, progress_state, perceptual, stats); + } + else + { + status = convert_and_write_normal_texture(work_tex, params, comp_params, dst_format, progress_state, formats_differ, perceptual, stats); + } + + console::progress(L""); + + if (progress_state.m_canceled) + { + params.m_canceled = true; + return false; + } + + double total_write_time = t.get_elapsed_secs(); + + if (status) + { + if (params.m_param_debugging) + console::info(L"Work texture format: %s, desired destination format: %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); + + console::message(L"Texture successfully written in %3.3fs", total_write_time); + } + else + { + dynamic_wstring str; + + if (work_tex.get_last_error().is_empty()) + str.format(L"Failed writing texture to file \"%s\"", params.m_dst_filename.get_ptr()); + else + str.format(L"Failed writing texture to file \"%s\", Reason: %s", params.m_dst_filename.get_ptr(), work_tex.get_last_error().get_ptr()); + + return convert_error(params, str.get_ptr()); + } + + if (params.m_debugging) + { + crnlib_print_mem_stats(); + } + + params.m_status = true; + return true; + } + + } // namespace texture_conversion + +} // namespace crnlib diff --git a/crnlib/crn_texture_conversion.h b/crnlib/crn_texture_conversion.h new file mode 100644 index 00000000..442321e0 --- /dev/null +++ b/crnlib/crn_texture_conversion.h @@ -0,0 +1,109 @@ +// File: crn_texture_conversion.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_dxt_image.h" +#include "crn_dds_texture.h" +#include "crn_rect.h" +#include "crn_lzma_codec.h" + +namespace crnlib +{ + namespace texture_conversion + { + class convert_stats + { + public: + convert_stats(); + + bool init( + const wchar_t* pSrc_filename, + const wchar_t* pDst_filename, + dds_texture& src_tex, + texture_file_types::format dst_file_type, + bool lzma_stats); + + bool print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const wchar_t *pCSVStatsFile = NULL) const; + + void clear(); + + dynamic_wstring m_src_filename; + dynamic_wstring m_dst_filename; + texture_file_types::format m_dst_file_type; + + dds_texture* m_pInput_tex; + dds_texture m_output_tex; + + uint64 m_input_file_size; + uint m_total_input_pixels; + + uint64 m_output_file_size; + uint m_total_output_pixels; + + uint64 m_output_comp_file_size; + }; + + class convert_params + { + public: + convert_params() : + m_pInput_texture(NULL), + m_texture_type(cTextureTypeUnknown), + m_dst_file_type(texture_file_types::cFormatInvalid), + m_dst_format(PIXEL_FMT_INVALID), + m_pProgress_func(NULL), + m_pProgress_user_data(NULL), + m_pIntermediate_texture(NULL), + m_write_mipmaps_to_multiple_files(false), + m_quick(false), + m_debugging(false), + m_param_debugging(false), + m_no_stats(false), + m_lzma_stats(false), + m_status(false), + m_canceled(false) + { + } + + ~convert_params() + { + crnlib_delete(m_pIntermediate_texture); + } + + void print(); + + // Input parameters + dds_texture* m_pInput_texture; + + texture_type m_texture_type; + + dynamic_wstring m_dst_filename; + texture_file_types::format m_dst_file_type; + pixel_format m_dst_format; + + crn_comp_params m_comp_params; + crn_mipmap_params m_mipmap_params; + + typedef bool (*progress_callback_func_ptr)(uint percentage_complete, void* pUser_data_ptr); + progress_callback_func_ptr m_pProgress_func; + void* m_pProgress_user_data; + + // Return parameters + dds_texture* m_pIntermediate_texture; + mutable dynamic_wstring m_error_message; + + bool m_write_mipmaps_to_multiple_files; + bool m_quick; + bool m_debugging; + bool m_param_debugging; + bool m_no_stats; + + bool m_lzma_stats; + mutable bool m_status; + mutable bool m_canceled; + }; + + bool process(convert_params& params, convert_stats& stats); + + } // namespace texture_conversion + +} // namespace crnlib diff --git a/crnlib/crn_texture_file_types.cpp b/crnlib/crn_texture_file_types.cpp new file mode 100644 index 00000000..2f846da9 --- /dev/null +++ b/crnlib/crn_texture_file_types.cpp @@ -0,0 +1,101 @@ +// File: crn_texture_file_types.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_texture_file_types.h" +#include "crn_strutils.h" + +namespace crnlib +{ + const wchar_t* texture_file_types::get_extension(format fmt) + { + CRNLIB_ASSERT(fmt < cNumFileFormats); + if (fmt >= cNumFileFormats) + return NULL; + + static const wchar_t* extensions[cNumFileFormats] = + { + L"tga", + L"png", + L"jpg", + L"jpeg", + L"bmp", + L"gif", + L"tif", + L"tiff", + L"ppm", + L"pgm", + L"dds", + L"psd", + L"jp2", + L"crn", + L"", + L"" + }; + return extensions[fmt]; + } + + texture_file_types::format texture_file_types::determine_file_format(const wchar_t* pFilename) + { + dynamic_wstring ext; + if (!split_path(pFilename, NULL, NULL, NULL, &ext)) + return cFormatInvalid; + + if (ext.is_empty()) + return cFormatInvalid; + + if (ext[0] == L'.') + ext.right(1); + + for (uint i = 0; i < cNumFileFormats; i++) + if (ext == get_extension(static_cast(i))) + return static_cast(i); + + return cFormatInvalid; + } + + bool texture_file_types::supports_mipmaps(format fmt) + { + switch (fmt) + { + case cFormatCRN: + case cFormatDDS: + return true; + default: break; + } + + return false; + } + + bool texture_file_types::supports_alpha(format fmt) + { + switch (fmt) + { + case cFormatJPG: + case cFormatJPEG: + case cFormatGIF: + case cFormatJP2: + return false; + default: break; + } + + return true; + } + + const wchar_t* get_texture_type_desc(texture_type t) + { + switch (t) + { + case cTextureTypeUnknown: return L"Unknown"; + case cTextureTypeRegularMap: return L"2D map"; + case cTextureTypeNormalMap: return L"Normal map"; + case cTextureTypeVerticalCrossCubemap: return L"Vertical Cross Cubemap"; + case cTextureTypeCubemap: return L"Cubemap"; + default: break; + } + + CRNLIB_ASSERT(false); + + return L"?"; + } + +} // namespace crnlib diff --git a/crnlib/crn_texture_file_types.h b/crnlib/crn_texture_file_types.h new file mode 100644 index 00000000..d45f4786 --- /dev/null +++ b/crnlib/crn_texture_file_types.h @@ -0,0 +1,62 @@ +// File: crn_texture_file_types.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "../inc/crnlib.h" +#include "crn_vec.h" +#include "crn_pixel_format.h" + +namespace crnlib +{ + struct texture_file_types + { + enum format + { + cFormatInvalid = -1, + + cFormatTGA = 0, + cFormatPNG, + cFormatJPG, + cFormatJPEG, + cFormatBMP, + cFormatGIF, + cFormatTIF, + cFormatTIFF, + cFormatPPM, + cFormatPGM, + cFormatDDS, + cFormatPSD, + cFormatJP2, + cFormatCRN, + + cNumRegularFileFormats, + + // Not really a file format + cFormatClipboard = cNumRegularFileFormats, + cFormatDragDrop, + + cNumFileFormats + }; + + static const wchar_t* get_extension(format fmt); + + static format determine_file_format(const wchar_t* pFilename); + + static bool supports_mipmaps(format fmt); + static bool supports_alpha(format fmt); + }; + + enum texture_type + { + cTextureTypeUnknown = 0, + cTextureTypeRegularMap, + cTextureTypeNormalMap, + cTextureTypeVerticalCrossCubemap, + cTextureTypeCubemap, + + cNumTextureTypes + }; + + const wchar_t* get_texture_type_desc(texture_type t); + +} // namespace crnlib + diff --git a/crnlib/crn_threaded_clusterizer.h b/crnlib/crn_threaded_clusterizer.h new file mode 100644 index 00000000..a00f7266 --- /dev/null +++ b/crnlib/crn_threaded_clusterizer.h @@ -0,0 +1,361 @@ +// File: crn_threaded_clusterizer.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_clusterizer.h" + +namespace crnlib +{ + template + class threaded_clusterizer + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer); + + public: + threaded_clusterizer(task_pool& tp) : + m_pTask_pool(&tp), + m_pProgress_callback(NULL), + m_pProgress_callback_data(NULL), + m_canceled(false) + { + } + + void clear() + { + for (uint i = 0; i < cMaxClusterizers; i++) + m_clusterizers[i].clear(); + } + + struct weighted_vec + { + weighted_vec() { } + weighted_vec(const VectorType& v, uint w) : m_vec(v), m_weight(w) { } + + VectorType m_vec; + uint m_weight; + }; + typedef crnlib::vector weighted_vec_array; + + typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); + + bool create_clusters( + const weighted_vec_array& weighted_vecs, + uint max_clusters, crnlib::vector< crnlib::vector >& cluster_indices, + progress_callback_func pProgress_callback, + void* pProgress_callback_data) + { + m_main_thread_id = get_current_thread_id(); + m_canceled = false; + m_pProgress_callback = pProgress_callback; + m_pProgress_callback_data = pProgress_callback_data; + + if (max_clusters >= 128) + { + crnlib::vector primary_indices(weighted_vecs.size()); + for (uint i = 0; i < weighted_vecs.size(); i++) + primary_indices[i] = i; + + CRNLIB_ASSUME(cMaxClusterizers == 4); + + crnlib::vector indices[6]; + + compute_split(weighted_vecs, primary_indices, indices[0], indices[1]); + compute_split(weighted_vecs, indices[0], indices[2], indices[3]); + compute_split(weighted_vecs, indices[1], indices[4], indices[5]); + + create_clusters_task_state task_state[4]; + + m_cluster_task_displayed_progress = false; + + uint total_partitions = 0; + for (uint i = 0; i < 4; i++) + { + const uint num_indices = indices[2 + i].size(); + if (num_indices) + total_partitions++; + } + + for (uint i = 0; i < 4; i++) + { + const uint num_indices = indices[2 + i].size(); + if (!num_indices) + continue; + + task_state[i].m_pWeighted_vecs = &weighted_vecs; + task_state[i].m_pIndices = &indices[2 + i]; + task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions; + + m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]); + } + + m_pTask_pool->join(); + + if (m_canceled) + return false; + + uint total_clusters = 0; + for (uint i = 0; i < 4; i++) + total_clusters += task_state[i].m_cluster_indices.size(); + + cluster_indices.reserve(total_clusters); + cluster_indices.resize(0); + + for (uint i = 0; i < 4; i++) + { + const uint ofs = cluster_indices.size(); + + cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size()); + + for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++) + { + cluster_indices[ofs + j].swap( task_state[i].m_cluster_indices[j] ); + } + } + } + else + { + m_clusterizers[0].clear(); + m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size()); + + for (uint i = 0; i < weighted_vecs.size(); i++) + { + const weighted_vec& v = weighted_vecs[i]; + + m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight); + } + + m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false);//m_params.m_dxt_quality <= cCRNDXTQualityFast); + + const uint num_clusters = m_clusterizers[0].get_codebook_size(); + + m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices); + } + + return !m_canceled; + } + + private: + task_pool* m_pTask_pool; + + uint32 m_main_thread_id; + + struct create_clusters_task_state + { + create_clusters_task_state() : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0) + { + } + + const weighted_vec_array* m_pWeighted_vecs; + crnlib::vector* m_pIndices; + crnlib::vector< crnlib::vector > m_cluster_indices; + uint m_max_clusters; + }; + + typedef clusterizer vector_clusterizer; + + enum { cMaxClusterizers = 4 }; + vector_clusterizer m_clusterizers[cMaxClusterizers]; + bool m_cluster_task_displayed_progress; + + progress_callback_func m_pProgress_callback; + void* m_pProgress_callback_data; + bool m_canceled; + + static bool generate_codebook_progress_callback(uint percentage_completed, void* pData) + { + threaded_clusterizer* pClusterizer = static_cast(pData); + + if (!pClusterizer->m_pProgress_callback) + return true; + + if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data)) + { + pClusterizer->m_canceled = true; + return false; + } + return true; + } + + void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector& indices) + { + const uint N = VectorType::num_elements; + + VectorType centroid(0.0f); + double total_weight = 0.0f; + for (uint i = 0; i < indices.size(); i++) + { + const weighted_vec& v = vecs[indices[i]]; + centroid += v.m_vec * static_cast(v.m_weight); + total_weight += v.m_weight; + } + + if (total_weight == 0.0f) + { + axis_res.clear(); + centroid_res = centroid; + return; + } + + double one_over_total_weight = 1.0f / total_weight; + for (uint i = 0; i < N; i++) + centroid[i] = static_cast(centroid[i] * one_over_total_weight); + + matrix covar; + covar.clear(); + + for (uint i = 0; i < indices.size(); i++) + { + const weighted_vec& weighted_vec = vecs[indices[i]]; + + const VectorType v(weighted_vec.m_vec - centroid); + const VectorType w(v * static_cast(weighted_vec.m_weight)); + + for (uint x = 0; x < N; x++) + for (uint y = x; y < N; y++) + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + + for (uint x = 0; x < N; x++) + for (uint y = x; y < N; y++) + covar[x][y] = static_cast(covar[x][y] * one_over_total_weight); + + for (uint x = 0; x < (N - 1); x++) + for (uint y = x + 1; y < N; y++) + covar[y][x] = covar[x][y]; + + VectorType axis; + for (uint i = 0; i < N; i++) + axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1))); + + VectorType prev_axis(axis); + + const uint cMaxIterations = 10; + for (uint iter = 0; iter < cMaxIterations; iter++) + { + VectorType x; + + double max_sum = 0; + + for (uint i = 0; i < N; i++) + { + double sum = 0; + + for (uint j = 0; j < N; j++) + sum += axis[j] * covar[i][j]; + + x[i] = static_cast(sum); + + max_sum = math::maximum(max_sum, fabs(sum)); + } + + if (max_sum != 0.0f) + x *= static_cast(1.0f / max_sum); + + VectorType delta_axis(prev_axis - x); + + prev_axis = axis; + axis = x; + + if (delta_axis.norm() < .0025f) + break; + } + + axis.normalize(); + + axis_res = axis; + centroid_res = centroid; + } + + void compute_division( + const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector& indices, + vector& left_indices, + vector& right_indices) + { + left_indices.resize(0); + right_indices.resize(0); + + for (uint i = 0; i < indices.size(); i++) + { + const uint vec_index = indices[i]; + const VectorType v(vecs[vec_index].m_vec - centroid); + + float t = v * axis; + if (t < 0.0f) + left_indices.push_back(vec_index); + else + right_indices.push_back(vec_index); + } + } + + void compute_split( + const weighted_vec_array& vecs, const vector& indices, + vector& left_indices, + vector& right_indices) + { + VectorType axis, centroid; + compute_pca(axis, centroid, vecs, indices); + + compute_division(axis, centroid, vecs, indices, left_indices, right_indices); + } + + static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData) + { + percentage_completed; + + if (static_cast(pData)->m_canceled) + return false; + + return true; + } + + void create_clusters_task(uint64 data, void* pData_ptr) + { + if (m_canceled) + return; + + const uint partition_index = static_cast(data); + create_clusters_task_state& state = *static_cast(pData_ptr); + + m_clusterizers[partition_index].clear(); + + for (uint i = 0; i < state.m_pIndices->size(); i++) + { + const uint index = (*state.m_pIndices)[i]; + const weighted_vec& v = (*state.m_pWeighted_vecs)[index]; + + m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight); + } + + if (m_canceled) + return; + + const bool is_main_thread = (get_current_thread_id() == m_main_thread_id); + + const bool quick = false; + m_clusterizers[partition_index].generate_codebook( + state.m_max_clusters, + (is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback, + this, + quick); + + if (is_main_thread) + m_cluster_task_displayed_progress = true; + + if (m_canceled) + return; + + const uint num_clusters = m_clusterizers[partition_index].get_codebook_size(); + + m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices); + + for (uint i = 0; i < state.m_cluster_indices.size(); i++) + { + crnlib::vector& indices = state.m_cluster_indices[i]; + + for (uint j = 0; j < indices.size(); j++) + indices[j] = (*state.m_pIndices)[indices[j]]; + } + } + + }; + +} // namespace crnlib diff --git a/crnlib/crn_threaded_resampler.cpp b/crnlib/crn_threaded_resampler.cpp new file mode 100644 index 00000000..4d2382ed --- /dev/null +++ b/crnlib/crn_threaded_resampler.cpp @@ -0,0 +1,321 @@ +// File: crn_threaded_resampler.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_threaded_resampler.h" +#include "crn_resample_filters.h" + +namespace crnlib +{ + threaded_resampler::threaded_resampler(task_pool& tp) : + m_pTask_pool(&tp), + m_pParams(NULL), + m_pX_contribs(NULL), + m_pY_contribs(NULL), + m_bytes_per_pixel(0) + { + } + + threaded_resampler::~threaded_resampler() + { + free_contrib_lists(); + } + + void threaded_resampler::free_contrib_lists() + { + if (m_pX_contribs) + { + crnlib_free(m_pX_contribs->p); + m_pX_contribs->p = NULL; + + crnlib_free(m_pX_contribs); + m_pX_contribs = NULL; + } + + if (m_pY_contribs) + { + crnlib_free(m_pY_contribs->p); + m_pY_contribs->p = NULL; + + crnlib_free(m_pY_contribs); + m_pY_contribs = NULL; + } + } + + void threaded_resampler::resample_x_task(uint64 data, void* pData_ptr) + { + pData_ptr; + const uint thread_index = (uint)data; + + for (uint src_y = 0; src_y < m_pParams->m_src_height; src_y++) + { + if (m_pTask_pool->get_num_threads()) + { + if ((src_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + const Resampler::Contrib_List* pContribs = m_pX_contribs; + const Resampler::Contrib_List* pContribs_end = m_pX_contribs + m_pParams->m_dst_width; + + switch (m_pParams->m_fmt) + { + case cPF_Y_F32: + { + const float* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); + vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; + + do + { + const Resampler::Contrib* p = pContribs->p; + const Resampler::Contrib* p_end = pContribs->p + pContribs->n; + + vec4F s(0.0f); + + while (p != p_end) + { + const uint src_pixel = p->pixel; + const float src_weight = p->weight; + + s[0] += pSrc[src_pixel] * src_weight; + + p++; + } + + *pDst++ = s; + pContribs++; + } while (pContribs != pContribs_end); + + break; + } + case cPF_RGBX_F32: + { + const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); + vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; + + do + { + const Resampler::Contrib* p = pContribs->p; + const Resampler::Contrib* p_end = pContribs->p + pContribs->n; + + vec4F s(0.0f); + + while (p != p_end) + { + const float src_weight = p->weight; + + const vec4F& src_pixel = pSrc[p->pixel]; + + s[0] += src_pixel[0] * src_weight; + s[1] += src_pixel[1] * src_weight; + s[2] += src_pixel[2] * src_weight; + + p++; + } + + *pDst++ = s; + pContribs++; + } while (pContribs != pContribs_end); + + break; + } + case cPF_RGBA_F32: + { + const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); + vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; + + do + { + Resampler::Contrib* p = pContribs->p; + Resampler::Contrib* p_end = pContribs->p + pContribs->n; + + vec4F s(0.0f); + + while (p != p_end) + { + const float src_weight = p->weight; + + const vec4F& src_pixel = pSrc[p->pixel]; + + s[0] += src_pixel[0] * src_weight; + s[1] += src_pixel[1] * src_weight; + s[2] += src_pixel[2] * src_weight; + s[3] += src_pixel[3] * src_weight; + + p++; + } + + *pDst++ = s; + pContribs++; + } while (pContribs != pContribs_end); + + break; + } + default: break; + } + } + } + + void threaded_resampler::resample_y_task(uint64 data, void* pData_ptr) + { + pData_ptr; + + const uint thread_index = (uint)data; + + crnlib::vector tmp(m_pParams->m_dst_width); + + for (uint dst_y = 0; dst_y < m_pParams->m_dst_height; dst_y++) + { + if (m_pTask_pool->get_num_threads()) + { + if ((dst_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) + continue; + } + + const Resampler::Contrib_List& contribs = m_pY_contribs[dst_y]; + + const vec4F* pSrc; + + if (contribs.n == 1) + { + pSrc = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[0].pixel; + } + else + { + for (uint src_y_iter = 0; src_y_iter < contribs.n; src_y_iter++) + { + const vec4F* p = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[src_y_iter].pixel; + const float weight = contribs.p[src_y_iter].weight; + + if (!src_y_iter) + { + for (uint i = 0; i < m_pParams->m_dst_width; i++) + tmp[i] = p[i] * weight; + } + else + { + for (uint i = 0; i < m_pParams->m_dst_width; i++) + tmp[i] += p[i] * weight; + } + } + + pSrc = tmp.get_ptr(); + } + + const vec4F* pSrc_end = pSrc + m_pParams->m_dst_width; + + const float l = m_pParams->m_sample_low; + const float h = m_pParams->m_sample_high; + + switch (m_pParams->m_fmt) + { + case cPF_Y_F32: + { + float* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + + do + { + *pDst++ = math::clamp((*pSrc)[0], l, h); + + pSrc++; + + } while (pSrc != pSrc_end); + + break; + } + case cPF_RGBX_F32: + { + vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + + do + { + (*pDst)[0] = math::clamp((*pSrc)[0], l, h); + (*pDst)[1] = math::clamp((*pSrc)[1], l, h); + (*pDst)[2] = math::clamp((*pSrc)[2], l, h); + (*pDst)[3] = h; + + pSrc++; + pDst++; + + } while (pSrc != pSrc_end); + + break; + } + case cPF_RGBA_F32: + { + vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); + + do + { + (*pDst)[0] = math::clamp((*pSrc)[0], l, h); + (*pDst)[1] = math::clamp((*pSrc)[1], l, h); + (*pDst)[2] = math::clamp((*pSrc)[2], l, h); + (*pDst)[3] = math::clamp((*pSrc)[3], l, h); + + pSrc++; + pDst++; + + } while (pSrc != pSrc_end); + + break; + } + default: break; + } + } + } + + bool threaded_resampler::resample(const params& p) + { + free_contrib_lists(); + + m_pParams = &p; + + CRNLIB_ASSERT(m_pParams->m_src_width && m_pParams->m_src_height); + CRNLIB_ASSERT(m_pParams->m_dst_width && m_pParams->m_dst_height); + + switch (p.m_fmt) + { + case cPF_Y_F32: + m_bytes_per_pixel = 4; + break; + case cPF_RGBX_F32: + case cPF_RGBA_F32: + m_bytes_per_pixel = 16; + break; + default: + CRNLIB_ASSERT(false); + return false; + } + + int filter_index = find_resample_filter(p.m_Pfilter_name); + if (filter_index < 0) + return false; + + const resample_filter& filter = g_resample_filters[filter_index]; + + m_pX_contribs = Resampler::make_clist(m_pParams->m_src_width, m_pParams->m_dst_width, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_x_scale, 0.0f); + if (!m_pX_contribs) + return false; + + m_pY_contribs = Resampler::make_clist(m_pParams->m_src_height, m_pParams->m_dst_height, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_y_scale, 0.0f); + if (!m_pY_contribs) + return false; + + if (!m_tmp_img.try_resize(m_pParams->m_dst_width * m_pParams->m_src_height)) + return false; + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, NULL); + m_pTask_pool->join(); + + for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) + m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, NULL); + m_pTask_pool->join(); + + m_tmp_img.clear(); + free_contrib_lists(); + + return true; + } + +} // namespace crnlib + diff --git a/crnlib/crn_threaded_resampler.h b/crnlib/crn_threaded_resampler.h new file mode 100644 index 00000000..70ad8880 --- /dev/null +++ b/crnlib/crn_threaded_resampler.h @@ -0,0 +1,87 @@ +// File: crn_threaded_resampler.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_resampler.h" +#include "crn_vec.h" +#include "crn_task_pool.h" + +namespace crnlib +{ + class threaded_resampler + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_resampler); + + public: + threaded_resampler(task_pool& tp); + ~threaded_resampler(); + + enum pixel_format + { + cPF_Y_F32, + cPF_RGBX_F32, + cPF_RGBA_F32, + + cPF_Total + }; + + struct params + { + params() + { + clear(); + } + + void clear() + { + utils::zero_object(*this); + + m_boundary_op = Resampler::BOUNDARY_CLAMP; + m_sample_low = 0.0f; + m_sample_high = 255.0f; + m_Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; + m_filter_x_scale = 1.0f; + m_filter_y_scale = 1.0f; + } + + pixel_format m_fmt; + + const void* m_pSrc_pixels; + uint m_src_width; + uint m_src_height; + uint m_src_pitch; + + void* m_pDst_pixels; + uint m_dst_width; + uint m_dst_height; + uint m_dst_pitch; + + Resampler::Boundary_Op m_boundary_op; + + float m_sample_low; + float m_sample_high; + + const char* m_Pfilter_name; + float m_filter_x_scale; + float m_filter_y_scale; + }; + + bool resample(const params& p); + + private: + task_pool* m_pTask_pool; + + const params* m_pParams; + + Resampler::Contrib_List* m_pX_contribs; + Resampler::Contrib_List* m_pY_contribs; + uint m_bytes_per_pixel; + + crnlib::vector m_tmp_img; + + void free_contrib_lists(); + + void resample_x_task(uint64 data, void* pData_ptr); + void resample_y_task(uint64 data, void* pData_ptr); + }; + +} // namespace crnlib diff --git a/crnlib/crn_traits.h b/crnlib/crn_traits.h new file mode 100644 index 00000000..81e59de0 --- /dev/null +++ b/crnlib/crn_traits.h @@ -0,0 +1,106 @@ +// File: crn_traits.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { helpers::construct(p); } + static inline void construct(T* p, const T& init) { helpers::construct(p, init); } + static inline void construct_array(T* p, uint n) { helpers::construct_array(p, n); } + static inline void destruct(T* p) { helpers::destruct(p); } + static inline void destruct_array(T* p, uint n) { helpers::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, uint n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { p; } + static inline void destruct_array(T** p, uint n) { p, n; } + }; + +#define CRNLIB_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, uint n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { p; } \ + static inline void destruct_array(X* p, uint n) { p, n; } }; + + CRNLIB_DEFINE_BUILT_IN_TYPE(bool) + CRNLIB_DEFINE_BUILT_IN_TYPE(char) + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned char) + CRNLIB_DEFINE_BUILT_IN_TYPE(short) + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned short) + CRNLIB_DEFINE_BUILT_IN_TYPE(int) + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned int) + CRNLIB_DEFINE_BUILT_IN_TYPE(long) + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long) + CRNLIB_DEFINE_BUILT_IN_TYPE(__int64) + CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned __int64) + CRNLIB_DEFINE_BUILT_IN_TYPE(float) + CRNLIB_DEFINE_BUILT_IN_TYPE(double) + CRNLIB_DEFINE_BUILT_IN_TYPE(long double) + +#undef CRNLIB_DEFINE_BUILT_IN_TYPE + +// See: http://erdani.org/publications/cuj-2004-06.pdf + + template + struct bitwise_movable { enum { cFlag = false }; }; + +// Defines type Q as bitwise movable. +#define CRNLIB_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + template + struct bitwise_copyable { enum { cFlag = false }; }; + + // Defines type Q as bitwise copyable. +#define CRNLIB_DEFINE_BITWISE_COPYABLE(Q) template<> struct bitwise_copyable { enum { cFlag = true }; }; + +#define CRNLIB_IS_POD(T) __is_pod(T) + +#define CRNLIB_IS_SCALAR_TYPE(T) (scalar_type::cFlag) + +#define CRNLIB_IS_BITWISE_COPYABLE(T) ((scalar_type::cFlag) || (bitwise_copyable::cFlag) || CRNLIB_IS_POD(T)) + +#define CRNLIB_IS_BITWISE_MOVABLE(T) (CRNLIB_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) + +#define CRNLIB_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T))) + + // From yasli_traits.h: + // Credit goes to Boost; + // also found in the C++ Templates book by Vandevoorde and Josuttis + + typedef char (&yes_t)[1]; + typedef char (&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + + CRNLIB_DEFINE_BITWISE_COPYABLE(empty_type); + CRNLIB_DEFINE_BITWISE_MOVABLE(empty_type); + +} // namespace crnlib diff --git a/crnlib/crn_tree_clusterizer.h b/crnlib/crn_tree_clusterizer.h new file mode 100644 index 00000000..13119a5e --- /dev/null +++ b/crnlib/crn_tree_clusterizer.h @@ -0,0 +1,457 @@ +// File: crn_tree_clusterizer.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_matrix.h" + +namespace crnlib +{ + template + class tree_clusterizer + { + public: + tree_clusterizer() : + m_overall_variance(0.0f) + { + } + + void clear() + { + m_hist.clear(); + m_codebook.clear(); + m_nodes.clear(); + m_overall_variance = 0.0f; + } + + void add_training_vec(const VectorType& v, uint weight) + { + const std::pair insert_result( m_hist.insert( std::make_pair(v, 0U) ) ); + + typename vector_map_type::iterator it(insert_result.first); + + uint max_weight = UINT_MAX - weight; + if (weight > max_weight) + it->second = UINT_MAX; + else + it->second = it->second + weight; + } + + bool generate_codebook(uint max_size) + { + if (m_hist.empty()) + return false; + + double ttsum = 0.0f; + + vq_node root; + root.m_vectors.reserve(static_cast(m_hist.size())); + + for (typename vector_map_type::const_iterator it = m_hist.begin(); it != m_hist.end(); ++it) + { + const VectorType& v = it->first; + const uint weight = it->second; + + root.m_centroid += (v * (float)weight); + root.m_total_weight += weight; + root.m_vectors.push_back( std::make_pair(v, weight) ); + + ttsum += v.dot(v) * weight; + } + + root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); + + root.m_centroid *= (1.0f / root.m_total_weight); + + m_nodes.clear(); + m_nodes.reserve(max_size * 2 + 1); + + m_nodes.push_back(root); + + uint total_leaves = 1; + + while (total_leaves < max_size) + { + int worst_node_index = -1; + float worst_variance = -1.0f; + + for (uint i = 0; i < m_nodes.size(); i++) + { + vq_node& node = m_nodes[i]; + + // Skip internal and unsplittable nodes. + if ((node.m_left != -1) || (node.m_unsplittable)) + continue; + + if (node.m_variance > worst_variance) + { + worst_variance = node.m_variance; + worst_node_index = i; + } + } + + if (worst_variance <= 0.0f) + break; + + split_node(worst_node_index); + total_leaves++; + } + + m_codebook.clear(); + + m_overall_variance = 0.0f; + + for (uint i = 0; i < m_nodes.size(); i++) + { + vq_node& node = m_nodes[i]; + if (node.m_left != -1) + { + CRNLIB_ASSERT(node.m_right != -1); + continue; + } + + CRNLIB_ASSERT((node.m_left == -1) && (node.m_right == -1)); + + node.m_codebook_index = m_codebook.size(); + m_codebook.push_back(node.m_centroid); + + m_overall_variance += node.m_variance; + } + + return true; + } + + inline float get_overall_variance() const { return m_overall_variance; } + + inline uint get_codebook_size() const + { + return m_codebook.size(); + } + + inline const VectorType& get_codebook_entry(uint index) const + { + return m_codebook[index]; + } + + typedef crnlib::vector vector_vec_type; + inline const vector_vec_type& get_codebook() const + { + return m_codebook; + } + + const uint find_best_codebook_entry(const VectorType& v) const + { + uint cur_node_index = 0; + + for ( ; ; ) + { + const vq_node& cur_node = m_nodes[cur_node_index]; + + if (cur_node.m_left == -1) + return cur_node.m_codebook_index; + + const vq_node& left_node = m_nodes[cur_node.m_left]; + const vq_node& right_node = m_nodes[cur_node.m_right]; + + float left_dist = left_node.m_centroid.squared_distance(v); + float right_dist = right_node.m_centroid.squared_distance(v); + + if (left_dist < right_dist) + cur_node_index = cur_node.m_left; + else + cur_node_index = cur_node.m_right; + } + } + + const uint find_best_codebook_entry_fs(const VectorType& v) const + { + float best_dist = math::cNearlyInfinite; + uint best_index = 0; + + for (uint i = 0; i < m_codebook.size(); i++) + { + float dist = m_codebook[i].squared_distance(v); + if (dist < best_dist) + { + best_dist = dist; + best_index = i; + if (best_dist == 0.0f) + break; + } + } + + return best_index; + } + + private: + typedef std::map vector_map_type; + + vector_map_type m_hist; + + struct vq_node + { + vq_node() : m_centroid(cClear), m_total_weight(0), m_left(-1), m_right(-1), m_codebook_index(-1), m_unsplittable(false) { } + + VectorType m_centroid; + uint64 m_total_weight; + + float m_variance; + + crnlib::vector< std::pair > m_vectors; + + int m_left; + int m_right; + + int m_codebook_index; + + bool m_unsplittable; + }; + + typedef crnlib::vector node_vec_type; + + node_vec_type m_nodes; + + vector_vec_type m_codebook; + + float m_overall_variance; + + random m_rand; + + void split_node(uint index) + { + vq_node& parent_node = m_nodes[index]; + + if (parent_node.m_vectors.size() == 1) + return; + + VectorType furthest; + double furthest_dist = -1.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = parent_node.m_vectors[i].first; + + double dist = v.squared_distance(parent_node.m_centroid); + if (dist > furthest_dist) + { + furthest_dist = dist; + furthest = v; + } + } + + VectorType opposite; + double opposite_dist = -1.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = parent_node.m_vectors[i].first; + + double dist = v.squared_distance(furthest); + if (dist > opposite_dist) + { + opposite_dist = dist; + opposite = v; + } + } + + VectorType left_child((furthest + parent_node.m_centroid) * .5f); + VectorType right_child((opposite + parent_node.m_centroid) * .5f); + + if (parent_node.m_vectors.size() > 2) + { + const uint N = VectorType::num_elements; + + matrix covar; + covar.clear(); + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType v(parent_node.m_vectors[i].first - parent_node.m_centroid); + const VectorType w(v * (float)parent_node.m_vectors[i].second); + + for (uint x = 0; x < N; x++) + for (uint y = x; y < N; y++) + covar[x][y] = covar[x][y] + v[x] * w[y]; + } + + for (uint x = 0; x < N - 1; x++) + for (uint y = x + 1; y < N; y++) + covar[y][x] = covar[x][y]; + + covar /= float(parent_node.m_total_weight); + + VectorType axis(1.0f); + // Starting with an estimate of the principle axis should work better, but doesn't in practice? + //left_child - right_child); + //axis.normalize(); + + for (uint iter = 0; iter < 10; iter++) + { + VectorType x; + + double max_sum = 0; + + for (uint i = 0; i < N; i++) + { + double sum = 0; + + for (uint j = 0; j < N; j++) + sum += axis[j] * covar[i][j]; + + x[i] = (float)sum; + + max_sum = i ? math::maximum(max_sum, sum) : sum; + } + + if (max_sum != 0.0f) + x *= (float)(1.0f / max_sum); + + axis = x; + } + + axis.normalize(); + + VectorType new_left_child(0.0f); + VectorType new_right_child(0.0f); + + double left_weight = 0.0f; + double right_weight = 0.0f; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const float weight = (float)parent_node.m_vectors[i].second; + + const VectorType& v = parent_node.m_vectors[i].first; + + double t = (v - parent_node.m_centroid) * axis; + if (t < 0.0f) + { + new_left_child += v * weight; + left_weight += weight; + } + else + { + new_right_child += v * weight; + right_weight += weight; + } + } + + if ((left_weight > 0.0f) && (right_weight > 0.0f)) + { + left_child = new_left_child * (float)(1.0f/left_weight); + right_child = new_right_child * (float)(1.0f/right_weight); + } + } + + uint64 left_weight = 0; + uint64 right_weight = 0; + + crnlib::vector< std::pair > left_children; + crnlib::vector< std::pair > right_children; + + left_children.reserve(parent_node.m_vectors.size() / 2); + right_children.reserve(parent_node.m_vectors.size() / 2); + + float prev_total_variance = 1e+10f; + + float left_variance = 0.0f; + float right_variance = 0.0f; + + // FIXME: Excessive upper limit + const uint cMaxLoops = 1024; + for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) + { + left_children.resize(0); + right_children.resize(0); + + VectorType new_left_child(cClear); + VectorType new_right_child(cClear); + + double left_ttsum = 0.0f; + double right_ttsum = 0.0f; + + left_weight = 0; + right_weight = 0; + + for (uint i = 0; i < parent_node.m_vectors.size(); i++) + { + const VectorType& v = parent_node.m_vectors[i].first; + const uint weight = parent_node.m_vectors[i].second; + + double left_dist2 = left_child.squared_distance(v); + double right_dist2 = right_child.squared_distance(v); + + if (left_dist2 < right_dist2) + { + left_children.push_back(parent_node.m_vectors[i]); + + new_left_child += (v * (float)weight); + left_weight += weight; + + left_ttsum += v.dot(v) * weight; + } + else + { + right_children.push_back(parent_node.m_vectors[i]); + + new_right_child += (v * (float)weight); + right_weight += weight; + + right_ttsum += v.dot(v) * weight; + } + } + + if ((!left_weight) || (!right_weight)) + { + parent_node.m_unsplittable = true; + return; + } + + left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); + right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); + + new_left_child *= (1.0f / left_weight); + new_right_child *= (1.0f / right_weight); + + left_child = new_left_child; + left_weight = left_weight; + + right_child = new_right_child; + right_weight = right_weight; + + float total_variance = left_variance + right_variance; + if (total_variance < .00001f) + break; + + if (((prev_total_variance - total_variance) / total_variance) < .00001f) + break; + + prev_total_variance = total_variance; + } + + const uint left_child_index = m_nodes.size(); + const uint right_child_index = m_nodes.size() + 1; + + parent_node.m_left = m_nodes.size(); + parent_node.m_right = m_nodes.size() + 1; + + m_nodes.resize(m_nodes.size() + 2); + + // parent_node is invalid now, because m_nodes has been changed + + vq_node& left_child_node = m_nodes[left_child_index]; + vq_node& right_child_node = m_nodes[right_child_index]; + + left_child_node.m_centroid = left_child; + left_child_node.m_total_weight = left_weight; + left_child_node.m_vectors.swap(left_children); + left_child_node.m_variance = left_variance; + + right_child_node.m_centroid = right_child; + right_child_node.m_total_weight = right_weight; + right_child_node.m_vectors.swap(right_children); + right_child_node.m_variance = right_variance; + } + + }; + +} // namespace crnlib + diff --git a/crnlib/crn_types.h b/crnlib/crn_types.h new file mode 100644 index 00000000..93cc6343 --- /dev/null +++ b/crnlib/crn_types.h @@ -0,0 +1,57 @@ +// File: crn_types.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + typedef unsigned char uint8; + typedef signed char int8; + typedef unsigned short uint16; + typedef signed short int16; + typedef unsigned int uint32; + typedef uint32 uint; + typedef signed int int32; + + typedef unsigned __int64 uint64; + typedef signed __int64 int64; + + const uint8 UINT8_MIN = 0; + const uint8 UINT8_MAX = 0xFFU; + const uint16 UINT16_MIN = 0; + const uint16 UINT16_MAX = 0xFFFFU; + const uint32 UINT32_MIN = 0; + const uint32 UINT32_MAX = 0xFFFFFFFFU; + const uint64 UINT64_MIN = 0; + const uint64 UINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; + + const int8 INT8_MIN = -128; + const int8 INT8_MAX = 127; + const int16 INT16_MIN = -32768; + const int16 INT16_MAX = 32767; + const int32 INT32_MIN = (-2147483647 - 1); + const int32 INT32_MAX = 2147483647; + const int64 INT64_MIN = (int64)0x8000000000000000ULL; //(-9223372036854775807i64 - 1); + const int64 INT64_MAX = (int64)0x7FFFFFFFFFFFFFFFULL; // 9223372036854775807i64; + +#ifdef CRNLIB_PLATFORM_PC_X64 + typedef unsigned __int64 uint_ptr; + typedef unsigned __int64 uint32_ptr; + typedef signed __int64 signed_size_t; + typedef uint64 ptr_bits_t; +#else + typedef unsigned int uint_ptr; + typedef unsigned int uint32_ptr; + typedef signed int signed_size_t; + typedef uint32 ptr_bits_t; +#endif + + enum eVarArg { cVarArg }; + enum eClear { cClear }; + enum eNoClamp { cNoClamp }; + enum { cInvalidIndex = -1 }; + + const uint cIntBits = 32; + + struct empty_type { }; + +} // namespace crnlib diff --git a/crnlib/crn_utils.cpp b/crnlib/crn_utils.cpp new file mode 100644 index 00000000..4d772705 --- /dev/null +++ b/crnlib/crn_utils.cpp @@ -0,0 +1,60 @@ +// File: crn_utils.cpp +#include "crn_core.h" +#include "crn_utils.h" + +namespace crnlib +{ + namespace utils + { + void endian_switch_words(uint16* p, uint num) + { + uint16* p_end = p + num; + while (p != p_end) + { + uint16 k = *p; + *p++ = swap16(k); + } + } + + void endian_switch_dwords(uint32* p, uint num) + { + uint32* p_end = p + num; + while (p != p_end) + { + uint32 k = *p; + *p++ = swap32(k); + } + } + + void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch) + { + if (!endian_switch) + memcpy(pDst, pSrc, num << 1U); + else + { + uint16* pDst_end = pDst + num; + while (pDst != pDst_end) + *pDst++ = swap16(*pSrc++); + } + } + + uint compute_max_mips(uint width, uint height) + { + if ((width | height) == 0) + return 0; + + uint num_mips = 1; + + while ((width > 1U) || (height > 1U)) + { + width >>= 1U; + height >>= 1U; + num_mips++; + } + + return num_mips; + } + + } // namespace utils + +} // namespace crnlib diff --git a/crnlib/crn_utils.h b/crnlib/crn_utils.h new file mode 100644 index 00000000..d46d448b --- /dev/null +++ b/crnlib/crn_utils.h @@ -0,0 +1,234 @@ +// File: crn_utils.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#define CRNLIB_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define CRNLIB_MAX(a, b) (((a) < (b)) ? (b) : (a)) + +#define CRNLIB_ARRAYSIZE(x) (sizeof(x)/sizeof(x[0])) + +#ifdef _MSC_VER +extern "C" unsigned long __cdecl _lrotl(unsigned long, int); +#pragma intrinsic(_lrotl) + +extern "C" unsigned long __cdecl _lrotr(unsigned long, int); +#pragma intrinsic(_lrotr) +#endif + +//#define CRNLIB_ROTATE_LEFT(x, k) (((x) << (k)) | ((x) >> (32-(k)))) +#define CRNLIB_ROTATE_LEFT(x, k) _lrotl(x, k) + +//#define CRNLIB_ROTATE_RIGHT(x, k) (((x) >> (k)) | ((x) << (32-(k)))) +#define CRNLIB_ROTATE_RIGHT(x, k) _lrotr(x, k) + +template T decay_array_to_subtype(T (&a)[N]); +#define CRNLIB_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) + +#define CRNLIB_SIZEOF_U32(x) static_cast(sizeof(x)) + +namespace crnlib +{ + namespace utils + { + template inline void swap(T& l, T& r) + { + T temp(l); + l = r; + r = temp; + } + + template inline void zero_object(T& obj) + { + memset(&obj, 0, sizeof(obj)); + } + + template inline void zero_this(T* pObj) + { + memset(pObj, 0, sizeof(*pObj)); + } + + inline bool is_bit_set(uint bits, uint mask) + { + return (bits & mask) != 0; + } + + inline void set_bit(uint& bits, uint mask, bool state) + { + if (state) + bits |= mask; + else + bits &= ~mask; + } + + inline bool is_flag_set(uint bits, uint flag) + { + CRNLIB_ASSERT(flag < 32U); + return is_bit_set(bits, 1U << flag); + } + + inline void set_flag(uint& bits, uint flag, bool state) + { + CRNLIB_ASSERT(flag < 32U); + set_bit(bits, 1U << flag, state); + } + + inline void invert_buf(void* pBuf, uint size) + { + uint8* p = static_cast(pBuf); + + const uint half_size = size >> 1; + for (uint i = 0; i < half_size; i++) + utils::swap(p[i], p[size - 1U - i]); + } + + // buffer_is_little_endian is the endianness of the buffer's data + template + inline void write_obj(const T& obj, void* pBuf, bool buffer_is_little_endian) + { + const uint8* pSrc = reinterpret_cast(&obj); + uint8* pDst = static_cast(pBuf); + + if (c_crnlib_little_endian_platform == buffer_is_little_endian) + memcpy(pDst, pSrc, sizeof(T)); + else + { + for (uint i = 0; i < sizeof(T); i++) + pDst[i] = pSrc[sizeof(T) - 1 - i]; + } + } + + // buffer_is_little_endian is the endianness of the buffer's data + template + inline void read_obj(T& obj, const void* pBuf, bool buffer_is_little_endian) + { + const uint8* pSrc = reinterpret_cast(pBuf); + uint8* pDst = reinterpret_cast(&obj); + + if (c_crnlib_little_endian_platform == buffer_is_little_endian) + memcpy(pDst, pSrc, sizeof(T)); + else + { + for (uint i = 0; i < sizeof(T); i++) + pDst[i] = pSrc[sizeof(T) - 1 - i]; + } + } + + template + inline bool write_obj(const T& obj, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + if (buf_size < sizeof(T)) + return false; + + utils::write_obj(obj, pBuf, buffer_is_little_endian); + + pBuf = static_cast(pBuf) + sizeof(T); + buf_size -= sizeof(T); + + return true; + } + + inline bool write_val(uint8 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + inline bool write_val(uint16 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + inline bool write_val(uint val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + inline bool write_val(int val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + inline bool write_val(uint64 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + inline bool write_val(float val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + inline bool write_val(double val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } + + template + inline bool read_obj(T& obj, const void*& pBuf, uint& buf_size, bool buffer_is_little_endian) + { + if (buf_size < sizeof(T)) + { + zero_object(obj); + return false; + } + + utils::read_obj(obj, pBuf, buffer_is_little_endian); + + pBuf = static_cast(pBuf) + sizeof(T); + buf_size -= sizeof(T); + + return true; + } + + static inline uint16 swap16(uint16 x) { return static_cast((x << 8U) | (x >> 8U)); } + static inline uint32 swap32(uint32 x) { return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); } + + inline uint16 swap_le16_to_native(uint16 x) { return c_crnlib_little_endian_platform ? x : swap16(x); } + inline uint32 swap_le32_to_native(uint32 x) { return c_crnlib_little_endian_platform ? x : swap32(x); } + inline uint16 swap_be16_to_native(uint16 x) { return c_crnlib_big_endian_platform ? x : swap16(x); } + inline uint32 swap_be32_to_native(uint32 x) { return c_crnlib_big_endian_platform ? x : swap32(x); } + + inline uint32 read_le32(const void* p) { return swap_le32_to_native(*static_cast(p)); } + inline void write_le32(void* p, uint32 x) { *static_cast(p) = swap_le32_to_native(x); } + + inline void fast_memset(void* pDst, int val, size_t size) + { + memset(pDst, val, size); + } + + inline void fast_memcpy(void* pDst, const void* pSrc, size_t size) + { + memcpy(pDst, pSrc, size); + } + + inline uint count_leading_zeros(uint v) + { + uint temp; + uint n = 32; + + temp = v >> 16; + if (temp) { n -= 16; v = temp; } + + temp = v >> 8; + if (temp) { n -= 8; v = temp; } + + temp = v >> 4; + if (temp) { n -= 4; v = temp; } + + temp = v >> 2; + if (temp) { n -= 2; v = temp; } + + temp = v >> 1; + if (temp) { n -= 1; v = temp; } + + if (v & 1) n--; + + return n; + } + + inline uint count_leading_zeros16(uint v) + { + CRNLIB_ASSERT(v < 0x10000); + + uint temp; + uint n = 16; + + temp = v >> 8; + if (temp) { n -= 8; v = temp; } + + temp = v >> 4; + if (temp) { n -= 4; v = temp; } + + temp = v >> 2; + if (temp) { n -= 2; v = temp; } + + temp = v >> 1; + if (temp) { n -= 1; v = temp; } + + if (v & 1) n--; + + return n; + } + + void endian_switch_words(uint16* p, uint num); + void endian_switch_dwords(uint32* p, uint num); + void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch); + + uint compute_max_mips(uint width, uint height); + + } // namespace utils + +} // namespace crnlib + diff --git a/crnlib/crn_value.cpp b/crnlib/crn_value.cpp new file mode 100644 index 00000000..bc074771 --- /dev/null +++ b/crnlib/crn_value.cpp @@ -0,0 +1,22 @@ +// File: crn_value.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_value.h" + +namespace crnlib +{ + const wchar_t* gValueDataTypeStrings[cDTTotal + 1] = + { + L"invalid", + L"string", + L"bool", + L"int", + L"uint", + L"float", + L"vec3f", + L"vec3i", + + NULL, + }; + +} // namespace crnlib diff --git a/crnlib/crn_value.h b/crnlib/crn_value.h new file mode 100644 index 00000000..b1a9679e --- /dev/null +++ b/crnlib/crn_value.h @@ -0,0 +1,1025 @@ +// File: crn_value.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_strutils.h" +#include "crn_dynamic_wstring.h" +#include "crn_vec.h" + +namespace crnlib +{ + enum value_data_type + { + cDTInvalid, + cDTString, + cDTBool, + cDTInt, + cDTUInt, + cDTFloat, + cDTVec3F, + cDTVec3I, + + cDTTotal + }; + + extern const wchar_t* gValueDataTypeStrings[cDTTotal + 1]; + + class value + { + public: + value() : + m_type(cDTInvalid) + { + } + + value(const wchar_t* pStr) : + m_pStr(crnlib_new(pStr)), m_type(cDTString) + { + } + + value(const dynamic_wstring& str) : + m_pStr(crnlib_new(str)), m_type(cDTString) + { + } + + explicit value(bool v) : + m_bool(v), m_type(cDTBool) + { + } + + value(int v) : + m_int(v), m_type(cDTInt) + { + } + + value(uint v) : + m_uint(v), m_type(cDTUInt) + { + } + + value(float v) : + m_float(v), m_type(cDTFloat) + { + } + + value(const vec3F& v) : + m_pVec3F(crnlib_new(v)), m_type(cDTVec3F) + { + } + + value(const vec3I& v) : + m_pVec3I(crnlib_new(v)), m_type(cDTVec3I) + { + } + + ~value() + { + switch (m_type) + { + case cDTString: + crnlib_delete(m_pStr); + break; + case cDTVec3F: + crnlib_delete(m_pVec3F); + break; + case cDTVec3I: + crnlib_delete(m_pVec3I); + break; + default: + break; + } + } + + value(const value& other) : + m_type(cDTInvalid) + { + *this = other; + } + + value& operator= (const value& other) + { + if (this == &other) + return *this; + + change_type(other.m_type); + + switch (other.m_type) + { + case cDTString: m_pStr->set(*other.m_pStr); break; + case cDTBool: m_bool = other.m_bool; break; + case cDTInt: m_int = other.m_int; break; + case cDTUInt: m_uint = other.m_uint; break; + case cDTFloat: m_float = other.m_float; break; + case cDTVec3F: m_pVec3F->set(*other.m_pVec3F); break; + case cDTVec3I: m_pVec3I->set(*other.m_pVec3I); break; + default: break; + } + return *this; + } + + inline value_data_type get_data_type() const { return m_type; } + + void clear() + { + clear_dynamic(); + + m_type = cDTInvalid; + } + + void set_string(const wchar_t* pStr) + { + set_str(pStr); + } + + void set_int(int v) + { + clear_dynamic(); + m_type = cDTInt; + m_int = v; + } + + void set_uint(uint v) + { + clear_dynamic(); + m_type = cDTUInt; + m_uint = v; + } + + void set_bool(bool v) + { + clear_dynamic(); + m_type = cDTBool; + m_bool = v; + } + + void set_float(float v) + { + clear_dynamic(); + m_type = cDTFloat; + m_float = v; + } + + void set_vec(const vec3F& v) + { + change_type(cDTVec3F); + m_pVec3F->set(v); + } + + void set_vec(const vec3I& v) + { + change_type(cDTVec3I); + m_pVec3I->set(v); + } + + bool parse(const wchar_t* p) + { + if ((!p) || (!p[0])) + { + clear(); + return false; + } + + if (_wcsicmp(p, L"false") == 0) + { + set_bool(false); + return true; + } + else if (_wcsicmp(p, L"true") == 0) + { + set_bool(true); + return true; + } + + if (p[0] == '\"') + { + dynamic_wstring str; + str = p + 1; + if (!str.is_empty()) + { + if (str[str.get_len() - 1] == '\"') + { + str.left(str.get_len() - 1); + set_str(str); + + return true; + } + } + } + + if (wcschr(p, L',') != NULL) + { + float fx = 0, fy = 0, fz = 0; +#ifdef _MSC_VER + if (swscanf_s(p, L"%f,%f,%f", &fx, &fy, &fz) == 3) +#else + if (swscanf(p, L"%f,%f,%f", &fx, &fy, &fz) == 3) +#endif + { + bool as_float = true; + int ix = 0, iy = 0, iz = 0; +#ifdef _MSC_VER + if (swscanf_s(p, L"%i,%i,%i", &ix, &iy, &iz) == 3) +#else + if (swscanf(p, L"%i,%i,%i", &ix, &iy, &iz) == 3) +#endif + { + if ((ix == fx) && (iy == fy) && (iz == fz)) + as_float = false; + } + + if (as_float) + set_vec(vec3F(fx, fy, fz)); + else + set_vec(vec3I(ix, iy, iz)); + + return true; + } + } + + const wchar_t* q = p; + bool success = string_to_uint(q, m_uint); + if ((success) && (*q == 0)) + { + set_uint(m_uint); + return true; + } + + q = p; + success = string_to_int(q, m_int); + if ((success) && (*q == 0)) + { + set_int(m_int); + return true; + } + + q = p; + success = string_to_float(q, m_float); + if ((success) && (*q == 0)) + { + set_float(m_float); + return true; + } + + set_string(p); + + return true; + } + + dynamic_wstring& get_as_string(dynamic_wstring& dst) const + { + switch (m_type) + { + case cDTInvalid: dst.clear(); break; + case cDTString: dst = *m_pStr; break; + case cDTBool: dst = m_bool ? L"TRUE" : L"FALSE"; break; + case cDTInt: dst.format(L"%i", m_int); break; + case cDTUInt: dst.format(L"%u", m_uint); break; + case cDTFloat: dst.format(L"%f", m_float); break; + case cDTVec3F: dst.format(L"%f,%f,%f", (*m_pVec3F)[0], (*m_pVec3F)[1], (*m_pVec3F)[2]); break; + case cDTVec3I: dst.format(L"%i,%i,%i", (*m_pVec3I)[0], (*m_pVec3I)[1], (*m_pVec3I)[2]); break; + default: break; + } + + return dst; + } + + bool get_as_int(int& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = 0; + return false; + } + case cDTString: + { + const wchar_t* p = m_pStr->get_ptr(); + return string_to_int(p, val); + } + case cDTBool: val = m_bool; break; + case cDTInt: val = m_int; break; + case cDTUInt: + { + if (m_uint > INT_MAX) + { + val = 0; + return false; + } + val = m_uint; + break; + } + case cDTFloat: + { + if ((m_float < INT_MIN) || (m_float > INT_MAX)) + { + val = 0; + return false; + } + val = (int)m_float; + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = 0; + return false; + } + if (((*m_pVec3F)[component] < INT_MIN) || ((*m_pVec3F)[component] > INT_MAX)) + { + val = 0; + return false; + } + val = (int)(*m_pVec3F)[component]; + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = 0; + return false; + } + val = (int)(*m_pVec3I)[component]; + break; + } + default: break; + } + return true; + } + + bool get_as_uint(uint& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = 0; + return false; + } + case cDTString: + { + const wchar_t* p = m_pStr->get_ptr(); + return string_to_uint(p, val); + } + case cDTBool: + { + val = m_bool; + break; + } + case cDTInt: + { + if (m_int < 0) + { + val = 0; + return false; + } + val = (uint)m_int; + break; + } + case cDTUInt: + { + val = m_uint; + break; + } + case cDTFloat: + { + if ((m_float < 0) || (m_float > UINT_MAX)) + { + val = 0; + return false; + } + val = (uint)m_float; + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = 0; + return false; + } + if (((*m_pVec3F)[component] < 0) || ((*m_pVec3F)[component] > UINT_MAX)) + { + val = 0; + return false; + } + val = (uint)(*m_pVec3F)[component]; + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = 0; + return false; + } + if ((*m_pVec3I)[component] < 0) + { + val = 0; + return false; + } + val = (uint)(*m_pVec3I)[component]; + break; + } + default: break; + } + return true; + } + + bool get_as_bool(bool& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = false; + return false; + } + case cDTString: + { + const wchar_t* p = m_pStr->get_ptr(); + return string_to_bool(p, val); + } + case cDTBool: + { + val = m_bool; + break; + } + case cDTInt: + { + val = (m_int != 0); + break; + } + case cDTUInt: + { + val = (m_uint != 0); + break; + } + case cDTFloat: + { + val = (m_float != 0); + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = false; + return false; + } + val = ((*m_pVec3F)[component] != 0); + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = false; + return false; + } + val = ((*m_pVec3I)[component] != 0); + break; + } + default: break; + } + return true; + } + + bool get_as_float(float& val, uint component = 0) const + { + switch (m_type) + { + case cDTInvalid: + { + val = 0; + return false; + } + case cDTString: + { + const wchar_t* p = m_pStr->get_ptr(); + return string_to_float(p, val); + } + case cDTBool: + { + val = m_bool; + break; + } + case cDTInt: + { + val = (float)m_int; + break; + } + case cDTUInt: + { + val = (float)m_uint; + break; + } + case cDTFloat: + { + val = m_float; + break; + } + case cDTVec3F: + { + if (component > 2) + { + val = 0; + return false; + } + val = (*m_pVec3F)[component]; + break; + } + case cDTVec3I: + { + if (component > 2) + { + val = 0; + return false; + } + val = (float)(*m_pVec3I)[component]; + break; + } + default: break; + } + return true; + } + + bool get_as_vec(vec3F& val) const + { + switch (m_type) + { + case cDTInvalid: + { + val.clear(); + return false; + } + case cDTString: + { + const wchar_t* p = m_pStr->get_ptr(); + float x = 0, y = 0, z = 0; +#ifdef _MSC_VER + if (wscanf_s(p, L"%f,%f,%f", &x, &y, &z) == 3) +#else + if (wscanf(p, L"%f,%f,%f", &x, &y, &z) == 3) +#endif + { + val.set(x, y, z); + return true; + } + else + { + val.clear(); + return false; + } + } + case cDTBool: + { + val.set(m_bool); + break; + } + case cDTInt: + { + val.set(static_cast(m_int)); + break; + } + case cDTUInt: + { + val.set(static_cast(m_uint)); + break; + } + case cDTFloat: + { + val.set(m_float); + break; + } + case cDTVec3F: + { + val = *m_pVec3F; + break; + } + case cDTVec3I: + { + val.set((float)(*m_pVec3I)[0], (float)(*m_pVec3I)[1], (float)(*m_pVec3I)[2]); + break; + } + default: break; + } + return true; + } + + bool get_as_vec(vec3I& val) const + { + switch (m_type) + { + case cDTInvalid: + { + val.clear(); + return false; + } + case cDTString: + { + const wchar_t* p = m_pStr->get_ptr(); + float x = 0, y = 0, z = 0; +#ifdef _MSC_VER + if (wscanf_s(p, L"%f,%f,%f", &x, &y, &z) == 3) +#else + if (wscanf(p, L"%f,%f,%f", &x, &y, &z) == 3) +#endif + { + if ((x < INT_MIN) || (x > INT_MAX) || (y < INT_MIN) || (y > INT_MAX) || (z < INT_MIN) || (z > INT_MAX)) + { + val.clear(); + return false; + } + val.set((int)x, (int)y, (int)z); + return true; + } + else + { + val.clear(); + return false; + } + + break; + } + case cDTBool: + { + val.set(m_bool); + break; + } + case cDTInt: + { + val.set(m_int); + break; + } + case cDTUInt: + { + val.set(m_uint); + break; + } + case cDTFloat: + { + val.set((int)m_float); + break; + } + case cDTVec3F: + { + val.set((int)(*m_pVec3F)[0], (int)(*m_pVec3F)[1], (int)(*m_pVec3F)[2]); + break; + } + case cDTVec3I: + { + val = *m_pVec3I; + break; + } + default: break; + } + return true; + } + + bool set_zero() + { + switch (m_type) + { + case cDTInvalid: + { + return false; + } + case cDTString: + { + m_pStr->empty(); + break; + } + case cDTBool: + { + m_bool = false; + break; + } + case cDTInt: + { + m_int = 0; + break; + } + case cDTUInt: + { + m_uint = 0; + break; + } + case cDTFloat: + { + m_float = 0; + break; + } + case cDTVec3F: + { + m_pVec3F->clear(); + break; + } + case cDTVec3I: + { + m_pVec3I->clear(); + break; + } + default: break; + } + return true; + } + + bool is_vector() const + { + switch (m_type) + { + case cDTVec3F: + case cDTVec3I: + return true; + default: break; + } + return false; + } + + uint get_num_components() const + { + switch (m_type) + { + case cDTVec3F: + case cDTVec3I: + return 3; + default: break; + } + return 1; + } + + bool is_numeric() const + { + switch (m_type) + { + case cDTInt: + case cDTUInt: + case cDTFloat: + case cDTVec3F: + case cDTVec3I: + return true; + default: break; + } + return false; + } + + bool is_float() const + { + switch (m_type) + { + case cDTFloat: + case cDTVec3F: + return true; + default: break; + } + return false; + } + + bool is_integer() const + { + switch (m_type) + { + case cDTInt: + case cDTUInt: + case cDTVec3I: + return true; + default: break; + } + return false; + } + + bool is_signed() const + { + switch (m_type) + { + case cDTInt: + case cDTFloat: + case cDTVec3F: + case cDTVec3I: + return true; + default: break; + } + return false; + } + + bool is_string() const + { + return m_type == cDTString; + } + + int serialize(void* pBuf, uint buf_size, bool little_endian) const + { + uint buf_left = buf_size; + + uint8 t = (uint8)m_type; + if (!utils::write_obj(t, pBuf, buf_left, little_endian)) return -1; + + switch (m_type) + { + case cDTString: + { + int bytes_written = m_pStr->serialize(pBuf, buf_left, little_endian); + if (bytes_written < 0) return -1; + + pBuf = static_cast(pBuf) + bytes_written; + buf_left -= bytes_written; + + break; + } + case cDTBool: + { + if (!utils::write_obj(m_bool, pBuf, buf_left, little_endian)) return -1; + break; + } + case cDTInt: + case cDTUInt: + case cDTFloat: + { + if (!utils::write_obj(m_float, pBuf, buf_left, little_endian)) return -1; + break; + } + case cDTVec3F: + { + for (uint i = 0; i < 3; i++) + if (!utils::write_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) return -1; + break; + } + case cDTVec3I: + { + for (uint i = 0; i < 3; i++) + if (!utils::write_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) return -1; + break; + } + default: break; + } + + return buf_size - buf_left; + } + + int deserialize(const void* pBuf, uint buf_size, bool little_endian) + { + uint buf_left = buf_size; + + uint8 t; + if (!utils::read_obj(t, pBuf, buf_left, little_endian)) return -1; + + if (t >= cDTTotal) + return -1; + + m_type = static_cast(t); + + switch (m_type) + { + case cDTString: + { + change_type(cDTString); + + int bytes_read = m_pStr->deserialize(pBuf, buf_left, little_endian); + if (bytes_read < 0) return -1; + + pBuf = static_cast(pBuf) + bytes_read; + buf_left -= bytes_read; + + break; + } + case cDTBool: + { + if (!utils::read_obj(m_bool, pBuf, buf_left, little_endian)) return -1; + break; + } + case cDTInt: + case cDTUInt: + case cDTFloat: + { + if (!utils::read_obj(m_float, pBuf, buf_left, little_endian)) return -1; + break; + } + case cDTVec3F: + { + change_type(cDTVec3F); + + for (uint i = 0; i < 3; i++) + if (!utils::read_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) return -1; + break; + } + case cDTVec3I: + { + change_type(cDTVec3I); + + for (uint i = 0; i < 3; i++) + if (!utils::read_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) return -1; + break; + } + default: break; + } + + return buf_size - buf_left; + } + + void swap(value& other) + { + for (uint i = 0; i < cUnionSize; i++) + std::swap(m_union[i], other.m_union[i]); + + std::swap(m_type, other.m_type); + } + + private: + void clear_dynamic() + { + if (m_type == cDTVec3F) + { + crnlib_delete(m_pVec3F); + m_pVec3F = NULL; + + m_type = cDTInvalid; + } + else if (m_type == cDTVec3I) + { + crnlib_delete(m_pVec3I); + m_pVec3I = NULL; + + m_type = cDTInvalid; + } + else if (m_type == cDTString) + { + crnlib_delete(m_pStr); + m_pStr = NULL; + + m_type = cDTInvalid; + } + } + + void change_type(value_data_type type) + { + if (type != m_type) + { + clear_dynamic(); + + m_type = type; + + switch (m_type) + { + case cDTString: + m_pStr = crnlib_new(); + break; + case cDTVec3F: + m_pVec3F = crnlib_new(); + break; + case cDTVec3I: + m_pVec3I = crnlib_new(); + break; + default: break; + } + } + } + + void set_str(const dynamic_wstring& s) + { + if (m_type == cDTString) + m_pStr->set(s); + else + { + clear_dynamic(); + + m_type = cDTString; + m_pStr = crnlib_new(s); + } + } + + void set_str(const wchar_t* p) + { + if (m_type == cDTString) + m_pStr->set(p); + else + { + clear_dynamic(); + + m_type = cDTString; + m_pStr = crnlib_new(p); + } + } + + enum { cUnionSize = 1 }; + + union + { + bool m_bool; + int m_int; + uint m_uint; + float m_float; + + vec3F* m_pVec3F; + vec3I* m_pVec3I; + dynamic_wstring* m_pStr; + + uint m_union[cUnionSize]; + }; + + value_data_type m_type; + }; + +} // namespace crnlib + diff --git a/crnlib/crn_vec.h b/crnlib/crn_vec.h new file mode 100644 index 00000000..f380aa20 --- /dev/null +++ b/crnlib/crn_vec.h @@ -0,0 +1,838 @@ +// File: crn_vec.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +#include "crn_core.h" +#include "crn_rand.h" + +namespace crnlib +{ + template class vec : public helpers::rel_ops< vec > + { + public: + typedef T scalar_type; + enum { num_elements = N }; + + inline vec() { } + + inline vec(eClear) { clear(); } + + inline vec(const vec& other) + { + for (uint i = 0; i < N; i++) + m_s[i] = other.m_s[i]; + } + + template + inline vec(const vec& other) + { + set(other); + } + + template + inline vec(const vec& other, T w) + { + *this = other; + m_s[N - 1] = w; + } + + explicit inline vec(T val) + { + set(val); + } + + inline vec(T val0, T val1) + { + set(val0, val1); + } + + inline vec(T val0, T val1, T val2) + { + set(val0, val1, val2); + } + + inline vec(T val0, T val1, T val2, T val3) + { + set(val0, val1, val2, val3); + } + + inline void clear() + { + if (N > 4) + memset(m_s, 0, sizeof(m_s)); + else + { + for (uint i = 0; i < N; i++) + m_s[i] = 0; + } + } + + template inline vec& set(const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + const uint m = math::minimum(N, ON); + uint i; + for (i = 0; i < m; i++) + m_s[i] = static_cast(other[i]); + for ( ; i < N; i++) + m_s[i] = 0; + return *this; + } + + inline vec& set_component(uint index, T val) + { + CRNLIB_ASSERT(index < N); + m_s[index] = val; + return *this; + } + + inline vec& set(T val) + { + for (uint i = 0; i < N; i++) + m_s[i] = val; + return *this; + } + + inline vec& set(T val0, T val1) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + for (uint i = 2; i < N; i++) + m_s[i] = 0; + } + return *this; + } + + inline vec& set(T val0, T val1, T val2) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + for (uint i = 3; i < N; i++) + m_s[i] = 0; + } + } + return *this; + } + + inline vec& set(T val0, T val1, T val2, T val3) + { + m_s[0] = val0; + if (N >= 2) + { + m_s[1] = val1; + + if (N >= 3) + { + m_s[2] = val2; + + if (N >= 4) + { + m_s[3] = val3; + + for (uint i = 4; i < N; i++) + m_s[i] = 0; + } + } + } + return *this; + } + + inline vec& set(const T* pValues) + { + for (uint i = 0; i < N; i++) + m_s[i] = pValues[i]; + return *this; + } + + template inline vec& swizzle_set(const vec& other, uint i) + { + return set(static_cast(other[i])); + } + + template inline vec& swizzle_set(const vec& other, uint i, uint j) + { + return set(static_cast(other[i]), static_cast(other[j])); + } + + template inline vec& swizzle_set(const vec& other, uint i, uint j, uint k) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); + } + + template inline vec& swizzle_set(const vec& other, uint i, uint j, uint k, uint l) + { + return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); + } + + inline vec& operator= (const vec& rhs) + { + if (this != &rhs) + { + for (uint i = 0; i < N; i++) + m_s[i] = rhs.m_s[i]; + } + return *this; + } + + template + inline vec& operator= (const vec& other) + { + if ((void*)this == (void*)&other) + return *this; + + uint s = math::minimum(N, O); + + uint i; + for (i = 0; i < s; i++) + m_s[i] = static_cast(other[i]); + + for ( ; i < N; i++) + m_s[i] = 0; + + return *this; + } + + inline bool operator== (const vec& rhs) const + { + for (uint i = 0; i < N; i++) + if (!(m_s[i] == rhs.m_s[i])) + return false; + return true; + } + + inline bool operator< (const vec& rhs) const + { + for (uint i = 0; i < N; i++) + { + if (m_s[i] < rhs.m_s[i]) + return true; + else if (!(m_s[i] == rhs.m_s[i])) + return false; + } + + return false; + } + + inline T operator[] (uint i) const + { + CRNLIB_ASSERT(i < N); + return m_s[i]; + } + + inline T& operator[] (uint i) + { + CRNLIB_ASSERT(i < N); + return m_s[i]; + } + + inline T get_x(void) const { return m_s[0]; } + inline T get_y(void) const { CRNLIB_ASSUME(N >= 2); return m_s[1]; } + inline T get_z(void) const { CRNLIB_ASSUME(N >= 3); return m_s[2]; } + inline T get_w(void) const { CRNLIB_ASSUME(N >= 4); return m_s[3]; } + + inline vec& set_x(T v) { m_s[0] = v; return *this; } + inline vec& set_y(T v) { CRNLIB_ASSUME(N >= 2); m_s[1] = v; return *this; } + inline vec& set_z(T v) { CRNLIB_ASSUME(N >= 3); m_s[2] = v; return *this; } + inline vec& set_w(T v) { CRNLIB_ASSUME(N >= 4); m_s[3] = v; return *this; } + + inline vec as_point() const + { + vec result(*this); + result[N - 1] = 1; + return result; + } + + inline vec as_dir() const + { + vec result(*this); + result[N - 1] = 0; + return result; + } + + inline vec<2, T> select2(uint i, uint j) const + { + CRNLIB_ASSERT((i < N) && (j < N)); + return vec<2, T>(m_s[i], m_s[j]); + } + + inline vec<3, T> select3(uint i, uint j, uint k) const + { + CRNLIB_ASSERT((i < N) && (j < N) && (k < N)); + return vec<3, T>(m_s[i], m_s[j], m_s[k]); + } + + inline vec<4, T> select4(uint i, uint j, uint k, uint l) const + { + CRNLIB_ASSERT((i < N) && (j < N) && (k < N) && (l < N)); + return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); + } + + inline bool is_dir() const { return m_s[N - 1] == 0; } + inline bool is_vector() const { return is_dir(); } + inline bool is_point() const { return m_s[N - 1] == 1; } + + inline vec project() const + { + vec result(*this); + if (result[N - 1]) + result /= result[N - 1]; + return result; + } + + inline vec broadcast(unsigned i) const + { + return vec((*this)[i]); + } + + inline vec swizzle(uint i, uint j) const + { + return vec((*this)[i], (*this)[j]); + } + + inline vec swizzle(uint i, uint j, uint k) const + { + return vec((*this)[i], (*this)[j], (*this)[k]); + } + + inline vec swizzle(uint i, uint j, uint k, uint l) const + { + return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); + } + + inline vec operator- () const + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = -m_s[i]; + return result; + } + + inline vec operator+ () const + { + return *this; + } + + inline vec& operator += (const vec& other) + { + for (uint i = 0; i < N; i++) + m_s[i] += other.m_s[i]; + return *this; + } + + inline vec& operator -= (const vec& other) + { + for (uint i = 0; i < N; i++) + m_s[i] -= other.m_s[i]; + return *this; + } + + inline vec& operator *= (const vec& other) + { + for (uint i = 0; i < N; i++) + m_s[i] *= other.m_s[i]; + return *this; + } + + inline vec& operator /= (const vec& other) + { + for (uint i = 0; i < N; i++) + m_s[i] /= other.m_s[i]; + return *this; + } + + inline vec& operator *= (T s) + { + for (uint i = 0; i < N; i++) + m_s[i] *= s; + return *this; + } + + inline vec& operator /= (T s) + { + for (uint i = 0; i < N; i++) + m_s[i] /= s; + return *this; + } + + friend inline T operator* (const vec& lhs, const vec& rhs) + { + T result = lhs.m_s[0] * rhs.m_s[0]; + for (uint i = 1; i < N; i++) + result += lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + friend inline vec operator* (const vec& lhs, T val) + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator* (T val, const vec& lhs) + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] * val; + return result; + } + + friend inline vec operator/ (const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; + return result; + } + + friend inline vec operator/ (const vec& lhs, T val) + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] / val; + return result; + } + + friend inline vec operator+ (const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; + return result; + } + + friend inline vec operator- (const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; + return result; + } + + static inline vec<3, T> cross2(const vec& a, const vec& b) + { + CRNLIB_ASSUME(N >= 2); + return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); + } + + static inline vec<3, T> cross3(const vec& a, const vec& b) + { + CRNLIB_ASSUME(N >= 3); + return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); + } + + static inline vec<3, T> cross(const vec& a, const vec& b) + { + CRNLIB_ASSUME(N >= 2); + + if (N == 2) + return cross2(a, b); + else + return cross3(a, b); + } + + inline T dot(const vec& rhs) const + { + return *this * rhs; + } + + inline T dot2(const vec& rhs) const + { + CRNLIB_ASSUME(N >= 2); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; + } + + inline T dot3(const vec& rhs) const + { + CRNLIB_ASSUME(N >= 3); + return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; + } + + inline T norm(void) const + { + T sum = m_s[0] * m_s[0]; + for (uint i = 1; i < N; i++) + sum += m_s[i] * m_s[i]; + return sum; + } + + inline T length(void) const + { + return sqrt(norm()); + } + + inline T squared_distance(const vec& rhs) const + { + T dist2 = 0; + for (uint i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return dist2; + } + + inline T squared_distance(const vec& rhs, T early_out) const + { + T dist2 = 0; + for (uint i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + if (dist2 > early_out) + break; + } + return dist2; + } + + inline T distance(const vec& rhs) const + { + T dist2 = 0; + for (uint i = 0; i < N; i++) + { + T d = m_s[i] - rhs.m_s[i]; + dist2 += d * d; + } + return sqrt(dist2); + } + + inline vec inverse() const + { + vec result; + for (uint i = 0; i < N; i++) + result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; + return result; + } + + inline double normalize(const vec* pDefaultVec = NULL) + { + double n = m_s[0] * m_s[0]; + for (uint i = 1; i < N; i++) + n += m_s[i] * m_s[i]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline double normalize3(const vec* pDefaultVec = NULL) + { + CRNLIB_ASSUME(N >= 3); + + double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; + + if (n != 0) + *this *= static_cast((1.0f / sqrt(n))); + else if (pDefaultVec) + *this = *pDefaultVec; + return n; + } + + inline vec& normalize_in_place(const vec* pDefaultVec = NULL) + { + normalize(pDefaultVec); + return *this; + } + + inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) + { + normalize3(pDefaultVec); + return *this; + } + + inline vec get_normalized(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize(pDefaultVec); + return result; + } + + inline vec get_normalized3(const vec* pDefaultVec = NULL) const + { + vec result(*this); + result.normalize3(pDefaultVec); + return result; + } + + inline vec& clamp(T l, T h) + { + for (uint i = 0; i < N; i++) + m_s[i] = static_cast(math::clamp(m_s[i], l, h)); + return *this; + } + + inline vec& clamp(const vec& l, const vec& h) + { + for (uint i = 0; i < N; i++) + m_s[i] = static_cast(math::clamp(m_s[i], l[i], h[i])); + return *this; + } + + inline bool is_within_bounds(const vec& l, const vec& h) const + { + for (uint i = 0; i < N; i++) + if ((m_s[i] < l[i]) || (m_s[i] > h[i])) + return false; + + return true; + } + + inline bool is_within_bounds(T l, T h) const + { + for (uint i = 0; i < N; i++) + if ((m_s[i] < l) || (m_s[i] > h)) + return false; + + return true; + } + + inline uint get_major_axis(void) const + { + T m = fabs(m_s[0]); + uint r = 0; + for (uint i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c > m) + { + m = c; + r = i; + } + } + return r; + } + + inline uint get_minor_axis(void) const + { + T m = fabs(m_s[0]); + uint r = 0; + for (uint i = 1; i < N; i++) + { + const T c = fabs(m_s[i]); + if (c < m) + { + m = c; + r = i; + } + } + return r; + } + + inline T get_absolute_minimum(void) const + { + T result = fabs(m_s[0]); + for (uint i = 1; i < N; i++) + result = math::minimum(result, fabs(m_s[i])); + return result; + } + + inline T get_absolute_maximum(void) const + { + T result = fabs(m_s[0]); + for (uint i = 1; i < N; i++) + result = math::maximum(result, fabs(m_s[i])); + return result; + } + + inline T get_minimum(void) const + { + T result = m_s[0]; + for (uint i = 1; i < N; i++) + result = math::minimum(result, m_s[i]); + return result; + } + + inline T get_maximum(void) const + { + T result = m_s[0]; + for (uint i = 1; i < N; i++) + result = math::maximum(result, m_s[i]); + return result; + } + + inline vec& remove_unit_direction(const vec& dir) + { + T p = *this * dir; + *this -= (p * dir); + return *this; + } + + inline bool all_less(const vec& b) const + { + for (uint i = 0; i < N; i++) + if (m_s[i] >= b.m_s[i]) + return false; + return true; + } + + inline bool all_less_equal(const vec& b) const + { + for (uint i = 0; i < N; i++) + if (m_s[i] > b.m_s[i]) + return false; + return true; + } + + inline bool all_greater(const vec& b) const + { + for (uint i = 0; i < N; i++) + if (m_s[i] <= b.m_s[i]) + return false; + return true; + } + + inline bool all_greater_equal(const vec& b) const + { + for (uint i = 0; i < N; i++) + if (m_s[i] < b.m_s[i]) + return false; + return true; + } + + inline vec get_negate_xyz() const + { + vec ret; + + ret[0] = -m_s[0]; + if (N >= 2) ret[1] = -m_s[1]; + if (N >= 3) ret[2] = -m_s[2]; + + for (uint i = 3; i < N; i++) + ret[i] = m_s[i]; + + return ret; + } + + inline vec& invert() + { + for (uint i = 0; i < N; i++) + if (m_s[i] != 0.0f) + m_s[i] = 1.0f / m_s[i]; + return *this; + } + + static inline vec mul_components(const vec& lhs, const vec& rhs) + { + vec result; + for (uint i = 0; i < N; i++) + result[i] = lhs.m_s[i] * rhs.m_s[i]; + return result; + } + + static inline vec make_axis(uint i) + { + vec result; + result.clear(); + result[i] = 1; + return result; + } + + static inline vec component_max(const vec& a, const vec& b) + { + vec ret; + for (uint i = 0; i < N; i++) + ret.m_s[i] = math::maximum(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec component_min(const vec& a, const vec& b) + { + vec ret; + for (uint i = 0; i < N; i++) + ret.m_s[i] = math::minimum(a.m_s[i], b.m_s[i]); + return ret; + } + + static inline vec lerp(const vec& a, const vec& b, float t) + { + vec ret; + for (uint i = 0; i < N; i++) + ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; + return ret; + } + + static inline vec make_random(random& r, float l, float h) + { + vec result; + for (uint i = 0; i < N; i++) + result[i] = r.frand(l, h); + return result; + } + + static inline vec make_random(fast_random& r, float l, float h) + { + vec result; + for (uint i = 0; i < N; i++) + result[i] = r.frand(l, h); + return result; + } + + static inline vec make_random(random& r, const vec& l, const vec& h) + { + vec result; + for (uint i = 0; i < N; i++) + result[i] = r.frand(l[i], h[i]); + return result; + } + + static inline vec make_random(fast_random& r, const vec& l, const vec& h) + { + vec result; + for (uint i = 0; i < N; i++) + result[i] = r.frand(l[i], h[i]); + return result; + } + + private: + T m_s[N]; + }; + + typedef vec<1, double> vec1D; + typedef vec<2, double> vec2D; + typedef vec<3, double> vec3D; + typedef vec<4, double> vec4D; + + typedef vec<1, float> vec1F; + + typedef vec<2, float> vec2F; + typedef crnlib::vector vec2F_array; + + typedef vec<3, float> vec3F; + typedef crnlib::vector vec3F_array; + + typedef vec<4, float> vec4F; + typedef crnlib::vector vec4F_array; + + typedef vec<2, int> vec2I; + typedef vec<3, int> vec3I; + + typedef vec<2, int16> vec2I16; + typedef vec<3, int16> vec3I16; + + template + struct scalar_type< vec > + { + enum { cFlag = true }; + static inline void construct(vec* p) { } + static inline void construct(vec* p, const vec& init) { memcpy(p, &init, sizeof(vec)); } + static inline void construct_array(vec* p, uint n) { p, n; } + static inline void destruct(vec* p) { p; } + static inline void destruct_array(vec* p, uint n) { p, n; } + }; + +} // namespace crnlib + diff --git a/crnlib/crn_vec_interval.h b/crnlib/crn_vec_interval.h new file mode 100644 index 00000000..4adec597 --- /dev/null +++ b/crnlib/crn_vec_interval.h @@ -0,0 +1,35 @@ +// File: crn_vec_interval.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_vec.h" + +namespace crnlib +{ + template + class vec_interval + { + public: + enum { N = T::num_elements }; + typedef typename T::scalar_type scalar_type; + + inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; } + inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; } + + inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } + + inline const T& operator[] (uint i) const { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } + inline T& operator[] (uint i) { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } + + private: + T m_bounds[2]; + }; + + typedef vec_interval vec_interval1F; + typedef vec_interval vec_interval2F; + typedef vec_interval vec_interval3F; + typedef vec_interval vec_interval4F; + + typedef vec_interval2F aabb2F; + typedef vec_interval3F aabb3F; + +} // namespace crnlib diff --git a/crnlib/crn_vector.cpp b/crnlib/crn_vector.cpp new file mode 100644 index 00000000..a589fdc2 --- /dev/null +++ b/crnlib/crn_vector.cpp @@ -0,0 +1,84 @@ +// File: crn_vector.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_vector.h" +#include "crn_rand.h" + +#include "crn_color.h" +#include "crn_vec.h" +#include + +namespace crnlib +{ + bool elemental_vector::increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pMover, bool nofail) + { + CRNLIB_ASSERT(m_size <= m_capacity); +#ifdef CRNLIB_PLATFORM_PC_X64 + CRNLIB_ASSERT(min_new_capacity < (0x400000000ULL / element_size)); +#else + CRNLIB_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); +#endif + + if (m_capacity >= min_new_capacity) + return true; + + size_t new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2(new_capacity))) + new_capacity = math::next_pow2(new_capacity); + + CRNLIB_ASSERT(new_capacity && (new_capacity > m_capacity)); + + const size_t desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) + { + void* new_p = crnlib_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; +#ifdef _MSC_VER + sprintf_s(buf, sizeof(buf), "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); +#else + sprintf(buf, "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); +#endif + CRNLIB_FAIL(buf); + } + m_p = new_p; + } + else + { + void* new_p = crnlib_malloc(desired_size, &actual_size); + if (!new_p) + { + if (nofail) + return false; + + char buf[256]; +#ifdef _MSC_VER + sprintf_s(buf, sizeof(buf), "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); +#else + sprintf(buf, "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); +#endif + CRNLIB_FAIL(buf); + } + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + crnlib_free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = static_cast(new_capacity); + + return true; + } + +} // namespace crnlib diff --git a/crnlib/crn_vector.h b/crnlib/crn_vector.h new file mode 100644 index 00000000..732b0acc --- /dev/null +++ b/crnlib/crn_vector.h @@ -0,0 +1,636 @@ +// File: crn_vector.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + struct elemental_vector + { + void* m_p; + uint m_size; + uint m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint num); + + bool increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pRelocate, bool nofail); + }; + + template + class vector : public helpers::rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0) + { + } + + inline vector(uint n, const T& init) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(n, false); + helpers::construct_array(m_p, n, init); + m_size = n; + } + + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + increase_capacity(other.m_size, false); + + m_size = other.m_size; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint i = m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + } + + inline explicit vector(uint size) : + m_p(NULL), + m_size(0), + m_capacity(0) + { + resize(size); + } + + inline ~vector() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnlib_free(m_p); + } + } + + inline vector& operator= (const vector& other) + { + if (this == &other) + return *this; + + if (m_capacity >= other.m_size) + resize(0); + else + { + clear(); + increase_capacity(other.m_size, false); + } + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint i = other.m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return *this; + } + + inline const T* begin() const { return m_p; } + T* begin() { return m_p; } + + inline const T* end() const { return m_p + m_size; } + T* end() { return m_p + m_size; } + + inline bool empty() const { return !m_size; } + inline uint size() const { return m_size; } + inline uint size_in_bytes() const { return m_size * sizeof(T); } + inline uint capacity() const { return m_capacity; } + + // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. + inline const T& operator[] (uint i) const { CRNLIB_ASSERT(i < m_size); return m_p[i]; } + inline T& operator[] (uint i) { CRNLIB_ASSERT(i < m_size); return m_p[i]; } + + // at() always includes range checking, even in final builds, unlike operator []. + // The first element is returned if the index is out of range. + inline const T& at(uint i) const { CRNLIB_ASSERT(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + inline T& at(uint i) { CRNLIB_ASSERT(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } + + inline const T& front() const { CRNLIB_ASSERT(m_size); return m_p[0]; } + inline T& front() { CRNLIB_ASSERT(m_size); return m_p[0]; } + + inline const T& back() const { CRNLIB_ASSERT(m_size); return m_p[m_size - 1]; } + inline T& back() { CRNLIB_ASSERT(m_size); return m_p[m_size - 1]; } + + inline const T* get_ptr() const { return m_p; } + inline T* get_ptr() { return m_p; } + + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnlib_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void clear_no_destruction() + { + if (m_p) + { + crnlib_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + } + + inline void reserve(uint new_capacity) + { + if (new_capacity > m_capacity) + increase_capacity(new_capacity, false); + else if (new_capacity < m_capacity) + { + // Must work around the lack of a "decrease_capacity()" method. + // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. + vector tmp; + tmp.increase_capacity(math::maximum(m_size, new_capacity), false); + tmp = *this; + swap(tmp); + } + } + + inline bool try_reserve(uint new_capacity) + { + return increase_capacity(new_capacity, true, true); + } + + inline void resize(uint new_size, bool grow_hint = false) + { + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint); + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + } + + inline bool try_resize(uint new_size, bool grow_hint = false) + { + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + inline T* enlarge(uint i) + { + uint cur_size = m_size; + resize(cur_size + i, true); + return get_ptr() + cur_size; + } + + inline T* try_enlarge(uint i) + { + uint cur_size = m_size; + if (!try_resize(cur_size + i, true)) + return NULL; + return get_ptr() + cur_size; + } + + inline void push_back(const T& obj) + { + CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + increase_capacity(m_size + 1, true); + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + inline bool try_push_back(const T& obj) + { + CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void push_back_value(T obj) + { + if (m_size >= m_capacity) + increase_capacity(m_size + 1, true); + + scalar_type::construct(m_p + m_size, obj); + m_size++; + } + + inline void pop_back() + { + CRNLIB_ASSERT(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline void insert(uint index, const T* p, uint n) + { + CRNLIB_ASSERT(index <= m_size); + if (!n) + return; + + const uint orig_size = m_size; + resize(m_size + n, true); + + const uint num_to_move = orig_size - index; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); + else + { + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + for (uint i = 0; i < num_to_move; i++) + { + CRNLIB_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + } + + T* pDst = m_p + index; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + memcpy(pDst, p, sizeof(T) * n); + else + { + for (uint i = 0; i < n; i++) + { + CRNLIB_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + } + + // push_front() isn't going to be very fast - it's only here for usability. + inline void push_front(const T& obj) + { + insert(0, &obj, 1); + } + + vector& append(const vector& other) + { + if (other.m_size) + insert(m_size, &other[0], other.m_size); + return *this; + } + + vector& append(const T* p, uint n) + { + if (n) + insert(m_size, p, n); + return *this; + } + + inline void erase(uint start, uint n) + { + CRNLIB_ASSERT((start + n) <= m_size); + if ((start + n) > m_size) + return; + + if (!n) + return; + + const uint num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + + const T* pSrc = m_p + start + n; + + if (CRNLIB_IS_BITWISE_COPYABLE(T)) + memmove(pDst, pSrc, num_to_move * sizeof(T)); + else + { + T* pDst_end = pDst + num_to_move; + + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type::destruct_array(pDst_end, n); + } + + m_size -= n; + } + + inline void erase(uint index) + { + erase(index, 1); + } + + inline void erase(T* p) + { + CRNLIB_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(static_cast(p - m_p)); + } + + void erase_unordered(uint index) + { + CRNLIB_ASSERT(index < m_size); + + if ((index + 1) < m_size) + (*this)[index] = back(); + + pop_back(); + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const uint min_size = math::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + inline void swap(vector& other) + { + utils::swap(m_p, other.m_p); + utils::swap(m_size, other.m_size); + utils::swap(m_capacity, other.m_capacity); + } + + inline void sort() + { + std::sort(begin(), end()); + } + + inline void unique() + { + if (!empty()) + { + sort(); + + resize(std::unique(begin(), end()) - begin()); + } + } + + inline void reverse() + { + uint j = m_size >> 1; + for (uint i = 0; i < j; i++) + utils::swap(m_p[i], m_p[m_size - 1 - i]); + } + + inline int find(const T& key) const + { + const T* p = m_p; + const T* p_end = m_p + m_size; + + uint index = 0; + + while (p != p_end) + { + if (key == *p) + return index; + + p++; + index++; + } + + return cInvalidIndex; + } + + inline int find_sorted(const T& key) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for ( ; ; ) + { + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + const T* pKey_i = m_p + i; + int cmp = key < *pKey_i; + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + pKey_i = m_p + i; + cmp = key < *pKey_i; + if ((!cmp) && (key == *pKey_i)) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + } + } + + return cInvalidIndex; + } + + template + inline int find_sorted(const T& key, Q less_than) const + { + if (m_size) + { + // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. + int i = ((m_size + 1) >> 1) - 1; + int m = m_size; + + for ( ; ; ) + { + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + const T* pKey_i = m_p + i; + int cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + + CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); + pKey_i = m_p + i; + cmp = less_than(key, *pKey_i); + if ((!cmp) && (!less_than(*pKey_i, key))) return i; + m >>= 1; + if (!m) break; + cmp = -cmp; + i += (((m + 1) >> 1) ^ cmp) - cmp; + } + } + + return cInvalidIndex; + } + + inline uint count_occurences(const T& key) const + { + uint c = 0; + + const T* p = m_p; + const T* p_end = m_p + m_size; + + while (p != p_end) + { + if (key == *p) + c++; + + p++; + } + + return c; + } + + inline void set_all(const T& o) + { + if ((sizeof(T) == 1) && (scalar_type::cFlag)) + memset(m_p, *reinterpret_cast(&o), m_size); + else + { + T* pDst = m_p; + T* pDst_end = pDst + m_size; + while (pDst != pDst_end) + *pDst++ = o; + } + } + + inline void *assume_ownership() + { + T* p = m_p; + m_p = NULL; + m_size = 0; + m_capacity = 0; + return p; + } + + private: + T* m_p; + uint m_size; + uint m_capacity; + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + // placement new + new (static_cast(pDst)) T(*pSrc); + pSrc->~T(); + pSrc++; + pDst++; + } + } + + inline bool increase_capacity(uint min_new_capacity, bool grow_hint, bool nofail = false) + { + return reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + (CRNLIB_IS_BITWISE_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); + } + }; + + template struct bitwise_movable< vector > { enum { cFlag = true }; }; + + extern void vector_test(); + + template + inline void swap(vector& a, vector& b) + { + a.swap(b); + } + +} // namespace crnlib + diff --git a/crnlib/crn_win32_console.cpp b/crnlib/crn_win32_console.cpp new file mode 100644 index 00000000..4d7e6556 --- /dev/null +++ b/crnlib/crn_win32_console.cpp @@ -0,0 +1,116 @@ +// File: crn_win32_console.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_win32_console.h" +#include "crn_winhdr.h" + +namespace crnlib +{ + void win32_console::init() + { + console::init(); + console::add_console_output_func(console_output_func, NULL); + } + + void win32_console::deinit() + { + console::remove_console_output_func(console_output_func); + console::deinit(); + } + + void win32_console::tick() + { + } + +#ifdef CRNLIB_PLATFORM_PC + bool win32_console::console_output_func(eConsoleMessageType type, const wchar_t* pMsg, void* pData) + { + pData; + + if (console::get_output_disabled()) + return true; + + HANDLE cons = GetStdHandle(STD_OUTPUT_HANDLE); + + DWORD attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE; + switch (type) + { + case cDebugConsoleMessage: attr = FOREGROUND_BLUE | FOREGROUND_INTENSITY; break; + case cMessageConsoleMessage: attr = FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY; break; + case cWarningConsoleMessage: attr = FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; break; + case cErrorConsoleMessage: attr = FOREGROUND_RED | FOREGROUND_INTENSITY; break; + default: break; + } + + if (INVALID_HANDLE_VALUE != cons) + SetConsoleTextAttribute(cons, (WORD)attr); + + if (console::get_prefixes()) + { + switch (type) + { + case cDebugConsoleMessage: + wprintf(L"Debug: %s", pMsg); + break; + case cWarningConsoleMessage: + wprintf(L"Warning: %s", pMsg); + break; + case cErrorConsoleMessage: + wprintf(L"Error: %s", pMsg); + break; + default: + wprintf(L"%s", pMsg); + break; + } + } + else + { + wprintf(L"%s", pMsg); + } + + if (console::get_crlf()) + wprintf(L"\n"); + + if (INVALID_HANDLE_VALUE != cons) + SetConsoleTextAttribute(cons, FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE); + + return true; + } +#else + bool win32_console::console_output_func(eConsoleMessageType type, const wchar_t* pMsg, void* pData) + { + if (console::get_output_disabled()) + return true; + + if (console::get_prefixes()) + { + switch (type) + { + case cDebugConsoleMessage: + wprintf(L"Debug: %s", pMsg); + break; + case cWarningConsoleMessage: + wprintf(L"Warning: %s", pMsg); + break; + case cErrorConsoleMessage: + wprintf(L"Error: %s", pMsg); + break; + default: + wprintf(L"%s", pMsg); + break; + } + } + else + { + wprintf(L"%s", pMsg); + } + + if (console::get_crlf()) + wprintf(L"\n"); + + return true; + } +#endif + +} // namespace crnlib + diff --git a/crnlib/crn_win32_console.h b/crnlib/crn_win32_console.h new file mode 100644 index 00000000..b2f12c66 --- /dev/null +++ b/crnlib/crn_win32_console.h @@ -0,0 +1,21 @@ +// File: crn_win32_console.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_console.h" +#include "crn_event.h" + +namespace crnlib +{ + class win32_console + { + public: + static void init(); + static void deinit(); + static void tick(); + + private: + static bool console_output_func(eConsoleMessageType type, const wchar_t* pMsg, void* pData); + + }; + +} // namespace crnlib diff --git a/crnlib/crn_win32_file_utils.cpp b/crnlib/crn_win32_file_utils.cpp new file mode 100644 index 00000000..a076a9a9 --- /dev/null +++ b/crnlib/crn_win32_file_utils.cpp @@ -0,0 +1,69 @@ +// File: crn_win32_file_utils.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_win32_file_utils.h" +#include "crn_winhdr.h" + +namespace crnlib +{ + bool win32_file_utils::does_file_exist(const wchar_t* pFilename) + { + const DWORD fullAttributes = GetFileAttributesW(pFilename); + + if (fullAttributes == INVALID_FILE_ATTRIBUTES) + return false; + + if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) + return false; + + return true; + } + + bool win32_file_utils::does_dir_exist(const wchar_t* pDir) + { + //-- Get the file attributes. + DWORD fullAttributes = GetFileAttributesW(pDir); + + if (fullAttributes == INVALID_FILE_ATTRIBUTES) + return false; + + if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) + return true; + + return false; + } + + bool win32_file_utils::get_file_size(const wchar_t* pFilename, uint64& file_size) + { + file_size = 0; + + WIN32_FILE_ATTRIBUTE_DATA attr; + + if (0 == GetFileAttributesExW(pFilename, GetFileExInfoStandard, &attr)) + return false; + + if (attr.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + return false; + + file_size = static_cast(attr.nFileSizeLow) | (static_cast(attr.nFileSizeHigh) << 32U); + + return true; + } + + bool win32_file_utils::get_file_size(const wchar_t* pFilename, uint32& file_size) + { + uint64 file_size64; + if (!get_file_size(pFilename, file_size64)) + { + file_size = 0; + return false; + } + + if (file_size64 > UINT32_MAX) + file_size64 = UINT32_MAX; + + file_size = static_cast(file_size64); + return true; + } + +} // namespace crnlib diff --git a/crnlib/crn_win32_file_utils.h b/crnlib/crn_win32_file_utils.h new file mode 100644 index 00000000..1f720ba5 --- /dev/null +++ b/crnlib/crn_win32_file_utils.h @@ -0,0 +1,15 @@ +// File: crn_win32_file_utils.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + struct win32_file_utils + { + static bool does_file_exist(const wchar_t* pFilename); + static bool does_dir_exist(const wchar_t* pDir); + static bool get_file_size(const wchar_t* pFilename, uint64& file_size); + static bool get_file_size(const wchar_t* pFilename, uint32& file_size); + }; + +} // namespace crnlib diff --git a/crnlib/crn_win32_find_files.cpp b/crnlib/crn_win32_find_files.cpp new file mode 100644 index 00000000..e60ddcaf --- /dev/null +++ b/crnlib/crn_win32_find_files.cpp @@ -0,0 +1,176 @@ +// File: crn_win32_find_files.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_win32_find_files.h" +#include "crn_win32_file_utils.h" +#include "crn_strutils.h" + +namespace crnlib +{ + bool find_files::find(const wchar_t* pBasepath, const wchar_t* pFilespec, uint flags) + { + m_last_error = S_OK; + m_files.resize(0); + + return find_internal(pBasepath, L"", pFilespec, flags); + } + + bool find_files::find(const wchar_t* pSpec, uint flags) + { + dynamic_wstring find_name(pSpec); + + if (!full_path(find_name)) + return false; + + dynamic_wstring find_pathname, find_filename; + if (!split_path(find_name.get_ptr(), find_pathname, find_filename)) + return false; + + return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); + } + + bool find_files::find_internal(const wchar_t* pBasepath, const wchar_t* pRelpath, const wchar_t* pFilespec, uint flags) + { + WIN32_FIND_DATAW find_data; + + dynamic_wstring filename; + + dynamic_wstring_array child_paths; + if (flags & cFlagRecursive) + { + if (wcslen(pRelpath)) + combine_path(filename, pBasepath, pRelpath, L"*"); + else + combine_path(filename, pBasepath, L"*"); + + HANDLE handle = FindFirstFileW(filename.get_ptr(), &find_data); + if (handle == INVALID_HANDLE_VALUE) + { + HRESULT hres = GetLastError(); + if ((hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) + { + m_last_error = hres; + return false; + } + } + else + { + do + { + const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + + bool skip = !is_dir; + if (is_dir) + skip = (wcscmp(find_data.cFileName, L".") == 0) || (wcscmp(find_data.cFileName, L"..") == 0); + + if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) + skip = true; + + if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) + { + if ((flags & cFlagAllowHidden) == 0) + skip = true; + } + + if (!skip) + { + dynamic_wstring child_path(find_data.cFileName); + if ((!child_path.count_char(L'?')) && (!child_path.count_char(L'*'))) + child_paths.push_back(child_path); + } + + } while (FindNextFileW(handle, &find_data) != 0); + + HRESULT hres = GetLastError(); + + FindClose(handle); + handle = INVALID_HANDLE_VALUE; + + if (hres != ERROR_NO_MORE_FILES) + { + m_last_error = hres; + return false; + } + } + } + + if (wcslen(pRelpath)) + combine_path(filename, pBasepath, pRelpath, pFilespec); + else + combine_path(filename, pBasepath, pFilespec); + + HANDLE handle = FindFirstFileW(filename.get_ptr(), &find_data); + if (handle == INVALID_HANDLE_VALUE) + { + HRESULT hres = GetLastError(); + if ((hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) + { + m_last_error = hres; + return false; + } + } + else + { + do + { + const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; + + bool skip = false; + if (is_dir) + skip = (wcscmp(find_data.cFileName, L".") == 0) || (wcscmp(find_data.cFileName, L"..") == 0); + + if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) + skip = true; + + if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) + { + if ((flags & cFlagAllowHidden) == 0) + skip = true; + } + + if (!skip) + { + if (((is_dir) && (flags & cFlagAllowDirs)) || ((!is_dir) && (flags & cFlagAllowFiles))) + { + m_files.resize(m_files.size() + 1); + file_desc& file = m_files.back(); + file.m_is_dir = is_dir; + file.m_base = pBasepath; + file.m_name = find_data.cFileName; + file.m_rel = pRelpath; + if (wcslen(pRelpath)) + combine_path(file.m_fullname, pBasepath, pRelpath, find_data.cFileName); + else + combine_path(file.m_fullname, pBasepath, find_data.cFileName); + } + } + + } while (FindNextFileW(handle, &find_data) != 0); + + HRESULT hres = GetLastError(); + + FindClose(handle); + + if (hres != ERROR_NO_MORE_FILES) + { + m_last_error = hres; + return false; + } + } + + for (uint i = 0; i < child_paths.size(); i++) + { + dynamic_wstring child_path; + if (wcslen(pRelpath)) + combine_path(child_path, pRelpath, child_paths[i].get_ptr()); + else + child_path = child_paths[i]; + + if (!find_internal(pBasepath, child_path.get_ptr(), pFilespec, flags)) + return false; + } + + return true; + } + +} // namespace crnlib diff --git a/crnlib/crn_win32_find_files.h b/crnlib/crn_win32_find_files.h new file mode 100644 index 00000000..84e4e22e --- /dev/null +++ b/crnlib/crn_win32_find_files.h @@ -0,0 +1,55 @@ +// File: crn_win32_find_files.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once +#include "crn_winhdr.h" + +namespace crnlib +{ + class find_files + { + public: + struct file_desc + { + inline file_desc() : m_is_dir(false) { } + + dynamic_wstring m_fullname; + dynamic_wstring m_base; + dynamic_wstring m_rel; + dynamic_wstring m_name; + bool m_is_dir; + + inline bool operator== (const file_desc& other) const { return m_fullname == other.m_fullname; } + inline bool operator< (const file_desc& other) const { return m_fullname < other.m_fullname; } + + inline operator size_t() const { return static_cast(m_fullname); } + }; + + typedef crnlib::vector file_desc_vec; + + find_files() : m_last_error(S_OK) { } + + enum flags + { + cFlagRecursive = 1, + cFlagAllowDirs = 2, + cFlagAllowFiles = 4, + cFlagAllowHidden = 8 + }; + + bool find(const wchar_t* pBasepath, const wchar_t* pFilespec, uint flags = cFlagAllowFiles); + + bool find(const wchar_t* pSpec, uint flags = cFlagAllowFiles); + + inline HRESULT get_last_error() const { return m_last_error; } + + const file_desc_vec& get_files() const { return m_files; } + + private: + file_desc_vec m_files; + HRESULT m_last_error; + + bool find_internal(const wchar_t* pBasepath, const wchar_t* pRelpath, const wchar_t* pFilespec, uint flags); + + }; // class find_files + +} // namespace crnlib diff --git a/crnlib/crn_win32_threading.cpp b/crnlib/crn_win32_threading.cpp new file mode 100644 index 00000000..30cedfe8 --- /dev/null +++ b/crnlib/crn_win32_threading.cpp @@ -0,0 +1,36 @@ +// File: crn_win32_threading.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_win32_threading.h" +#include "crn_winhdr.h" + +namespace crnlib +{ + uint g_number_of_processors = 1; + + int32 interlocked_compare_exchange32(int32 volatile *Destination, int32 Exchange, int32 Comperand) + { + CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); + return InterlockedCompareExchange((volatile LONG*)Destination, Exchange, Comperand); + } + + int32 interlocked_increment32(int32 volatile *lpAddend) + { + return InterlockedIncrement((volatile LONG*)lpAddend); + } + + int32 interlocked_exchange_add32(int32 volatile *Addend, int32 Value) + { + return InterlockedExchangeAdd((volatile LONG*)Addend, Value); + } + + int32 interlocked_exchange32(int32 volatile *Target, int32 Value) + { + return InterlockedExchange((volatile LONG*)Target, Value); + } + + uint32 get_current_thread_id() + { + return GetCurrentThreadId(); + } +} diff --git a/crnlib/crn_win32_threading.h b/crnlib/crn_win32_threading.h new file mode 100644 index 00000000..4e842791 --- /dev/null +++ b/crnlib/crn_win32_threading.h @@ -0,0 +1,18 @@ +// File: crn_win32_threading.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + // g_number_of_processors defaults to 1. Will be higher on multicore machines. + extern uint g_number_of_processors; + + int32 interlocked_compare_exchange32(int32 volatile *Destination, int32 Exchange, int32 Comperand); + int32 interlocked_increment32(int32 volatile *lpAddend); + int32 interlocked_exchange_add32(int32 volatile *Addend, int32 Value); + int32 interlocked_exchange32(int32 volatile *Target, int32 Value); + uint32 get_current_thread_id(); + +} // namespace crnlib + + diff --git a/crnlib/crn_win32_timer.cpp b/crnlib/crn_win32_timer.cpp new file mode 100644 index 00000000..9c2a40ea --- /dev/null +++ b/crnlib/crn_win32_timer.cpp @@ -0,0 +1,119 @@ +// File: crn_win32_timer.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "crn_win32_timer.h" +#include "crn_winhdr.h" + +namespace crnlib +{ + uint64 timer::g_init_ticks; + uint64 timer::g_freq; + double timer::g_inv_freq; + + timer::timer() : + m_start_time(0), + m_stop_time(0), + m_started(false), + m_stopped(false) + { + if (!g_inv_freq) init(); + } + + timer::timer(timer_ticks start_ticks) + { + if (!g_inv_freq) init(); + + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; + } + + void timer::start(timer_ticks start_ticks) + { + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; + } + + void timer::start() + { + QueryPerformanceCounter((LARGE_INTEGER*)&m_start_time); + + m_started = true; + m_stopped = false; + } + + void timer::stop() + { + CRNLIB_ASSERT(m_started); + + QueryPerformanceCounter((LARGE_INTEGER*)&m_stop_time); + + m_stopped = true; + } + + double timer::get_elapsed_secs() const + { + CRNLIB_ASSERT(m_started); + if (!m_started) + return 0; + + uint64 stop_time = m_stop_time; + if (!m_stopped) + QueryPerformanceCounter((LARGE_INTEGER*)&stop_time); + + uint64 delta = stop_time - m_start_time; + return delta * g_inv_freq; + } + + uint64 timer::get_elapsed_us() const + { + CRNLIB_ASSERT(m_started); + if (!m_started) + return 0; + + uint64 stop_time = m_stop_time; + if (!m_stopped) + QueryPerformanceCounter((LARGE_INTEGER*)&stop_time); + + uint64 delta = stop_time - m_start_time; + return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; + } + + void timer::init() + { + if (!g_inv_freq) + { + QueryPerformanceFrequency((LARGE_INTEGER*)&g_freq); + g_inv_freq = 1.0f / g_freq; + + QueryPerformanceCounter((LARGE_INTEGER*)&g_init_ticks); + } + } + + timer_ticks timer::get_init_ticks() + { + if (!g_inv_freq) init(); + + return g_init_ticks; + } + + timer_ticks timer::get_ticks() + { + if (!g_inv_freq) init(); + + timer_ticks ticks; + QueryPerformanceCounter((LARGE_INTEGER*)&ticks); + return ticks; + } + + double timer::ticks_to_secs(timer_ticks ticks) + { + if (!g_inv_freq) init(); + + return ticks * g_inv_freq; + } + +} // namespace crnlib diff --git a/crnlib/crn_win32_timer.h b/crnlib/crn_win32_timer.h new file mode 100644 index 00000000..f950aa24 --- /dev/null +++ b/crnlib/crn_win32_timer.h @@ -0,0 +1,40 @@ +// File: crn_win32_timer.h +// See Copyright Notice and license at the end of inc/crnlib.h +#pragma once + +namespace crnlib +{ + typedef uint64 timer_ticks; + + class timer + { + public: + timer(); + timer(timer_ticks start_ticks); + + void start(); + void start(timer_ticks start_ticks); + + void stop(); + + double get_elapsed_secs() const; + uint64 get_elapsed_us() const; + + static void init(); + static timer_ticks get_init_ticks(); + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + + private: + static uint64 g_init_ticks; + static uint64 g_freq; + static double g_inv_freq; + + uint64 m_start_time; + uint64 m_stop_time; + + bool m_started : 1; + bool m_stopped : 1; + }; + +} // namespace crnlib diff --git a/crnlib/crn_winhdr.h b/crnlib/crn_winhdr.h new file mode 100644 index 00000000..ea60e52c --- /dev/null +++ b/crnlib/crn_winhdr.h @@ -0,0 +1,15 @@ +#pragma once + +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x500 +#endif + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include "windows.h" diff --git a/crnlib/crn_zeng.cpp b/crnlib/crn_zeng.cpp new file mode 100644 index 00000000..9f1fa2b2 --- /dev/null +++ b/crnlib/crn_zeng.cpp @@ -0,0 +1,289 @@ +// File: crn_zeng.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +// Modified Zeng's technique for codebook/palette reordering +// Evaluation of some reordering techniques for image VQ index compression, António R. C. Paiva , O J. Pinho +// http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.88.7221 +#include "crn_core.h" +#include "crn_zeng.h" +#include "crn_sparse_array.h" +#include + +#define USE_SPARSE_ARRAY 1 + +namespace crnlib +{ +#if USE_SPARSE_ARRAY + typedef sparse_array hist_type; +#else + typedef crnlib::vector hist_type; +#endif + + static inline void update_hist(hist_type& hist, int i, int j, int n) + { + if (i == j) + return; + + if ((i != -1) && (j != -1) && (i < j)) + { + CRNLIB_ASSERT( (i >= 0) && (i < (int)n) ); + CRNLIB_ASSERT( (j >= 0) && (j < (int)n) ); + + uint index = i * n + j; + +#if USE_SPARSE_ARRAY + uint freq = hist[index]; + freq++; + hist.set(index, freq); +#else + hist[index]++; +#endif + } + } + + static inline uint read_hist(hist_type& hist, int i, int j, int n) + { + if (i > j) + utils::swap(i, j); + + return hist[i * n + j]; + } + + void create_zeng_reorder_table(uint n, uint num_indices, const uint* pIndices, crnlib::vector& remap_table, zeng_similarity_func pFunc, void* pContext, float similarity_func_weight) + { + CRNLIB_ASSERT((n > 0) && (num_indices > 0)); + CRNLIB_ASSERT_CLOSED_RANGE(similarity_func_weight, 0.0f, 1.0f); + +// printf("create_zeng_reorder_table start:\n"); + + remap_table.clear(); + remap_table.resize(n); + + if (num_indices <= 1) + return; + + const uint t = n * n; + hist_type xhist(t); + + for (uint i = 0; i < num_indices; i++) + { + const int prev_val = (i > 0) ? pIndices[i-1] : -1; + const int cur_val = pIndices[i]; + const int next_val = (i < (num_indices - 1)) ? pIndices[i+1] : -1; + + update_hist(xhist, cur_val, prev_val, n); + update_hist(xhist, cur_val, next_val, n); + } + +#if 0 + uint total1 = 0, total2 = 0; + for (uint i = 0; i < n; i++) + { + for (uint j = 0; j < n; j++) + { + if (i == j) + continue; + + //uint a = hist[i * n + j]; + //total1 += a; + + uint c = read_hist(xhist, i, j, n); + total2 += c; + } + } + + printf("%u %u\n", total1, total2); +#endif + + uint max_freq = 0; + uint max_index = 0; + for (uint i = 0; i < t; i++) + { + if (xhist[i] > max_freq) + { + max_freq = xhist[i]; + max_index = i; + } + } + + uint x = max_index / n; + uint y = max_index % n; + + crnlib::vector values_chosen; + values_chosen.reserve(n); + + values_chosen.push_back(static_cast(x)); + values_chosen.push_back(static_cast(y)); + + crnlib::vector values_remaining; + if (n > 2) + values_remaining.reserve(n - 2); + for (uint i = 0; i < n; i++) + if ((i != x) && (i != y)) + values_remaining.push_back(static_cast(i)); + + crnlib::vector total_freq_to_chosen_values(n); + for (uint i = 0; i < values_remaining.size(); i++) + { + uint u = values_remaining[i]; + + uint total_freq = 0; + + for (uint j = 0; j < values_chosen.size(); j++) + { + uint l = values_chosen[j]; + + total_freq += read_hist(xhist, u, l, n); //[u * n + l]; + } + + total_freq_to_chosen_values[u] = total_freq; + } + + while (!values_remaining.empty()) + { + double best_freq = 0; + uint best_i = 0; + + for (uint i = 0; i < values_remaining.size(); i++) + { + uint u = values_remaining[i]; + + #if 0 + double total_freq = 0; + + for (uint j = 0; j < values_chosen.size(); j++) + { + uint l = values_chosen[j]; + + total_freq += read_hist(xhist, u, l, n); //[u * n + l]; + } + + CRNLIB_ASSERT(total_freq_to_chosen_values[u] == total_freq); + #else + double total_freq = total_freq_to_chosen_values[u]; + #endif + + if (pFunc) + { + float weight = math::maximum( + (*pFunc)(u, values_chosen.front(), pContext), + (*pFunc)(u, values_chosen.back(), pContext) ); + + CRNLIB_ASSERT_CLOSED_RANGE(weight, 0.0f, 1.0f); + + weight = math::lerp(1.0f - similarity_func_weight, 1.0f + similarity_func_weight, weight); + + total_freq = (total_freq + 1.0f) * weight; + } + + if (total_freq > best_freq) + { + best_freq = total_freq; + best_i = i; + } + } + + const uint u = values_remaining[best_i]; + + float side = 0; + int left_freq = 0; + int right_freq = 0; + + for (uint j = 0; j < values_chosen.size(); j++) + { + const uint l = values_chosen[j]; + + int freq = read_hist(xhist, u, l, n); //[u * n + l]; + int scale = (values_chosen.size() + 1 - 2 * (j + 1)); + + side = side + (float)(scale * freq); + + if (scale < 0) + right_freq += -scale * freq; + else + left_freq += scale * freq; + } + + if (pFunc) + { + float weight_left = (*pFunc)(u, values_chosen.front(), pContext); + float weight_right = (*pFunc)(u, values_chosen.back(), pContext); + + weight_left = math::lerp(1.0f - similarity_func_weight, 1.0f + similarity_func_weight, weight_left); + weight_right = math::lerp(1.0f - similarity_func_weight, 1.0f + similarity_func_weight, weight_right); + + side = weight_left * left_freq - weight_right * right_freq; + } + + if (side > 0) + values_chosen.push_front(static_cast(u)); + else + values_chosen.push_back(static_cast(u)); + + values_remaining.erase(values_remaining.begin() + best_i); + + for (uint i = 0; i < values_remaining.size(); i++) + { + const uint r = values_remaining[i]; + + total_freq_to_chosen_values[r] += read_hist(xhist, r, u, n); //[r * n + u]; + } + } + + for (uint i = 0; i < n; i++) + { + uint v = values_chosen[i]; + remap_table[v] = i; + } + + #if 0 + uint before_sum = 0; + uint after_sum = 0; + { + printf("\nBEFORE:\n"); + crnlib::vector delta_hist(n*2); + + int sum = 0; + for (uint i = 1; i < num_indices; i++) + { + int prev = pIndices[i-1]; + int cur = pIndices[i]; + delta_hist[prev-cur+n]++; + sum += labs(prev-cur); + } + + printf("\n"); + for (uint i = 0; i < n*2; i++) + printf("%04u ", delta_hist[i]); + + printf("\nSum: %i\n", sum); + before_sum = sum; + } + + { + printf("AFTER:\n"); + crnlib::vector delta_hist(n*2); + + int sum = 0; + for (uint i = 1; i < num_indices; i++) + { + int prev = remap_table[pIndices[i-1]]; + int cur = remap_table[pIndices[i]]; + delta_hist[prev-cur+n]++; + sum += labs(prev-cur); + } + + printf("\n"); + for (uint i = 0; i < n*2; i++) + printf("%04u ", delta_hist[i]); + + printf("\nSum: %i\n", sum); + after_sum = sum; + } + printf("Before sum: %u, After sum: %u\n", before_sum, after_sum); + #endif + +// printf("create_zeng_reorder_table end:\n"); + } + +} // namespace crnlib + diff --git a/crnlib/crn_zeng.h b/crnlib/crn_zeng.h new file mode 100644 index 00000000..bf37bea4 --- /dev/null +++ b/crnlib/crn_zeng.h @@ -0,0 +1,10 @@ +// File: crn_zeng.h +// See Copyright Notice and license at the end of inc/crnlib.h + +namespace crnlib +{ + typedef float (*zeng_similarity_func)(uint index_a, uint index_b, void* pContext); + + void create_zeng_reorder_table(uint n, uint num_indices, const uint* pIndices, crnlib::vector& remap_table, zeng_similarity_func pFunc, void* pContext, float similarity_func_weight); + +} // namespace crnlib diff --git a/crnlib/crnlib.2008.vcproj b/crnlib/crnlib.2008.vcproj new file mode 100644 index 00000000..3eb44696 --- /dev/null +++ b/crnlib/crnlib.2008.vcprojdiff --git a/crnlib/crnlib.cbp b/crnlib/crnlib.cbp new file mode 100644 index 00000000..602d9659 --- /dev/null +++ b/crnlib/crnlib.cbp @@ -0,0 +1,218 @@ + + + + + + diff --git a/crnlib/crnlib.cpp b/crnlib/crnlib.cpp new file mode 100644 index 00000000..86ec2e65 --- /dev/null +++ b/crnlib/crnlib.cpp @@ -0,0 +1,370 @@ +// File: crnlib.cpp +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" +#include "../inc/crnlib.h" +#include "crn_comp.h" +#include "crn_dds_comp.h" +#include "crn_dynamic_stream.h" +#include "crn_buffer_stream.h" +#include "crn_ryg_dxt.hpp" + +#include "crn_winhdr.h" + +#define CRND_HEADER_FILE_ONLY +#include "../inc/crn_decomp.h" + +namespace crnlib +{ + static void* realloc_func(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data) + { + pUser_data; + return crnlib_realloc(p, size, pActual_size, movable); + } + + static size_t msize_func(void* p, void* pUser_data) + { + pUser_data; + return crnlib_msize(p); + } + + class crnlib_global_initializer + { + public: + crnlib_global_initializer() + { + ryg_dxt::sInitDXT(); + +#ifdef CRNLIB_PLATFORM_PC + SYSTEM_INFO g_system_info; + GetSystemInfo(&g_system_info); + + g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); +#endif + + crnlib_enable_fail_exceptions(true); + + // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. + crnd::crnd_set_memory_callbacks(realloc_func, msize_func, NULL); + } + }; + + crnlib_global_initializer g_crnlib_initializer; +} + +using namespace crnlib; + +const char* crn_get_format_stringa(crn_format fmt) +{ + return pixel_format_helpers::get_crn_format_stringa(fmt); +} + +const wchar_t* crn_get_format_string(crn_format fmt) +{ + return pixel_format_helpers::get_crn_format_string(fmt); +} + +crn_uint32 crn_get_format_fourcc(crn_format fmt) +{ + return crnd::crnd_crn_format_to_fourcc(fmt); +} + +crn_uint32 crn_get_format_bits_per_texel(crn_format fmt) +{ + return crnd::crnd_get_crn_format_bits_per_texel(fmt); +} + +crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt) +{ + return crnd::crnd_get_bytes_per_dxt_block(fmt); +} + +crn_format crn_get_fundamental_dxt_format(crn_format fmt) +{ + return crnd::crnd_get_fundamental_dxt_format(fmt); +} + +const wchar_t* crn_get_file_type_ext(crn_file_type file_type) +{ + switch (file_type) + { + case cCRNFileTypeDDS: return L"dds"; + case cCRNFileTypeCRN: return L"crn"; + default: break; + } + return L"?"; +} + +const char* crn_get_file_type_exta(crn_file_type file_type) +{ + switch (file_type) + { + case cCRNFileTypeDDS: return "dds"; + case cCRNFileTypeCRN: return "crn"; + default: break; + } + return "?"; +} + +const wchar_t* crn_get_mip_mode_desc(crn_mip_mode m) +{ + switch (m) + { + case cCRNMipModeUseSourceOrGenerateMips: return L"Use source/generate if none"; + case cCRNMipModeUseSourceMips: return L"Only use source MIP maps (if any)"; + case cCRNMipModeGenerateMips: return L"Always generate new MIP maps"; + case cCRNMipModeNoMips: return L"No MIP maps"; + default: break; + } + return L"?"; +} + +const wchar_t* crn_get_mip_mode_name(crn_mip_mode m) +{ + switch (m) + { + case cCRNMipModeUseSourceOrGenerateMips: return L"UseSourceOrGenerate"; + case cCRNMipModeUseSourceMips: return L"UseSource"; + case cCRNMipModeGenerateMips: return L"Generate"; + case cCRNMipModeNoMips: return L"None"; + default: break; + } + return L"?"; +} + +const char* crn_get_mip_filter_name(crn_mip_filter f) +{ + switch (f) + { + case cCRNMipFilterBox: return "box"; + case cCRNMipFilterTent: return "tent"; + case cCRNMipFilterLanczos4: return "lanczos4"; + case cCRNMipFilterMitchell: return "mitchell"; + case cCRNMipFilterKaiser: return "kaiser"; + default: break; + } + return "?"; +} + +const wchar_t* crn_get_scale_mode_desc(crn_scale_mode sm) +{ + switch (sm) + { + case cCRNSMDisabled: return L"disabled"; + case cCRNSMAbsolute: return L"absolute"; + case cCRNSMRelative: return L"relative"; + case cCRNSMLowerPow2: return L"lowerpow2"; + case cCRNSMNearestPow2: return L"nearestpow2"; + case cCRNSMNextPow2: return L"nextpow2"; + default: break; + } + return L"?"; +} + +const wchar_t* crn_get_dxt_quality_string(crn_dxt_quality q) +{ + switch (q) + { + case cCRNDXTQualitySuperFast: return L"SuperFast"; + case cCRNDXTQualityFast: return L"Fast"; + case cCRNDXTQualityNormal: return L"Normal"; + case cCRNDXTQualityBetter: return L"Better"; + case cCRNDXTQualityUber: return L"Uber"; + default: break; + } + CRNLIB_ASSERT(false); + return L"?"; +} + +const char* crn_get_dxt_quality_stringa(crn_dxt_quality q) +{ + switch (q) + { + case cCRNDXTQualitySuperFast: return "SuperFast"; + case cCRNDXTQualityFast: return "Fast"; + case cCRNDXTQualityNormal: return "Normal"; + case cCRNDXTQualityBetter: return "Better"; + case cCRNDXTQualityUber: return "Uber"; + default: break; + } + CRNLIB_ASSERT(false); + return "?"; +} + + +void crn_free_block(void *pBlock) +{ + crnlib_free(pBlock); +} + +void *crn_compress(const crn_comp_params &comp_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level, float *pActual_bitrate) +{ + compressed_size = 0; + if (pActual_quality_level) *pActual_quality_level = 0; + if (pActual_bitrate) *pActual_bitrate = 0.0f; + + if (!comp_params.check()) + return false; + + crnlib::vector crn_file_data; + if (!create_compressed_texture(comp_params, crn_file_data, pActual_quality_level, pActual_bitrate)) + return NULL; + + compressed_size = crn_file_data.size(); + return crn_file_data.assume_ownership(); +} + +void *crn_compress(const crn_comp_params &comp_params, const crn_mipmap_params &mip_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level, float *pActual_bitrate) +{ + compressed_size = 0; + if (pActual_quality_level) *pActual_quality_level = 0; + if (pActual_bitrate) *pActual_bitrate = 0.0f; + + if ((!comp_params.check()) || (!mip_params.check())) + return false; + + crnlib::vector crn_file_data; + if (!create_compressed_texture(comp_params, mip_params, crn_file_data, pActual_quality_level, pActual_bitrate)) + return NULL; + + compressed_size = crn_file_data.size(); + return crn_file_data.assume_ownership(); +} + +void *crn_decompress_crn_to_dds(const void *pCRN_file_data, crn_uint32 &file_size) +{ + dds_texture tex; + if (!tex.load_crn_from_memory(L"from_memory.crn", pCRN_file_data, file_size)) + { + file_size = 0; + return NULL; + } + + file_size = 0; + + dynamic_stream dds_file_data; + dds_file_data.reserve(128*1024); + data_stream_serializer serializer(dds_file_data); + if (!tex.write_dds(serializer)) + return NULL; + dds_file_data.reserve(0); + + file_size = static_cast(dds_file_data.get_size()); + return dds_file_data.get_buf().assume_ownership(); +} + +bool crn_decompress_dds_to_images(const void *pDDS_file_data, crn_uint32 dds_file_size, crn_uint32 **ppImages, crn_texture_desc &tex_desc) +{ + memset(&tex_desc, 0, sizeof(tex_desc)); + + dds_texture tex; + buffer_stream in_stream(pDDS_file_data, dds_file_size); + data_stream_serializer in_serializer(in_stream); + if (!tex.read_dds(in_serializer)) + return false; + + if (tex.is_packed()) + { + // TODO: Allow the user to disable uncooking of swizzled DXT5 formats? + bool uncook = true; + + if (!tex.unpack_from_dxt(uncook)) + return false; + } + + tex_desc.m_faces = tex.get_num_faces(); + tex_desc.m_width = tex.get_width(); + tex_desc.m_height = tex.get_height(); + tex_desc.m_levels = tex.get_num_levels(); + tex_desc.m_fmt_fourcc = (crn_uint32)tex.get_format(); + + for (uint f = 0; f < tex.get_num_faces(); f++) + { + for (uint l = 0; l < tex.get_num_levels(); l++) + { + mip_level *pLevel = tex.get_level(f, l); + image_u8 *pImg = pLevel->get_image(); + ppImages[l + tex.get_num_levels() * f] = static_cast(pImg->get_pixel_buf().assume_ownership()); + } + } + + return true; +} + +void crn_free_all_images(crn_uint32 **ppImages, const crn_texture_desc &desc) +{ + for (uint f = 0; f < desc.m_faces; f++) + for (uint l = 0; l < desc.m_levels; l++) + crn_free_block(ppImages[l + desc.m_levels * f]); +} + +// Simple low-level DXTn 4x4 block compressor API. +// Basically just a basic wrapper over the crnlib::dxt_image class. + +namespace crnlib +{ + class crn_block_compressor + { + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_block_compressor); + + public: + crn_block_compressor() + { + } + + bool init(const crn_comp_params ¶ms) + { + m_comp_params = params; + + m_pack_params.init(params); + + crn_format basic_crn_fmt = crnd::crnd_get_fundamental_dxt_format(params.m_format); + pixel_format basic_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(basic_crn_fmt); + + if ((params.get_flag(cCRNCompFlagDXT1AForTransparency)) && (basic_pixel_fmt == PIXEL_FMT_DXT1)) + basic_pixel_fmt = PIXEL_FMT_DXT1A; + + if (!m_image.init(pixel_format_helpers::get_dxt_format(basic_pixel_fmt), cDXTBlockSize, cDXTBlockSize, false)) + return false; + + return true; + } + + void compress_block(const crn_uint32 *pPixels, void *pDst_block) + { + if (m_image.is_valid()) + { + m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_dxt1_optimizer, m_dxt5_optimizer); + memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); + } + } + + private: + dxt_image m_image; + crn_comp_params m_comp_params; + dxt_image::pack_params m_pack_params; + dxt1_endpoint_optimizer m_dxt1_optimizer; + dxt5_endpoint_optimizer m_dxt5_optimizer; + }; +} + +crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params ¶ms) +{ + crn_block_compressor *pComp = crnlib_new(); + if (!pComp->init(params)) + { + crnlib_delete(pComp); + return NULL; + } + return pComp; +} + +void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32 *pPixels, void *pDst_block) +{ + crn_block_compressor *pComp = static_cast(pContext); + pComp->compress_block(pPixels, pDst_block); +} + +void crn_free_block_compressor(crn_block_compressor_context_t pContext) +{ + crnlib_delete(static_cast(pContext)); +} diff --git a/crnlib/lzma_7zBuf.cpp b/crnlib/lzma_7zBuf.cpp new file mode 100644 index 00000000..1645edd4 --- /dev/null +++ b/crnlib/lzma_7zBuf.cpp @@ -0,0 +1,41 @@ +/* 7zBuf.c -- Byte Buffer +2008-03-28 +Igor Pavlov +Public domain */ +#include "crn_core.h" + +#include "lzma_7zBuf.h" + +namespace crnlib { + +void Buf_Init(CBuf *p) +{ + p->data = 0; + p->size = 0; +} + +int Buf_Create(CBuf *p, size_t size, ISzAlloc *alloc) +{ + p->size = 0; + if (size == 0) + { + p->data = 0; + return 1; + } + p->data = (Byte *)alloc->Alloc(alloc, size); + if (p->data != 0) + { + p->size = size; + return 1; + } + return 0; +} + +void Buf_Free(CBuf *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->data); + p->data = 0; + p->size = 0; +} + +} \ No newline at end of file diff --git a/crnlib/lzma_7zBuf.h b/crnlib/lzma_7zBuf.h new file mode 100644 index 00000000..bad28bce --- /dev/null +++ b/crnlib/lzma_7zBuf.h @@ -0,0 +1,35 @@ +/* 7zBuf.h -- Byte Buffer +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __7Z_BUF_H +#define __7Z_BUF_H + +#include "lzma_Types.h" + +namespace crnlib { + +typedef struct +{ + Byte *data; + size_t size; +} CBuf; + +void Buf_Init(CBuf *p); +int Buf_Create(CBuf *p, size_t size, ISzAlloc *alloc); +void Buf_Free(CBuf *p, ISzAlloc *alloc); + +typedef struct +{ + Byte *data; + size_t size; + size_t pos; +} CDynBuf; + +void DynBuf_Construct(CDynBuf *p); +void DynBuf_SeekToBeg(CDynBuf *p); +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAlloc *alloc); +void DynBuf_Free(CDynBuf *p, ISzAlloc *alloc); + +} + +#endif diff --git a/crnlib/lzma_7zBuf2.cpp b/crnlib/lzma_7zBuf2.cpp new file mode 100644 index 00000000..2c3ddfe1 --- /dev/null +++ b/crnlib/lzma_7zBuf2.cpp @@ -0,0 +1,50 @@ +/* 7zBuf2.c -- Byte Buffer +2008-10-04 : Igor Pavlov : Public domain */ + +#include "crn_core.h" +#include +#include "lzma_7zBuf.h" + +namespace crnlib { + +void DynBuf_Construct(CDynBuf *p) +{ + p->data = 0; + p->size = 0; + p->pos = 0; +} + +void DynBuf_SeekToBeg(CDynBuf *p) +{ + p->pos = 0; +} + +int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAlloc *alloc) +{ + if (size > p->size - p->pos) + { + size_t newSize = p->pos + size; + Byte *data; + newSize += newSize / 4; + data = (Byte *)alloc->Alloc(alloc, newSize); + if (data == 0) + return 0; + p->size = newSize; + memcpy(data, p->data, p->pos); + alloc->Free(alloc, p->data); + p->data = data; + } + memcpy(p->data + p->pos, buf, size); + p->pos += size; + return 1; +} + +void DynBuf_Free(CDynBuf *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->data); + p->data = 0; + p->size = 0; + p->pos = 0; +} + +} diff --git a/crnlib/lzma_7zCrc.cpp b/crnlib/lzma_7zCrc.cpp new file mode 100644 index 00000000..8c53ee18 --- /dev/null +++ b/crnlib/lzma_7zCrc.cpp @@ -0,0 +1,40 @@ +/* 7zCrc.c -- CRC32 calculation +2008-08-05 +Igor Pavlov +Public domain */ +#include "crn_core.h" + +#include "lzma_7zCrc.h" + +namespace crnlib { + +#define kCrcPoly 0xEDB88320 +UInt32 g_CrcTable[256]; + +void MY_FAST_CALL CrcGenerateTable(void) +{ + UInt32 i; + for (i = 0; i < 256; i++) + { + UInt32 r = i; + int j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + g_CrcTable[i] = r; + } +} + +UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size) +{ + const Byte *p = (const Byte *)data; + for (; size > 0 ; size--, p++) + v = CRC_UPDATE_BYTE(v, *p); + return v; +} + +UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) +{ + return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; +} + +} diff --git a/crnlib/lzma_7zCrc.h b/crnlib/lzma_7zCrc.h new file mode 100644 index 00000000..77351d56 --- /dev/null +++ b/crnlib/lzma_7zCrc.h @@ -0,0 +1,28 @@ +/* 7zCrc.h -- CRC32 calculation +2008-03-13 +Igor Pavlov +Public domain */ + +#ifndef __7Z_CRC_H +#define __7Z_CRC_H + +#include + +#include "lzma_Types.h" + +namespace crnlib { + +extern UInt32 g_CrcTable[]; + +void MY_FAST_CALL CrcGenerateTable(void); + +#define CRC_INIT_VAL 0xFFFFFFFF +#define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) +#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) + +UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size); +UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size); + +} + +#endif diff --git a/crnlib/lzma_7zFile.cpp b/crnlib/lzma_7zFile.cpp new file mode 100644 index 00000000..8cdaeacf --- /dev/null +++ b/crnlib/lzma_7zFile.cpp @@ -0,0 +1,267 @@ +/* 7zFile.c -- File IO +2008-11-22 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_7zFile.h" + +#ifndef USE_WINDOWS_FILE + +#include + +#endif + +#ifdef USE_WINDOWS_FILE + +/* + ReadFile and WriteFile functions in Windows have BUG: + If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) + from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES + (Insufficient system resources exist to complete the requested service). + Probably in some version of Windows there are problems with other sizes: + for 32 MB (maybe also for 16 MB). + And message can be "Network connection was lost" +*/ + +#define kChunkSizeMax (1 << 22) + +#endif + +namespace crnlib { + +void File_Construct(CSzFile *p) +{ + #ifdef USE_WINDOWS_FILE + p->handle = INVALID_HANDLE_VALUE; + #else + p->file = NULL; + #endif +} + +static WRes File_Open(CSzFile *p, const char *name, int writeMode) +{ + #ifdef USE_WINDOWS_FILE + p->handle = CreateFileA(name, + writeMode ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, NULL, + writeMode ? CREATE_ALWAYS : OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, NULL); + return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); + #else + p->file = fopen(name, writeMode ? "wb+" : "rb"); + return (p->file != 0) ? 0 : errno; + #endif +} + +WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); } +WRes OutFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 1); } + +WRes File_Close(CSzFile *p) +{ + #ifdef USE_WINDOWS_FILE + if (p->handle != INVALID_HANDLE_VALUE) + { + if (!CloseHandle(p->handle)) + return GetLastError(); + p->handle = INVALID_HANDLE_VALUE; + } + #else + if (p->file != NULL) + { + int res = fclose(p->file); + if (res != 0) + return res; + p->file = NULL; + } + #endif + return 0; +} + +WRes File_Read(CSzFile *p, void *data, size_t *size) +{ + size_t originalSize = *size; + if (originalSize == 0) + return 0; + + #ifdef USE_WINDOWS_FILE + + *size = 0; + do + { + DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + DWORD processed = 0; + BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); + data = (void *)((Byte *)data + processed); + originalSize -= processed; + *size += processed; + if (!res) + return GetLastError(); + if (processed == 0) + break; + } + while (originalSize > 0); + return 0; + + #else + + *size = fread(data, 1, originalSize, p->file); + if (*size == originalSize) + return 0; + return ferror(p->file); + + #endif +} + +WRes File_Write(CSzFile *p, const void *data, size_t *size) +{ + size_t originalSize = *size; + if (originalSize == 0) + return 0; + + #ifdef USE_WINDOWS_FILE + + *size = 0; + do + { + DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; + DWORD processed = 0; + BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); + data = (void *)((Byte *)data + processed); + originalSize -= processed; + *size += processed; + if (!res) + return GetLastError(); + if (processed == 0) + break; + } + while (originalSize > 0); + return 0; + + #else + + *size = fwrite(data, 1, originalSize, p->file); + if (*size == originalSize) + return 0; + return ferror(p->file); + + #endif +} + +WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin) +{ + #ifdef USE_WINDOWS_FILE + + LARGE_INTEGER value; + DWORD moveMethod; + value.LowPart = (DWORD)*pos; + value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ + switch (origin) + { + case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; + case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break; + case SZ_SEEK_END: moveMethod = FILE_END; break; + default: return ERROR_INVALID_PARAMETER; + } + value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); + if (value.LowPart == 0xFFFFFFFF) + { + WRes res = GetLastError(); + if (res != NO_ERROR) + return res; + } + *pos = ((Int64)value.HighPart << 32) | value.LowPart; + return 0; + + #else + + int moveMethod; + int res; + switch (origin) + { + case SZ_SEEK_SET: moveMethod = SEEK_SET; break; + case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; + case SZ_SEEK_END: moveMethod = SEEK_END; break; + default: return 1; + } + res = fseek(p->file, (long)*pos, moveMethod); + *pos = ftell(p->file); + return res; + + #endif +} + +WRes File_GetLength(CSzFile *p, UInt64 *length) +{ + #ifdef USE_WINDOWS_FILE + + DWORD sizeHigh; + DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); + if (sizeLow == 0xFFFFFFFF) + { + DWORD res = GetLastError(); + if (res != NO_ERROR) + return res; + } + *length = (((UInt64)sizeHigh) << 32) + sizeLow; + return 0; + + #else + + long pos = ftell(p->file); + int res = fseek(p->file, 0, SEEK_END); + *length = ftell(p->file); + fseek(p->file, pos, SEEK_SET); + return res; + + #endif +} + + +/* ---------- FileSeqInStream ---------- */ + +static SRes FileSeqInStream_Read(void *pp, void *buf, size_t *size) +{ + CFileSeqInStream *p = (CFileSeqInStream *)pp; + return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; +} + +void FileSeqInStream_CreateVTable(CFileSeqInStream *p) +{ + p->s.Read = FileSeqInStream_Read; +} + + +/* ---------- FileInStream ---------- */ + +static SRes FileInStream_Read(void *pp, void *buf, size_t *size) +{ + CFileInStream *p = (CFileInStream *)pp; + return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; +} + +static SRes FileInStream_Seek(void *pp, Int64 *pos, ESzSeek origin) +{ + CFileInStream *p = (CFileInStream *)pp; + return File_Seek(&p->file, pos, origin); +} + +void FileInStream_CreateVTable(CFileInStream *p) +{ + p->s.Read = FileInStream_Read; + p->s.Seek = FileInStream_Seek; +} + + +/* ---------- FileOutStream ---------- */ + +static size_t FileOutStream_Write(void *pp, const void *data, size_t size) +{ + CFileOutStream *p = (CFileOutStream *)pp; + File_Write(&p->file, data, &size); + return size; +} + +void FileOutStream_CreateVTable(CFileOutStream *p) +{ + p->s.Write = FileOutStream_Write; +} + +} diff --git a/crnlib/lzma_7zFile.h b/crnlib/lzma_7zFile.h new file mode 100644 index 00000000..d18f2583 --- /dev/null +++ b/crnlib/lzma_7zFile.h @@ -0,0 +1,78 @@ +/* 7zFile.h -- File IO +2008-11-22 : Igor Pavlov : Public domain */ + +#ifndef __7Z_FILE_H +#define __7Z_FILE_H + +#ifdef _WIN32 +#define USE_WINDOWS_FILE +#endif + +#ifdef USE_WINDOWS_FILE +#include +#else +#include +#endif + +#include "lzma_Types.h" + +namespace crnlib { + + +/* ---------- File ---------- */ + +typedef struct +{ + #ifdef USE_WINDOWS_FILE + HANDLE handle; + #else + FILE *file; + #endif +} CSzFile; + +void File_Construct(CSzFile *p); +WRes InFile_Open(CSzFile *p, const char *name); +WRes OutFile_Open(CSzFile *p, const char *name); +WRes File_Close(CSzFile *p); + +/* reads max(*size, remain file's size) bytes */ +WRes File_Read(CSzFile *p, void *data, size_t *size); + +/* writes *size bytes */ +WRes File_Write(CSzFile *p, const void *data, size_t *size); + +WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin); +WRes File_GetLength(CSzFile *p, UInt64 *length); + + +/* ---------- FileInStream ---------- */ + +typedef struct +{ + ISeqInStream s; + CSzFile file; +} CFileSeqInStream; + +void FileSeqInStream_CreateVTable(CFileSeqInStream *p); + + +typedef struct +{ + ISeekInStream s; + CSzFile file; +} CFileInStream; + +void FileInStream_CreateVTable(CFileInStream *p); + + +typedef struct +{ + ISeqOutStream s; + CSzFile file; +} CFileOutStream; + +void FileOutStream_CreateVTable(CFileOutStream *p); + +} + +#endif diff --git a/crnlib/lzma_7zStream.cpp b/crnlib/lzma_7zStream.cpp new file mode 100644 index 00000000..7aca68ce --- /dev/null +++ b/crnlib/lzma_7zStream.cpp @@ -0,0 +1,173 @@ +/* 7zStream.c -- 7z Stream functions +2008-11-23 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include + +#include "lzma_Types.h" + +namespace crnlib { + +SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(stream->Read(stream, buf, &processed)); + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size) +{ + return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + +SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf) +{ + size_t processed = 1; + RINOK(stream->Read(stream, buf, &processed)); + return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; +} + +SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset) +{ + Int64 t = offset; + return stream->Seek(stream, &t, SZ_SEEK_SET); +} + +SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size) +{ + void *lookBuf; + if (*size == 0) + return SZ_OK; + RINOK(stream->Look(stream, &lookBuf, size)); + memcpy(buf, lookBuf, *size); + return stream->Skip(stream, *size); +} + +SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType) +{ + while (size != 0) + { + size_t processed = size; + RINOK(stream->Read(stream, buf, &processed)); + if (processed == 0) + return errorType; + buf = (void *)((Byte *)buf + processed); + size -= processed; + } + return SZ_OK; +} + +SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size) +{ + return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); +} + +static SRes LookToRead_Look_Lookahead(void *pp, void **buf, size_t *size) +{ + SRes res = SZ_OK; + CLookToRead *p = (CLookToRead *)pp; + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size > 0) + { + p->pos = 0; + size2 = LookToRead_BUF_SIZE; + res = p->realStream->Read(p->realStream, p->buf, &size2); + p->size = size2; + } + if (size2 < *size) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead_Look_Exact(void *pp, void **buf, size_t *size) +{ + SRes res = SZ_OK; + CLookToRead *p = (CLookToRead *)pp; + size_t size2 = p->size - p->pos; + if (size2 == 0 && *size > 0) + { + p->pos = 0; + if (*size > LookToRead_BUF_SIZE) + *size = LookToRead_BUF_SIZE; + res = p->realStream->Read(p->realStream, p->buf, size); + size2 = p->size = *size; + } + if (size2 < *size) + *size = size2; + *buf = p->buf + p->pos; + return res; +} + +static SRes LookToRead_Skip(void *pp, size_t offset) +{ + CLookToRead *p = (CLookToRead *)pp; + p->pos += offset; + return SZ_OK; +} + +static SRes LookToRead_Read(void *pp, void *buf, size_t *size) +{ + CLookToRead *p = (CLookToRead *)pp; + size_t rem = p->size - p->pos; + if (rem == 0) + return p->realStream->Read(p->realStream, buf, size); + if (rem > *size) + rem = *size; + memcpy(buf, p->buf + p->pos, rem); + p->pos += rem; + *size = rem; + return SZ_OK; +} + +static SRes LookToRead_Seek(void *pp, Int64 *pos, ESzSeek origin) +{ + CLookToRead *p = (CLookToRead *)pp; + p->pos = p->size = 0; + return p->realStream->Seek(p->realStream, pos, origin); +} + +void LookToRead_CreateVTable(CLookToRead *p, int lookahead) +{ + p->s.Look = lookahead ? + LookToRead_Look_Lookahead : + LookToRead_Look_Exact; + p->s.Skip = LookToRead_Skip; + p->s.Read = LookToRead_Read; + p->s.Seek = LookToRead_Seek; +} + +void LookToRead_Init(CLookToRead *p) +{ + p->pos = p->size = 0; +} + +static SRes SecToLook_Read(void *pp, void *buf, size_t *size) +{ + CSecToLook *p = (CSecToLook *)pp; + return LookInStream_LookRead(p->realStream, buf, size); +} + +void SecToLook_CreateVTable(CSecToLook *p) +{ + p->s.Read = SecToLook_Read; +} + +static SRes SecToRead_Read(void *pp, void *buf, size_t *size) +{ + CSecToRead *p = (CSecToRead *)pp; + return p->realStream->Read(p->realStream, buf, size); +} + +void SecToRead_CreateVTable(CSecToRead *p) +{ + p->s.Read = SecToRead_Read; +} + +} diff --git a/crnlib/lzma_7zVersion.h b/crnlib/lzma_7zVersion.h new file mode 100644 index 00000000..595dec5f --- /dev/null +++ b/crnlib/lzma_7zVersion.h @@ -0,0 +1,7 @@ +#define MY_VER_MAJOR 4 +#define MY_VER_MINOR 63 +#define MY_VER_BUILD 0 +#define MY_VERSION "4.63" +#define MY_DATE "2008-12-31" +#define MY_COPYRIGHT ": Igor Pavlov : Public domain" +#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE diff --git a/crnlib/lzma_Alloc.cpp b/crnlib/lzma_Alloc.cpp new file mode 100644 index 00000000..32b0da87 --- /dev/null +++ b/crnlib/lzma_Alloc.cpp @@ -0,0 +1,131 @@ +/* Alloc.c -- Memory allocation functions +2008-09-24 +Igor Pavlov +Public domain */ +#include "crn_core.h" +#ifdef _WIN32 +#include +#endif +#include + +#include "lzma_Alloc.h" + +namespace crnlib { + +/* #define _SZ_ALLOC_DEBUG */ + +/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ +#ifdef _SZ_ALLOC_DEBUG +#include +int g_allocCount = 0; +int g_allocCountMid = 0; +int g_allocCountBig = 0; +#endif + +void *MyAlloc(size_t size) +{ + if (size == 0) + return 0; + #ifdef _SZ_ALLOC_DEBUG + { + void *p = crnlib::crnlib_malloc(size); + fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); + return p; + } + #else + return crnlib::crnlib_malloc(size); + #endif +} + +void MyFree(void *address) +{ + #ifdef _SZ_ALLOC_DEBUG + if (address != 0) + fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); + #endif + crnlib::crnlib_free(address); +} + +#ifdef _WIN32 + +void *MidAlloc(size_t size) +{ + if (size == 0) + return 0; + #ifdef _SZ_ALLOC_DEBUG + fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); + #endif + return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); +} + +void MidFree(void *address) +{ + #ifdef _SZ_ALLOC_DEBUG + if (address != 0) + fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); + #endif + if (address == 0) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#ifndef MEM_LARGE_PAGES +#undef _7ZIP_LARGE_PAGES +#endif + +#ifdef _7ZIP_LARGE_PAGES +SIZE_T g_LargePageSize = 0; +typedef SIZE_T (WINAPI *GetLargePageMinimumP)(); +#endif + +void SetLargePageSize() +{ + #ifdef _7ZIP_LARGE_PAGES + SIZE_T size = 0; + GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) + GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); + if (largePageMinimum == 0) + return; + size = largePageMinimum(); + if (size == 0 || (size & (size - 1)) != 0) + return; + g_LargePageSize = size; + #endif +} + + +void *BigAlloc(size_t size) +{ + if (size == 0) + return 0; + #ifdef _SZ_ALLOC_DEBUG + fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); + #endif + + #ifdef _7ZIP_LARGE_PAGES + if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) + { + void *res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), + MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (res != 0) + return res; + } + #endif + return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); +} + +void BigFree(void *address) +{ + #ifdef _SZ_ALLOC_DEBUG + if (address != 0) + fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); + #endif + + if (address == 0) + return; + VirtualFree(address, 0, MEM_RELEASE); +} + +#endif + +} diff --git a/crnlib/lzma_Alloc.h b/crnlib/lzma_Alloc.h new file mode 100644 index 00000000..479f6b04 --- /dev/null +++ b/crnlib/lzma_Alloc.h @@ -0,0 +1,36 @@ +/* Alloc.h -- Memory allocation functions +2008-03-13 +Igor Pavlov +Public domain */ + +#ifndef __COMMON_ALLOC_H +#define __COMMON_ALLOC_H + +#include + +namespace crnlib { + +void *MyAlloc(size_t size); +void MyFree(void *address); + +#ifdef _WIN32 + +void SetLargePageSize(); + +void *MidAlloc(size_t size); +void MidFree(void *address); +void *BigAlloc(size_t size); +void BigFree(void *address); + +#else + +#define MidAlloc(size) MyAlloc(size) +#define MidFree(address) MyFree(address) +#define BigAlloc(size) MyAlloc(size) +#define BigFree(address) MyFree(address) + +#endif + +} + +#endif diff --git a/crnlib/lzma_Bcj2.cpp b/crnlib/lzma_Bcj2.cpp new file mode 100644 index 00000000..4bf75750 --- /dev/null +++ b/crnlib/lzma_Bcj2.cpp @@ -0,0 +1,136 @@ +/* Bcj2.c -- Converter for x86 code (BCJ2) +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_Bcj2.h" + +namespace crnlib { + +#ifdef _LZMA_PROB32 +#define CProb UInt32 +#else +#define CProb UInt16 +#endif + +#define IsJcc(b0, b1) ((b0) == 0x0F && ((b1) & 0xF0) == 0x80) +#define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1)) + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_READ_BYTE (*buffer++) +#define RC_TEST { if (buffer == bufferLim) return SZ_ERROR_DATA; } +#define RC_INIT2 code = 0; range = 0xFFFFFFFF; \ + { int i; for (i = 0; i < 5; i++) { RC_TEST; code = (code << 8) | RC_READ_BYTE; }} + +#define NORMALIZE if (range < kTopValue) { RC_TEST; range <<= 8; code = (code << 8) | RC_READ_BYTE; } + +#define IF_BIT_0(p) ttt = *(p); bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); NORMALIZE; +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); NORMALIZE; + +int Bcj2_Decode( + const Byte *buf0, SizeT size0, + const Byte *buf1, SizeT size1, + const Byte *buf2, SizeT size2, + const Byte *buf3, SizeT size3, + Byte *outBuf, SizeT outSize) +{ + CProb p[256 + 2]; + SizeT inPos = 0, outPos = 0; + + const Byte *buffer, *bufferLim; + UInt32 range, code; + Byte prevByte = 0; + + unsigned int i; + for (i = 0; i < sizeof(p) / sizeof(p[0]); i++) + p[i] = kBitModelTotal >> 1; + + buffer = buf3; + bufferLim = buffer + size3; + RC_INIT2 + + if (outSize == 0) + return SZ_OK; + + for (;;) + { + Byte b; + CProb *prob; + UInt32 bound; + UInt32 ttt; + + SizeT limit = size0 - inPos; + if (outSize - outPos < limit) + limit = outSize - outPos; + while (limit != 0) + { + Byte b = buf0[inPos]; + outBuf[outPos++] = b; + if (IsJ(prevByte, b)) + break; + inPos++; + prevByte = b; + limit--; + } + + if (limit == 0 || outPos == outSize) + break; + + b = buf0[inPos++]; + + if (b == 0xE8) + prob = p + prevByte; + else if (b == 0xE9) + prob = p + 256; + else + prob = p + 257; + + IF_BIT_0(prob) + { + UPDATE_0(prob) + prevByte = b; + } + else + { + UInt32 dest; + const Byte *v; + UPDATE_1(prob) + if (b == 0xE8) + { + v = buf1; + if (size1 < 4) + return SZ_ERROR_DATA; + buf1 += 4; + size1 -= 4; + } + else + { + v = buf2; + if (size2 < 4) + return SZ_ERROR_DATA; + buf2 += 4; + size2 -= 4; + } + dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) | + ((UInt32)v[2] << 8) | ((UInt32)v[3])) - ((UInt32)outPos + 4); + outBuf[outPos++] = (Byte)dest; + if (outPos == outSize) + break; + outBuf[outPos++] = (Byte)(dest >> 8); + if (outPos == outSize) + break; + outBuf[outPos++] = (Byte)(dest >> 16); + if (outPos == outSize) + break; + outBuf[outPos++] = prevByte = (Byte)(dest >> 24); + } + } + return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA; +} + +} diff --git a/crnlib/lzma_Bcj2.h b/crnlib/lzma_Bcj2.h new file mode 100644 index 00000000..8a8429d0 --- /dev/null +++ b/crnlib/lzma_Bcj2.h @@ -0,0 +1,34 @@ +/* Bcj2.h -- Converter for x86 code (BCJ2) +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __BCJ2_H +#define __BCJ2_H + +#include "lzma_Types.h" + +namespace crnlib { + +/* +Conditions: + outSize <= FullOutputSize, + where FullOutputSize is full size of output stream of x86_2 filter. + +If buf0 overlaps outBuf, there are two required conditions: + 1) (buf0 >= outBuf) + 2) (buf0 + size0 >= outBuf + FullOutputSize). + +Returns: + SZ_OK + SZ_ERROR_DATA - Data error +*/ + +int Bcj2_Decode( + const Byte *buf0, SizeT size0, + const Byte *buf1, SizeT size1, + const Byte *buf2, SizeT size2, + const Byte *buf3, SizeT size3, + Byte *outBuf, SizeT outSize); + +} + +#endif diff --git a/crnlib/lzma_Bra.cpp b/crnlib/lzma_Bra.cpp new file mode 100644 index 00000000..a8755618 --- /dev/null +++ b/crnlib/lzma_Bra.cpp @@ -0,0 +1,137 @@ +/* Bra.c -- Converters for RISC code +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_Bra.h" + +namespace crnlib { + +SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + SizeT i; + if (size < 4) + return 0; + size -= 4; + ip += 8; + for (i = 0; i <= size; i += 4) + { + if (data[i + 3] == 0xEB) + { + UInt32 dest; + UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]); + src <<= 2; + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + dest >>= 2; + data[i + 2] = (Byte)(dest >> 16); + data[i + 1] = (Byte)(dest >> 8); + data[i + 0] = (Byte)dest; + } + } + return i; +} + +SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + SizeT i; + if (size < 4) + return 0; + size -= 4; + ip += 4; + for (i = 0; i <= size; i += 2) + { + if ((data[i + 1] & 0xF8) == 0xF0 && + (data[i + 3] & 0xF8) == 0xF8) + { + UInt32 dest; + UInt32 src = + (((UInt32)data[i + 1] & 0x7) << 19) | + ((UInt32)data[i + 0] << 11) | + (((UInt32)data[i + 3] & 0x7) << 8) | + (data[i + 2]); + + src <<= 1; + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + dest >>= 1; + + data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7)); + data[i + 0] = (Byte)(dest >> 11); + data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7)); + data[i + 2] = (Byte)dest; + i += 2; + } + } + return i; +} + +SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + SizeT i; + if (size < 4) + return 0; + size -= 4; + for (i = 0; i <= size; i += 4) + { + if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1) + { + UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) | + ((UInt32)data[i + 1] << 16) | + ((UInt32)data[i + 2] << 8) | + ((UInt32)data[i + 3] & (~3)); + + UInt32 dest; + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3)); + data[i + 1] = (Byte)(dest >> 16); + data[i + 2] = (Byte)(dest >> 8); + data[i + 3] &= 0x3; + data[i + 3] |= dest; + } + } + return i; +} + +SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + UInt32 i; + if (size < 4) + return 0; + size -= 4; + for (i = 0; i <= size; i += 4) + { + if (data[i] == 0x40 && (data[i + 1] & 0xC0) == 0x00 || + data[i] == 0x7F && (data[i + 1] & 0xC0) == 0xC0) + { + UInt32 src = + ((UInt32)data[i + 0] << 24) | + ((UInt32)data[i + 1] << 16) | + ((UInt32)data[i + 2] << 8) | + ((UInt32)data[i + 3]); + UInt32 dest; + + src <<= 2; + if (encoding) + dest = ip + i + src; + else + dest = src - (ip + i); + dest >>= 2; + + dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000; + + data[i + 0] = (Byte)(dest >> 24); + data[i + 1] = (Byte)(dest >> 16); + data[i + 2] = (Byte)(dest >> 8); + data[i + 3] = (Byte)dest; + } + } + return i; +} + +} diff --git a/crnlib/lzma_Bra.h b/crnlib/lzma_Bra.h new file mode 100644 index 00000000..9cf320ee --- /dev/null +++ b/crnlib/lzma_Bra.h @@ -0,0 +1,64 @@ +/* Bra.h -- Branch converters for executables +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __BRA_H +#define __BRA_H + +#include "lzma_Types.h" + +namespace crnlib { + +/* +These functions convert relative addresses to absolute addresses +in CALL instructions to increase the compression ratio. + + In: + data - data buffer + size - size of data + ip - current virtual Instruction Pinter (IP) value + state - state variable for x86 converter + encoding - 0 (for decoding), 1 (for encoding) + + Out: + state - state variable for x86 converter + + Returns: + The number of processed bytes. If you call these functions with multiple calls, + you must start next call with first byte after block of processed bytes. + + Type Endian Alignment LookAhead + + x86 little 1 4 + ARMT little 2 2 + ARM little 4 0 + PPC big 4 0 + SPARC big 4 0 + IA64 little 16 0 + + size must be >= Alignment + LookAhead, if it's not last block. + If (size < Alignment + LookAhead), converter returns 0. + + Example: + + UInt32 ip = 0; + for () + { + ; size must be >= Alignment + LookAhead, if it's not last block + SizeT processed = Convert(data, size, ip, 1); + data += processed; + size -= processed; + ip += processed; + } +*/ + +#define x86_Convert_Init(state) { state = 0; } +SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding); +SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); +SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding); + +} + +#endif diff --git a/crnlib/lzma_Bra86.cpp b/crnlib/lzma_Bra86.cpp new file mode 100644 index 00000000..51814744 --- /dev/null +++ b/crnlib/lzma_Bra86.cpp @@ -0,0 +1,89 @@ +/* Bra86.c -- Converter for x86 code (BCJ) +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_Bra.h" + +namespace crnlib { + +#define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) + +const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0}; +const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3}; + +SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) +{ + SizeT bufferPos = 0, prevPosT; + UInt32 prevMask = *state & 0x7; + if (size < 5) + return 0; + ip += 5; + prevPosT = (SizeT)0 - 1; + + for (;;) + { + Byte *p = data + bufferPos; + Byte *limit = data + size - 4; + for (; p < limit; p++) + if ((*p & 0xFE) == 0xE8) + break; + bufferPos = (SizeT)(p - data); + if (p >= limit) + break; + prevPosT = bufferPos - prevPosT; + if (prevPosT > 3) + prevMask = 0; + else + { + prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; + if (prevMask != 0) + { + Byte b = p[4 - kMaskToBitNumber[prevMask]]; + if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) + { + prevPosT = bufferPos; + prevMask = ((prevMask << 1) & 0x7) | 1; + bufferPos++; + continue; + } + } + } + prevPosT = bufferPos; + + if (Test86MSByte(p[4])) + { + UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); + UInt32 dest; + for (;;) + { + Byte b; + int index; + if (encoding) + dest = (ip + (UInt32)bufferPos) + src; + else + dest = src - (ip + (UInt32)bufferPos); + if (prevMask == 0) + break; + index = kMaskToBitNumber[prevMask] * 8; + b = (Byte)(dest >> (24 - index)); + if (!Test86MSByte(b)) + break; + src = dest ^ ((1 << (32 - index)) - 1); + } + p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); + p[3] = (Byte)(dest >> 16); + p[2] = (Byte)(dest >> 8); + p[1] = (Byte)dest; + bufferPos += 5; + } + else + { + prevMask = ((prevMask << 1) & 0x7) | 1; + bufferPos++; + } + } + prevPosT = bufferPos - prevPosT; + *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); + return bufferPos; +} + +} diff --git a/crnlib/lzma_BraIA64.cpp b/crnlib/lzma_BraIA64.cpp new file mode 100644 index 00000000..65d3425c --- /dev/null +++ b/crnlib/lzma_BraIA64.cpp @@ -0,0 +1,71 @@ +/* BraIA64.c -- Converter for IA-64 code +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_Bra.h" + +namespace crnlib { + +static const Byte kBranchTable[32] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 +}; + +SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) +{ + SizeT i; + if (size < 16) + return 0; + size -= 16; + for (i = 0; i <= size; i += 16) + { + UInt32 instrTemplate = data[i] & 0x1F; + UInt32 mask = kBranchTable[instrTemplate]; + UInt32 bitPos = 5; + int slot; + for (slot = 0; slot < 3; slot++, bitPos += 41) + { + UInt32 bytePos, bitRes; + UInt64 instruction, instNorm; + int j; + if (((mask >> slot) & 1) == 0) + continue; + bytePos = (bitPos >> 3); + bitRes = bitPos & 0x7; + instruction = 0; + for (j = 0; j < 6; j++) + instruction += (UInt64)data[i + j + bytePos] << (8 * j); + + instNorm = instruction >> bitRes; + if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0) + { + UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF); + UInt32 dest; + src |= ((UInt32)(instNorm >> 36) & 1) << 20; + + src <<= 4; + + if (encoding) + dest = ip + (UInt32)i + src; + else + dest = src - (ip + (UInt32)i); + + dest >>= 4; + + instNorm &= ~((UInt64)(0x8FFFFF) << 13); + instNorm |= ((UInt64)(dest & 0xFFFFF) << 13); + instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20)); + + instruction &= (1 << bitRes) - 1; + instruction |= (instNorm << bitRes); + for (j = 0; j < 6; j++) + data[i + j + bytePos] = (Byte)(instruction >> (8 * j)); + } + } + } + return i; +} + +} diff --git a/crnlib/lzma_CpuArch.h b/crnlib/lzma_CpuArch.h new file mode 100644 index 00000000..006361f2 --- /dev/null +++ b/crnlib/lzma_CpuArch.h @@ -0,0 +1,69 @@ +/* CpuArch.h +2008-08-05 +Igor Pavlov +Public domain */ + +#ifndef __CPUARCH_H +#define __CPUARCH_H + +/* +LITTLE_ENDIAN_UNALIGN means: + 1) CPU is LITTLE_ENDIAN + 2) it's allowed to make unaligned memory accesses +if LITTLE_ENDIAN_UNALIGN is not defined, it means that we don't know +about these properties of platform. +*/ + +#if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386__) || defined(__x86_64__) +#define LITTLE_ENDIAN_UNALIGN +#endif + +#ifdef LITTLE_ENDIAN_UNALIGN + +#define GetUi16(p) (*(const UInt16 *)(p)) +#define GetUi32(p) (*(const UInt32 *)(p)) +#define GetUi64(p) (*(const UInt64 *)(p)) +#define SetUi32(p, d) *(UInt32 *)(p) = (d); + +#else + +#define GetUi16(p) (((const Byte *)(p))[0] | ((UInt16)((const Byte *)(p))[1] << 8)) + +#define GetUi32(p) ( \ + ((const Byte *)(p))[0] | \ + ((UInt32)((const Byte *)(p))[1] << 8) | \ + ((UInt32)((const Byte *)(p))[2] << 16) | \ + ((UInt32)((const Byte *)(p))[3] << 24)) + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) + +#define SetUi32(p, d) { UInt32 _x_ = (d); \ + ((Byte *)(p))[0] = (Byte)_x_; \ + ((Byte *)(p))[1] = (Byte)(_x_ >> 8); \ + ((Byte *)(p))[2] = (Byte)(_x_ >> 16); \ + ((Byte *)(p))[3] = (Byte)(_x_ >> 24); } + +#endif + +#if defined(LITTLE_ENDIAN_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) + +#pragma intrinsic(_byteswap_ulong) +#pragma intrinsic(_byteswap_uint64) +#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p)) +#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p)) + +#else + +#define GetBe32(p) ( \ + ((UInt32)((const Byte *)(p))[0] << 24) | \ + ((UInt32)((const Byte *)(p))[1] << 16) | \ + ((UInt32)((const Byte *)(p))[2] << 8) | \ + ((const Byte *)(p))[3] ) + +#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4)) + +#endif + +#define GetBe16(p) (((UInt16)((const Byte *)(p))[0] << 8) | ((const Byte *)(p))[1]) + +#endif diff --git a/crnlib/lzma_LzFind.cpp b/crnlib/lzma_LzFind.cpp new file mode 100644 index 00000000..98b2b09f --- /dev/null +++ b/crnlib/lzma_LzFind.cpp @@ -0,0 +1,755 @@ +/* LzFind.c -- Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include + +#include "lzma_LzFind.h" +#include "lzma_LzHash.h" + +namespace crnlib { + +#define kEmptyHashValue 0 +#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) +#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ +#define kNormalizeMask (~(kNormalizeStepMin - 1)) +#define kMaxHistorySize ((UInt32)3 << 30) + +#define kStartMaxLen 3 + +static void LzInWindow_Free(CMatchFinder *p, ISzAlloc *alloc) +{ + if (!p->directInput) + { + alloc->Free(alloc, p->bufferBase); + p->bufferBase = 0; + } +} + +/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ + +static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAlloc *alloc) +{ + UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; + if (p->directInput) + { + p->blockSize = blockSize; + return 1; + } + if (p->bufferBase == 0 || p->blockSize != blockSize) + { + LzInWindow_Free(p, alloc); + p->blockSize = blockSize; + p->bufferBase = (Byte *)alloc->Alloc(alloc, (size_t)blockSize); + } + return (p->bufferBase != 0); +} + +Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +Byte MatchFinder_GetIndexByte(CMatchFinder *p, Int32 index) { return p->buffer[index]; } + +UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } + +void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) +{ + p->posLimit -= subValue; + p->pos -= subValue; + p->streamPos -= subValue; +} + +static void MatchFinder_ReadBlock(CMatchFinder *p) +{ + if (p->streamEndWasReached || p->result != SZ_OK) + return; + for (;;) + { + Byte *dest = p->buffer + (p->streamPos - p->pos); + size_t size = (p->bufferBase + p->blockSize - dest); + if (size == 0) + return; + p->result = p->stream->Read(p->stream, dest, &size); + if (p->result != SZ_OK) + return; + if (size == 0) + { + p->streamEndWasReached = 1; + return; + } + p->streamPos += (UInt32)size; + if (p->streamPos - p->pos > p->keepSizeAfter) + return; + } +} + +void MatchFinder_MoveBlock(CMatchFinder *p) +{ + memmove(p->bufferBase, + p->buffer - p->keepSizeBefore, + (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); + p->buffer = p->bufferBase + p->keepSizeBefore; +} + +int MatchFinder_NeedMove(CMatchFinder *p) +{ + /* if (p->streamEndWasReached) return 0; */ + return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); +} + +void MatchFinder_ReadIfRequired(CMatchFinder *p) +{ + if (p->streamEndWasReached) + return; + if (p->keepSizeAfter >= p->streamPos - p->pos) + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) +{ + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); +} + +static void MatchFinder_SetDefaultSettings(CMatchFinder *p) +{ + p->cutValue = 32; + p->btMode = 1; + p->numHashBytes = 4; + /* p->skipModeBits = 0; */ + p->directInput = 0; + p->bigHash = 0; +} + +#define kCrcPoly 0xEDB88320 + +void MatchFinder_Construct(CMatchFinder *p) +{ + UInt32 i; + p->bufferBase = 0; + p->directInput = 0; + p->hash = 0; + MatchFinder_SetDefaultSettings(p); + + for (i = 0; i < 256; i++) + { + UInt32 r = i; + int j; + for (j = 0; j < 8; j++) + r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); + p->crc[i] = r; + } +} + +static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->hash); + p->hash = 0; +} + +void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc) +{ + MatchFinder_FreeThisClassMemory(p, alloc); + LzInWindow_Free(p, alloc); +} + +static CLzRef* AllocRefs(UInt32 num, ISzAlloc *alloc) +{ + size_t sizeInBytes = (size_t)num * sizeof(CLzRef); + if (sizeInBytes / sizeof(CLzRef) != num) + return 0; + return (CLzRef *)alloc->Alloc(alloc, sizeInBytes); +} + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAlloc *alloc) +{ + UInt32 sizeReserv; + if (historySize > kMaxHistorySize) + { + MatchFinder_Free(p, alloc); + return 0; + } + sizeReserv = historySize >> 1; + if (historySize > ((UInt32)2 << 30)) + sizeReserv = historySize >> 2; + sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + + p->keepSizeBefore = historySize + keepAddBufferBefore + 1; + p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; + /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ + if (LzInWindow_Create(p, sizeReserv, alloc)) + { + UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1; + UInt32 hs; + p->matchMaxLen = matchMaxLen; + { + p->fixedHashSize = 0; + if (p->numHashBytes == 2) + hs = (1 << 16) - 1; + else + { + hs = historySize - 1; + hs |= (hs >> 1); + hs |= (hs >> 2); + hs |= (hs >> 4); + hs |= (hs >> 8); + hs >>= 1; + /* hs >>= p->skipModeBits; */ + hs |= 0xFFFF; /* don't change it! It's required for Deflate */ + if (hs > (1 << 24)) + { + if (p->numHashBytes == 3) + hs = (1 << 24) - 1; + else + hs >>= 1; + } + } + p->hashMask = hs; + hs++; + if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; + if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; + if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; + hs += p->fixedHashSize; + } + + { + UInt32 prevSize = p->hashSizeSum + p->numSons; + UInt32 newSize; + p->historySize = historySize; + p->hashSizeSum = hs; + p->cyclicBufferSize = newCyclicBufferSize; + p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); + newSize = p->hashSizeSum + p->numSons; + if (p->hash != 0 && prevSize == newSize) + return 1; + MatchFinder_FreeThisClassMemory(p, alloc); + p->hash = AllocRefs(newSize, alloc); + if (p->hash != 0) + { + p->son = p->hash + p->hashSizeSum; + return 1; + } + } + } + MatchFinder_Free(p, alloc); + return 0; +} + +static void MatchFinder_SetLimits(CMatchFinder *p) +{ + UInt32 limit = kMaxValForNormalize - p->pos; + UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; + if (limit2 < limit) + limit = limit2; + limit2 = p->streamPos - p->pos; + if (limit2 <= p->keepSizeAfter) + { + if (limit2 > 0) + limit2 = 1; + } + else + limit2 -= p->keepSizeAfter; + if (limit2 < limit) + limit = limit2; + { + UInt32 lenLimit = p->streamPos - p->pos; + if (lenLimit > p->matchMaxLen) + lenLimit = p->matchMaxLen; + p->lenLimit = lenLimit; + } + p->posLimit = p->pos + limit; +} + +void MatchFinder_Init(CMatchFinder *p) +{ + UInt32 i; + for (i = 0; i < p->hashSizeSum; i++) + p->hash[i] = kEmptyHashValue; + p->cyclicBufferPos = 0; + p->buffer = p->bufferBase; + p->pos = p->streamPos = p->cyclicBufferSize; + p->result = SZ_OK; + p->streamEndWasReached = 0; + MatchFinder_ReadBlock(p); + MatchFinder_SetLimits(p); +} + +static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) +{ + return (p->pos - p->historySize - 1) & kNormalizeMask; +} + +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems) +{ + UInt32 i; + for (i = 0; i < numItems; i++) + { + UInt32 value = items[i]; + if (value <= subValue) + value = kEmptyHashValue; + else + value -= subValue; + items[i] = value; + } +} + +static void MatchFinder_Normalize(CMatchFinder *p) +{ + UInt32 subValue = MatchFinder_GetSubValue(p); + MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); + MatchFinder_ReduceOffsets(p, subValue); +} + +static void MatchFinder_CheckLimits(CMatchFinder *p) +{ + if (p->pos == kMaxValForNormalize) + MatchFinder_Normalize(p); + if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) + MatchFinder_CheckAndMoveAndRead(p); + if (p->cyclicBufferPos == p->cyclicBufferSize) + p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); +} + +static UInt32 * Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *distances, UInt32 maxLen) +{ + son[_cyclicBufferPos] = curMatch; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + return distances; + { + const Byte *pb = cur - delta; + curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + if (pb[maxLen] == cur[maxLen] && *pb == *cur) + { + UInt32 len = 0; + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + return distances; + } + } + } + } +} + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *distances, UInt32 maxLen) +{ + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return distances; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return distances; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) +{ + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + return; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + { + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + return; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } +} + +#define MOVE_POS \ + ++p->cyclicBufferPos; \ + p->buffer++; \ + if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + +#define MOVE_POS_RET MOVE_POS return offset; + +static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } + +#define GET_MATCHES_HEADER2(minLen, ret_op) \ + UInt32 lenLimit; UInt32 hashValue; const Byte *cur; UInt32 curMatch; \ + lenLimit = p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ + cur = p->buffer; + +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) +#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue + +#define GET_MATCHES_FOOTER(offset, maxLen) \ + offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ + distances + offset, maxLen) - distances); MOVE_POS_RET; + +#define SKIP_FOOTER \ + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; + +static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 offset; + GET_MATCHES_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 1) +} + +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = 0; + GET_MATCHES_FOOTER(offset, 2) +} + +static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 hash2Value, delta2, maxLen, offset; + GET_MATCHES_HEADER(3) + + HASH3_CALC; + + delta2 = p->pos - p->hash[hash2Value]; + curMatch = p->hash[kFix3HashSize + hashValue]; + + p->hash[hash2Value] = + p->hash[kFix3HashSize + hashValue] = p->pos; + + + maxLen = 2; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[0] = maxLen; + distances[1] = delta2 - 1; + offset = 2; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + GET_MATCHES_FOOTER(offset, maxLen) +} + +static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[ hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) + { + SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + GET_MATCHES_FOOTER(offset, maxLen) +} + +static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; + GET_MATCHES_HEADER(4) + + HASH4_CALC; + + delta2 = p->pos - p->hash[ hash2Value]; + delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; + curMatch = p->hash[kFix4HashSize + hashValue]; + + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + + maxLen = 1; + offset = 0; + if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) + { + distances[0] = maxLen = 2; + distances[1] = delta2 - 1; + offset = 2; + } + if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) + { + maxLen = 3; + distances[offset + 1] = delta3 - 1; + offset += 2; + delta2 = delta3; + } + if (offset != 0) + { + for (; maxLen != lenLimit; maxLen++) + if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) + break; + distances[offset - 2] = maxLen; + if (maxLen == lenLimit) + { + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS_RET; + } + } + if (maxLen < 3) + maxLen = 3; + offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances + offset, maxLen) - (distances)); + MOVE_POS_RET +} + +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +{ + UInt32 offset; + GET_MATCHES_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), + distances, 2) - (distances)); + MOVE_POS_RET +} + +static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(2) + HASH2_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 hash2Value; + SKIP_HEADER(3) + HASH3_CALC; + curMatch = p->hash[kFix3HashSize + hashValue]; + p->hash[hash2Value] = + p->hash[kFix3HashSize + hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = p->pos; + p->hash[kFix4HashSize + hashValue] = p->pos; + SKIP_FOOTER + } + while (--num != 0); +} + +static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + UInt32 hash2Value, hash3Value; + SKIP_HEADER(4) + HASH4_CALC; + curMatch = p->hash[kFix4HashSize + hashValue]; + p->hash[ hash2Value] = + p->hash[kFix3HashSize + hash3Value] = + p->hash[kFix4HashSize + hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) +{ + do + { + SKIP_HEADER(3) + HASH_ZIP_CALC; + curMatch = p->hash[hashValue]; + p->hash[hashValue] = p->pos; + p->son[p->cyclicBufferPos] = curMatch; + MOVE_POS + } + while (--num != 0); +} + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinder_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; + if (!p->btMode) + { + vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; + } + else if (p->numHashBytes == 2) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; + } + else if (p->numHashBytes == 3) + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; + } + else + { + vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; + vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; + } +} + +} diff --git a/crnlib/lzma_LzFind.h b/crnlib/lzma_LzFind.h new file mode 100644 index 00000000..e5550e47 --- /dev/null +++ b/crnlib/lzma_LzFind.h @@ -0,0 +1,111 @@ +/* LzFind.h -- Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZFIND_H +#define __LZFIND_H + +#include "lzma_Types.h" + +namespace crnlib { + +typedef UInt32 CLzRef; + +typedef struct _CMatchFinder +{ + Byte *buffer; + UInt32 pos; + UInt32 posLimit; + UInt32 streamPos; + UInt32 lenLimit; + + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ + + UInt32 matchMaxLen; + CLzRef *hash; + CLzRef *son; + UInt32 hashMask; + UInt32 cutValue; + + Byte *bufferBase; + ISeqInStream *stream; + int streamEndWasReached; + + UInt32 blockSize; + UInt32 keepSizeBefore; + UInt32 keepSizeAfter; + + UInt32 numHashBytes; + int directInput; + int btMode; + /* int skipModeBits; */ + int bigHash; + UInt32 historySize; + UInt32 fixedHashSize; + UInt32 hashSizeSum; + UInt32 numSons; + SRes result; + UInt32 crc[256]; +} CMatchFinder; + +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) +#define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) + +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) + +int MatchFinder_NeedMove(CMatchFinder *p); +Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +void MatchFinder_MoveBlock(CMatchFinder *p); +void MatchFinder_ReadIfRequired(CMatchFinder *p); + +void MatchFinder_Construct(CMatchFinder *p); + +/* Conditions: + historySize <= 3 GB + keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB +*/ +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAlloc *alloc); +void MatchFinder_Free(CMatchFinder *p, ISzAlloc *alloc); +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, UInt32 numItems); +void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); + +UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *distances, UInt32 maxLen); + +/* +Conditions: + Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. + Mf_GetPointerToCurrentPos_Func's result must be used only before any other function +*/ + +typedef void (*Mf_Init_Func)(void *object); +typedef Byte (*Mf_GetIndexByte_Func)(void *object, Int32 index); +typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); +typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); +typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef void (*Mf_Skip_Func)(void *object, UInt32); + +typedef struct _IMatchFinder +{ + Mf_Init_Func Init; + Mf_GetIndexByte_Func GetIndexByte; + Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; + Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; + Mf_GetMatches_Func GetMatches; + Mf_Skip_Func Skip; +} IMatchFinder; + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); + +void MatchFinder_Init(CMatchFinder *p); +UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); + +} + +#endif diff --git a/crnlib/lzma_LzFindMt.cpp b/crnlib/lzma_LzFindMt.cpp new file mode 100644 index 00000000..ce72e002 --- /dev/null +++ b/crnlib/lzma_LzFindMt.cpp @@ -0,0 +1,797 @@ +/* LzFindMt.c -- multithreaded Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_LzHash.h" + +#include "lzma_LzFindMt.h" + +namespace crnlib { + +void MtSync_Construct(CMtSync *p) +{ + p->wasCreated = False; + p->csWasInitialized = False; + p->csWasEntered = False; + Thread_Construct(&p->thread); + Event_Construct(&p->canStart); + Event_Construct(&p->wasStarted); + Event_Construct(&p->wasStopped); + Semaphore_Construct(&p->freeSemaphore); + Semaphore_Construct(&p->filledSemaphore); +} + +void MtSync_GetNextBlock(CMtSync *p) +{ + if (p->needStart) + { + p->numProcessedBlocks = 1; + p->needStart = False; + p->stopWriting = False; + p->exit = False; + Event_Reset(&p->wasStarted); + Event_Reset(&p->wasStopped); + + Event_Set(&p->canStart); + Event_Wait(&p->wasStarted); + } + else + { + CriticalSection_Leave(&p->cs); + p->csWasEntered = False; + p->numProcessedBlocks++; + Semaphore_Release1(&p->freeSemaphore); + } + Semaphore_Wait(&p->filledSemaphore); + CriticalSection_Enter(&p->cs); + p->csWasEntered = True; +} + +/* MtSync_StopWriting must be called if Writing was started */ + +void MtSync_StopWriting(CMtSync *p) +{ + UInt32 myNumBlocks = p->numProcessedBlocks; + if (!Thread_WasCreated(&p->thread) || p->needStart) + return; + p->stopWriting = True; + if (p->csWasEntered) + { + CriticalSection_Leave(&p->cs); + p->csWasEntered = False; + } + Semaphore_Release1(&p->freeSemaphore); + + Event_Wait(&p->wasStopped); + + while (myNumBlocks++ != p->numProcessedBlocks) + { + Semaphore_Wait(&p->filledSemaphore); + Semaphore_Release1(&p->freeSemaphore); + } + p->needStart = True; +} + +void MtSync_Destruct(CMtSync *p) +{ + if (Thread_WasCreated(&p->thread)) + { + MtSync_StopWriting(p); + p->exit = True; + if (p->needStart) + Event_Set(&p->canStart); + Thread_Wait(&p->thread); + Thread_Close(&p->thread); + } + if (p->csWasInitialized) + { + CriticalSection_Delete(&p->cs); + p->csWasInitialized = False; + } + + Event_Close(&p->canStart); + Event_Close(&p->wasStarted); + Event_Close(&p->wasStopped); + Semaphore_Close(&p->freeSemaphore); + Semaphore_Close(&p->filledSemaphore); + + p->wasCreated = False; +} + +#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } + +static SRes MtSync_Create2(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks) +{ + if (p->wasCreated) + return SZ_OK; + + RINOK_THREAD(CriticalSection_Init(&p->cs)); + p->csWasInitialized = True; + + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); + RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); + + RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); + RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); + + p->needStart = True; + + RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); + p->wasCreated = True; + return SZ_OK; +} + +static SRes MtSync_Create(CMtSync *p, unsigned (MY_STD_CALL *startAddress)(void *), void *obj, UInt32 numBlocks) +{ + SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); + if (res != SZ_OK) + MtSync_Destruct(p); + return res; +} + +void MtSync_Init(CMtSync *p) { p->needStart = True; } + +#define kMtMaxValForNormalize 0xFFFFFFFF + +#define DEF_GetHeads2(name, v, action) \ +static void GetHeads ## name(const Byte *p, UInt32 pos, \ +UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) \ +{ action; for (; numHeads != 0; numHeads--) { \ +const UInt32 value = (v); p++; *heads++ = pos - hash[value]; hash[value] = pos++; } } + +#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) + +DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc; ) +DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) +DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) +DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) +//DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) + +void HashThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->hashSync; + for (;;) + { + UInt32 numProcessedBlocks = 0; + Event_Wait(&p->canStart); + Event_Set(&p->wasStarted); + for (;;) + { + if (p->exit) + return; + if (p->stopWriting) + { + p->numProcessedBlocks = numProcessedBlocks; + Event_Set(&p->wasStopped); + break; + } + + { + CMatchFinder *mf = mt->MatchFinder; + if (MatchFinder_NeedMove(mf)) + { + CriticalSection_Enter(&mt->btSync.cs); + CriticalSection_Enter(&mt->hashSync.cs); + { + const Byte *beforePtr = MatchFinder_GetPointerToCurrentPos(mf); + const Byte *afterPtr; + MatchFinder_MoveBlock(mf); + afterPtr = MatchFinder_GetPointerToCurrentPos(mf); + mt->pointerToCurPos -= beforePtr - afterPtr; + mt->buffer -= beforePtr - afterPtr; + } + CriticalSection_Leave(&mt->btSync.cs); + CriticalSection_Leave(&mt->hashSync.cs); + continue; + } + + Semaphore_Wait(&p->freeSemaphore); + + MatchFinder_ReadIfRequired(mf); + if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) + { + UInt32 subValue = (mf->pos - mf->historySize - 1); + MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); + } + { + UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; + UInt32 num = mf->streamPos - mf->pos; + heads[0] = 2; + heads[1] = num; + if (num >= mf->numHashBytes) + { + num = num - mf->numHashBytes + 1; + if (num > kMtHashBlockSize - 2) + num = kMtHashBlockSize - 2; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + heads[0] += num; + } + mf->pos += num; + mf->buffer += num; + } + } + + Semaphore_Release1(&p->filledSemaphore); + } + } +} + +void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) +{ + MtSync_GetNextBlock(&p->hashSync); + p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; + p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; + p->hashNumAvail = p->hashBuf[p->hashBufPos++]; +} + +#define kEmptyHashValue 0 + +/* #define MFMT_GM_INLINE */ + +#ifdef MFMT_GM_INLINE + +#define NO_INLINE MY_FAST_CALL + +Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, + UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + UInt32 *_distances, UInt32 _maxLen, const UInt32 *hash, Int32 limit, UInt32 size, UInt32 *posRes) +{ + do + { + UInt32 *distances = _distances + 1; + UInt32 curMatch = pos - *hash++; + + CLzRef *ptr0 = son + (_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + (_cyclicBufferPos << 1); + UInt32 len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + UInt32 maxLen = _maxLen; + for (;;) + { + UInt32 delta = pos - curMatch; + if (cutValue-- == 0 || delta >= _cyclicBufferSize) + { + *ptr0 = *ptr1 = kEmptyHashValue; + break; + } + { + CLzRef *pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + const Byte *pb = cur - delta; + UInt32 len = (len0 < len1 ? len0 : len1); + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + *distances++ = maxLen = len; + *distances++ = delta - 1; + if (len == lenLimit) + { + *ptr1 = pair[0]; + *ptr0 = pair[1]; + break; + } + } + } + if (pb[len] < cur[len]) + { + *ptr1 = curMatch; + ptr1 = pair + 1; + curMatch = *ptr1; + len1 = len; + } + else + { + *ptr0 = curMatch; + ptr0 = pair; + curMatch = *ptr0; + len0 = len; + } + } + } + pos++; + _cyclicBufferPos++; + cur++; + { + UInt32 num = (UInt32)(distances - _distances); + *_distances = num - 1; + _distances += num; + limit -= num; + } + } + while (limit > 0 && --size != 0); + *posRes = pos; + return limit; +} + +#endif + +void BtGetMatches(CMatchFinderMt *p, UInt32 *distances) +{ + UInt32 numProcessed = 0; + UInt32 curPos = 2; + UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + distances[1] = p->hashNumAvail; + while (curPos < limit) + { + if (p->hashBufPos == p->hashBufPosLimit) + { + MatchFinderMt_GetNextBlock_Hash(p); + distances[1] = numProcessed + p->hashNumAvail; + if (p->hashNumAvail >= p->numHashBytes) + continue; + for (; p->hashNumAvail != 0; p->hashNumAvail--) + distances[curPos++] = 0; + break; + } + { + UInt32 size = p->hashBufPosLimit - p->hashBufPos; + UInt32 lenLimit = p->matchMaxLen; + UInt32 pos = p->pos; + UInt32 cyclicBufferPos = p->cyclicBufferPos; + if (lenLimit >= p->hashNumAvail) + lenLimit = p->hashNumAvail; + { + UInt32 size2 = p->hashNumAvail - lenLimit + 1; + if (size2 < size) + size = size2; + size2 = p->cyclicBufferSize - cyclicBufferPos; + if (size2 < size) + size = size2; + } + #ifndef MFMT_GM_INLINE + while (curPos < limit && size-- != 0) + { + UInt32 *startDistances = distances + curPos; + UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], + pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + startDistances + 1, p->numHashBytes - 1) - startDistances); + *startDistances = num - 1; + curPos += num; + cyclicBufferPos++; + pos++; + p->buffer++; + } + #else + { + UInt32 posRes; + curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, + distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos) , size, &posRes); + p->hashBufPos += posRes - pos; + cyclicBufferPos += posRes - pos; + p->buffer += posRes - pos; + pos = posRes; + } + #endif + + numProcessed += pos - p->pos; + p->hashNumAvail -= pos - p->pos; + p->pos = pos; + if (cyclicBufferPos == p->cyclicBufferSize) + cyclicBufferPos = 0; + p->cyclicBufferPos = cyclicBufferPos; + } + } + distances[0] = curPos; +} + +void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) +{ + CMtSync *sync = &p->hashSync; + if (!sync->needStart) + { + CriticalSection_Enter(&sync->cs); + sync->csWasEntered = True; + } + + BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); + + if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) + { + UInt32 subValue = p->pos - p->cyclicBufferSize; + MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); + p->pos -= subValue; + } + + if (!sync->needStart) + { + CriticalSection_Leave(&sync->cs); + sync->csWasEntered = False; + } +} + +void BtThreadFunc(CMatchFinderMt *mt) +{ + CMtSync *p = &mt->btSync; + for (;;) + { + UInt32 blockIndex = 0; + Event_Wait(&p->canStart); + Event_Set(&p->wasStarted); + for (;;) + { + if (p->exit) + return; + if (p->stopWriting) + { + p->numProcessedBlocks = blockIndex; + MtSync_StopWriting(&mt->hashSync); + Event_Set(&p->wasStopped); + break; + } + Semaphore_Wait(&p->freeSemaphore); + BtFillBlock(mt, blockIndex++); + Semaphore_Release1(&p->filledSemaphore); + } + } +} + +void MatchFinderMt_Construct(CMatchFinderMt *p) +{ + p->hashBuf = 0; + MtSync_Construct(&p->hashSync); + MtSync_Construct(&p->btSync); +} + +void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->hashBuf); + p->hashBuf = 0; +} + +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc) +{ + MtSync_Destruct(&p->hashSync); + MtSync_Destruct(&p->btSync); + MatchFinderMt_FreeMem(p, alloc); +} + +#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) +#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + +static unsigned MY_STD_CALL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } +static unsigned MY_STD_CALL BtThreadFunc2(void *p) +{ + Byte allocaDummy[0x180]; + int i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)i; + BtThreadFunc((CMatchFinderMt *)p); + return 0; +} + +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc) +{ + CMatchFinder *mf = p->MatchFinder; + p->historySize = historySize; + if (kMtBtBlockSize <= matchMaxLen * 4) + return SZ_ERROR_PARAM; + if (p->hashBuf == 0) + { + p->hashBuf = (UInt32 *)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); + if (p->hashBuf == 0) + return SZ_ERROR_MEM; + p->btBuf = p->hashBuf + kHashBufferSize; + } + keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); + keepAddBufferAfter += kMtHashBlockSize; + if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) + return SZ_ERROR_MEM; + + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); + return SZ_OK; +} + +/* Call it after ReleaseStream / SetStream */ +void MatchFinderMt_Init(CMatchFinderMt *p) +{ + CMatchFinder *mf = p->MatchFinder; + p->btBufPos = p->btBufPosLimit = 0; + p->hashBufPos = p->hashBufPosLimit = 0; + MatchFinder_Init(mf); + p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); + p->btNumAvailBytes = 0; + p->lzPos = p->historySize + 1; + + p->hash = mf->hash; + p->fixedHashSize = mf->fixedHashSize; + p->crc = mf->crc; + + p->son = mf->son; + p->matchMaxLen = mf->matchMaxLen; + p->numHashBytes = mf->numHashBytes; + p->pos = mf->pos; + p->buffer = mf->buffer; + p->cyclicBufferPos = mf->cyclicBufferPos; + p->cyclicBufferSize = mf->cyclicBufferSize; + p->cutValue = mf->cutValue; +} + +/* ReleaseStream is required to finish multithreading */ +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) +{ + MtSync_StopWriting(&p->btSync); + /* p->MatchFinder->ReleaseStream(); */ +} + +void MatchFinderMt_Normalize(CMatchFinderMt *p) +{ + MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); + p->lzPos = p->historySize + 1; +} + +void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) +{ + UInt32 blockIndex; + MtSync_GetNextBlock(&p->btSync); + blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); + p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; + p->btBufPosLimit += p->btBuf[p->btBufPos++]; + p->btNumAvailBytes = p->btBuf[p->btBufPos++]; + if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) + MatchFinderMt_Normalize(p); +} + +const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) +{ + return p->pointerToCurPos; +} + +#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); + +UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) +{ + GET_NEXT_BLOCK_IF_REQUIRED; + return p->btNumAvailBytes; +} + +Byte MatchFinderMt_GetIndexByte(CMatchFinderMt *p, Int32 index) +{ + return p->pointerToCurPos[index]; +} + +UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, curMatch2; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH2_CALC + + curMatch2 = hash[hash2Value]; + hash[hash2Value] = lzPos; + + if (curMatch2 >= matchMinPos) + if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + *distances++ = 2; + *distances++ = lzPos - curMatch2 - 1; + } + return distances; +} + +UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, curMatch2, curMatch3; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH3_CALC + + curMatch2 = hash[ hash2Value]; + curMatch3 = hash[kFix3HashSize + hash3Value]; + + hash[ hash2Value] = + hash[kFix3HashSize + hash3Value] = + lzPos; + + if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch2 - 1; + if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + { + distances[0] = 3; + return distances + 2; + } + distances[0] = 2; + distances += 2; + } + if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + { + *distances++ = 3; + *distances++ = lzPos - curMatch3 - 1; + } + return distances; +} + +/* +UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) +{ + UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; + UInt32 *hash = p->hash; + const Byte *cur = p->pointerToCurPos; + UInt32 lzPos = p->lzPos; + MT_HASH4_CALC + + curMatch2 = hash[ hash2Value]; + curMatch3 = hash[kFix3HashSize + hash3Value]; + curMatch4 = hash[kFix4HashSize + hash4Value]; + + hash[ hash2Value] = + hash[kFix3HashSize + hash3Value] = + hash[kFix4HashSize + hash4Value] = + lzPos; + + if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch2 - 1; + if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) + { + distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; + return distances + 2; + } + distances[0] = 2; + distances += 2; + } + if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) + { + distances[1] = lzPos - curMatch3 - 1; + if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) + { + distances[0] = 4; + return distances + 2; + } + distances[0] = 3; + distances += 2; + } + + if (curMatch4 >= matchMinPos) + if ( + cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && + cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] + ) + { + *distances++ = 4; + *distances++ = lzPos - curMatch4 - 1; + } + return distances; +} +*/ + +#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; + +UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *distances) +{ + const UInt32 *btBuf = p->btBuf + p->btBufPos; + UInt32 len = *btBuf++; + p->btBufPos += 1 + len; + p->btNumAvailBytes--; + { + UInt32 i; + for (i = 0; i < len; i += 2) + { + *distances++ = *btBuf++; + *distances++ = *btBuf++; + } + } + INCREASE_LZ_POS + return len; +} + +UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *distances) +{ + const UInt32 *btBuf = p->btBuf + p->btBufPos; + UInt32 len = *btBuf++; + p->btBufPos += 1 + len; + + if (len == 0) + { + if (p->btNumAvailBytes-- >= 4) + len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); + } + else + { + /* Condition: there are matches in btBuf with length < p->numHashBytes */ + UInt32 *distances2; + p->btNumAvailBytes--; + distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); + do + { + *distances2++ = *btBuf++; + *distances2++ = *btBuf++; + } + while ((len -= 2) != 0); + len = (UInt32)(distances2 - (distances)); + } + INCREASE_LZ_POS + return len; +} + +#define SKIP_HEADER2 do { GET_NEXT_BLOCK_IF_REQUIRED +#define SKIP_HEADER(n) SKIP_HEADER2 if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; +#define SKIP_FOOTER } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0); + +void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER2 { p->btNumAvailBytes--; + SKIP_FOOTER +} + +void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER(2) + UInt32 hash2Value; + MT_HASH2_CALC + hash[hash2Value] = p->lzPos; + SKIP_FOOTER +} + +void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER(3) + UInt32 hash2Value, hash3Value; + MT_HASH3_CALC + hash[kFix3HashSize + hash3Value] = + hash[ hash2Value] = + p->lzPos; + SKIP_FOOTER +} + +/* +void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) +{ + SKIP_HEADER(4) + UInt32 hash2Value, hash3Value, hash4Value; + MT_HASH4_CALC + hash[kFix4HashSize + hash4Value] = + hash[kFix3HashSize + hash3Value] = + hash[ hash2Value] = + p->lzPos; + SKIP_FOOTER +} +*/ + +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) +{ + vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; + vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; + vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; + vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; + switch(p->MatchFinder->numHashBytes) + { + case 2: + p->GetHeadsFunc = GetHeads2; + p->MixMatchesFunc = (Mf_Mix_Matches)0; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; + vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; + break; + case 3: + p->GetHeadsFunc = GetHeads3; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; + break; + default: + /* case 4: */ + p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + /* p->GetHeadsFunc = GetHeads4; */ + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; + break; + /* + default: + p->GetHeadsFunc = GetHeads5; + p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; + vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; + break; + */ + } +} + +} diff --git a/crnlib/lzma_LzFindMt.h b/crnlib/lzma_LzFindMt.h new file mode 100644 index 00000000..b815d739 --- /dev/null +++ b/crnlib/lzma_LzFindMt.h @@ -0,0 +1,101 @@ +/* LzFindMt.h -- multithreaded Match finder for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZFINDMT_H +#define __LZFINDMT_H + +#include "lzma_Threads.h" +#include "lzma_LzFind.h" + +namespace crnlib { + +#define kMtHashBlockSize (1 << 13) +#define kMtHashNumBlocks (1 << 3) +#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) + +#define kMtBtBlockSize (1 << 14) +#define kMtBtNumBlocks (1 << 6) +#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) + +typedef struct _CMtSync +{ + Bool wasCreated; + Bool needStart; + Bool exit; + Bool stopWriting; + + CThread thread; + CAutoResetEvent canStart; + CAutoResetEvent wasStarted; + CAutoResetEvent wasStopped; + CSemaphore freeSemaphore; + CSemaphore filledSemaphore; + Bool csWasInitialized; + Bool csWasEntered; + CCriticalSection cs; + UInt32 numProcessedBlocks; +} CMtSync; + +typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); + +/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ +#define kMtCacheLineDummy 128 + +typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, + UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); + +typedef struct _CMatchFinderMt +{ + /* LZ */ + const Byte *pointerToCurPos; + UInt32 *btBuf; + UInt32 btBufPos; + UInt32 btBufPosLimit; + UInt32 lzPos; + UInt32 btNumAvailBytes; + + UInt32 *hash; + UInt32 fixedHashSize; + UInt32 historySize; + const UInt32 *crc; + + Mf_Mix_Matches MixMatchesFunc; + + /* LZ + BT */ + CMtSync btSync; + Byte btDummy[kMtCacheLineDummy]; + + /* BT */ + UInt32 *hashBuf; + UInt32 hashBufPos; + UInt32 hashBufPosLimit; + UInt32 hashNumAvail; + + CLzRef *son; + UInt32 matchMaxLen; + UInt32 numHashBytes; + UInt32 pos; + Byte *buffer; + UInt32 cyclicBufferPos; + UInt32 cyclicBufferSize; /* it must be historySize + 1 */ + UInt32 cutValue; + + /* BT + Hash */ + CMtSync hashSync; + /* Byte hashDummy[kMtCacheLineDummy]; */ + + /* Hash */ + Mf_GetHeads GetHeadsFunc; + CMatchFinder *MatchFinder; +} CMatchFinderMt; + +void MatchFinderMt_Construct(CMatchFinderMt *p); +void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAlloc *alloc); +SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, + UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc *alloc); +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); +void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); + +} + +#endif diff --git a/crnlib/lzma_LzHash.h b/crnlib/lzma_LzHash.h new file mode 100644 index 00000000..c9234175 --- /dev/null +++ b/crnlib/lzma_LzHash.h @@ -0,0 +1,54 @@ +/* LzHash.h -- HASH functions for LZ algorithms +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZHASH_H +#define __LZHASH_H + +#define kHash2Size (1 << 10) +#define kHash3Size (1 << 16) +#define kHash4Size (1 << 20) + +#define kFix3HashSize (kHash2Size) +#define kFix4HashSize (kHash2Size + kHash3Size) +#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) + +#define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); + +#define HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; } + +#define HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; } + +#define HASH5_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ + hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ + hash4Value &= (kHash4Size - 1); } + +/* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ +#define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; + + +#define MT_HASH2_CALC \ + hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + +#define MT_HASH3_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } + +#define MT_HASH4_CALC { \ + UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + hash2Value = temp & (kHash2Size - 1); \ + hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ + hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); } + +#endif diff --git a/crnlib/lzma_LzmaDec.cpp b/crnlib/lzma_LzmaDec.cpp new file mode 100644 index 00000000..fab2ee9a --- /dev/null +++ b/crnlib/lzma_LzmaDec.cpp @@ -0,0 +1,1011 @@ +/* LzmaDec.c -- LZMA Decoder +2008-11-06 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include "lzma_LzmaDec.h" + +#include + +namespace crnlib { + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 + +#define RC_INIT_SIZE 5 + +#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); +#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); +#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ + { UPDATE_0(p); i = (i + i); A0; } else \ + { UPDATE_1(p); i = (i + i) + 1; A1; } +#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) + +#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } +#define TREE_DECODE(probs, limit, i) \ + { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } + +/* #define _LZMA_SIZE_OPT */ + +#ifdef _LZMA_SIZE_OPT +#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) +#else +#define TREE_6_DECODE(probs, i) \ + { i = 1; \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + TREE_GET_BIT(probs, i); \ + i -= 0x40; } +#endif + +#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } + +#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) +#define UPDATE_0_CHECK range = bound; +#define UPDATE_1_CHECK range -= bound; code -= bound; +#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ + { UPDATE_0_CHECK; i = (i + i); A0; } else \ + { UPDATE_1_CHECK; i = (i + i) + 1; A1; } +#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) +#define TREE_DECODE_CHECK(probs, limit, i) \ + { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } + + +#define kNumPosBitsMax 4 +#define kNumPosStatesMax (1 << kNumPosBitsMax) + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define LenChoice 0 +#define LenChoice2 (LenChoice + 1) +#define LenLow (LenChoice2 + 1) +#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) +#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) +#define kNumLenProbs (LenHigh + kLenNumHighSymbols) + + +#define kNumStates 12 +#define kNumLitStates 7 + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) + +#define kNumPosSlotBits 6 +#define kNumLenToPosStates 4 + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) + +#define kMatchMinLen 2 +#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define IsMatch 0 +#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) +#define IsRepG0 (IsRep + kNumStates) +#define IsRepG1 (IsRepG0 + kNumStates) +#define IsRepG2 (IsRepG1 + kNumStates) +#define IsRep0Long (IsRepG2 + kNumStates) +#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) +#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) +#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) +#define LenCoder (Align + kAlignTableSize) +#define RepLenCoder (LenCoder + kNumLenProbs) +#define Literal (RepLenCoder + kNumLenProbs) + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) + +#if Literal != LZMA_BASE_SIZE +StopCompilingDueBUG +#endif + +static const Byte kLiteralNextStates[kNumStates * 2] = +{ + 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, + 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 +}; + +#define LZMA_DIC_MIN (1 << 12) + +/* First LZMA-symbol is always decoded. +And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization +Out: + Result: + SZ_OK - OK + SZ_ERROR_DATA - Error + p->remainLen: + < kMatchSpecLenStart : normal remain + = kMatchSpecLenStart : finished + = kMatchSpecLenStart + 1 : Flush marker + = kMatchSpecLenStart + 2 : State Init Marker +*/ + +static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + CLzmaProb *probs = p->probs; + + unsigned state = p->state; + UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; + unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; + unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; + unsigned lc = p->prop.lc; + + Byte *dic = p->dic; + SizeT dicBufSize = p->dicBufSize; + SizeT dicPos = p->dicPos; + + UInt32 processedPos = p->processedPos; + UInt32 checkDicSize = p->checkDicSize; + unsigned len = 0; + + const Byte *buf = p->buf; + UInt32 range = p->range; + UInt32 code = p->code; + + do + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = processedPos & pbMask; + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + unsigned symbol; + UPDATE_0(prob); + prob = probs + Literal; + if (checkDicSize != 0 || processedPos != 0) + prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); + + if (state < kNumLitStates) + { + symbol = 1; + do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + unsigned offs = 0x100; + symbol = 1; + do + { + unsigned bit; + CLzmaProb *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) + } + while (symbol < 0x100); + } + dic[dicPos++] = (Byte)symbol; + processedPos++; + + state = kLiteralNextStates[state]; + /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */ + continue; + } + else + { + UPDATE_1(prob); + prob = probs + IsRep + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + state += kNumStates; + prob = probs + LenCoder; + } + else + { + UPDATE_1(prob); + if (checkDicSize == 0 && processedPos == 0) + return SZ_ERROR_DATA; + prob = probs + IsRepG0 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0(prob) + { + UPDATE_0(prob); + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + processedPos++; + state = state < kNumLitStates ? 9 : 11; + continue; + } + UPDATE_1(prob); + } + else + { + UInt32 distance; + UPDATE_1(prob); + prob = probs + IsRepG1 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep1; + } + else + { + UPDATE_1(prob); + prob = probs + IsRepG2 + state; + IF_BIT_0(prob) + { + UPDATE_0(prob); + distance = rep2; + } + else + { + UPDATE_1(prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < kNumLitStates ? 8 : 11; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = (1 << kLenNumLowBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenChoice2; + IF_BIT_0(probLen) + { + UPDATE_0(probLen); + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = (1 << kLenNumMidBits); + } + else + { + UPDATE_1(probLen); + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = (1 << kLenNumHighBits); + } + } + TREE_DECODE(probLen, limit, len); + len += offset; + } + + if (state >= kNumStates) + { + UInt32 distance; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); + TREE_6_DECODE(prob, distance); + if (distance >= kStartPosModelIndex) + { + unsigned posSlot = (unsigned)distance; + int numDirectBits = (int)(((distance >> 1) - 1)); + distance = (2 | (distance & 1)); + if (posSlot < kEndPosModelIndex) + { + distance <<= numDirectBits; + prob = probs + SpecPos + distance - posSlot - 1; + { + UInt32 mask = 1; + unsigned i = 1; + do + { + GET_BIT2(prob + i, i, ; , distance |= mask); + mask <<= 1; + } + while (--numDirectBits != 0); + } + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE + range >>= 1; + + { + UInt32 t; + code -= range; + t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ + distance = (distance << 1) + (t + 1); + code += range & t; + } + /* + distance <<= 1; + if (code >= range) + { + code -= range; + distance |= 1; + } + */ + } + while (--numDirectBits != 0); + prob = probs + Align; + distance <<= kNumAlignBits; + { + unsigned i = 1; + GET_BIT2(prob + i, i, ; , distance |= 1); + GET_BIT2(prob + i, i, ; , distance |= 2); + GET_BIT2(prob + i, i, ; , distance |= 4); + GET_BIT2(prob + i, i, ; , distance |= 8); + } + if (distance == (UInt32)0xFFFFFFFF) + { + len += kMatchSpecLenStart; + state -= kNumStates; + break; + } + } + } + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + rep0 = distance + 1; + if (checkDicSize == 0) + { + if (distance >= processedPos) + return SZ_ERROR_DATA; + } + else if (distance >= checkDicSize) + return SZ_ERROR_DATA; + state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; + /* state = kLiteralNextStates[state]; */ + } + + len += kMatchMinLen; + + if (limit == dicPos) + return SZ_ERROR_DATA; + { + SizeT rem = limit - dicPos; + unsigned curLen = ((rem < len) ? (unsigned)rem : len); + SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); + + processedPos += curLen; + + len -= curLen; + if (pos + curLen <= dicBufSize) + { + Byte *dest = dic + dicPos; + ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; + const Byte *lim = dest + curLen; + dicPos += curLen; + do + *(dest) = (Byte)*(dest + src); + while (++dest != lim); + } + else + { + do + { + dic[dicPos++] = dic[pos]; + if (++pos == dicBufSize) + pos = 0; + } + while (--curLen != 0); + } + } + } + } + while (dicPos < limit && buf < bufLimit); + NORMALIZE; + p->buf = buf; + p->range = range; + p->code = code; + p->remainLen = len; + p->dicPos = dicPos; + p->processedPos = processedPos; + p->reps[0] = rep0; + p->reps[1] = rep1; + p->reps[2] = rep2; + p->reps[3] = rep3; + p->state = state; + + return SZ_OK; +} + +static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) +{ + if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) + { + Byte *dic = p->dic; + SizeT dicPos = p->dicPos; + SizeT dicBufSize = p->dicBufSize; + unsigned len = p->remainLen; + UInt32 rep0 = p->reps[0]; + if (limit - dicPos < len) + len = (unsigned)(limit - dicPos); + + if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) + p->checkDicSize = p->prop.dicSize; + + p->processedPos += len; + p->remainLen -= len; + while (len-- != 0) + { + dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; + dicPos++; + } + p->dicPos = dicPos; + } +} + +static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) +{ + do + { + SizeT limit2 = limit; + if (p->checkDicSize == 0) + { + UInt32 rem = p->prop.dicSize - p->processedPos; + if (limit - p->dicPos > rem) + limit2 = p->dicPos + rem; + } + RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); + if (p->processedPos >= p->prop.dicSize) + p->checkDicSize = p->prop.dicSize; + LzmaDec_WriteRem(p, limit); + } + while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); + + if (p->remainLen > kMatchSpecLenStart) + { + p->remainLen = kMatchSpecLenStart; + } + return 0; +} + +typedef enum +{ + DUMMY_ERROR, /* unexpected end of input stream */ + DUMMY_LIT, + DUMMY_MATCH, + DUMMY_REP +} ELzmaDummy; + +static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize) +{ + UInt32 range = p->range; + UInt32 code = p->code; + const Byte *bufLimit = buf + inSize; + CLzmaProb *probs = p->probs; + unsigned state = p->state; + ELzmaDummy res; + + { + CLzmaProb *prob; + UInt32 bound; + unsigned ttt; + unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); + + prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK + + /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ + + prob = probs + Literal; + if (p->checkDicSize != 0 || p->processedPos != 0) + prob += (LZMA_LIT_SIZE * + ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); + + if (state < kNumLitStates) + { + unsigned symbol = 1; + do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); + } + else + { + unsigned matchByte = p->dic[p->dicPos - p->reps[0] + + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; + unsigned offs = 0x100; + unsigned symbol = 1; + do + { + unsigned bit; + CLzmaProb *probLit; + matchByte <<= 1; + bit = (matchByte & offs); + probLit = prob + offs + bit + symbol; + GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) + } + while (symbol < 0x100); + } + res = DUMMY_LIT; + } + else + { + unsigned len; + UPDATE_1_CHECK; + + prob = probs + IsRep + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + state = 0; + prob = probs + LenCoder; + res = DUMMY_MATCH; + } + else + { + UPDATE_1_CHECK; + res = DUMMY_REP; + prob = probs + IsRepG0 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + NORMALIZE_CHECK; + return DUMMY_REP; + } + else + { + UPDATE_1_CHECK; + } + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG1 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + prob = probs + IsRepG2 + state; + IF_BIT_0_CHECK(prob) + { + UPDATE_0_CHECK; + } + else + { + UPDATE_1_CHECK; + } + } + } + state = kNumStates; + prob = probs + RepLenCoder; + } + { + unsigned limit, offset; + CLzmaProb *probLen = prob + LenChoice; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenLow + (posState << kLenNumLowBits); + offset = 0; + limit = 1 << kLenNumLowBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenChoice2; + IF_BIT_0_CHECK(probLen) + { + UPDATE_0_CHECK; + probLen = prob + LenMid + (posState << kLenNumMidBits); + offset = kLenNumLowSymbols; + limit = 1 << kLenNumMidBits; + } + else + { + UPDATE_1_CHECK; + probLen = prob + LenHigh; + offset = kLenNumLowSymbols + kLenNumMidSymbols; + limit = 1 << kLenNumHighBits; + } + } + TREE_DECODE_CHECK(probLen, limit, len); + len += offset; + } + + if (state < 4) + { + unsigned posSlot; + prob = probs + PosSlot + + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << + kNumPosSlotBits); + TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); + if (posSlot >= kStartPosModelIndex) + { + int numDirectBits = ((posSlot >> 1) - 1); + + /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ + + if (posSlot < kEndPosModelIndex) + { + prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; + } + else + { + numDirectBits -= kNumAlignBits; + do + { + NORMALIZE_CHECK + range >>= 1; + code -= range & (((code - range) >> 31) - 1); + /* if (code >= range) code -= range; */ + } + while (--numDirectBits != 0); + prob = probs + Align; + numDirectBits = kNumAlignBits; + } + { + unsigned i = 1; + do + { + GET_BIT_CHECK(prob + i, i); + } + while (--numDirectBits != 0); + } + } + } + } + } + NORMALIZE_CHECK; + return res; +} + + +static void LzmaDec_InitRc(CLzmaDec *p, const Byte *data) +{ + p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); + p->range = 0xFFFFFFFF; + p->needFlush = 0; +} + +void LzmaDec_InitDicAndState(CLzmaDec *p, Bool initDic, Bool initState) +{ + p->needFlush = 1; + p->remainLen = 0; + p->tempBufSize = 0; + + if (initDic) + { + p->processedPos = 0; + p->checkDicSize = 0; + p->needInitState = 1; + } + if (initState) + p->needInitState = 1; +} + +void LzmaDec_Init(CLzmaDec *p) +{ + p->dicPos = 0; + LzmaDec_InitDicAndState(p, True, True); +} + +static void LzmaDec_InitStateReal(CLzmaDec *p) +{ + UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); + UInt32 i; + CLzmaProb *probs = p->probs; + for (i = 0; i < numProbs; i++) + probs[i] = kBitModelTotal >> 1; + p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; + p->state = 0; + p->needInitState = 0; +} + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen, + ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT inSize = *srcLen; + (*srcLen) = 0; + LzmaDec_WriteRem(p, dicLimit); + + *status = LZMA_STATUS_NOT_SPECIFIED; + + while (p->remainLen != kMatchSpecLenStart) + { + int checkEndMarkNow; + + if (p->needFlush != 0) + { + for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) + p->tempBuf[p->tempBufSize++] = *src++; + if (p->tempBufSize < RC_INIT_SIZE) + { + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (p->tempBuf[0] != 0) + return SZ_ERROR_DATA; + + LzmaDec_InitRc(p, p->tempBuf); + p->tempBufSize = 0; + } + + checkEndMarkNow = 0; + if (p->dicPos >= dicLimit) + { + if (p->remainLen == 0 && p->code == 0) + { + *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; + return SZ_OK; + } + if (finishMode == LZMA_FINISH_ANY) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_OK; + } + if (p->remainLen != 0) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + checkEndMarkNow = 1; + } + + if (p->needInitState) + LzmaDec_InitStateReal(p); + + if (p->tempBufSize == 0) + { + SizeT processed; + const Byte *bufLimit; + if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, src, inSize); + if (dummyRes == DUMMY_ERROR) + { + memcpy(p->tempBuf, src, inSize); + p->tempBufSize = (unsigned)inSize; + (*srcLen) += inSize; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + bufLimit = src; + } + else + bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; + p->buf = src; + if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) + return SZ_ERROR_DATA; + processed = (SizeT)(p->buf - src); + (*srcLen) += processed; + src += processed; + inSize -= processed; + } + else + { + unsigned rem = p->tempBufSize, lookAhead = 0; + while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) + p->tempBuf[rem++] = src[lookAhead++]; + p->tempBufSize = rem; + if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) + { + int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); + if (dummyRes == DUMMY_ERROR) + { + (*srcLen) += lookAhead; + *status = LZMA_STATUS_NEEDS_MORE_INPUT; + return SZ_OK; + } + if (checkEndMarkNow && dummyRes != DUMMY_MATCH) + { + *status = LZMA_STATUS_NOT_FINISHED; + return SZ_ERROR_DATA; + } + } + p->buf = p->tempBuf; + if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) + return SZ_ERROR_DATA; + lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); + (*srcLen) += lookAhead; + src += lookAhead; + inSize -= lookAhead; + p->tempBufSize = 0; + } + } + if (p->code == 0) + *status = LZMA_STATUS_FINISHED_WITH_MARK; + return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; +} + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) +{ + SizeT outSize = *destLen; + SizeT inSize = *srcLen; + *srcLen = *destLen = 0; + for (;;) + { + SizeT inSizeCur = inSize, outSizeCur, dicPos; + ELzmaFinishMode curFinishMode; + SRes res; + if (p->dicPos == p->dicBufSize) + p->dicPos = 0; + dicPos = p->dicPos; + if (outSize > p->dicBufSize - dicPos) + { + outSizeCur = p->dicBufSize; + curFinishMode = LZMA_FINISH_ANY; + } + else + { + outSizeCur = dicPos + outSize; + curFinishMode = finishMode; + } + + res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); + src += inSizeCur; + inSize -= inSizeCur; + *srcLen += inSizeCur; + outSizeCur = p->dicPos - dicPos; + memcpy(dest, p->dic + dicPos, outSizeCur); + dest += outSizeCur; + outSize -= outSizeCur; + *destLen += outSizeCur; + if (res != 0) + return res; + if (outSizeCur == 0 || outSize == 0) + return SZ_OK; + } +} + +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->probs); + p->probs = 0; +} + +static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->dic); + p->dic = 0; +} + +void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) +{ + LzmaDec_FreeProbs(p, alloc); + LzmaDec_FreeDict(p, alloc); +} + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size) +{ + UInt32 dicSize; + Byte d; + + if (size < LZMA_PROPS_SIZE) + return SZ_ERROR_UNSUPPORTED; + else + dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); + + if (dicSize < LZMA_DIC_MIN) + dicSize = LZMA_DIC_MIN; + p->dicSize = dicSize; + + d = data[0]; + if (d >= (9 * 5 * 5)) + return SZ_ERROR_UNSUPPORTED; + + p->lc = d % 9; + d /= 9; + p->pb = d / 5; + p->lp = d % 5; + + return SZ_OK; +} + +static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) +{ + UInt32 numProbs = LzmaProps_GetNumProbs(propNew); + if (p->probs == 0 || numProbs != p->numProbs) + { + LzmaDec_FreeProbs(p, alloc); + p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); + p->numProbs = numProbs; + if (p->probs == 0) + return SZ_ERROR_MEM; + } + return SZ_OK; +} + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +{ + CLzmaProps propNew; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc) +{ + CLzmaProps propNew; + SizeT dicBufSize; + RINOK(LzmaProps_Decode(&propNew, props, propsSize)); + RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); + dicBufSize = propNew.dicSize; + if (p->dic == 0 || dicBufSize != p->dicBufSize) + { + LzmaDec_FreeDict(p, alloc); + p->dic = (Byte *)alloc->Alloc(alloc, dicBufSize); + if (p->dic == 0) + { + LzmaDec_FreeProbs(p, alloc); + return SZ_ERROR_MEM; + } + } + p->dicBufSize = dicBufSize; + p->prop = propNew; + return SZ_OK; +} + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc) +{ + CLzmaDec p; + SRes res; + SizeT inSize = *srcLen; + SizeT outSize = *destLen; + *srcLen = *destLen = 0; + if (inSize < RC_INIT_SIZE) + return SZ_ERROR_INPUT_EOF; + + LzmaDec_Construct(&p); + res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); + if (res != 0) + return res; + p.dic = dest; + p.dicBufSize = outSize; + + LzmaDec_Init(&p); + + *srcLen = inSize; + res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); + + if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) + res = SZ_ERROR_INPUT_EOF; + + (*destLen) = p.dicPos; + LzmaDec_FreeProbs(&p, alloc); + return res; +} + +} diff --git a/crnlib/lzma_LzmaDec.h b/crnlib/lzma_LzmaDec.h new file mode 100644 index 00000000..2a86ab8e --- /dev/null +++ b/crnlib/lzma_LzmaDec.h @@ -0,0 +1,227 @@ +/* LzmaDec.h -- LZMA Decoder +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZMADEC_H +#define __LZMADEC_H + +#include "lzma_Types.h" + +namespace crnlib { + +/* #define _LZMA_PROB32 */ +/* _LZMA_PROB32 can increase the speed on some CPUs, + but memory usage for CLzmaDec::probs will be doubled in that case */ + +#ifdef _LZMA_PROB32 +#define CLzmaProb UInt32 +#else +#define CLzmaProb UInt16 +#endif + + +/* ---------- LZMA Properties ---------- */ + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaProps +{ + unsigned lc, lp, pb; + UInt32 dicSize; +} CLzmaProps; + +/* LzmaProps_Decode - decodes properties +Returns: + SZ_OK + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size); + + +/* ---------- LZMA Decoder state ---------- */ + +/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. + Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ + +#define LZMA_REQUIRED_INPUT_MAX 20 + +typedef struct +{ + CLzmaProps prop; + CLzmaProb *probs; + Byte *dic; + const Byte *buf; + UInt32 range, code; + SizeT dicPos; + SizeT dicBufSize; + UInt32 processedPos; + UInt32 checkDicSize; + unsigned state; + UInt32 reps[4]; + unsigned remainLen; + int needFlush; + int needInitState; + UInt32 numProbs; + unsigned tempBufSize; + Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; +} CLzmaDec; + +#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } + +void LzmaDec_Init(CLzmaDec *p); + +/* There are two types of LZMA streams: + 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. + 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ + +typedef enum +{ + LZMA_FINISH_ANY, /* finish at any point */ + LZMA_FINISH_END /* block must be finished at the end */ +} ELzmaFinishMode; + +/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! + + You must use LZMA_FINISH_END, when you know that current output buffer + covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. + + If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, + and output value of destLen will be less than output buffer size limit. + You can check status result also. + + You can use multiple checks to test data integrity after full decompression: + 1) Check Result and "status" variable. + 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. + 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. + You must use correct finish mode in that case. */ + +typedef enum +{ + LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ + LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ + LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ + LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ +} ELzmaStatus; + +/* ELzmaStatus is used only as output value for function call */ + + +/* ---------- Interfaces ---------- */ + +/* There are 3 levels of interfaces: + 1) Dictionary Interface + 2) Buffer Interface + 3) One Call Interface + You can select any of these interfaces, but don't mix functions from different + groups for same object. */ + + +/* There are two variants to allocate state for Dictionary Interface: + 1) LzmaDec_Allocate / LzmaDec_Free + 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs + You can use variant 2, if you set dictionary buffer manually. + For Buffer Interface you must always use variant 1. + +LzmaDec_Allocate* can return: + SZ_OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties +*/ + +SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAlloc *alloc); +void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); + +SRes LzmaDec_Allocate(CLzmaDec *state, const Byte *prop, unsigned propsSize, ISzAlloc *alloc); +void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); + +/* ---------- Dictionary Interface ---------- */ + +/* You can use it, if you want to eliminate the overhead for data copying from + dictionary to some other external buffer. + You must work with CLzmaDec variables directly in this interface. + + STEPS: + LzmaDec_Constr() + LzmaDec_Allocate() + for (each new stream) + { + LzmaDec_Init() + while (it needs more decompression) + { + LzmaDec_DecodeToDic() + use data from CLzmaDec::dic and update CLzmaDec::dicPos + } + } + LzmaDec_Free() +*/ + +/* LzmaDec_DecodeToDic + + The decoding to internal dictionary buffer (CLzmaDec::dic). + You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! + +finishMode: + It has meaning only if the decoding reaches output limit (dicLimit). + LZMA_FINISH_ANY - Decode just dicLimit bytes. + LZMA_FINISH_END - Stream must be finished after dicLimit. + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_NEEDS_MORE_INPUT + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error +*/ + +SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- Buffer Interface ---------- */ + +/* It's zlib-like interface. + See LzmaDec_DecodeToDic description for information about STEPS and return results, + but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need + to work with CLzmaDec variables manually. + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). +*/ + +SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, + const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); + + +/* ---------- One Call Interface ---------- */ + +/* LzmaDecode + +finishMode: + It has meaning only if the decoding reaches output limit (*destLen). + LZMA_FINISH_ANY - Decode just destLen bytes. + LZMA_FINISH_END - Stream must be finished after (*destLen). + +Returns: + SZ_OK + status: + LZMA_STATUS_FINISHED_WITH_MARK + LZMA_STATUS_NOT_FINISHED + LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). +*/ + +SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, + const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, + ELzmaStatus *status, ISzAlloc *alloc); + +} + +#endif diff --git a/crnlib/lzma_LzmaEnc.cpp b/crnlib/lzma_LzmaEnc.cpp new file mode 100644 index 00000000..3daffcc4 --- /dev/null +++ b/crnlib/lzma_LzmaEnc.cpp @@ -0,0 +1,2279 @@ +/* LzmaEnc.c -- LZMA Encoder +2008-10-04 : Igor Pavlov : Public domain */ +#include "crn_core.h" +#include + +/* #define SHOW_STAT */ +/* #define SHOW_STAT2 */ + +#if defined(SHOW_STAT) || defined(SHOW_STAT2) +#include +#endif + +#include "lzma_LzmaEnc.h" + +#include "lzma_LzFind.h" +#ifdef COMPRESS_MF_MT +#include "lzma_LzFindMt.h" +#endif + +namespace crnlib { + +#ifdef SHOW_STAT +static int ttt = 0; +#endif + +#define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) + +#define kBlockSize (9 << 10) +#define kUnpackBlockSize (1 << 18) +#define kMatchArraySize (1 << 21) +#define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) + +#define kNumMaxDirectBits (31) + +#define kNumTopBits 24 +#define kTopValue ((UInt32)1 << kNumTopBits) + +#define kNumBitModelTotalBits 11 +#define kBitModelTotal (1 << kNumBitModelTotalBits) +#define kNumMoveBits 5 +#define kProbInitValue (kBitModelTotal >> 1) + +#define kNumMoveReducingBits 4 +#define kNumBitPriceShiftBits 4 +#define kBitPrice (1 << kNumBitPriceShiftBits) + +void LzmaEncProps_Init(CLzmaEncProps *p) +{ + p->level = 5; + p->dictSize = p->mc = 0; + p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; + p->writeEndMark = 0; +} + +void LzmaEncProps_Normalize(CLzmaEncProps *p) +{ + int level = p->level; + if (level < 0) level = 5; + p->level = level; + if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); + if (p->lc < 0) p->lc = 3; + if (p->lp < 0) p->lp = 0; + if (p->pb < 0) p->pb = 2; + if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); + if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); + if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); + if (p->numHashBytes < 0) p->numHashBytes = 4; + if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); + if (p->numThreads < 0) p->numThreads = ((p->btMode && p->algo) ? 2 : 1); +} + +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) +{ + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + return props.dictSize; +} + +/* #define LZMA_LOG_BSR */ +/* Define it for Intel's CPU */ + + +#ifdef LZMA_LOG_BSR + +#define kDicLogSizeMaxCompress 30 + +#define BSR2_RET(pos, res) { unsigned long i; _BitScanReverse(&i, (pos)); res = (i + i) + ((pos >> (i - 1)) & 1); } + +UInt32 GetPosSlot1(UInt32 pos) +{ + UInt32 res; + BSR2_RET(pos, res); + return res; +} +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } + +#else + +#define kNumLogBits (9 + (int)sizeof(size_t) / 2) +#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) + +void LzmaEnc_FastPosInit(Byte *g_FastPos) +{ + int c = 2, slotFast; + g_FastPos[0] = 0; + g_FastPos[1] = 1; + + for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) + { + UInt32 k = (1 << ((slotFast >> 1) - 1)); + UInt32 j; + for (j = 0; j < k; j++, c++) + g_FastPos[c] = (Byte)slotFast; + } +} + +#define BSR2_RET(pos, res) { UInt32 i = 6 + ((kNumLogBits - 1) & \ + (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ + res = p->g_FastPos[pos >> i] + (i * 2); } +/* +#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ + p->g_FastPos[pos >> 6] + 12 : \ + p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } +*/ + +#define GetPosSlot1(pos) p->g_FastPos[pos] +#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } +#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos]; else BSR2_RET(pos, res); } + +#endif + + +#define LZMA_NUM_REPS 4 + +typedef unsigned CState; + +typedef struct _COptimal +{ + UInt32 price; + + CState state; + int prev1IsChar; + int prev2; + + UInt32 posPrev2; + UInt32 backPrev2; + + UInt32 posPrev; + UInt32 backPrev; + UInt32 backs[LZMA_NUM_REPS]; +} COptimal; + +#define kNumOpts (1 << 12) + +#define kNumLenToPosStates 4 +#define kNumPosSlotBits 6 +#define kDicLogSizeMin 0 +#define kDicLogSizeMax 32 +#define kDistTableSizeMax (kDicLogSizeMax * 2) + + +#define kNumAlignBits 4 +#define kAlignTableSize (1 << kNumAlignBits) +#define kAlignMask (kAlignTableSize - 1) + +#define kStartPosModelIndex 4 +#define kEndPosModelIndex 14 +#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) + +#define kNumFullDistances (1 << (kEndPosModelIndex / 2)) + +#ifdef _LZMA_PROB32 +#define CLzmaProb UInt32 +#else +#define CLzmaProb UInt16 +#endif + +#define LZMA_PB_MAX 4 +#define LZMA_LC_MAX 8 +#define LZMA_LP_MAX 4 + +#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) + + +#define kLenNumLowBits 3 +#define kLenNumLowSymbols (1 << kLenNumLowBits) +#define kLenNumMidBits 3 +#define kLenNumMidSymbols (1 << kLenNumMidBits) +#define kLenNumHighBits 8 +#define kLenNumHighSymbols (1 << kLenNumHighBits) + +#define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) + +#define LZMA_MATCH_LEN_MIN 2 +#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) + +#define kNumStates 12 + +typedef struct +{ + CLzmaProb choice; + CLzmaProb choice2; + CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; + CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; + CLzmaProb high[kLenNumHighSymbols]; +} CLenEnc; + +typedef struct +{ + CLenEnc p; + UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; + UInt32 tableSize; + UInt32 counters[LZMA_NUM_PB_STATES_MAX]; +} CLenPriceEnc; + +typedef struct _CRangeEnc +{ + UInt32 range; + Byte cache; + UInt64 low; + UInt64 cacheSize; + Byte *buf; + Byte *bufLim; + Byte *bufBase; + ISeqOutStream *outStream; + UInt64 processed; + SRes res; +} CRangeEnc; + +typedef struct _CSeqInStreamBuf +{ + ISeqInStream funcTable; + const Byte *data; + SizeT rem; +} CSeqInStreamBuf; + +static SRes MyRead(void *pp, void *data, size_t *size) +{ + size_t curSize = *size; + CSeqInStreamBuf *p = (CSeqInStreamBuf *)pp; + if (p->rem < curSize) + curSize = p->rem; + memcpy(data, p->data, curSize); + p->rem -= curSize; + p->data += curSize; + *size = curSize; + return SZ_OK; +} + +typedef struct +{ + CLzmaProb *litProbs; + + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + UInt32 reps[LZMA_NUM_REPS]; + UInt32 state; +} CSaveState; + +typedef struct _CLzmaEnc +{ + IMatchFinder matchFinder; + void *matchFinderObj; + + #ifdef COMPRESS_MF_MT + Bool mtMode; + CMatchFinderMt matchFinderMt; + #endif + + CMatchFinder matchFinderBase; + + #ifdef COMPRESS_MF_MT + Byte pad[128]; + #endif + + UInt32 optimumEndIndex; + UInt32 optimumCurrentIndex; + + UInt32 longestMatchLength; + UInt32 numPairs; + UInt32 numAvail; + COptimal opt[kNumOpts]; + + #ifndef LZMA_LOG_BSR + Byte g_FastPos[1 << kNumLogBits]; + #endif + + UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + UInt32 numFastBytes; + UInt32 additionalOffset; + UInt32 reps[LZMA_NUM_REPS]; + UInt32 state; + + UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; + UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; + UInt32 alignPrices[kAlignTableSize]; + UInt32 alignPriceCount; + + UInt32 distTableSize; + + unsigned lc, lp, pb; + unsigned lpMask, pbMask; + + CLzmaProb *litProbs; + + CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; + CLzmaProb isRep[kNumStates]; + CLzmaProb isRepG0[kNumStates]; + CLzmaProb isRepG1[kNumStates]; + CLzmaProb isRepG2[kNumStates]; + CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; + + CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; + CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; + CLzmaProb posAlignEncoder[1 << kNumAlignBits]; + + CLenPriceEnc lenEnc; + CLenPriceEnc repLenEnc; + + unsigned lclp; + + Bool fastMode; + + CRangeEnc rc; + + Bool writeEndMark; + UInt64 nowPos64; + UInt32 matchPriceCount; + Bool finished; + Bool multiThread; + + SRes result; + UInt32 dictSize; + UInt32 matchFinderCycles; + + ISeqInStream *inStream; + CSeqInStreamBuf seqBufInStream; + + CSaveState saveState; +} CLzmaEnc; + +void LzmaEnc_SaveState(CLzmaEncHandle pp) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CSaveState *dest = &p->saveState; + int i; + dest->lenEnc = p->lenEnc; + dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + for (i = 0; i < kNumStates; i++) + { + memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); + memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); + } + for (i = 0; i < kNumLenToPosStates; i++) + memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); + memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); + memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); + memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); + memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); + memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); + memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); + memcpy(dest->reps, p->reps, sizeof(p->reps)); + memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); +} + +void LzmaEnc_RestoreState(CLzmaEncHandle pp) +{ + CLzmaEnc *dest = (CLzmaEnc *)pp; + const CSaveState *p = &dest->saveState; + int i; + dest->lenEnc = p->lenEnc; + dest->repLenEnc = p->repLenEnc; + dest->state = p->state; + + for (i = 0; i < kNumStates; i++) + { + memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); + memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); + } + for (i = 0; i < kNumLenToPosStates; i++) + memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); + memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); + memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); + memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); + memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); + memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); + memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); + memcpy(dest->reps, p->reps, sizeof(p->reps)); + memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); +} + +SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + CLzmaEncProps props = *props2; + LzmaEncProps_Normalize(&props); + + if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX || + props.dictSize > (1 << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30)) + return SZ_ERROR_PARAM; + p->dictSize = props.dictSize; + p->matchFinderCycles = props.mc; + { + unsigned fb = props.fb; + if (fb < 5) + fb = 5; + if (fb > LZMA_MATCH_LEN_MAX) + fb = LZMA_MATCH_LEN_MAX; + p->numFastBytes = fb; + } + p->lc = props.lc; + p->lp = props.lp; + p->pb = props.pb; + p->fastMode = (props.algo == 0); + p->matchFinderBase.btMode = props.btMode; + { + UInt32 numHashBytes = 4; + if (props.btMode) + { + if (props.numHashBytes < 2) + numHashBytes = 2; + else if (props.numHashBytes < 4) + numHashBytes = props.numHashBytes; + } + p->matchFinderBase.numHashBytes = numHashBytes; + } + + p->matchFinderBase.cutValue = props.mc; + + p->writeEndMark = props.writeEndMark; + + #ifdef COMPRESS_MF_MT + /* + if (newMultiThread != _multiThread) + { + ReleaseMatchFinder(); + _multiThread = newMultiThread; + } + */ + p->multiThread = (props.numThreads > 1); + #endif + + return SZ_OK; +} + +static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; +static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; +static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; +static const int kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; + +#define IsCharState(s) ((s) < 7) + +#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1) + +#define kInfinityPrice (1 << 30) + +static void RangeEnc_Construct(CRangeEnc *p) +{ + p->outStream = 0; + p->bufBase = 0; +} + +#define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) + +#define RC_BUF_SIZE (1 << 16) +static int RangeEnc_Alloc(CRangeEnc *p, ISzAlloc *alloc) +{ + if (p->bufBase == 0) + { + p->bufBase = (Byte *)alloc->Alloc(alloc, RC_BUF_SIZE); + if (p->bufBase == 0) + return 0; + p->bufLim = p->bufBase + RC_BUF_SIZE; + } + return 1; +} + +static void RangeEnc_Free(CRangeEnc *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->bufBase); + p->bufBase = 0; +} + +static void RangeEnc_Init(CRangeEnc *p) +{ + /* Stream.Init(); */ + p->low = 0; + p->range = 0xFFFFFFFF; + p->cacheSize = 1; + p->cache = 0; + + p->buf = p->bufBase; + + p->processed = 0; + p->res = SZ_OK; +} + +static void RangeEnc_FlushStream(CRangeEnc *p) +{ + size_t num; + if (p->res != SZ_OK) + return; + num = p->buf - p->bufBase; + if (num != p->outStream->Write(p->outStream, p->bufBase, num)) + p->res = SZ_ERROR_WRITE; + p->processed += num; + p->buf = p->bufBase; +} + +static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) +{ + if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) + { + Byte temp = p->cache; + do + { + Byte *buf = p->buf; + *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); + p->buf = buf; + if (buf == p->bufLim) + RangeEnc_FlushStream(p); + temp = 0xFF; + } + while (--p->cacheSize != 0); + p->cache = (Byte)((UInt32)p->low >> 24); + } + p->cacheSize++; + p->low = (UInt32)p->low << 8; +} + +static void RangeEnc_FlushData(CRangeEnc *p) +{ + int i; + for (i = 0; i < 5; i++) + RangeEnc_ShiftLow(p); +} + +static void RangeEnc_EncodeDirectBits(CRangeEnc *p, UInt32 value, int numBits) +{ + do + { + p->range >>= 1; + p->low += p->range & (0 - ((value >> --numBits) & 1)); + if (p->range < kTopValue) + { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } + } + while (numBits != 0); +} + +static void RangeEnc_EncodeBit(CRangeEnc *p, CLzmaProb *prob, UInt32 symbol) +{ + UInt32 ttt = *prob; + UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; + if (symbol == 0) + { + p->range = newBound; + ttt += (kBitModelTotal - ttt) >> kNumMoveBits; + } + else + { + p->low += newBound; + p->range -= newBound; + ttt -= ttt >> kNumMoveBits; + } + *prob = (CLzmaProb)ttt; + if (p->range < kTopValue) + { + p->range <<= 8; + RangeEnc_ShiftLow(p); + } +} + +static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol) +{ + symbol |= 0x100; + do + { + RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); + symbol <<= 1; + } + while (symbol < 0x10000); +} + +static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 symbol, UInt32 matchByte) +{ + UInt32 offs = 0x100; + symbol |= 0x100; + do + { + matchByte <<= 1; + RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } + while (symbol < 0x10000); +} + +void LzmaEnc_InitPriceTables(UInt32 *ProbPrices) +{ + UInt32 i; + for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) + { + const int kCyclesBits = kNumBitPriceShiftBits; + UInt32 w = i; + UInt32 bitCount = 0; + int j; + for (j = 0; j < kCyclesBits; j++) + { + w = w * w; + bitCount <<= 1; + while (w >= ((UInt32)1 << 16)) + { + w >>= 1; + bitCount++; + } + } + ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); + } +} + + +#define GET_PRICE(prob, symbol) \ + p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICEa(prob, symbol) \ + ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; + +#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +#define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] +#define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] + +static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 symbol, UInt32 *ProbPrices) +{ + UInt32 price = 0; + symbol |= 0x100; + do + { + price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); + symbol <<= 1; + } + while (symbol < 0x10000); + return price; +} + +static UInt32 LitEnc_GetPriceMatched(const CLzmaProb *probs, UInt32 symbol, UInt32 matchByte, UInt32 *ProbPrices) +{ + UInt32 price = 0; + UInt32 offs = 0x100; + symbol |= 0x100; + do + { + matchByte <<= 1; + price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); + symbol <<= 1; + offs &= ~(matchByte ^ symbol); + } + while (symbol < 0x10000); + return price; +} + + +static void RcTree_Encode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) +{ + UInt32 m = 1; + int i; + for (i = numBitLevels; i != 0;) + { + UInt32 bit; + i--; + bit = (symbol >> i) & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + } +} + +static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, int numBitLevels, UInt32 symbol) +{ + UInt32 m = 1; + int i; + for (i = 0; i < numBitLevels; i++) + { + UInt32 bit = symbol & 1; + RangeEnc_EncodeBit(rc, probs + m, bit); + m = (m << 1) | bit; + symbol >>= 1; + } +} + +static UInt32 RcTree_GetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices) +{ + UInt32 price = 0; + symbol |= (1 << numBitLevels); + while (symbol != 1) + { + price += GET_PRICEa(probs[symbol >> 1], symbol & 1); + symbol >>= 1; + } + return price; +} + +static UInt32 RcTree_ReverseGetPrice(const CLzmaProb *probs, int numBitLevels, UInt32 symbol, UInt32 *ProbPrices) +{ + UInt32 price = 0; + UInt32 m = 1; + int i; + for (i = numBitLevels; i != 0; i--) + { + UInt32 bit = symbol & 1; + symbol >>= 1; + price += GET_PRICEa(probs[m], bit); + m = (m << 1) | bit; + } + return price; +} + + +static void LenEnc_Init(CLenEnc *p) +{ + unsigned i; + p->choice = p->choice2 = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) + p->low[i] = kProbInitValue; + for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) + p->mid[i] = kProbInitValue; + for (i = 0; i < kLenNumHighSymbols; i++) + p->high[i] = kProbInitValue; +} + +static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState) +{ + if (symbol < kLenNumLowSymbols) + { + RangeEnc_EncodeBit(rc, &p->choice, 0); + RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); + } + else + { + RangeEnc_EncodeBit(rc, &p->choice, 1); + if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) + { + RangeEnc_EncodeBit(rc, &p->choice2, 0); + RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); + } + else + { + RangeEnc_EncodeBit(rc, &p->choice2, 1); + RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); + } + } +} + +static void LenEnc_SetPrices(CLenEnc *p, UInt32 posState, UInt32 numSymbols, UInt32 *prices, UInt32 *ProbPrices) +{ + UInt32 a0 = GET_PRICE_0a(p->choice); + UInt32 a1 = GET_PRICE_1a(p->choice); + UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); + UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); + UInt32 i = 0; + for (i = 0; i < kLenNumLowSymbols; i++) + { + if (i >= numSymbols) + return; + prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); + } + for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) + { + if (i >= numSymbols) + return; + prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); + } + for (; i < numSymbols; i++) + prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); +} + +static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc *p, UInt32 posState, UInt32 *ProbPrices) +{ + LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); + p->counters[posState] = p->tableSize; +} + +static void LenPriceEnc_UpdateTables(CLenPriceEnc *p, UInt32 numPosStates, UInt32 *ProbPrices) +{ + UInt32 posState; + for (posState = 0; posState < numPosStates; posState++) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + +static void LenEnc_Encode2(CLenPriceEnc *p, CRangeEnc *rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32 *ProbPrices) +{ + LenEnc_Encode(&p->p, rc, symbol, posState); + if (updatePrice) + if (--p->counters[posState] == 0) + LenPriceEnc_UpdateTable(p, posState, ProbPrices); +} + + + + +static void MovePos(CLzmaEnc *p, UInt32 num) +{ + #ifdef SHOW_STAT + ttt += num; + printf("\n MovePos %d", num); + #endif + if (num != 0) + { + p->additionalOffset += num; + p->matchFinder.Skip(p->matchFinderObj, num); + } +} + +static UInt32 ReadMatchDistances(CLzmaEnc *p, UInt32 *numDistancePairsRes) +{ + UInt32 lenRes = 0, numPairs; + p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); + numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + #ifdef SHOW_STAT + printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); + ttt++; + { + UInt32 i; + for (i = 0; i < numPairs; i += 2) + printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); + } + #endif + if (numPairs > 0) + { + lenRes = p->matches[numPairs - 2]; + if (lenRes == p->numFastBytes) + { + const Byte *pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + UInt32 distance = p->matches[numPairs - 1] + 1; + UInt32 numAvail = p->numAvail; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + { + const Byte *pby2 = pby - distance; + for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++); + } + } + } + p->additionalOffset++; + *numDistancePairsRes = numPairs; + return lenRes; +} + + +#define MakeAsChar(p) (p)->backPrev = (UInt32)(-1); (p)->prev1IsChar = False; +#define MakeAsShortRep(p) (p)->backPrev = 0; (p)->prev1IsChar = False; +#define IsShortRep(p) ((p)->backPrev == 0) + +static UInt32 GetRepLen1Price(CLzmaEnc *p, UInt32 state, UInt32 posState) +{ + return + GET_PRICE_0(p->isRepG0[state]) + + GET_PRICE_0(p->isRep0Long[state][posState]); +} + +static UInt32 GetPureRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 state, UInt32 posState) +{ + UInt32 price; + if (repIndex == 0) + { + price = GET_PRICE_0(p->isRepG0[state]); + price += GET_PRICE_1(p->isRep0Long[state][posState]); + } + else + { + price = GET_PRICE_1(p->isRepG0[state]); + if (repIndex == 1) + price += GET_PRICE_0(p->isRepG1[state]); + else + { + price += GET_PRICE_1(p->isRepG1[state]); + price += GET_PRICE(p->isRepG2[state], repIndex - 2); + } + } + return price; +} + +static UInt32 GetRepPrice(CLzmaEnc *p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) +{ + return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + + GetPureRepPrice(p, repIndex, state, posState); +} + +static UInt32 Backward(CLzmaEnc *p, UInt32 *backRes, UInt32 cur) +{ + UInt32 posMem = p->opt[cur].posPrev; + UInt32 backMem = p->opt[cur].backPrev; + p->optimumEndIndex = cur; + do + { + if (p->opt[cur].prev1IsChar) + { + MakeAsChar(&p->opt[posMem]) + p->opt[posMem].posPrev = posMem - 1; + if (p->opt[cur].prev2) + { + p->opt[posMem - 1].prev1IsChar = False; + p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; + p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; + } + } + { + UInt32 posPrev = posMem; + UInt32 backCur = backMem; + + backMem = p->opt[posPrev].backPrev; + posMem = p->opt[posPrev].posPrev; + + p->opt[posPrev].backPrev = backCur; + p->opt[posPrev].posPrev = cur; + cur = posPrev; + } + } + while (cur != 0); + *backRes = p->opt[0].backPrev; + p->optimumCurrentIndex = p->opt[0].posPrev; + return p->optimumCurrentIndex; +} + +#define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos) & p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) + +static UInt32 GetOptimum(CLzmaEnc *p, UInt32 position, UInt32 *backRes) +{ + UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; + UInt32 matchPrice, repMatchPrice, normalMatchPrice; + UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; + UInt32 *matches; + const Byte *data; + Byte curByte, matchByte; + if (p->optimumEndIndex != p->optimumCurrentIndex) + { + const COptimal *opt = &p->opt[p->optimumCurrentIndex]; + UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; + *backRes = opt->backPrev; + p->optimumCurrentIndex = opt->posPrev; + return lenRes; + } + p->optimumCurrentIndex = p->optimumEndIndex = 0; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + if (numAvail < 2) + { + *backRes = (UInt32)(-1); + return 1; + } + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + repMaxIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + UInt32 lenTest; + const Byte *data2; + reps[i] = p->reps[i]; + data2 = data - (reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + { + repLens[i] = 0; + continue; + } + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); + repLens[i] = lenTest; + if (lenTest > repLens[repMaxIndex]) + repMaxIndex = i; + } + if (repLens[repMaxIndex] >= p->numFastBytes) + { + UInt32 lenRes; + *backRes = repMaxIndex; + lenRes = repLens[repMaxIndex]; + MovePos(p, lenRes - 1); + return lenRes; + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) + { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) + { + *backRes = (UInt32)-1; + return 1; + } + + p->opt[0].state = (CState)p->state; + + posState = (position & p->pbMask); + + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + + (!IsCharState(p->state) ? + LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + MakeAsChar(&p->opt[1]); + + matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); + + if (matchByte == curByte) + { + UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); + if (shortRepPrice < p->opt[1].price) + { + p->opt[1].price = shortRepPrice; + MakeAsShortRep(&p->opt[1]); + } + } + lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); + + if (lenEnd < 2) + { + *backRes = p->opt[1].backPrev; + return 1; + } + + p->opt[1].posPrev = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + p->opt[0].backs[i] = reps[i]; + + len = lenEnd; + do + p->opt[len--].price = kInfinityPrice; + while (len >= 2); + + for (i = 0; i < LZMA_NUM_REPS; i++) + { + UInt32 repLen = repLens[i]; + UInt32 price; + if (repLen < 2) + continue; + price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); + do + { + UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; + COptimal *opt = &p->opt[repLen]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = i; + opt->prev1IsChar = False; + } + } + while (--repLen >= 2); + } + + normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); + + len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); + if (len <= mainLen) + { + UInt32 offs = 0; + while (len > matches[offs]) + offs += 2; + for (; ; len++) + { + COptimal *opt; + UInt32 distance = matches[offs + 1]; + + UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; + UInt32 lenToPosState = GetLenToPosState(len); + if (distance < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][distance]; + else + { + UInt32 slot; + GetPosSlot2(distance, slot); + curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; + } + opt = &p->opt[len]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = 0; + opt->backPrev = distance + LZMA_NUM_REPS; + opt->prev1IsChar = False; + } + if (len == matches[offs]) + { + offs += 2; + if (offs == numPairs) + break; + } + } + } + + cur = 0; + + #ifdef SHOW_STAT2 + if (position >= 0) + { + unsigned i; + printf("\n pos = %4X", position); + for (i = cur; i <= lenEnd; i++) + printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); + } + #endif + + for (;;) + { + UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; + UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; + Bool nextIsChar; + Byte curByte, matchByte; + const Byte *data; + COptimal *curOpt; + COptimal *nextOpt; + + cur++; + if (cur == lenEnd) + return Backward(p, backRes, cur); + + newLen = ReadMatchDistances(p, &numPairs); + if (newLen >= p->numFastBytes) + { + p->numPairs = numPairs; + p->longestMatchLength = newLen; + return Backward(p, backRes, cur); + } + position++; + curOpt = &p->opt[cur]; + posPrev = curOpt->posPrev; + if (curOpt->prev1IsChar) + { + posPrev--; + if (curOpt->prev2) + { + state = p->opt[curOpt->posPrev2].state; + if (curOpt->backPrev2 < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + else + state = p->opt[posPrev].state; + state = kLiteralNextStates[state]; + } + else + state = p->opt[posPrev].state; + if (posPrev == cur - 1) + { + if (IsShortRep(curOpt)) + state = kShortRepNextStates[state]; + else + state = kLiteralNextStates[state]; + } + else + { + UInt32 pos; + const COptimal *prevOpt; + if (curOpt->prev1IsChar && curOpt->prev2) + { + posPrev = curOpt->posPrev2; + pos = curOpt->backPrev2; + state = kRepNextStates[state]; + } + else + { + pos = curOpt->backPrev; + if (pos < LZMA_NUM_REPS) + state = kRepNextStates[state]; + else + state = kMatchNextStates[state]; + } + prevOpt = &p->opt[posPrev]; + if (pos < LZMA_NUM_REPS) + { + UInt32 i; + reps[0] = prevOpt->backs[pos]; + for (i = 1; i <= pos; i++) + reps[i] = prevOpt->backs[i - 1]; + for (; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i]; + } + else + { + UInt32 i; + reps[0] = (pos - LZMA_NUM_REPS); + for (i = 1; i < LZMA_NUM_REPS; i++) + reps[i] = prevOpt->backs[i - 1]; + } + } + curOpt->state = (CState)state; + + curOpt->backs[0] = reps[0]; + curOpt->backs[1] = reps[1]; + curOpt->backs[2] = reps[2]; + curOpt->backs[3] = reps[3]; + + curPrice = curOpt->price; + nextIsChar = False; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + curByte = *data; + matchByte = *(data - (reps[0] + 1)); + + posState = (position & p->pbMask); + + curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); + { + const CLzmaProb *probs = LIT_PROBS(position, *(data - 1)); + curAnd1Price += + (!IsCharState(state) ? + LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : + LitEnc_GetPrice(probs, curByte, p->ProbPrices)); + } + + nextOpt = &p->opt[cur + 1]; + + if (curAnd1Price < nextOpt->price) + { + nextOpt->price = curAnd1Price; + nextOpt->posPrev = cur; + MakeAsChar(nextOpt); + nextIsChar = True; + } + + matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); + repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); + + if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) + { + UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); + if (shortRepPrice <= nextOpt->price) + { + nextOpt->price = shortRepPrice; + nextOpt->posPrev = cur; + MakeAsShortRep(nextOpt); + nextIsChar = True; + } + } + numAvailFull = p->numAvail; + { + UInt32 temp = kNumOpts - 1 - cur; + if (temp < numAvailFull) + numAvailFull = temp; + } + + if (numAvailFull < 2) + continue; + numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); + + if (!nextIsChar && matchByte != curByte) /* speed optimization */ + { + /* try Literal + rep0 */ + UInt32 temp; + UInt32 lenTest2; + const Byte *data2 = data - (reps[0] + 1); + UInt32 limit = p->numFastBytes + 1; + if (limit > numAvailFull) + limit = numAvailFull; + + for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++); + lenTest2 = temp - 1; + if (lenTest2 >= 2) + { + UInt32 state2 = kLiteralNextStates[state]; + UInt32 posStateNext = (position + 1) & p->pbMask; + UInt32 nextRepMatchPrice = curAnd1Price + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + /* for (; lenTest2 >= 2; lenTest2--) */ + { + UInt32 curAndLenPrice; + COptimal *opt; + UInt32 offset = cur + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = False; + } + } + } + } + + startLen = 2; /* speed optimization */ + { + UInt32 repIndex; + for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) + { + UInt32 lenTest; + UInt32 lenTestTemp; + UInt32 price; + const Byte *data2 = data - (reps[repIndex] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++); + while (lenEnd < cur + lenTest) + p->opt[++lenEnd].price = kInfinityPrice; + lenTestTemp = lenTest; + price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); + do + { + UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; + COptimal *opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = repIndex; + opt->prev1IsChar = False; + } + } + while (--lenTest >= 2); + lenTest = lenTestTemp; + + if (repIndex == 0) + startLen = lenTest + 1; + + /* if (_maxMode) */ + { + UInt32 lenTest2 = lenTest + 1; + UInt32 limit = lenTest2 + p->numFastBytes; + UInt32 nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) + { + UInt32 state2 = kRepNextStates[state]; + UInt32 posStateNext = (position + lenTest) & p->pbMask; + UInt32 curAndLenCharPrice = + price + p->repLenEnc.prices[posState][lenTest - 2] + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), + data[lenTest], data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (position + lenTest + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + UInt32 curAndLenPrice; + COptimal *opt; + UInt32 offset = cur + lenTest + 1 + lenTest2; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = True; + opt->posPrev2 = cur; + opt->backPrev2 = repIndex; + } + } + } + } + } + } + /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ + if (newLen > numAvail) + { + newLen = numAvail; + for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); + matches[numPairs] = newLen; + numPairs += 2; + } + if (newLen >= startLen) + { + UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); + UInt32 offs, curBack, posSlot; + UInt32 lenTest; + while (lenEnd < cur + newLen) + p->opt[++lenEnd].price = kInfinityPrice; + + offs = 0; + while (startLen > matches[offs]) + offs += 2; + curBack = matches[offs + 1]; + GetPosSlot2(curBack, posSlot); + for (lenTest = /*2*/ startLen; ; lenTest++) + { + UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; + UInt32 lenToPosState = GetLenToPosState(lenTest); + COptimal *opt; + if (curBack < kNumFullDistances) + curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; + else + curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; + + opt = &p->opt[cur + lenTest]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur; + opt->backPrev = curBack + LZMA_NUM_REPS; + opt->prev1IsChar = False; + } + + if (/*_maxMode && */lenTest == matches[offs]) + { + /* Try Match + Literal + Rep0 */ + const Byte *data2 = data - (curBack + 1); + UInt32 lenTest2 = lenTest + 1; + UInt32 limit = lenTest2 + p->numFastBytes; + UInt32 nextRepMatchPrice; + if (limit > numAvailFull) + limit = numAvailFull; + for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++); + lenTest2 -= lenTest + 1; + if (lenTest2 >= 2) + { + UInt32 state2 = kMatchNextStates[state]; + UInt32 posStateNext = (position + lenTest) & p->pbMask; + UInt32 curAndLenCharPrice = curAndLenPrice + + GET_PRICE_0(p->isMatch[state2][posStateNext]) + + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), + data[lenTest], data2[lenTest], p->ProbPrices); + state2 = kLiteralNextStates[state2]; + posStateNext = (posStateNext + 1) & p->pbMask; + nextRepMatchPrice = curAndLenCharPrice + + GET_PRICE_1(p->isMatch[state2][posStateNext]) + + GET_PRICE_1(p->isRep[state2]); + + /* for (; lenTest2 >= 2; lenTest2--) */ + { + UInt32 offset = cur + lenTest + 1 + lenTest2; + UInt32 curAndLenPrice; + COptimal *opt; + while (lenEnd < offset) + p->opt[++lenEnd].price = kInfinityPrice; + curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); + opt = &p->opt[offset]; + if (curAndLenPrice < opt->price) + { + opt->price = curAndLenPrice; + opt->posPrev = cur + lenTest + 1; + opt->backPrev = 0; + opt->prev1IsChar = True; + opt->prev2 = True; + opt->posPrev2 = cur; + opt->backPrev2 = curBack + LZMA_NUM_REPS; + } + } + } + offs += 2; + if (offs == numPairs) + break; + curBack = matches[offs + 1]; + if (curBack >= kNumFullDistances) + GetPosSlot2(curBack, posSlot); + } + } + } + } +} + +#define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) + +static UInt32 GetOptimumFast(CLzmaEnc *p, UInt32 *backRes) +{ + UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; + const Byte *data; + const UInt32 *matches; + + if (p->additionalOffset == 0) + mainLen = ReadMatchDistances(p, &numPairs); + else + { + mainLen = p->longestMatchLength; + numPairs = p->numPairs; + } + + numAvail = p->numAvail; + *backRes = (UInt32)-1; + if (numAvail < 2) + return 1; + if (numAvail > LZMA_MATCH_LEN_MAX) + numAvail = LZMA_MATCH_LEN_MAX; + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + + repLen = repIndex = 0; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + UInt32 len; + const Byte *data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + for (len = 2; len < numAvail && data[len] == data2[len]; len++); + if (len >= p->numFastBytes) + { + *backRes = i; + MovePos(p, len - 1); + return len; + } + if (len > repLen) + { + repIndex = i; + repLen = len; + } + } + + matches = p->matches; + if (mainLen >= p->numFastBytes) + { + *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; + MovePos(p, mainLen - 1); + return mainLen; + } + + mainDist = 0; /* for GCC */ + if (mainLen >= 2) + { + mainDist = matches[numPairs - 1]; + while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) + { + if (!ChangePair(matches[numPairs - 3], mainDist)) + break; + numPairs -= 2; + mainLen = matches[numPairs - 2]; + mainDist = matches[numPairs - 1]; + } + if (mainLen == 2 && mainDist >= 0x80) + mainLen = 1; + } + + if (repLen >= 2 && ( + (repLen + 1 >= mainLen) || + (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || + (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) + { + *backRes = repIndex; + MovePos(p, repLen - 1); + return repLen; + } + + if (mainLen < 2 || numAvail <= 2) + return 1; + + p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); + if (p->longestMatchLength >= 2) + { + UInt32 newDistance = matches[p->numPairs - 1]; + if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || + (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || + (p->longestMatchLength > mainLen + 1) || + (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) + return 1; + } + + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; + for (i = 0; i < LZMA_NUM_REPS; i++) + { + UInt32 len, limit; + const Byte *data2 = data - (p->reps[i] + 1); + if (data[0] != data2[0] || data[1] != data2[1]) + continue; + limit = mainLen - 1; + for (len = 2; len < limit && data[len] == data2[len]; len++); + if (len >= limit) + return 1; + } + *backRes = mainDist + LZMA_NUM_REPS; + MovePos(p, mainLen - 2); + return mainLen; +} + +static void WriteEndMarker(CLzmaEnc *p, UInt32 posState) +{ + UInt32 len; + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + len = LZMA_MATCH_LEN_MIN; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); + RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); +} + +static SRes CheckErrors(CLzmaEnc *p) +{ + if (p->result != SZ_OK) + return p->result; + if (p->rc.res != SZ_OK) + p->result = SZ_ERROR_WRITE; + if (p->matchFinderBase.result != SZ_OK) + p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) + p->finished = True; + return p->result; +} + +static SRes Flush(CLzmaEnc *p, UInt32 nowPos) +{ + /* ReleaseMFStream(); */ + p->finished = True; + if (p->writeEndMark) + WriteEndMarker(p, nowPos & p->pbMask); + RangeEnc_FlushData(&p->rc); + RangeEnc_FlushStream(&p->rc); + return CheckErrors(p); +} + +static void FillAlignPrices(CLzmaEnc *p) +{ + UInt32 i; + for (i = 0; i < kAlignTableSize; i++) + p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); + p->alignPriceCount = 0; +} + +static void FillDistancesPrices(CLzmaEnc *p) +{ + UInt32 tempPrices[kNumFullDistances]; + UInt32 i, lenToPosState; + for (i = kStartPosModelIndex; i < kNumFullDistances; i++) + { + UInt32 posSlot = GetPosSlot1(i); + UInt32 footerBits = ((posSlot >> 1) - 1); + UInt32 base = ((2 | (posSlot & 1)) << footerBits); + tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); + } + + for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) + { + UInt32 posSlot; + const CLzmaProb *encoder = p->posSlotEncoder[lenToPosState]; + UInt32 *posSlotPrices = p->posSlotPrices[lenToPosState]; + for (posSlot = 0; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); + for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) + posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); + + { + UInt32 *distancesPrices = p->distancesPrices[lenToPosState]; + UInt32 i; + for (i = 0; i < kStartPosModelIndex; i++) + distancesPrices[i] = posSlotPrices[i]; + for (; i < kNumFullDistances; i++) + distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; + } + } + p->matchPriceCount = 0; +} + +void LzmaEnc_Construct(CLzmaEnc *p) +{ + RangeEnc_Construct(&p->rc); + MatchFinder_Construct(&p->matchFinderBase); + #ifdef COMPRESS_MF_MT + MatchFinderMt_Construct(&p->matchFinderMt); + p->matchFinderMt.MatchFinder = &p->matchFinderBase; + #endif + + { + CLzmaEncProps props; + LzmaEncProps_Init(&props); + LzmaEnc_SetProps(p, &props); + } + + #ifndef LZMA_LOG_BSR + LzmaEnc_FastPosInit(p->g_FastPos); + #endif + + LzmaEnc_InitPriceTables(p->ProbPrices); + p->litProbs = 0; + p->saveState.litProbs = 0; +} + +CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc) +{ + void *p; + p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); + if (p != 0) + LzmaEnc_Construct((CLzmaEnc *)p); + return p; +} + +void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAlloc *alloc) +{ + alloc->Free(alloc, p->litProbs); + alloc->Free(alloc, p->saveState.litProbs); + p->litProbs = 0; + p->saveState.litProbs = 0; +} + +void LzmaEnc_Destruct(CLzmaEnc *p, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + #ifdef COMPRESS_MF_MT + MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); + #endif + MatchFinder_Free(&p->matchFinderBase, allocBig); + LzmaEnc_FreeLits(p, alloc); + RangeEnc_Free(&p->rc, alloc); +} + +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); + alloc->Free(alloc, p); +} + +static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) +{ + UInt32 nowPos32, startPos32; + if (p->inStream != 0) + { + p->matchFinderBase.stream = p->inStream; + p->matchFinder.Init(p->matchFinderObj); + p->inStream = 0; + } + + if (p->finished) + return p->result; + RINOK(CheckErrors(p)); + + nowPos32 = (UInt32)p->nowPos64; + startPos32 = nowPos32; + + if (p->nowPos64 == 0) + { + UInt32 numPairs; + Byte curByte; + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + return Flush(p, nowPos32); + ReadMatchDistances(p, &numPairs); + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); + p->state = kLiteralNextStates[p->state]; + curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); + LitEnc_Encode(&p->rc, p->litProbs, curByte); + p->additionalOffset--; + nowPos32++; + } + + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) + for (;;) + { + UInt32 pos, len, posState; + + if (p->fastMode) + len = GetOptimumFast(p, &pos); + else + len = GetOptimum(p, nowPos32, &pos); + + #ifdef SHOW_STAT2 + printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); + #endif + + posState = nowPos32 & p->pbMask; + if (len == 1 && pos == (UInt32)-1) + { + Byte curByte; + CLzmaProb *probs; + const Byte *data; + + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); + data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; + curByte = *data; + probs = LIT_PROBS(nowPos32, *(data - 1)); + if (IsCharState(p->state)) + LitEnc_Encode(&p->rc, probs, curByte); + else + LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); + p->state = kLiteralNextStates[p->state]; + } + else + { + RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); + if (pos < LZMA_NUM_REPS) + { + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); + if (pos == 0) + { + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); + RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); + } + else + { + UInt32 distance = p->reps[pos]; + RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); + if (pos == 1) + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); + else + { + RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); + RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); + if (pos == 3) + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + } + p->reps[1] = p->reps[0]; + p->reps[0] = distance; + } + if (len == 1) + p->state = kShortRepNextStates[p->state]; + else + { + LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + p->state = kRepNextStates[p->state]; + } + } + else + { + UInt32 posSlot; + RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); + p->state = kMatchNextStates[p->state]; + LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); + pos -= LZMA_NUM_REPS; + GetPosSlot(pos, posSlot); + RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); + + if (posSlot >= kStartPosModelIndex) + { + UInt32 footerBits = ((posSlot >> 1) - 1); + UInt32 base = ((2 | (posSlot & 1)) << footerBits); + UInt32 posReduced = pos - base; + + if (posSlot < kEndPosModelIndex) + RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); + else + { + RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); + RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); + p->alignPriceCount++; + } + } + p->reps[3] = p->reps[2]; + p->reps[2] = p->reps[1]; + p->reps[1] = p->reps[0]; + p->reps[0] = pos; + p->matchPriceCount++; + } + } + p->additionalOffset -= len; + nowPos32 += len; + if (p->additionalOffset == 0) + { + UInt32 processed; + if (!p->fastMode) + { + if (p->matchPriceCount >= (1 << 7)) + FillDistancesPrices(p); + if (p->alignPriceCount >= kAlignTableSize) + FillAlignPrices(p); + } + if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) + break; + processed = nowPos32 - startPos32; + if (useLimits) + { + if (processed + kNumOpts + 300 >= maxUnpackSize || + RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) + break; + } + else if (processed >= (1 << 15)) + { + p->nowPos64 += nowPos32 - startPos32; + return CheckErrors(p); + } + } + } + p->nowPos64 += nowPos32 - startPos32; + return Flush(p, nowPos32); +} + +#define kBigHashDicLimit ((UInt32)1 << 24) + +static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + UInt32 beforeSize = kNumOpts; + Bool btMode; + if (!RangeEnc_Alloc(&p->rc, alloc)) + return SZ_ERROR_MEM; + btMode = (p->matchFinderBase.btMode != 0); + #ifdef COMPRESS_MF_MT + p->mtMode = (p->multiThread && !p->fastMode && btMode); + #endif + + { + unsigned lclp = p->lc + p->lp; + if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) + { + LzmaEnc_FreeLits(p, alloc); + p->litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); + p->saveState.litProbs = (CLzmaProb *)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); + if (p->litProbs == 0 || p->saveState.litProbs == 0) + { + LzmaEnc_FreeLits(p, alloc); + return SZ_ERROR_MEM; + } + p->lclp = lclp; + } + } + + p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); + + if (beforeSize + p->dictSize < keepWindowSize) + beforeSize = keepWindowSize - p->dictSize; + + #ifdef COMPRESS_MF_MT + if (p->mtMode) + { + RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); + p->matchFinderObj = &p->matchFinderMt; + MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); + } + else + #endif + { + if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + return SZ_ERROR_MEM; + p->matchFinderObj = &p->matchFinderBase; + MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + } + return SZ_OK; +} + +void LzmaEnc_Init(CLzmaEnc *p) +{ + UInt32 i; + p->state = 0; + for (i = 0 ; i < LZMA_NUM_REPS; i++) + p->reps[i] = 0; + + RangeEnc_Init(&p->rc); + + + for (i = 0; i < kNumStates; i++) + { + UInt32 j; + for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) + { + p->isMatch[i][j] = kProbInitValue; + p->isRep0Long[i][j] = kProbInitValue; + } + p->isRep[i] = kProbInitValue; + p->isRepG0[i] = kProbInitValue; + p->isRepG1[i] = kProbInitValue; + p->isRepG2[i] = kProbInitValue; + } + + { + UInt32 num = 0x300 << (p->lp + p->lc); + for (i = 0; i < num; i++) + p->litProbs[i] = kProbInitValue; + } + + { + for (i = 0; i < kNumLenToPosStates; i++) + { + CLzmaProb *probs = p->posSlotEncoder[i]; + UInt32 j; + for (j = 0; j < (1 << kNumPosSlotBits); j++) + probs[j] = kProbInitValue; + } + } + { + for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) + p->posEncoders[i] = kProbInitValue; + } + + LenEnc_Init(&p->lenEnc.p); + LenEnc_Init(&p->repLenEnc.p); + + for (i = 0; i < (1 << kNumAlignBits); i++) + p->posAlignEncoder[i] = kProbInitValue; + + p->optimumEndIndex = 0; + p->optimumCurrentIndex = 0; + p->additionalOffset = 0; + + p->pbMask = (1 << p->pb) - 1; + p->lpMask = (1 << p->lp) - 1; +} + +void LzmaEnc_InitPrices(CLzmaEnc *p) +{ + if (!p->fastMode) + { + FillDistancesPrices(p); + FillAlignPrices(p); + } + + p->lenEnc.tableSize = + p->repLenEnc.tableSize = + p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; + LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); + LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); +} + +static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + UInt32 i; + for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) + if (p->dictSize <= ((UInt32)1 << i)) + break; + p->distTableSize = i * 2; + + p->finished = False; + p->result = SZ_OK; + RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + p->nowPos64 = 0; + return SZ_OK; +} + +static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream *inStream, ISeqOutStream *outStream, + ISzAlloc *alloc, ISzAlloc *allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->inStream = inStream; + p->rc.outStream = outStream; + return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); +} + +SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, + ISeqInStream *inStream, UInt32 keepWindowSize, + ISzAlloc *alloc, ISzAlloc *allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + p->inStream = inStream; + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) +{ + p->seqBufInStream.funcTable.Read = MyRead; + p->seqBufInStream.data = src; + p->seqBufInStream.rem = srcLen; +} + +SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, + UInt32 keepWindowSize, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + LzmaEnc_SetInputBuf(p, src, srcLen); + p->inStream = &p->seqBufInStream.funcTable; + return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); +} + +void LzmaEnc_Finish(CLzmaEncHandle pp) +{ + #ifdef COMPRESS_MF_MT + CLzmaEnc *p = (CLzmaEnc *)pp; + if (p->mtMode) + MatchFinderMt_ReleaseStream(&p->matchFinderMt); + #else + pp = pp; + #endif +} + +typedef struct _CSeqOutStreamBuf +{ + ISeqOutStream funcTable; + Byte *data; + SizeT rem; + Bool overflow; +} CSeqOutStreamBuf; + +static size_t MyWrite(void *pp, const void *data, size_t size) +{ + CSeqOutStreamBuf *p = (CSeqOutStreamBuf *)pp; + if (p->rem < size) + { + size = p->rem; + p->overflow = True; + } + memcpy(p->data, data, size); + p->rem -= size; + p->data += size; + return size; +} + + +UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); +} + +const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) +{ + const CLzmaEnc *p = (CLzmaEnc *)pp; + return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; +} + +SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, + Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + UInt64 nowPos64; + SRes res; + CSeqOutStreamBuf outStream; + + outStream.funcTable.Write = MyWrite; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = False; + p->finished = False; + p->result = SZ_OK; + + if (reInit) + LzmaEnc_Init(p); + LzmaEnc_InitPrices(p); + nowPos64 = p->nowPos64; + RangeEnc_Init(&p->rc); + p->rc.outStream = &outStream.funcTable; + + res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); + + *unpackSize = (UInt32)(p->nowPos64 - nowPos64); + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + + return res; +} + +SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, + ISzAlloc *alloc, ISzAlloc *allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + SRes res = SZ_OK; + + #ifdef COMPRESS_MF_MT + Byte allocaDummy[0x300]; + int i = 0; + for (i = 0; i < 16; i++) + allocaDummy[i] = (Byte)i; + #endif + + RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig)); + + for (;;) + { + res = LzmaEnc_CodeOneBlock(p, False, 0, 0); + if (res != SZ_OK || p->finished != 0) + break; + if (progress != 0) + { + res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); + if (res != SZ_OK) + { + res = SZ_ERROR_PROGRESS; + break; + } + } + } + LzmaEnc_Finish(pp); + return res; +} + +SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) +{ + CLzmaEnc *p = (CLzmaEnc *)pp; + int i; + UInt32 dictSize = p->dictSize; + if (*size < LZMA_PROPS_SIZE) + return SZ_ERROR_PARAM; + *size = LZMA_PROPS_SIZE; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + for (i = 11; i <= 30; i++) + { + if (dictSize <= ((UInt32)2 << i)) + { + dictSize = (2 << i); + break; + } + if (dictSize <= ((UInt32)3 << i)) + { + dictSize = (3 << i); + break; + } + } + + for (i = 0; i < 4; i++) + props[1 + i] = (Byte)(dictSize >> (8 * i)); + return SZ_OK; +} + +SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + SRes res; + CLzmaEnc *p = (CLzmaEnc *)pp; + + CSeqOutStreamBuf outStream; + + LzmaEnc_SetInputBuf(p, src, srcLen); + + outStream.funcTable.Write = MyWrite; + outStream.data = dest; + outStream.rem = *destLen; + outStream.overflow = False; + + p->writeEndMark = writeEndMark; + res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable, + progress, alloc, allocBig); + + *destLen -= outStream.rem; + if (outStream.overflow) + return SZ_ERROR_OUTPUT_EOF; + return res; +} + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig) +{ + CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); + SRes res; + if (p == 0) + return SZ_ERROR_MEM; + + res = LzmaEnc_SetProps(p, props); + if (res == SZ_OK) + { + res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); + if (res == SZ_OK) + res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, + writeEndMark, progress, alloc, allocBig); + } + + LzmaEnc_Destroy(p, alloc, allocBig); + return res; +} + +} diff --git a/crnlib/lzma_LzmaEnc.h b/crnlib/lzma_LzmaEnc.h new file mode 100644 index 00000000..8b547508 --- /dev/null +++ b/crnlib/lzma_LzmaEnc.h @@ -0,0 +1,76 @@ +/* LzmaEnc.h -- LZMA Encoder +2008-10-04 : Igor Pavlov : Public domain */ + +#ifndef __LZMAENC_H +#define __LZMAENC_H + +#include "lzma_Types.h" + +namespace crnlib { + +#define LZMA_PROPS_SIZE 5 + +typedef struct _CLzmaEncProps +{ + int level; /* 0 <= level <= 9 */ + UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version + (1 << 12) <= dictSize <= (1 << 30) for 64-bit version + default = (1 << 24) */ + int lc; /* 0 <= lc <= 8, default = 3 */ + int lp; /* 0 <= lp <= 4, default = 0 */ + int pb; /* 0 <= pb <= 4, default = 2 */ + int algo; /* 0 - fast, 1 - normal, default = 1 */ + int fb; /* 5 <= fb <= 273, default = 32 */ + int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ + int numHashBytes; /* 2, 3 or 4, default = 4 */ + UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ + unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ + int numThreads; /* 1 or 2, default = 2 */ +} CLzmaEncProps; + +void LzmaEncProps_Init(CLzmaEncProps *p); +void LzmaEncProps_Normalize(CLzmaEncProps *p); +UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2); + + +/* ---------- CLzmaEncHandle Interface ---------- */ + +/* LzmaEnc_* functions can return the following exit codes: +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater in props + SZ_ERROR_WRITE - Write callback error. + SZ_ERROR_PROGRESS - some break from progress callback + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +typedef void * CLzmaEncHandle; + +CLzmaEncHandle LzmaEnc_Create(ISzAlloc *alloc); +void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc *alloc, ISzAlloc *allocBig); +SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props); +SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); +SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); +SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + int writeEndMark, ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +/* ---------- One Call Interface ---------- */ + +/* LzmaEncode +Return code: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, + const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, + ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig); + +} + +#endif diff --git a/crnlib/lzma_LzmaLib.cpp b/crnlib/lzma_LzmaLib.cpp new file mode 100644 index 00000000..9b8eb156 --- /dev/null +++ b/crnlib/lzma_LzmaLib.cpp @@ -0,0 +1,50 @@ +/* LzmaLib.c -- LZMA library wrapper +2008-08-05 +Igor Pavlov +Public domain */ +#include "crn_core.h" +#include "lzma_LzmaEnc.h" +#include "lzma_LzmaDec.h" +#include "lzma_Alloc.h" +#include "lzma_LzmaLib.h" + +namespace crnlib { + +static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); } +static void SzFree(void *p, void *address) { p = p; MyFree(address); } +static ISzAlloc g_Alloc = { SzAlloc, SzFree }; + +MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ +) +{ + CLzmaEncProps props; + LzmaEncProps_Init(&props); + props.level = level; + props.dictSize = dictSize; + props.lc = lc; + props.lp = lp; + props.pb = pb; + props.fb = fb; + props.numThreads = numThreads; + + return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, + NULL, &g_Alloc, &g_Alloc); +} + + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, + const unsigned char *props, size_t propsSize) +{ + ELzmaStatus status; + return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); +} + +} diff --git a/crnlib/lzma_LzmaLib.h b/crnlib/lzma_LzmaLib.h new file mode 100644 index 00000000..ee18e6d3 --- /dev/null +++ b/crnlib/lzma_LzmaLib.h @@ -0,0 +1,146 @@ +/* LzmaLib.h -- LZMA library interface +2008-08-05 +Igor Pavlov +Public domain */ + +#ifndef __LZMALIB_H +#define __LZMALIB_H + +#include "lzma_Types.h" + +namespace crnlib { + +#if 0 +#ifdef __cplusplus + #define MY_EXTERN_C extern "C" +#else + #define MY_EXTERN_C extern +#endif + +#define MY_STDAPI MY_EXTERN_C int MY_STD_CALL +#else +#define MY_STDAPI int MY_STD_CALL +#endif + +#define LZMA_PROPS_SIZE 5 + +/* +RAM requirements for LZMA: + for compression: (dictSize * 11.5 + 6 MB) + state_size + for decompression: dictSize + state_size + state_size = (4 + (1.5 << (lc + lp))) KB + by default (lc=3, lp=0), state_size = 16 KB. + +LZMA properties (5 bytes) format + Offset Size Description + 0 1 lc, lp and pb in encoded form. + 1 4 dictSize (little endian). +*/ + +/* +LzmaCompress +------------ + +outPropsSize - + In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. + + LZMA Encoder will use defult values for any parameter, if it is + -1 for any from: level, loc, lp, pb, fb, numThreads + 0 for dictSize + +level - compression level: 0 <= level <= 9; + + level dictSize algo fb + 0: 16 KB 0 32 + 1: 64 KB 0 32 + 2: 256 KB 0 32 + 3: 1 MB 0 32 + 4: 4 MB 0 32 + 5: 16 MB 1 32 + 6: 32 MB 1 32 + 7+: 64 MB 1 64 + + The default value for "level" is 5. + + algo = 0 means fast method + algo = 1 means normal method + +dictSize - The dictionary size in bytes. The maximum value is + 128 MB = (1 << 27) bytes for 32-bit version + 1 GB = (1 << 30) bytes for 64-bit version + The default value is 16 MB = (1 << 24) bytes. + It's recommended to use the dictionary that is larger than 4 KB and + that can be calculated as (1 << N) or (3 << N) sizes. + +lc - The number of literal context bits (high bits of previous literal). + It can be in the range from 0 to 8. The default value is 3. + Sometimes lc=4 gives the gain for big files. + +lp - The number of literal pos bits (low bits of current position for literals). + It can be in the range from 0 to 4. The default value is 0. + The lp switch is intended for periodical data when the period is equal to 2^lp. + For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's + better to set lc=0, if you change lp switch. + +pb - The number of pos bits (low bits of current position). + It can be in the range from 0 to 4. The default value is 2. + The pb switch is intended for periodical data when the period is equal 2^pb. + +fb - Word size (the number of fast bytes). + It can be in the range from 5 to 273. The default value is 32. + Usually, a big number gives a little bit better compression ratio and + slower compression process. + +numThreads - The number of thereads. 1 or 2. The default value is 2. + Fast mode (algo = 0) can use only 1 thread. + +Out: + destLen - processed output size +Returns: + SZ_OK - OK + SZ_ERROR_MEM - Memory allocation error + SZ_ERROR_PARAM - Incorrect paramater + SZ_ERROR_OUTPUT_EOF - output buffer overflow + SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) +*/ + +MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, + unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ + int level, /* 0 <= level <= 9, default = 5 */ + unsigned dictSize, /* default = (1 << 24) */ + int lc, /* 0 <= lc <= 8, default = 3 */ + int lp, /* 0 <= lp <= 4, default = 0 */ + int pb, /* 0 <= pb <= 4, default = 2 */ + int fb, /* 5 <= fb <= 273, default = 32 */ + int numThreads /* 1 or 2, default = 2 */ + ); + +/* +LzmaUncompress +-------------- +In: + dest - output data + destLen - output data size + src - input data + srcLen - input data size +Out: + destLen - processed output size + srcLen - processed input size +Returns: + SZ_OK - OK + SZ_ERROR_DATA - Data error + SZ_ERROR_MEM - Memory allocation arror + SZ_ERROR_UNSUPPORTED - Unsupported properties + SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) +*/ + +MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, + const unsigned char *props, size_t propsSize); + +#define LZMA_COMPRESS_FUNC_EXPORT "LzmaCompress" +#define LZMA_UNCOMPRESS_FUNC_EXPORT "LzmaUncompress" + +} + +#endif diff --git a/crnlib/lzma_MyVersion.h b/crnlib/lzma_MyVersion.h new file mode 100644 index 00000000..07e27917 --- /dev/null +++ b/crnlib/lzma_MyVersion.h @@ -0,0 +1,8 @@ +#define MY_VER_MAJOR 4 +#define MY_VER_MINOR 63 +#define MY_VER_BUILD 0 +#define MY_VERSION "4.63" +#define MY_7ZIP_VERSION "7-Zip 4.63" +#define MY_DATE "2008-12-31" +#define MY_COPYRIGHT "Copyright (c) 1999-2008 Igor Pavlov" +#define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " " MY_DATE diff --git a/crnlib/lzma_Threads.cpp b/crnlib/lzma_Threads.cpp new file mode 100644 index 00000000..8d556724 --- /dev/null +++ b/crnlib/lzma_Threads.cpp @@ -0,0 +1,116 @@ +/* Threads.c -- multithreading library +2008-08-05 +Igor Pavlov +Public domain */ +#include "crn_core.h" +#include "lzma_Threads.h" +#include + +namespace crnlib { + +static WRes GetError() +{ + DWORD res = GetLastError(); + return (res) ? (WRes)(res) : 1; +} + +WRes HandleToWRes(HANDLE h) { return (h != 0) ? 0 : GetError(); } +WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } + +static WRes MyCloseHandle(HANDLE *h) +{ + if (*h != NULL) + if (!CloseHandle(*h)) + return GetError(); + *h = NULL; + return 0; +} + +WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter) +{ + unsigned threadId; /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + thread->handle = + /* CreateThread(0, 0, startAddress, parameter, 0, &threadId); */ + (HANDLE)_beginthreadex(NULL, 0, startAddress, parameter, 0, &threadId); + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return HandleToWRes(thread->handle); +} + +WRes WaitObject(HANDLE h) +{ + return (WRes)WaitForSingleObject(h, INFINITE); +} + +WRes Thread_Wait(CThread *thread) +{ + if (thread->handle == NULL) + return 1; + return WaitObject(thread->handle); +} + +WRes Thread_Close(CThread *thread) +{ + return MyCloseHandle(&thread->handle); +} + +WRes Event_Create(CEvent *p, BOOL manualReset, int initialSignaled) +{ + p->handle = CreateEvent(NULL, manualReset, (initialSignaled ? TRUE : FALSE), NULL); + return HandleToWRes(p->handle); +} + +WRes ManualResetEvent_Create(CManualResetEvent *p, int initialSignaled) + { return Event_Create(p, TRUE, initialSignaled); } +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) + { return ManualResetEvent_Create(p, 0); } + +WRes AutoResetEvent_Create(CAutoResetEvent *p, int initialSignaled) + { return Event_Create(p, FALSE, initialSignaled); } +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) + { return AutoResetEvent_Create(p, 0); } + +WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(p->handle)); } +WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(p->handle)); } +WRes Event_Wait(CEvent *p) { return WaitObject(p->handle); } +WRes Event_Close(CEvent *p) { return MyCloseHandle(&p->handle); } + + +WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount) +{ + p->handle = CreateSemaphore(NULL, (LONG)initiallyCount, (LONG)maxCount, NULL); + return HandleToWRes(p->handle); +} + +WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount) +{ + return BOOLToWRes(ReleaseSemaphore(p->handle, releaseCount, previousCount)); +} +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) +{ + return Semaphore_Release(p, (LONG)releaseCount, NULL); +} +WRes Semaphore_Release1(CSemaphore *p) +{ + return Semaphore_ReleaseN(p, 1); +} + +WRes Semaphore_Wait(CSemaphore *p) { return WaitObject(p->handle); } +WRes Semaphore_Close(CSemaphore *p) { return MyCloseHandle(&p->handle); } + +WRes CriticalSection_Init(CCriticalSection *p) +{ +#ifdef _MSC_VER + /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ + __try + { + InitializeCriticalSection(p); + /* InitializeCriticalSectionAndSpinCount(p, 0); */ + } + __except (EXCEPTION_EXECUTE_HANDLER) { return 1; } +#else + InitializeCriticalSection(p); +#endif + return 0; +} + +} diff --git a/crnlib/lzma_Threads.h b/crnlib/lzma_Threads.h new file mode 100644 index 00000000..e0aa9141 --- /dev/null +++ b/crnlib/lzma_Threads.h @@ -0,0 +1,72 @@ +/* Threads.h -- multithreading library +2008-11-22 : Igor Pavlov : Public domain */ + +#ifndef __7Z_THRESDS_H +#define __7Z_THRESDS_H + +#include "lzma_Types.h" + +namespace crnlib { + +typedef struct _CThread +{ + HANDLE handle; +} CThread; + +#define Thread_Construct(thread) (thread)->handle = NULL +#define Thread_WasCreated(thread) ((thread)->handle != NULL) + +typedef unsigned THREAD_FUNC_RET_TYPE; +#define THREAD_FUNC_CALL_TYPE MY_STD_CALL +#define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE + +WRes Thread_Create(CThread *thread, THREAD_FUNC_RET_TYPE (THREAD_FUNC_CALL_TYPE *startAddress)(void *), LPVOID parameter); +WRes Thread_Wait(CThread *thread); +WRes Thread_Close(CThread *thread); + +typedef struct _CEvent +{ + HANDLE handle; +} CEvent; + +typedef CEvent CAutoResetEvent; +typedef CEvent CManualResetEvent; + +#define Event_Construct(event) (event)->handle = NULL +#define Event_IsCreated(event) ((event)->handle != NULL) + +WRes ManualResetEvent_Create(CManualResetEvent *event, int initialSignaled); +WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *event); +WRes AutoResetEvent_Create(CAutoResetEvent *event, int initialSignaled); +WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *event); +WRes Event_Set(CEvent *event); +WRes Event_Reset(CEvent *event); +WRes Event_Wait(CEvent *event); +WRes Event_Close(CEvent *event); + + +typedef struct _CSemaphore +{ + HANDLE handle; +} CSemaphore; + +#define Semaphore_Construct(p) (p)->handle = NULL + +WRes Semaphore_Create(CSemaphore *p, UInt32 initiallyCount, UInt32 maxCount); +WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); +WRes Semaphore_Release1(CSemaphore *p); +WRes Semaphore_Wait(CSemaphore *p); +WRes Semaphore_Close(CSemaphore *p); + + +typedef CRITICAL_SECTION CCriticalSection; + +WRes CriticalSection_Init(CCriticalSection *p); +#define CriticalSection_Delete(p) DeleteCriticalSection(p) +#define CriticalSection_Enter(p) EnterCriticalSection(p) +#define CriticalSection_Leave(p) LeaveCriticalSection(p) + +} + +#endif + diff --git a/crnlib/lzma_Types.h b/crnlib/lzma_Types.h new file mode 100644 index 00000000..99f1e77f --- /dev/null +++ b/crnlib/lzma_Types.h @@ -0,0 +1,219 @@ +/* Types.h -- Basic types +2008-11-23 : Igor Pavlov : Public domain */ + +#ifndef __7Z_TYPES_H +#define __7Z_TYPES_H + +#define COMPRESS_MF_MT + +#include + +#ifdef _XBOX +#include +#elif defined( _WIN32 ) +#include +#else +#error Unknown platform +#endif + +namespace crnlib { + +#define SZ_OK 0 + +#define SZ_ERROR_DATA 1 +#define SZ_ERROR_MEM 2 +#define SZ_ERROR_CRC 3 +#define SZ_ERROR_UNSUPPORTED 4 +#define SZ_ERROR_PARAM 5 +#define SZ_ERROR_INPUT_EOF 6 +#define SZ_ERROR_OUTPUT_EOF 7 +#define SZ_ERROR_READ 8 +#define SZ_ERROR_WRITE 9 +#define SZ_ERROR_PROGRESS 10 +#define SZ_ERROR_FAIL 11 +#define SZ_ERROR_THREAD 12 + +#define SZ_ERROR_ARCHIVE 16 +#define SZ_ERROR_NO_ARCHIVE 17 + +typedef int SRes; + +#ifdef _WIN32 +typedef DWORD WRes; +#else +typedef int WRes; +#endif + +#ifndef RINOK +#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } +#endif + +typedef unsigned char Byte; +typedef short Int16; +typedef unsigned short UInt16; + +#ifdef _LZMA_UINT32_IS_ULONG +typedef long Int32; +typedef unsigned long UInt32; +#else +typedef int Int32; +typedef unsigned int UInt32; +#endif + +#ifdef _SZ_NO_INT_64 + +/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. + NOTES: Some code will work incorrectly in that case! */ + +typedef long Int64; +typedef unsigned long UInt64; + +#else + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef __int64 Int64; +typedef unsigned __int64 UInt64; +#else +typedef long long int Int64; +typedef unsigned long long int UInt64; +#endif + +#endif + +#ifdef _LZMA_NO_SYSTEM_SIZE_T +typedef UInt32 SizeT; +#else +typedef size_t SizeT; +#endif + +typedef int Bool; +#define True 1 +#define False 0 + + +#ifdef _MSC_VER + +#if _MSC_VER >= 1300 +#define MY_NO_INLINE __declspec(noinline) +#else +#define MY_NO_INLINE +#endif + +#define MY_CDECL __cdecl +#define MY_STD_CALL __stdcall +#define MY_FAST_CALL MY_NO_INLINE __fastcall + +#else + +#define MY_CDECL +#define MY_STD_CALL +#define MY_FAST_CALL + +#endif + + +/* The following interfaces use first parameter as pointer to structure */ + +typedef struct +{ + SRes (*Read)(void *p, void *buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) < input(*size)) is allowed */ +} ISeqInStream; + +/* it can return SZ_ERROR_INPUT_EOF */ +SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); +SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); +SRes SeqInStream_ReadByte(ISeqInStream *stream, Byte *buf); + +typedef struct +{ + size_t (*Write)(void *p, const void *buf, size_t size); + /* Returns: result - the number of actually written bytes. + (result < size) means error */ +} ISeqOutStream; + +typedef enum +{ + SZ_SEEK_SET = 0, + SZ_SEEK_CUR = 1, + SZ_SEEK_END = 2 +} ESzSeek; + +typedef struct +{ + SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ + SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); +} ISeekInStream; + +typedef struct +{ + SRes (*Look)(void *p, void **buf, size_t *size); + /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. + (output(*size) > input(*size)) is not allowed + (output(*size) < input(*size)) is allowed */ + SRes (*Skip)(void *p, size_t offset); + /* offset must be <= output(*size) of Look */ + + SRes (*Read)(void *p, void *buf, size_t *size); + /* reads directly (without buffer). It's same as ISeqInStream::Read */ + SRes (*Seek)(void *p, Int64 *pos, ESzSeek origin); +} ILookInStream; + +SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); +SRes LookInStream_SeekTo(ILookInStream *stream, UInt64 offset); + +/* reads via ILookInStream::Read */ +SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); +SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); + +#define LookToRead_BUF_SIZE (1 << 14) + +typedef struct +{ + ILookInStream s; + ISeekInStream *realStream; + size_t pos; + size_t size; + Byte buf[LookToRead_BUF_SIZE]; +} CLookToRead; + +void LookToRead_CreateVTable(CLookToRead *p, int lookahead); +void LookToRead_Init(CLookToRead *p); + +typedef struct +{ + ISeqInStream s; + ILookInStream *realStream; +} CSecToLook; + +void SecToLook_CreateVTable(CSecToLook *p); + +typedef struct +{ + ISeqInStream s; + ILookInStream *realStream; +} CSecToRead; + +void SecToRead_CreateVTable(CSecToRead *p); + +typedef struct +{ + SRes (*Progress)(void *p, UInt64 inSize, UInt64 outSize); + /* Returns: result. (result != SZ_OK) means break. + Value (UInt64)(Int64)-1 for size means unknown value. */ +} ICompressProgress; + +typedef struct +{ + void *(*Alloc)(void *p, size_t size); + void (*Free)(void *p, void *address); /* address can be 0 */ +} ISzAlloc; + +#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) +#define IAlloc_Free(p, a) (p)->Free((p), a) + +} + +#endif + diff --git a/crunch/crunch.2008.vcproj b/crunch/crunch.2008.vcproj new file mode 100644 index 00000000..0d8d174f --- /dev/null +++ b/crunch/crunch.2008.vcproj @@ -0,0 +1,373 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/crunch/crunch.cbp b/crunch/crunch.cbp new file mode 100644 index 00000000..fd9d5c26 --- /dev/null +++ b/crunch/crunch.cbp @@ -0,0 +1,49 @@ + + + + + + diff --git a/crunch/crunch.cpp b/crunch/crunch.cpp new file mode 100644 index 00000000..2aea6dc2 --- /dev/null +++ b/crunch/crunch.cpp @@ -0,0 +1,1304 @@ +// File: crunch.cpp - Command line tool for DDS/CRN texture compression/decompression. +// This tool exposes all of crnlib's functionality. It also uses a bunch of internal crlib +// classes that aren't directly exposed in the main crnlib.h header. The actual tool is +// implemented as a single class "crunch" which in theory is reusable. Most of the heavy +// lifting is actually done by functions in the crnlib::texture_conversion namespace, +// which are mostly wrappers over the public crnlib.h functions. +// See Copyright Notice and license at the end of inc/crnlib.h +#include "crn_core.h" + +#include +#include + +#include "crn_win32_console.h" +#include "crn_win32_find_files.h" +#include "crn_win32_file_utils.h" +#include "crn_command_line_params.h" + +#include "crn_dxt.h" +#include "crn_cfile_stream.h" +#include "crn_texture_conversion.h" + +#define CRND_HEADER_FILE_ONLY +#include "crn_decomp.h" + +using namespace crnlib; + +const int cDefaultCRNQualityLevel = 128; + +class crunch +{ + CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crunch); + + cfile_stream m_log_stream; + + uint m_num_processed; + uint m_num_failed; + uint m_num_succeeded; + uint m_num_skipped; + +public: + crunch() : + m_num_processed(0), + m_num_failed(0), + m_num_succeeded(0), + m_num_skipped(0) + { + } + + ~crunch() + { + } + + enum convert_status + { + cCSFailed, + cCSSucceeded, + cCSSkipped, + cCSBadParam, + }; + + inline uint get_num_processed() const { return m_num_processed; } + inline uint get_num_failed() const { return m_num_failed; } + inline uint get_num_succeeded() const { return m_num_succeeded; } + inline uint get_num_skipped() const { return m_num_skipped; } + + static void print_usage() + { + console::message(L"\nCommand line usage:"); + console::printf(L"crunch [options] -file filename"); + console::printf(L"-file filename - Required input filename, wildcards, multiple /file params OK."); + console::printf(L"-file @list.txt - List of files to convert."); + console::printf(L"Supported source file formats: dds,crn,tga,bmp,png,jpg/jpeg,psd"); + console::printf(L"Note: Some file format variants are unsupported, such as progressive JPEG's."); + console::printf(L"See the docs for stb_image.c: http://www.nothings.org/stb_image.c"); + + console::message(L"\nPath/file related parameters:"); + console::printf(L"/out filename - Output filename"); + console::printf(L"/outdir dir - Output directory"); + console::printf(L"/outsamedir - Write output file to input directory"); + console::printf(L"/deep - Recurse subdirectories, default=false"); + console::printf(L"/nooverwrite - Don't overwrite existing files"); + console::printf(L"/timestamp - Update only changed files"); + console::printf(L"/forcewrite - Overwrite read-only files"); + console::printf(L"/recreate - Recreate directory structure"); + console::printf(L"/fileformat [dds,crn,tga,bmp] - Output file format, default=crn or dds"); + + console::message(L"\nModes:"); + console::printf(L"/compare - Compare input and output files (no output files are written)."); + console::printf(L"/info - Only display input file statistics (no output files are written)."); + + console::message(L"\nMisc. options:"); + console::printf(L"/helperThreads # - Set number of helper threads, 0-16, default=(# of CPU's)-1"); + console::printf(L"/noprogress - Disable progress output"); + console::printf(L"/quiet - Disable all console output"); + console::printf(L"/ignoreerrors - Continue processing files after errors. Note: The default"); + console::printf(L" behavior is to immediately exit whenever an error occurs."); + console::printf(L"/logfile filename - Append output to log file"); + console::printf(L"/pause - Wait for keypress on error"); + console::printf(L"/window - Crop window before processing"); + console::printf(L"/clamp - Crop image if larger than width/height"); + console::printf(L"/clampscale - Scale image if larger than width/height"); + console::printf(L"/nostats - Disable all output file statistics (faster)"); + console::printf(L"/imagestats - Print various image qualilty statistics"); + console::printf(L"/mipstats - Print statistics for each mipmap, not just the top mip"); + console::printf(L"/lzmastats - Print size of output file compressed with LZMA codec"); + console::printf(L"/split - Write faces/mip levels to multiple separate output files"); + + console::message(L"\nImage rescaling (mutually exclusive options)"); + console::printf(L"/rescale - Rescale image to specified resolution"); + console::printf(L"/relscale - Rescale image to specified relative resolution"); + console::printf(L"/rescalemode - Auto-rescale non-power of two images"); + console::printf(L" nearest - Use nearest power of 2, hi - Use next, lo - Use previous"); + + console::message(L"\nDDS/CRN compression quality control:"); + console::printf(L"/quality # (or /q #) - Set Clustered DDS/CRN quality factor [0-255] 255=best"); + console::printf(L" DDS default quality is best possible."); + console::printf(L" CRN default quality is %u.", cDefaultCRNQualityLevel); + console::printf(L"/bitrate # - Set the desired output bitrate of DDS or CRN output files."); + console::printf(L" This option causes crunch to find the quality factor"); + console::printf(L" closest to the desired bitrate using a binary search."); + + console::message(L"\nLow-level CRN specific options:"); + console::printf(L"/c # - Color endpoint palette size, 32-8192, default=3072"); + console::printf(L"/s # - Color selector palette size, 32-8192, default=3072"); + console::printf(L"/ca # - Alpha endpoint palette size, 32-8192, default=3072"); + console::printf(L"/sa # - Alpha selector palette size, 32-8192, default=3072"); + + console::message(L"\nMipmap filtering options:"); + console::printf(L"/mipMode [UseSourceOrGenerate,UseSource,Generate,None]"); + console::printf(L" Default mipMode is UseSourceOrGenerate"); + console::printf(L"/mipFilter [box,tent,lanczos4,mitchell,kaiser], default=kaiser"); + console::printf(L"/gamma # - Mipmap gamma correction value, default=2.2, use 1.0 for linear"); + console::printf(L"/blurriness # - Scale filter kernel, >1=blur, <1=sharpen, .01-8, default=.9"); + console::printf(L"/wrap - Assume texture is tiled when filtering, default=clamping"); + console::printf(L"/renormalize - Renormalize filtered normal map texels, default=disabled"); + console::printf(L"/maxmips # - Limit number of generated texture mipmap levels, 1-16, default=16"); + console::printf(L"/minmipsize # - Smallest allowable mipmap resolution, default=1"); + + console::message(L"\nCompression options:"); + console::printf(L"/alphaThreshold # - Set DXT1A alpha threshold, 0-255, default=128"); + console::printf(L" Note: /alphaThreshold also changes the compressor's behavior to"); + console::printf(L" prefer DXT1A over DXT5 for images with alpha channels (.DDS only)."); + console::printf(L"/uniformMetrics - Use uniform color metrics, default=use perceptual metrics"); + console::printf(L"/noAdaptiveBlocks - Disable adaptive block sizes (i.e. disable macroblocks)."); + console::printf(L"/compressor [CRN,CRNF,RYG] - Set DXTn compressor, default=CRN"); + console::printf(L"/dxtQuality [superfast,fast,normal,better,uber] - Endpoint optimizer speed."); + console::printf(L" Sets endpoint optimizer's max iteration depth. Default=uber."); + console::printf(L"/noendpointcaching - Don't try reusing previous DXT endpoint solutions."); + console::printf(L"/grayscalsampling - Assume shader will convert fetched results to luma (Y)."); + console::printf(L"/forceprimaryencoding - Only use DXT1 color4 and DXT5 alpha8 block encodings."); + console::printf(L"/usetransparentindicesforblack - Try DXT1 transparent indices for dark pixels."); + + console::message(L"\nAll supported texture formats (Note: .CRN only supports DXTn pixel formats):"); + for (uint i = 0; i < pixel_format_helpers::get_num_formats(); i++) + { + pixel_format fmt = pixel_format_helpers::get_pixel_format_by_index(i); + console::printf(L"/%s", pixel_format_helpers::get_pixel_format_string(fmt)); + } + } + + bool convert(const wchar_t* pCommand_line) + { + m_num_processed = 0; + m_num_failed = 0; + m_num_succeeded = 0; + m_num_skipped = 0; + + command_line_params::param_desc std_params[] = + { + { L"file", 1, true }, + + { L"out", 1 }, + { L"outdir", 1 }, + { L"outsamedir" }, + { L"deep" }, + { L"fileformat", 1 }, + + { L"helperThreads", 1 }, + { L"noprogress" }, + { L"quiet" }, + { L"ignoreerrors" }, + { L"logfile", 1 }, + + { L"q", 1 }, + { L"quality", 1 }, + + { L"c", 1 }, + { L"s", 1 }, + { L"ca", 1 }, + { L"sa", 1 }, + + { L"mipMode", 1 }, + { L"mipFilter", 1 }, + { L"gamma", 1 }, + { L"blurriness", 1 }, + { L"wrap" }, + { L"renormalize" }, + { L"noprogress" }, + { L"paramdebug" }, + { L"debug" }, + { L"quick" }, + { L"imagestats" }, + { L"nostats" }, + { L"mipstats" }, + + { L"alphaThreshold", 1 }, + { L"uniformMetrics" }, + { L"noAdaptiveBlocks" }, + { L"compressor", 1 }, + { L"dxtQuality", 1 }, + { L"noendpointcaching" }, + { L"grayscalesampling" }, + { L"converttoluma" }, + { L"setalphatoluma" }, + { L"pause" }, + { L"timestamp" }, + { L"nooverwrite" }, + { L"forcewrite" }, + { L"recreate" }, + { L"compare" }, + { L"info" }, + { L"forceprimaryencoding" }, + { L"usetransparentindicesforblack" }, + + { L"rescalemode", 1 }, + { L"rescale", 2 }, + { L"relrescale", 2 }, + { L"clamp", 2 }, + { L"clampScale", 2 }, + { L"window", 4 }, + + { L"maxmips", 1 }, + { L"minmipsize", 1}, + + { L"bitrate", 1 }, + + { L"lzmastats" }, + { L"split" }, + { L"csvfile", 1 }, + }; + + crnlib::vector params; + params.append(std_params, sizeof(std_params) / sizeof(std_params[0])); + + for (uint i = 0; i < pixel_format_helpers::get_num_formats(); i++) + { + pixel_format fmt = pixel_format_helpers::get_pixel_format_by_index(i); + + command_line_params::param_desc desc; + desc.m_pName = pixel_format_helpers::get_pixel_format_string(fmt); + desc.m_num_values = 0; + desc.m_support_listing_file = false; + params.push_back(desc); + } + + if (!m_params.parse(pCommand_line, params.size(), params.get_ptr(), true)) + { + return false; + } + + if (!m_params.get_num_params()) + { + console::error(L"No command line parameters specified!"); + + print_usage(); + + return false; + } + + if (m_params.get_count(L"")) + { + console::error(L"Unrecognized command line parameter: \"%s\"", m_params.get_value_as_string_or_empty(L"", 0).get_ptr()); + + return false; + } + + if (m_params.get_value_as_bool(L"debug")) + { + console::debug(L"Command line parameters:"); + for (command_line_params::param_map_const_iterator it = m_params.begin(); it != m_params.end(); ++it) + { + console::disable_crlf(); + console::debug(L"Key:\"%s\" Values (%u): ", it->first.get_ptr(), it->second.m_values.size()); + for (uint i = 0; i < it->second.m_values.size(); i++) + console::debug(L"\"%s\" ", it->second.m_values[i].get_ptr()); + console::debug(L"\n"); + console::enable_crlf(); + } + } + + dynamic_wstring log_filename; + if (m_params.get_value_as_string(L"logfile", 0, log_filename)) + { + if (!m_log_stream.open(log_filename.get_ptr(), cDataStreamWritable | cDataStreamSeekable, true)) + { + console::error(L"Unable to open log file: \"%s\"", log_filename.get_ptr()); + return false; + } + + console::printf(L"Appending to ANSI log file \"%s\"", log_filename.get_ptr()); + + console::set_log_stream(&m_log_stream); + } + + bool status = convert(); + + if (m_log_stream.is_opened()) + { + console::set_log_stream(NULL); + + m_log_stream.close(); + } + + return status; + } + +private: + command_line_params m_params; + + bool convert() + { + find_files::file_desc_vec files; + + uint total_input_specs = 0; + + command_line_params::param_map_const_iterator begin, end; + m_params.find(L"file", begin, end); + for (command_line_params::param_map_const_iterator it = begin; it != end; ++it) + { + total_input_specs++; + + const dynamic_wstring_array& strings = it->second.m_values; + for (uint i = 0; i < strings.size(); i++) + { + if (!process_input_spec(files, strings[i])) + { + if (!m_params.get_value_as_bool(L"ignoreerrors")) + return false; + } + } + } + + if (!total_input_specs) + { + console::error(L"No input files specified!"); + return false; + } + + if (files.empty()) + { + console::error(L"No files found to process!"); + return false; + } + + std::sort(files.begin(), files.end()); + files.resize((uint)(std::unique(files.begin(), files.end()) - files.begin())); + + timer tm; + tm.start(); + + if (!process_files(files)) + { + if (!m_params.get_value_as_bool(L"ignoreerrors")) + return false; + } + + double total_time = tm.get_elapsed_secs(); + + console::printf(L"Total time: %3.3fs", total_time); + + console::printf( + ((m_num_skipped) || (m_num_failed)) ? cWarningConsoleMessage : cInfoConsoleMessage, + L"%u total file(s) successfully processed, %u file(s) skipped, %u file(s) failed.", m_num_succeeded, m_num_skipped, m_num_failed); + + return true; + } + + bool process_input_spec(find_files::file_desc_vec& files, const dynamic_wstring& input_spec) + { + dynamic_wstring find_name(input_spec); + + if ((find_name.is_empty()) || (!full_path(find_name))) + { + console::error(L"Invalid input filename: %s", find_name.get_ptr()); + return false; + } + + const bool deep_flag = m_params.get_value_as_bool(L"deep"); + + dynamic_wstring find_drive, find_path, find_fname, find_ext; + split_path(find_name.get_ptr(), &find_drive, &find_path, &find_fname, &find_ext); + + dynamic_wstring find_pathname; + combine_path(find_pathname, find_drive.get_ptr(), find_path.get_ptr()); + dynamic_wstring find_filename; + find_filename = find_fname + find_ext; + + find_files file_finder; + bool success = file_finder.find(find_pathname.get_ptr(), find_filename.get_ptr(), find_files::cFlagAllowFiles | (deep_flag ? find_files::cFlagRecursive : 0)); + if (!success) + { + console::error(L"Failed finding files: %s", find_name.get_ptr()); + return false; + } + if (file_finder.get_files().empty()) + { + console::warning(L"No files found: %s", find_name.get_ptr()); + return true; + } + + files.append(file_finder.get_files()); + + return true; + } + + bool read_only_file_check(const wchar_t* pDst_filename) + { + uint32 dst_file_attribs = GetFileAttributesW(pDst_filename); + if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) + return true; + + if ((dst_file_attribs & FILE_ATTRIBUTE_READONLY) == 0) + return true; + + if (m_params.get_value_as_bool(L"forcewrite")) + { + dst_file_attribs &= ~FILE_ATTRIBUTE_READONLY; + if (SetFileAttributesW(pDst_filename, dst_file_attribs)) + { + console::warning(L"Setting read-only file \"%s\" to writable", pDst_filename); + return true; + } + else + { + console::error(L"Failed setting read-only file \"%s\" to writable!", pDst_filename); + return false; + } + } + + console::error(L"Output file \"%s\" is read-only!", pDst_filename); + return false; + } + + bool process_files(find_files::file_desc_vec& files) + { + const bool compare_mode = m_params.get_value_as_bool(L"compare"); + const bool info_mode = m_params.get_value_as_bool(L"info"); + + for (uint file_index = 0; file_index < files.size(); file_index++) + { + const find_files::file_desc& file_desc = files[file_index]; + const dynamic_wstring& in_filename = file_desc.m_fullname; + + dynamic_wstring in_drive, in_path, in_fname, in_ext; + split_path(in_filename.get_ptr(), &in_drive, &in_path, &in_fname, &in_ext); + + texture_file_types::format out_file_type = texture_file_types::cFormatCRN; + dynamic_wstring fmt; + if (m_params.get_value_as_string(L"fileformat", 0, fmt)) + { + if (fmt == L"tga") + out_file_type = texture_file_types::cFormatTGA; + else if (fmt == L"bmp") + out_file_type = texture_file_types::cFormatBMP; + else if (fmt == L"dds") + out_file_type = texture_file_types::cFormatDDS; + else if (fmt == L"crn") + out_file_type = texture_file_types::cFormatCRN; + else + { + console::error(L"Unsupported output file type: %s", fmt.get_ptr()); + return false; + } + } + + if (!m_params.has_key(L"fileformat")) + { + texture_file_types::format input_file_type = texture_file_types::determine_file_format(in_filename.get_ptr()); + if (input_file_type == texture_file_types::cFormatCRN) + { + out_file_type = texture_file_types::cFormatDDS; + } + } + + dynamic_wstring out_filename; + if (m_params.get_value_as_bool(L"outsamedir")) + out_filename.format(L"%s%s%s.%s", in_drive.get_ptr(), in_path.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + else if (m_params.has_key(L"out")) + { + out_filename = m_params.get_value_as_string_or_empty(L"out"); + + if (files.size() > 1) + { + dynamic_wstring out_drive, out_dir, out_name, out_ext; + split_path(out_filename.get_ptr(), &out_drive, &out_dir, &out_name, &out_ext); + + out_name.format(L"%s_%u", out_name.get_ptr(), file_index); + + out_filename.format(L"%s%s%s%s", out_drive.get_ptr(), out_dir.get_ptr(), out_name.get_ptr(), out_ext.get_ptr()); + } + + if (!m_params.has_key(L"fileformat")) + out_file_type = texture_file_types::determine_file_format(out_filename.get_ptr()); + } + else + { + dynamic_wstring out_dir(m_params.get_value_as_string_or_empty(L"outdir")); + + if (m_params.get_value_as_bool(L"recreate")) + { + combine_path(out_dir, out_dir.get_ptr(), file_desc.m_rel.get_ptr()); + } + + if (out_dir.get_len()) + out_filename.format(L"%s\\%s.%s", out_dir.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + else + out_filename.format(L"%s.%s", in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); + + if (m_params.get_value_as_bool(L"recreate")) + { + if (full_path(out_filename)) + { + if ((!compare_mode) && (!info_mode)) + { + dynamic_wstring out_drive, out_path; + split_path(out_filename.get_ptr(), &out_drive, &out_path, NULL, NULL); + out_drive += out_path; + create_path(out_drive.get_ptr()); + } + } + } + } + + if ((!compare_mode) && (!info_mode)) + { + WIN32_FILE_ATTRIBUTE_DATA dst_file_attribs; + const BOOL dest_file_exists = GetFileAttributesExW(out_filename.get_ptr(), GetFileExInfoStandard, &dst_file_attribs); + + if (dest_file_exists) + { + if (m_params.get_value_as_bool(L"nooverwrite")) + { + console::warning(L"Skipping already existing file: %s\n", out_filename.get_ptr()); + m_num_skipped++; + continue; + } + + if (m_params.get_value_as_bool(L"timestamp")) + { + WIN32_FILE_ATTRIBUTE_DATA src_file_attribs; + const BOOL src_file_exists = GetFileAttributesExW(in_filename.get_ptr(), GetFileExInfoStandard, &src_file_attribs); + + if (src_file_exists) + { + LONG timeComp = CompareFileTime(&src_file_attribs.ftLastWriteTime, &dst_file_attribs.ftLastWriteTime); + if (timeComp <= 0) + { + console::warning(L"Skipping up to date file: %s\n", out_filename.get_ptr()); + m_num_skipped++; + continue; + } + } + } + } + } + + convert_status status = cCSFailed; + + if (info_mode) + status = display_file_info(file_index, files.size(), in_filename.get_ptr()); + else if (compare_mode) + status = compare_file(file_index, files.size(), in_filename.get_ptr(), out_filename.get_ptr(), out_file_type); + else if (read_only_file_check(out_filename.get_ptr())) + status = convert_file(file_index, files.size(), in_filename.get_ptr(), out_filename.get_ptr(), out_file_type); + + m_num_processed++; + + switch (status) + { + case cCSSucceeded: + { + console::info(L""); + m_num_succeeded++; + break; + } + case cCSSkipped: + { + console::info(L"Skipping file.\n"); + m_num_skipped++; + break; + } + case cCSBadParam: + { + return false; + } + default: + { + if (!m_params.get_value_as_bool(L"ignoreerrors")) + return false; + + console::info(L""); + + m_num_failed++; + break; + } + } + } + + return true; + } + + void print_texture_info(const wchar_t* pTex_desc, texture_conversion::convert_params& params, dds_texture& tex) + { + console::info(L"%s: %ux%u, Levels: %u, Faces: %u, Format: %s", + pTex_desc, + tex.get_width(), + tex.get_height(), + tex.get_num_levels(), + tex.get_num_faces(), + pixel_format_helpers::get_pixel_format_string(tex.get_format())); + + console::disable_crlf(); + console::info(L"Apparent type: %s, ", get_texture_type_desc(params.m_texture_type)); + + console::info(L"Flags: "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagRValid) console::info(L"R "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagGValid) console::info(L"G "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagBValid) console::info(L"B "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagAValid) console::info(L"A "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) console::info(L"Grayscale "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagNormalMap) console::info(L"NormalMap "); + if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagLumaChroma) console::info(L"LumaChroma "); + console::info(L"\n"); + console::enable_crlf(); + } + + static bool progress_callback_func(uint percentage_complete, void* pUser_data_ptr) + { + pUser_data_ptr; + + console::disable_crlf(); + + wchar_t buf[8]; + for (uint i = 0; i < 7; i++) + buf[i] = 8; + buf[7] = '\0'; + + for (uint i = 0; i < 130/8; i++) + console::progress(buf); + + console::progress(L"Processing: %u%%", percentage_complete); + + for (uint i = 0; i < 7; i++) + buf[i] = L' '; + console::progress(buf); + console::progress(buf); + + for (uint i = 0; i < 7; i++) + buf[i] = 8; + console::progress(buf); + console::progress(buf); + + console::enable_crlf(); + + return true; + } + + bool parse_mipmap_params(crn_mipmap_params& mip_params) + { + dynamic_wstring val; + + if (m_params.get_value_as_string(L"mipMode", 0, val)) + { + uint i; + for (i = 0; i < cCRNMipModeTotal; i++) + { + if (val == crn_get_mip_mode_name( static_cast(i) )) + { + mip_params.m_mode = static_cast(i); + break; + } + } + if (i == cCRNMipModeTotal) + { + console::error(L"Invalid MipMode: \"%s\"", val.get_ptr()); + return false; + } + } + + if (m_params.get_value_as_string(L"mipFilter", 0, val)) + { + uint i; + for (i = 0; i < cCRNMipFilterTotal; i++) + { + if (val == dynamic_wstring(crn_get_mip_filter_name( static_cast(i) )) ) + { + mip_params.m_filter = static_cast(i); + break; + } + } + + if (i == cCRNMipFilterTotal) + { + console::error(L"Invalid MipFilter: \"%s\"", val.get_ptr()); + return false; + } + + if (i == cCRNMipFilterBox) + mip_params.m_blurriness = 1.0f; + } + + mip_params.m_gamma = m_params.get_value_as_float(L"gamma", 0, mip_params.m_gamma, .1f, 8.0f); + mip_params.m_gamma_filtering = (mip_params.m_gamma != 1.0f); + + mip_params.m_blurriness = m_params.get_value_as_float(L"blurriness", 0, mip_params.m_blurriness, .01f, 8.0f); + + mip_params.m_renormalize = m_params.get_value_as_bool(L"renormalize", 0, mip_params.m_renormalize != 0); + mip_params.m_tiled = m_params.get_value_as_bool(L"wrap"); + + mip_params.m_max_levels = m_params.get_value_as_int(L"maxmips", 0, cCRNMaxLevels, 1, cCRNMaxLevels); + mip_params.m_min_mip_size = m_params.get_value_as_int(L"minmipsize", 0, 1, 1, cCRNMaxLevelResolution); + + return true; + } + + bool parse_scale_params(crn_mipmap_params &mipmap_params) + { + if (m_params.has_key(L"rescale")) + { + int w = m_params.get_value_as_int(L"rescale", 0, -1, 1, cCRNMaxLevelResolution, 0); + int h = m_params.get_value_as_int(L"rescale", 0, -1, 1, cCRNMaxLevelResolution, 1); + + mipmap_params.m_scale_mode = cCRNSMAbsolute; + mipmap_params.m_scale_x = (float)w; + mipmap_params.m_scale_y = (float)h; + } + else if (m_params.has_key(L"relrescale")) + { + float w = m_params.get_value_as_float(L"relrescale", 0, 1, 1, 256, 0); + float h = m_params.get_value_as_float(L"relrescale", 0, 1, 1, 256, 1); + + mipmap_params.m_scale_mode = cCRNSMRelative; + mipmap_params.m_scale_x = w; + mipmap_params.m_scale_y = h; + } + else if (m_params.has_key(L"rescalemode")) + { + // nearest | hi | lo + + dynamic_wstring mode_str(m_params.get_value_as_string_or_empty(L"rescalemode")); + if (mode_str == L"nearest") + mipmap_params.m_scale_mode = cCRNSMNearestPow2; + else if (mode_str == L"hi") + mipmap_params.m_scale_mode = cCRNSMNextPow2; + else if (mode_str == L"lo") + mipmap_params.m_scale_mode = cCRNSMLowerPow2; + else + { + console::error(L"Invalid rescale mode: \"%s\"", mode_str.get_ptr()); + return false; + } + } + + if (m_params.has_key(L"clamp")) + { + uint w = m_params.get_value_as_int(L"clamp", 0, 1, 1, cCRNMaxLevelResolution, 0); + uint h = m_params.get_value_as_int(L"clamp", 0, 1, 1, cCRNMaxLevelResolution, 1); + + mipmap_params.m_clamp_scale = false; + mipmap_params.m_clamp_width = w; + mipmap_params.m_clamp_height = h; + } + else if (m_params.has_key(L"clampScale")) + { + uint w = m_params.get_value_as_int(L"clampscale", 0, 1, 1, cCRNMaxLevelResolution, 0); + uint h = m_params.get_value_as_int(L"clampscale", 0, 1, 1, cCRNMaxLevelResolution, 1); + + mipmap_params.m_clamp_scale = true; + mipmap_params.m_clamp_width = w; + mipmap_params.m_clamp_height = h; + } + + if (m_params.has_key(L"window")) + { + uint xl = m_params.get_value_as_int(L"window", 0, 0, 0, cCRNMaxLevelResolution, 0); + uint yl = m_params.get_value_as_int(L"window", 0, 0, 0, cCRNMaxLevelResolution, 1); + uint xh = m_params.get_value_as_int(L"window", 0, 0, 0, cCRNMaxLevelResolution, 2); + uint yh = m_params.get_value_as_int(L"window", 0, 0, 0, cCRNMaxLevelResolution, 3); + + mipmap_params.m_window_left = math::minimum(xl, xh); + mipmap_params.m_window_top = math::minimum(yl, yh); + mipmap_params.m_window_right = math::maximum(xl, xh); + mipmap_params.m_window_bottom = math::maximum(yl, yh); + } + + return true; + } + + bool parse_comp_params(texture_file_types::format dst_file_format, crn_comp_params &comp_params) + { + if (dst_file_format == texture_file_types::cFormatCRN) + comp_params.m_quality_level = cDefaultCRNQualityLevel; + + if (m_params.has_key(L"q") || m_params.has_key(L"quality")) + { + const wchar_t *pKeyName = m_params.has_key(L"q") ? L"q" : L"quality"; + + if ((dst_file_format == texture_file_types::cFormatDDS) || (dst_file_format == texture_file_types::cFormatCRN)) + { + uint i = m_params.get_value_as_int(pKeyName, 0, cDefaultCRNQualityLevel, 0, cCRNMaxQualityLevel); + + comp_params.m_quality_level = i; + } + else + { + console::error(L"/quality or /q option is only invalid when writing DDS or CRN files!"); + return false; + } + } + else + { + float desired_bitrate = m_params.get_value_as_float(L"bitrate", 0, 0.0f, .1f, 30.0f); + if (desired_bitrate > 0.0f) + { + comp_params.m_target_bitrate = desired_bitrate; + } + } + + int color_endpoints = m_params.get_value_as_int(L"c", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); + int color_selectors = m_params.get_value_as_int(L"s", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); + int alpha_endpoints = m_params.get_value_as_int(L"ca", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); + int alpha_selectors = m_params.get_value_as_int(L"sa", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); + if ( ((color_endpoints > 0) && (color_selectors > 0)) || + ((alpha_endpoints > 0) && (alpha_selectors > 0)) ) + { + comp_params.set_flag(cCRNCompFlagManualPaletteSizes, true); + comp_params.m_crn_color_endpoint_palette_size = color_endpoints; + comp_params.m_crn_color_selector_palette_size = color_selectors; + comp_params.m_crn_alpha_endpoint_palette_size = alpha_endpoints; + comp_params.m_crn_alpha_selector_palette_size = alpha_selectors; + } + + if (m_params.has_key(L"alphaThreshold")) + { + int dxt1a_alpha_threshold = m_params.get_value_as_int(L"alphaThreshold", 0, 128, 0, 255); + comp_params.m_dxt1a_alpha_threshold = dxt1a_alpha_threshold; + if (dxt1a_alpha_threshold > 0) + { + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); + } + } + + comp_params.set_flag(cCRNCompFlagPerceptual, !m_params.get_value_as_bool(L"uniformMetrics")); + comp_params.set_flag(cCRNCompFlagHierarchical, !m_params.get_value_as_bool(L"noAdaptiveBlocks")); + + if (m_params.has_key(L"helperThreads")) + comp_params.m_num_helper_threads = m_params.get_value_as_int(L"helperThreads", 0, cCRNMaxHelperThreads, 0, cCRNMaxHelperThreads); + else if (g_number_of_processors > 1) + comp_params.m_num_helper_threads = g_number_of_processors - 1; + + dynamic_wstring comp_name; + if (m_params.get_value_as_string(L"compressor", 0, comp_name)) + { + uint i; + for (i = 0; i < cCRNTotalDXTCompressors; i++) + { + if (comp_name == get_dxt_compressor_name(static_cast(i))) + { + comp_params.m_dxt_compressor_type = static_cast(i); + break; + } + } + if (i == cCRNTotalDXTCompressors) + { + console::error(L"Invalid compressor: \"%s\"", comp_name.get_ptr()); + return false; + } + } + + dynamic_wstring dxt_quality_str; + if (m_params.get_value_as_string(L"dxtquality", 0, dxt_quality_str)) + { + uint i; + for (i = 0; i < cCRNDXTQualityTotal; i++) + { + if (dxt_quality_str == crn_get_dxt_quality_string(static_cast(i))) + { + comp_params.m_dxt_quality = static_cast(i); + break; + } + } + if (i == cCRNDXTQualityTotal) + { + console::error(L"Invalid DXT quality: \"%s\"", dxt_quality_str.get_ptr()); + return false; + } + } + else + { + comp_params.m_dxt_quality = cCRNDXTQualityUber; + } + + comp_params.set_flag(cCRNCompFlagDisableEndpointCaching, m_params.get_value_as_bool(L"noendpointcaching")); + comp_params.set_flag(cCRNCompFlagGrayscaleSampling, m_params.get_value_as_bool(L"grayscalesampling")); + comp_params.set_flag(cCRNCompFlagUseBothBlockTypes, !m_params.get_value_as_bool(L"forceprimaryencoding")); + if (comp_params.get_flag(cCRNCompFlagUseBothBlockTypes)) + comp_params.set_flag(cCRNCompFlagUseTransparentIndicesForBlack, m_params.get_value_as_bool(L"usetransparentindicesforblack")); + else + comp_params.set_flag(cCRNCompFlagUseTransparentIndicesForBlack, false); + + return true; + } + + convert_status display_file_info(uint file_index, uint num_files, const wchar_t* pSrc_filename) + { + if (num_files > 1) + console::message(L"[%u/%u] Source texture: \"%s\"", file_index + 1, num_files, pSrc_filename); + else + console::message(L"Source texture: \"%s\"", pSrc_filename); + + texture_file_types::format src_file_format = texture_file_types::determine_file_format(pSrc_filename); + if (src_file_format == texture_file_types::cFormatInvalid) + { + console::error(L"Unrecognized file type: %s", pSrc_filename); + return cCSFailed; + } + + dds_texture src_tex; + if (!src_tex.load_from_file(pSrc_filename, src_file_format)) + { + if (src_tex.get_last_error().is_empty()) + console::error(L"Failed reading source file: \"%s\"", pSrc_filename); + else + console::error(L"%s", src_tex.get_last_error().get_ptr()); + + return cCSFailed; + } + + uint64 input_file_size; + win32_file_utils::get_file_size(pSrc_filename, input_file_size); + + uint total_in_pixels = 0; + for (uint i = 0; i < src_tex.get_num_levels(); i++) + { + uint width = math::maximum(1, src_tex.get_width() >> i); + uint height = math::maximum(1, src_tex.get_height() >> i); + total_in_pixels += width*height*src_tex.get_num_faces(); + } + + vector src_tex_bytes; + if (!cfile_stream::read_file_into_array(pSrc_filename, src_tex_bytes)) + { + console::error(L"Failed loading source file: %s", pSrc_filename); + return cCSFailed; + } + + if (!src_tex_bytes.size()) + { + console::warning(L"Source file is empty: %s", pSrc_filename); + return cCSSkipped; + } + + uint compressed_size = 0; + if (m_params.has_key(L"lzmastats")) + { + lzma_codec lossless_codec; + vector cmp_tex_bytes; + if (lossless_codec.pack(src_tex_bytes.get_ptr(), src_tex_bytes.size(), cmp_tex_bytes)) + { + compressed_size = cmp_tex_bytes.size(); + } + } + console::info(L"Source texture dimensions: %ux%u, Levels: %u, Faces: %u, Format: %s", + src_tex.get_width(), + src_tex.get_height(), + src_tex.get_num_levels(), + src_tex.get_num_faces(), + pixel_format_helpers::get_pixel_format_string(src_tex.get_format())); + + console::info(L"Total pixels: %u, Source file size: %I64i, Source file bits/pixel: %1.3f", + total_in_pixels, input_file_size, (input_file_size * 8.0f) / total_in_pixels); + if (compressed_size) + { + console::info(L"LZMA compressed file size: %u bytes, %1.3f bits/pixel", + compressed_size, compressed_size * 8.0f / total_in_pixels); + } + + double entropy = math::compute_entropy(src_tex_bytes.get_ptr(), src_tex_bytes.size()); + console::info(L"Source file entropy: %3.6f bits per byte", entropy / src_tex_bytes.size()); + + if (src_file_format == texture_file_types::cFormatCRN) + { + crnd::crn_texture_info tex_info; + tex_info.m_struct_size = sizeof(crnd::crn_texture_info); + crn_bool success = crnd::crnd_get_texture_info(src_tex_bytes.get_ptr(), src_tex_bytes.size(), &tex_info); + if (!success) + console::error(L"Failed retrieving CRN texture info!"); + else + { + console::info(L"CRN texture info:"); + + console::info(L"Width: %u, Height: %u, Levels: %u, Faces: %u\nBytes per block: %u, User0: 0x%08X, User1: 0x%08X, CRN Format: %u", + tex_info.m_width, + tex_info.m_height, + tex_info.m_levels, + tex_info.m_faces, + tex_info.m_bytes_per_block, + tex_info.m_userdata0, + tex_info.m_userdata1, + tex_info.m_format); + } + } + + return cCSSucceeded; + } + + void print_stats(texture_conversion::convert_stats &stats, bool force_image_stats = false) + { + dynamic_wstring csv_filename; + const wchar_t *pCSVStatsFilename = m_params.get_value_as_string(L"csvfile", 0, csv_filename) ? csv_filename.get_ptr() : NULL; + + bool image_stats = force_image_stats || m_params.get_value_as_bool(L"imagestats") || m_params.get_value_as_bool(L"mipstats") || (pCSVStatsFilename != NULL); + bool mip_stats = m_params.get_value_as_bool(L"mipstats"); + bool grayscale_sampling = m_params.get_value_as_bool(L"grayscalesampling"); + if (!stats.print(image_stats, mip_stats, grayscale_sampling, pCSVStatsFilename)) + { + console::warning(L"Unable to compute/display full output file statistics."); + } + } + + convert_status compare_file(uint file_index, uint num_files, const wchar_t* pSrc_filename, const wchar_t* pDst_filename, texture_file_types::format out_file_type) + { + if (num_files > 1) + console::message(L"[%u/%u] Comparing source texture \"%s\" to output texture \"%s\"", file_index + 1, num_files, pSrc_filename, pDst_filename); + else + console::message(L"Comparing source texture \"%s\" to output texture \"%s\"", pSrc_filename, pDst_filename); + + texture_file_types::format src_file_format = texture_file_types::determine_file_format(pSrc_filename); + if (src_file_format == texture_file_types::cFormatInvalid) + { + console::error(L"Unrecognized file type: %s", pSrc_filename); + return cCSFailed; + } + + dds_texture src_tex; + + if (!src_tex.load_from_file(pSrc_filename, src_file_format)) + { + if (src_tex.get_last_error().is_empty()) + console::error(L"Failed reading source file: \"%s\"", pSrc_filename); + else + console::error(L"%s", src_tex.get_last_error().get_ptr()); + + return cCSFailed; + } + + texture_conversion::convert_stats stats; + if (!stats.init(pSrc_filename, pDst_filename, src_tex, out_file_type, m_params.has_key(L"lzmastats"))) + return cCSFailed; + + print_stats(stats, true); + + return cCSSucceeded; + } + + convert_status convert_file(uint file_index, uint num_files, const wchar_t* pSrc_filename, const wchar_t* pDst_filename, texture_file_types::format out_file_type) + { + timer tim; + + if (num_files > 1) + console::message(L"[%u/%u] Reading source texture: \"%s\"", file_index + 1, num_files, pSrc_filename); + else + console::message(L"Reading source texture: \"%s\"", pSrc_filename); + + texture_file_types::format src_file_format = texture_file_types::determine_file_format(pSrc_filename); + if (src_file_format == texture_file_types::cFormatInvalid) + { + console::error(L"Unrecognized file type: %s", pSrc_filename); + return cCSFailed; + } + + dds_texture src_tex; + tim.start(); + if (!src_tex.load_from_file(pSrc_filename, src_file_format)) + { + if (src_tex.get_last_error().is_empty()) + console::error(L"Failed reading source file: \"%s\"", pSrc_filename); + else + console::error(L"%s", src_tex.get_last_error().get_ptr()); + + return cCSFailed; + } + double total_time = tim.get_elapsed_secs(); + console::info(L"Texture successfully loaded in %3.3fs", total_time); + + if (m_params.get_value_as_bool(L"converttoluma")) + src_tex.convert(image_utils::cConversion_Y_To_RGB); + if (m_params.get_value_as_bool(L"setalphatoluma")) + src_tex.convert(image_utils::cConversion_Y_To_A); + + texture_conversion::convert_params params; + + params.m_texture_type = src_tex.determine_texture_type(); + params.m_pInput_texture = &src_tex; + params.m_dst_filename = pDst_filename; + params.m_dst_file_type = out_file_type; + params.m_lzma_stats = m_params.has_key(L"lzmastats"); + params.m_write_mipmaps_to_multiple_files = m_params.has_key(L"split"); + + if ((!m_params.get_value_as_bool(L"noprogress")) && (!m_params.get_value_as_bool(L"quiet"))) + params.m_pProgress_func = progress_callback_func; + + if (m_params.get_value_as_bool(L"debug")) + { + params.m_debugging = true; + params.m_comp_params.set_flag(cCRNCompFlagDebugging, true); + } + + if (m_params.get_value_as_bool(L"paramdebug")) + params.m_param_debugging = true; + + if (m_params.get_value_as_bool(L"quick")) + params.m_quick = true; + + params.m_no_stats = m_params.get_value_as_bool(L"nostats"); + + params.m_dst_format = PIXEL_FMT_INVALID; + + for (uint i = 0; i < pixel_format_helpers::get_num_formats(); i++) + { + pixel_format trial_fmt = pixel_format_helpers::get_pixel_format_by_index(i); + if (m_params.has_key(pixel_format_helpers::get_pixel_format_string(trial_fmt))) + { + params.m_dst_format = trial_fmt; + break; + } + } + + if (texture_file_types::supports_mipmaps(src_file_format)) + { + params.m_mipmap_params.m_mode = cCRNMipModeUseSourceMips; + } + + if (!parse_mipmap_params(params.m_mipmap_params)) + return cCSBadParam; + + if (!parse_comp_params(params.m_dst_file_type, params.m_comp_params)) + return cCSBadParam; + + if (!parse_scale_params(params.m_mipmap_params)) + return cCSBadParam; + + print_texture_info(L"Source texture", params, src_tex); + + if (params.m_texture_type == cTextureTypeNormalMap) + { + params.m_comp_params.set_flag(cCRNCompFlagPerceptual, false); + } + + texture_conversion::convert_stats stats; + + tim.start(); + bool status = texture_conversion::process(params, stats); + total_time = tim.get_elapsed_secs(); + + if (!status) + { + if (params.m_error_message.is_empty()) + console::error(L"Failed writing output file: \"%s\"", pDst_filename); + else + console::error(params.m_error_message.get_ptr()); + return cCSFailed; + } + + console::info(L"Texture successfully processed in %3.3fs", total_time); + + if (!m_params.get_value_as_bool(L"nostats")) + print_stats(stats); + + return cCSSucceeded; + } +}; + +//----------------------------------------------------------------------------------------------------------------------- + +static bool check_for_option(int argc, wchar_t *argv[], const wchar_t *pOption) +{ + for (int i = 1; i < argc; i++) + { + if ((argv[i][0] == '/') || (argv[i][0] == '-')) + { + if (_wcsicmp(&argv[i][1], pOption) == 0) + return true; + } + } + return false; +} + +//----------------------------------------------------------------------------------------------------------------------- + +#define Q(x) L##x +#define U(x) Q(x) + +static void print_title() +{ + console::printf(L"crunch: Advanced DXTn Texture Compressor"); + console::printf(L"Copyright (c) 2010-2011 Tenacious Software LLC"); + console::printf(L"crnlib version v%u.%02u %s Built %s, %s", CRNLIB_VERSION / 100U, CRNLIB_VERSION % 100U, crnlib_is_x64() ? L"x64" : L"x86", U(__DATE__), U(__TIME__)); + console::printf(L""); +} + +//----------------------------------------------------------------------------------------------------------------------- + +static int wmain_internal(int argc, wchar_t *argv[]) +{ + argc; + argv; + + win32_console::init(); + + if (check_for_option(argc, argv, L"quiet")) + console::disable_output(); + + print_title(); + +#if 0 + if (check_for_option(argc, argv, L"exp")) + return test(argc, argv); +#endif + + crunch converter; + + bool status = converter.convert(GetCommandLineW()); + + win32_console::deinit(); + + crnlib_print_mem_stats(); + + return status ? EXIT_SUCCESS : EXIT_FAILURE; +} + +static void pause(void) +{ + console::enable_output(); + + console::message(L"\nPress a key to continue."); + + for ( ; ; ) + { + if (_getch() != -1) + break; + } +} + +//----------------------------------------------------------------------------------------------------------------------- + +#ifdef _MSC_VER +int wmain(int argc, wchar_t *argv[], wchar_t *envp[]) +#else +int main(int argc, char *argva[]) +#endif +{ +#ifndef _MSC_VER + // FIXME - mingw doesn't support wmain() + wchar_t *first_arg = (wchar_t*)L"crunch.exe"; + wchar_t* argv[1] = { first_arg }; + argc = 1; +#else + envp; +#endif + + int status = EXIT_FAILURE; + + if (IsDebuggerPresent()) + { + status = wmain_internal(argc, argv); + } + else + { +#ifdef _MSC_VER + __try + { + status = wmain_internal(argc, argv); + } + __except(EXCEPTION_EXECUTE_HANDLER) + { + console::error(L"Uncached exception! crunch command line tool failed!"); + } +#else + status = wmain_internal(argc, argv); +#endif + } + + console::printf(L"\nExit status: %i", status); + + if (check_for_option(argc, argv, L"pause")) + { + if ((status == EXIT_FAILURE) || (console::get_num_messages(cErrorConsoleMessage))) + pause(); + } + + return status; +} + diff --git a/example1/example1.2008.vcproj b/example1/example1.2008.vcproj new file mode 100644 index 00000000..9184de72 --- /dev/null +++ b/example1/example1.2008.vcproj @@ -0,0 +1,716 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/example1/example1.cpp b/example1/example1.cpp new file mode 100644 index 00000000..dac8c40b --- /dev/null +++ b/example1/example1.cpp @@ -0,0 +1,579 @@ +// File: example1.cpp - Simple command line tool that uses the crnlib lib and the crn_decomp.h header file library +// to compress, transcode/unpack, and inspect CRN/DDS textures. +// See Copyright Notice and license at the end of inc/crnlib.h +#include +#include +#include +#include + +// Public crnlib header. +#include "crnlib.h" + +// CRN transcoder library. +#include "crn_decomp.h" +// .DDS file format definitions. +#include "dds_defs.h" + +// stb_image, for loading/saving image files. +#ifdef _MSC_VER +#pragma warning (disable: 4244) // conversion from 'int' to 'uint8', possible loss of data +#pragma warning (disable: 4100) // unreferenced formal parameter +#endif +#include "stb_image.h" + +// windows.h is only needed here for GetSystemInfo(). +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include "windows.h" + +using namespace crnlib; + +const int cDefaultCRNQualityLevel = 128; + +static int print_usage() +{ + printf("Description: Simple crnlib API example program.\n"); + printf("Copyright (c) 2010-2011 Tenacious Software LLC\n"); + printf("Usage: example1 [mode: i/c/d] [source_file] [options]\n"); + printf("\nModes:\n"); + printf("c: Compress to .DDS or .CRN using the crn_compress() func. in crnlib.h\n"); + printf(" The default output format is .DDS\n"); + printf(" Supported source image formats:\n"); + printf(" Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); + printf("d: Transcodes a .CRN file to .DDS using the crn_decompress_crn_to_dds() func.,\n"); + printf("or unpacks each face and mipmap level in a .DDS file to multiple .TGA files.\n"); + printf("i: Display info about source_file.\n"); + printf("\nOptions:\n"); + printf("-out filename - Force output filename.\n"); + printf("\nCompression mode options:\n"); + printf("-crn - Generate a .CRN file instead of .DDS\n"); + printf("-bitrate # - Specify desired CRN/DDS bits/texel, from [.1-8]\n"); + printf(" When writing .DDS: -bitrate or -quality enable clustered DXTn compression.\n"); + printf("-quality # - Specify CRN/DDS quality level factor, from [0-255]\n"); + printf("-noAdaptiveBlocks - Always use 4x4 blocks instead of up to 8x8 macroblocks\n"); + printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); + printf("-nomips - Don't generate mipmaps\n"); + printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); + printf("-converttoluma - Set RGB to luma before compression.\n"); + printf("-pixelformat fmt - Output file's crn_format: DXT1, DXT1A, DXT3, DXT5_CCxY,\n"); + printf(" DXT5_xGxR, DXT5_xGBR, DXT5_AGBR, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC),\n"); + printf(" DXT5A (ATN1N)\n"); + printf(" If no output format is specified, this example uses either DXT1 or DXT5.\n"); + return EXIT_FAILURE; +} + +static int error(const char* pMsg, ...) +{ + va_list args; + va_start(args, pMsg); + char buf[512]; + vsprintf_s(buf, sizeof(buf), pMsg, args); + va_end(args); + printf("%s", buf); + return EXIT_FAILURE; +} + +// Loads an entire file into an allocated memory block. +static crn_uint8 *read_file_into_buffer(const char *pFilename, crn_uint32 &size) +{ + size = 0; + + FILE* pFile = NULL; + fopen_s(&pFile, pFilename, "rb"); + if (!pFile) + return NULL; + + fseek(pFile, 0, SEEK_END); + size = ftell(pFile); + fseek(pFile, 0, SEEK_SET); + + crn_uint8 *pSrc_file_data = static_cast(malloc(std::max(1U, size))); + if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) + { + fclose(pFile); + free(pSrc_file_data); + size = 0; + return NULL; + } + + fclose(pFile); + return pSrc_file_data; +} + +// Cracks a CRN's file header using the helper functions in crn_decomp.h. +static bool print_crn_info(const crn_uint8 *pData, crn_uint32 data_size) +{ + crnd::crn_file_info file_info; + if (!crnd::crnd_validate_file(pData, data_size, &file_info)) + return false; + + printf("crnd_validate_file:\n"); + printf("File size: %u\nActualDataSize: %u\nHeaderSize: %u\nTotalPaletteSize: %u\nTablesSize: %u\nLevels: %u\n", data_size, + file_info.m_actual_data_size, file_info.m_header_size, file_info.m_total_palette_size, file_info.m_tables_size, file_info.m_levels); + + printf("LevelCompressedSize: "); + for (crn_uint32 i = 0; i < cCRNMaxLevels; i++) + printf("%u ", file_info.m_level_compressed_size[i]); + printf("\n"); + + printf("ColorEndpointPaletteSize: %u\n", file_info.m_color_endpoint_palette_entries); + printf("ColorSelectorPaletteSize: %u\n", file_info.m_color_selector_palette_entries); + printf("AlphaEndpointPaletteSize: %u\n", file_info.m_alpha_endpoint_palette_entries); + printf("AlphaSelectorPaletteSize: %u\n", file_info.m_alpha_selector_palette_entries); + + printf("crnd_get_texture_info:\n"); + crnd::crn_texture_info tex_info; + if (!crnd::crnd_get_texture_info(pData, data_size, &tex_info)) + return false; + + printf("Dimensions: %ux%u\nLevels: %u\nFaces: %u\nBytesPerBlock: %u\nUserData0: %u\nUserData1: %u\nCrnFormat: %S\n", + tex_info.m_width, tex_info.m_height, tex_info.m_levels, tex_info.m_faces, tex_info.m_bytes_per_block, tex_info.m_userdata0, tex_info.m_userdata1, crn_get_format_string(tex_info.m_format)); + + return true; +} + +// Cracks the DDS header and dump its contents. +static bool print_dds_info(const void *pData, crn_uint32 data_size) +{ + if ((data_size < 128) || (*reinterpret_cast(pData) != crnlib::cDDSFileSignature)) + return false; + + const crnlib::DDSURFACEDESC2 &desc = *reinterpret_cast((reinterpret_cast(pData) + sizeof(crn_uint32))); + if (desc.dwSize != sizeof(crnlib::DDSURFACEDESC2)) + return false; + + printf("DDS file information:\n"); + printf("File size: %u\nDimensions: %ux%u\nPitch/LinearSize: %u\n", data_size, desc.dwWidth, desc.dwHeight, desc.dwLinearSize); + printf("MipMapCount: %u\nAlphaBitDepth: %u\n", desc.dwMipMapCount, desc.dwAlphaBitDepth); + + const char *pDDSDFlagNames[] = + { + "DDSD_CAPS", "DDSD_HEIGHT", "DDSD_WIDTH", "DDSD_PITCH", + NULL, "DDSD_BACKBUFFERCOUNT", "DDSD_ZBUFFERBITDEPTH", "DDSD_ALPHABITDEPTH", + NULL, NULL, NULL, "DDSD_LPSURFACE", + "DDSD_PIXELFORMAT", "DDSD_CKDESTOVERLAY", "DDSD_CKDESTBLT", "DDSD_CKSRCOVERLAY", + "DDSD_CKSRCBLT", "DDSD_MIPMAPCOUNT", "DDSD_REFRESHRATE", "DDSD_LINEARSIZE", + "DDSD_TEXTURESTAGE", "DDSD_FVF", "DDSD_SRCVBHANDLE", "DDSD_DEPTH" + }; + + printf("DDSD Flags: 0x%08X ", desc.dwFlags); + for (int i = 0; i < sizeof(pDDSDFlagNames)/sizeof(pDDSDFlagNames[0]); i++) + if ((pDDSDFlagNames[i]) && (desc.dwFlags & (1 << i))) + printf("%s ", pDDSDFlagNames[i]); + printf("\n\n"); + + printf("ddpfPixelFormat.dwFlags: 0x%08X ", desc.ddpfPixelFormat.dwFlags); + if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) printf("DDPF_ALPHAPIXELS "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHA) printf("DDPF_ALPHA "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) printf("DDPF_FOURCC "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) printf("DDPF_PALETTEINDEXED8 "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_RGB) printf("DDPF_RGB "); + if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) printf("DDPF_LUMINANCE "); + printf("\n"); + + printf("ddpfPixelFormat.dwFourCC: 0x%08X '%c' '%c' '%c' '%c'\n", + desc.ddpfPixelFormat.dwFourCC, + std::max(32U, desc.ddpfPixelFormat.dwFourCC & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 8) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 16) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 24) & 0xFF)); + + printf("dwRGBBitCount: %u 0x%08X\n", + desc.ddpfPixelFormat.dwRGBBitCount, desc.ddpfPixelFormat.dwRGBBitCount); + + printf("dwRGBBitCount as FOURCC: '%c' '%c' '%c' '%c'\n", + std::max(32U, desc.ddpfPixelFormat.dwRGBBitCount & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 8) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 16) & 0xFF), + std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 24) & 0xFF)); + + printf("dwRBitMask: 0x%08X\ndwGBitMask: 0x%08X\ndwBBitMask: 0x%08X\ndwRGBAlphaBitMask: 0x%08X\n", + desc.ddpfPixelFormat.dwRBitMask, desc.ddpfPixelFormat.dwGBitMask, desc.ddpfPixelFormat.dwBBitMask, desc.ddpfPixelFormat.dwRGBAlphaBitMask); + + printf("\n"); + printf("ddsCaps.dwCaps: 0x%08X ", desc.ddsCaps.dwCaps); + if (desc.ddsCaps.dwCaps & DDSCAPS_COMPLEX) printf("DDSCAPS_COMPLEX "); + if (desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE) printf("DDSCAPS_TEXTURE "); + if (desc.ddsCaps.dwCaps & DDSCAPS_MIPMAP) printf("DDSCAPS_MIPMAP"); + printf("\n"); + + printf("ddsCaps.dwCaps2: 0x%08X ", desc.ddsCaps.dwCaps2); + const char *pDDCAPS2FlagNames[] = + { + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, "DDSCAPS2_CUBEMAP", "DDSCAPS2_CUBEMAP_POSITIVEX", "DDSCAPS2_CUBEMAP_NEGATIVEX", + "DDSCAPS2_CUBEMAP_POSITIVEY", "DDSCAPS2_CUBEMAP_NEGATIVEY", "DDSCAPS2_CUBEMAP_POSITIVEZ", "DDSCAPS2_CUBEMAP_NEGATIVEZ", + NULL, NULL, NULL, NULL, + NULL, "DDSCAPS2_VOLUME" + }; + for (int i = 0; i < sizeof(pDDCAPS2FlagNames)/sizeof(pDDCAPS2FlagNames[0]); i++) + if ((pDDCAPS2FlagNames[i]) && (desc.ddsCaps.dwCaps2 & (1 << i))) + printf("%s ", pDDCAPS2FlagNames[i]); + printf("\n"); + + printf("ddsCaps.dwCaps3: 0x%08X\nddsCaps.dwCaps4: 0x%08X\n", + desc.ddsCaps.dwCaps3, desc.ddsCaps.dwCaps4); + + return true; +} + +// CRN/DDS compression callback function. +static crn_bool progress_callback_func(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) +{ + int percentage_complete = (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * 100.0f) / total_phases; + printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bProcessing: %u%%", std::min(100, std::max(0, percentage_complete))); + return true; +} + +int main(int argc, char *argv[]) +{ + printf("example1 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); + + if (argc < 3) + return print_usage(); + + // Parse command line options + int mode = argv[1][0]; + if ((mode != 'c') && (mode != 'd') && (mode != 'i')) + return error("Invalid mode!\n"); + + const char *pSrc_filename = argv[2]; + char out_filename[FILENAME_MAX] = { '\0' }; + + float bitrate = 0.0f; + int quality_level = -1; + bool srgb_colorspace = true; + bool create_mipmaps = true; + bool output_crn = false; + crn_format fmt = cCRNFmtInvalid; + bool use_adaptive_block_sizes = true; + bool set_alpha_to_luma = false; + bool convert_to_luma = false; + bool enable_dxt1a = false; + + for (int i = 3; i < argc; i++) + { + if (argv[i][0] == '/') + argv[i][0] = '-'; + + if (!_stricmp(argv[i], "-crn")) + { + output_crn = true; + } + else if (!_stricmp(argv[i], "-pixelformat")) + { + if (++i >= argc) + return error("Expected pixel format!"); + + if (!_stricmp(argv[i], "dxt1a")) + { + enable_dxt1a = true; + fmt = cCRNFmtDXT1; + } + else + { + uint f; + for (f = 0; f < cCRNFmtTotal; f++) + { + if (!_stricmp(argv[i], crn_get_format_stringa(static_cast(f)))) + { + fmt = static_cast(f); + break; + } + } + if (f == cCRNFmtTotal) + return error("Unrecognized pixel format: %s\n", argv[i]); + } + } + else if (!_stricmp(argv[i], "-bitrate")) + { + if (++i >= argc) + return error("Invalid bitrate!"); + + bitrate = (float)atof(argv[i]); + if ((bitrate < .1f) || (bitrate > 8.0f)) + return error("Invalid bitrate!"); + } + else if (!_stricmp(argv[i], "-quality")) + { + if (++i >= argc) + return error("Invalid quality level!"); + + quality_level = atoi(argv[i]); + if ((quality_level < 0) || (quality_level > cCRNMaxQualityLevel)) + return error("Invalid quality level!"); + } + else if (!_stricmp(argv[i], "-out")) + { + if (++i >= argc) + return error("Expected output filename!"); + + strcpy_s(out_filename, sizeof(out_filename), argv[i]); + } + else if (!_stricmp(argv[i], "-nonsrgb")) + srgb_colorspace = false; + else if (!_stricmp(argv[i], "-nomips")) + create_mipmaps = false; + else if (!_stricmp(argv[i], "-noAdaptiveBlocks")) + use_adaptive_block_sizes = false; + else if (!_stricmp(argv[i], "-setalphatoluma")) + set_alpha_to_luma = true; + else if (!_stricmp(argv[i], "-converttoluma")) + convert_to_luma = true; + else + return error("Invalid option: %s\n", argv[i]); + } + + char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; + if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) + return error("Invalid source filename!\n"); + + // Load the source file into memory. + printf("Loading source file: %s\n", pSrc_filename); + crn_uint32 src_file_size; + crn_uint8 *pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); + if (!pSrc_file_data) + return error("Unable to read source file\n"); + + if (mode == 'i') + { + // Information + if (_stricmp(ext_buf, ".crn") == 0) + { + if (!print_crn_info(pSrc_file_data, src_file_size)) + { + free(pSrc_file_data); + return error("Not a CRN file!\n"); + } + } + else if (_stricmp(ext_buf, ".dds") == 0) + { + if (!print_dds_info(pSrc_file_data, src_file_size)) + { + free(pSrc_file_data); + return error("Not a DDS file!\n"); + } + } + else + { + // Try parsing the source file as a regular image. + int x, y, actual_comps; + stbi_uc *p = stbi_load_from_memory(pSrc_file_data, src_file_size, &x, &y, &actual_comps, 4); + if (!p) + { + free(pSrc_file_data); + return error("Failed reading image file!\n"); + } + stbi_image_free(p); + + printf("File size: %u\nDimensions: %ix%i\nActual Components: %i\n", src_file_size, x, y, actual_comps); + } + } + else if (mode == 'c') + { + // Compression to DDS or CRN. + + // If the user has explicitly specified an output file, check the output file's extension to ensure we write the expected format. + if (out_filename[0]) + { + char out_fname_buf[_MAX_FNAME], out_ext_buf[_MAX_EXT]; + _splitpath_s(out_filename, NULL, 0, NULL, 0, out_fname_buf, _MAX_FNAME, out_ext_buf, _MAX_EXT); + if (!_stricmp(out_ext_buf, ".crn")) + output_crn = true; + else if (!_stricmp(out_ext_buf, ".dds")) + output_crn = false; + } + + // Load source image + int width, height, actual_comps; + crn_uint32 *pSrc_image = (crn_uint32*)stbi_load_from_memory(pSrc_file_data, src_file_size, &width, &height, &actual_comps, 4); + if (!pSrc_image) + { + free(pSrc_file_data); + return error("Failed reading image file!\n"); + } + + printf("Source file size: %u, Dimensions: %ux%u\nActual Components: %u\n", src_file_size, width, height, actual_comps); + + // Fill in compression parameters struct. + bool has_alpha_channel = actual_comps > 3; + + if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) + set_alpha_to_luma = true; + + if ((set_alpha_to_luma) || (convert_to_luma)) + { + for (int i = 0; i < width * height; i++) + { + crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; + // Compute CCIR 601 luma. + crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; + crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; + if (set_alpha_to_luma) a = y; + if (convert_to_luma) { r = y; g = y; b = y; } + pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); + } + } + + crn_comp_params comp_params; + comp_params.m_width = width; + comp_params.m_height = height; + comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, enable_dxt1a && has_alpha_channel); + comp_params.set_flag(cCRNCompFlagHierarchical, use_adaptive_block_sizes); + comp_params.m_file_type = output_crn ? cCRNFileTypeCRN : cCRNFileTypeDDS; + comp_params.m_format = (fmt != cCRNFmtInvalid) ? fmt : (has_alpha_channel ? cCRNFmtDXT5 : cCRNFmtDXT1); + + // Important note: This example only feeds a single source image to the compressor, and it internaly generates mipmaps from that source image. + // If you want, there's nothing stopping you from generating the mipmaps on your own, then feeding the multiple source images + // to the compressor. Just set the crn_mipmap_params::m_mode member (set below) to cCRNMipModeUseSourceMips. + comp_params.m_pImages[0][0] = pSrc_image; + + if (bitrate > 0.0f) + comp_params.m_target_bitrate = bitrate; + else if (quality_level >= 0) + comp_params.m_quality_level = quality_level; + else if (output_crn) + { + // Set a default quality level for CRN, otherwise we'll get the default (highest quality) which leads to huge compressed palettes. + comp_params.m_quality_level = cDefaultCRNQualityLevel; + } + + // Determine the # of helper threads (in addition to the main thread) to use during compression. NumberOfCPU's-1 is reasonable. + SYSTEM_INFO g_system_info; + GetSystemInfo(&g_system_info); + int num_helper_threads = std::max(0, (int)g_system_info.dwNumberOfProcessors - 1); + comp_params.m_num_helper_threads = num_helper_threads; + + comp_params.m_pProgress_func = progress_callback_func; + + // Fill in mipmap parameters struct. + crn_mipmap_params mip_params; + mip_params.m_gamma_filtering = srgb_colorspace; + mip_params.m_mode = create_mipmaps ? cCRNMipModeGenerateMips : cCRNMipModeNoMips; + + crn_uint32 actual_quality_level; + float actual_bitrate; + crn_uint32 output_file_size; + + printf("Compressing to %s\n", crn_get_format_stringa(comp_params.m_format)); + + // Now compress to DDS or CRN. + void *pOutput_file_data = crn_compress(comp_params, mip_params, output_file_size, &actual_quality_level, &actual_bitrate); + printf("\n"); + + if (!pOutput_file_data) + { + stbi_image_free(pSrc_image); + free(pSrc_file_data); + return error("Compression failed!"); + } + + printf("Compressed to %u bytes, quality level: %u, effective bitrate: %f\n", output_file_size, actual_quality_level, actual_bitrate); + + // Write the output file. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s%s", drive_buf, dir_buf, fname_buf, output_crn ? ".crn" : ".dds"); + if (out_filename[0]) strcpy(dst_filename, out_filename); + + printf("Writing %s file: %s\n", output_crn ? "CRN" : "DDS", dst_filename); + FILE *pFile = fopen(dst_filename, "wb"); + if ((!pFile) || (fwrite(pOutput_file_data, output_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) + { + free(pSrc_file_data); + crn_free_block(pOutput_file_data); + stbi_image_free(pSrc_image); + return error("Failed writing to output file!\n"); + } + + crn_free_block(pOutput_file_data); + stbi_image_free(pSrc_image); + } + else if (_stricmp(ext_buf, ".crn") == 0) + { + // Decompress/transcode CRN to DDS. + printf("Decompressing CRN to DDS\n"); + + // Transcode the CRN file to a DDS file in memory. + crn_uint32 dds_file_size = src_file_size; + void *pDDS_file_data = crn_decompress_crn_to_dds(pSrc_file_data, dds_file_size); + if (!pDDS_file_data) + { + free(pSrc_file_data); + return error("Failed decompressing CRN file!\n"); + } + + // Now write the DDS file to disk. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); + if (out_filename[0]) strcpy(dst_filename, out_filename); + + printf("Writing file: %s\n", dst_filename); + FILE *pFile = fopen(dst_filename, "wb"); + if ((!pFile) || (fwrite(pDDS_file_data, dds_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) + { + crn_free_block(pDDS_file_data); + free(pSrc_file_data); + return error("Failed writing to output file!\n"); + } + + printf("\n"); + + print_dds_info(pDDS_file_data, dds_file_size); + + crn_free_block(pDDS_file_data); + } + else if (_stricmp(ext_buf, ".dds") == 0) + { + // Unpack DDS to one or more TGA's. + if (out_filename[0]) + _splitpath_s(out_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT); + + crn_texture_desc tex_desc; + crn_uint32 *pImages[cCRNMaxFaces * cCRNMaxLevels]; + if (!crn_decompress_dds_to_images(pSrc_file_data, src_file_size, pImages, tex_desc)) + { + free(pSrc_file_data); + return error("Failed unpacking DDS file!\n"); + } + + printf("Decompressed texture Dimensions: %ux%u, Faces: %u, Levels: %u, FourCC: 0x%08X '%c' '%c' '%c' '%c'\n", + tex_desc.m_width, tex_desc.m_height, tex_desc.m_faces, tex_desc.m_levels, tex_desc.m_fmt_fourcc, + std::max(32U, tex_desc.m_fmt_fourcc & 0xFF), + std::max(32U, (tex_desc.m_fmt_fourcc >> 8) & 0xFF), + std::max(32U, (tex_desc.m_fmt_fourcc >> 16) & 0xFF), + std::max(32U, (tex_desc.m_fmt_fourcc >> 24) & 0xFF)); + + for (crn_uint32 face_index = 0; face_index < tex_desc.m_faces; face_index++) + { + for (crn_uint32 level_index = 0; level_index < tex_desc.m_levels; level_index++) + { + int width = std::max(1U, tex_desc.m_width >> level_index); + int height = std::max(1U, tex_desc.m_height >> level_index); + + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s_face%u_mip%u.tga", drive_buf, dir_buf, fname_buf, face_index, level_index); + + printf("Writing file: %s\n", dst_filename); + if (!stbi_write_tga(dst_filename, width, height, 4, pImages[level_index + face_index * tex_desc.m_levels])) + { + crn_free_all_images(pImages, tex_desc); + free(pSrc_file_data); + + return error("Failed writing output file!\n"); + } + } + } + + crn_free_all_images(pImages, tex_desc); + } + else + { + free(pSrc_file_data); + return error("Decompression mode only supports .dds or .crn files!\n"); + } + + free(pSrc_file_data); + + return EXIT_SUCCESS; +} diff --git a/example1/stb_image.h b/example1/stb_image.h new file mode 100644 index 00000000..6da2b729 --- /dev/null +++ b/example1/stb_image.h @@ -0,0 +1,3942 @@ +/* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c + when you control the images you're loading + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline (no JPEG progressive, no oddball channel decimations) + PNG 8-bit only + BMP non-1bpp, non-RLE + TGA (not sure what subset, if a subset) + PSD (composited view only, no extra channels) + HDR (radiance rgbE format) + writes BMP,TGA (define STBI_NO_WRITE to remove code) + decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) + + TODO: + stbi_info_* + + history: + 1.18 fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another stb_malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less + than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant +*/ + +#pragma warning (disable: 4793) // function compiled as native + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +//// begin header file //////////////////////////////////////////////////// +// +// Limitations: +// - no progressive/interlaced support (jpeg, png) +// - 8-bit samples only (jpeg, png) +// - not threadsafe +// - channel subsampling of at most 2 in each dimension (jpeg) +// - no delayed line count (jpeg) -- IJG doesn't support either +// +// Basic usage (see HDR discussion below): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *comp -- outputs # of image components in image file +// int req_comp -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise. +// If req_comp is non-zero, *comp has the number of components that _would_ +// have been output otherwise. E.g. if you set req_comp to 4, you will always +// get RGBA output, but you can check *comp to easily see if it's opaque. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *comp will be unchanged. The function stbi_failure_reason() +// can be queried for an extremely brief, end-user unfriendly explanation +// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid +// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG and BMP images are automatically depalettized. +// +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); + +//#define _CRT_SECURE_NO_WARNINGS + +#ifndef STBI_NO_STDIO +#include +#endif + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for req_comp + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4, +}; + +typedef unsigned char stbi_uc; + +#ifdef __cplusplus +extern "C" { +#endif + +// WRITING API + +#if !defined(STBI_NO_WRITE) && !defined(STBI_NO_STDIO) +// write a BMP/TGA file given tightly packed 'comp' channels (no padding, nor bmp-stride-padding) +// (you must include the appropriate extension in the filename). +// returns TRUE on success, FALSE if couldn't open file, error writing file +extern int stbi_write_bmp (char const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_bmp_w (wchar_t const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_tga (char const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_tga_w (wchar_t const *filename, int x, int y, int comp, const void *data); +#endif + +// PRIMARY API - works on images of any type + +// load image by filename, open file, or memory buffer +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_w (wchar_t const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +// for stbi_load_from_file, file pointer is left pointing immediately after image + +#ifndef STBI_NO_HDR +#ifndef STBI_NO_STDIO +extern float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif +extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + +extern void stbi_hdr_to_ldr_gamma(float gamma); +extern void stbi_hdr_to_ldr_scale(float scale); + +extern void stbi_ldr_to_hdr_gamma(float gamma); +extern void stbi_ldr_to_hdr_scale(float scale); + +#endif // STBI_NO_HDR + +// get a VERY brief reason for failure +// NOT THREADSAFE +extern char *stbi_failure_reason (void); + +// free the loaded image -- this is just stb_free() +extern void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +extern int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_is_hdr (char const *filename); +extern int stbi_is_hdr_from_file(FILE *f); +#endif + +// ZLIB client - used by PNG, available for other purposes + +extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +// TYPE-SPECIFIC ACCESS + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// define new loaders +typedef struct +{ + int (*test_memory)(stbi_uc const *buffer, int len); + stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + #ifndef STBI_NO_STDIO + int (*test_file)(FILE *f); + stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp); + #endif +} stbi_loader; + +// register a loader by filling out the above structure (you must defined ALL functions) +// returns 1 if added or already added, 0 if not added (too many loaders) +// NOT THREADSAFE +extern int stbi_register_loader(stbi_loader *loader); + +// define faster low-level operations (typically SIMD support) +#if STBI_SIMD +typedef void (*stbi_idct_8x8)(uint8 *out, int out_stride, short data[64], unsigned short *dequantize); +// compute an integer IDCT on "input" +// input[x] = data[x] * dequantize[x] +// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride' +// CLAMP results to 0..255 +typedef void (*stbi_YCbCr_to_RGB_run)(uint8 *output, uint8 const *y, uint8 const *cb, uint8 const *cr, int count, int step); +// compute a conversion from YCbCr to RGB +// 'count' pixels +// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B +// y: Y input channel +// cb: Cb input channel; scale/biased to be 0..255 +// cr: Cr input channel; scale/biased to be 0..255 + +extern void stbi_install_idct(stbi_idct_8x8 func); +extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func); +#endif // STBI_SIMD + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifndef STBI_HEADER_FILE_ONLY + +inline void* stb_malloc(size_t c) { return ::malloc(c); } +inline void* stb_realloc(void *p, size_t c) { return ::realloc(p, c); } +inline void stb_free(void *p) { ::free(p); } + +#ifndef STBI_NO_HDR +#include // ldexp +#include // strcmp +#endif + +#ifndef STBI_NO_STDIO +#include +#endif +#include +#include +#include +#include + +#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__MINGW64__) + #ifdef __cplusplus + #define __forceinline inline + #else + #define __forceinline + #endif +#endif + + +// implementation: +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef signed int int32; +typedef unsigned int uint; + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(uint32)==4]; + +#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE) +#define STBI_NO_WRITE +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Generic API that works on all image types +// + +// this is not threadsafe +static char *failure_reason; + +char *stbi_failure_reason(void) +{ + return failure_reason; +} + +static int e(char *str) +{ + failure_reason = str; + return 0; +} + +#ifdef STBI_NO_FAILURE_STRINGS + #define e(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define e(x,y) e(y) +#else + #define e(x,y) e(x) +#endif + +#define epf(x,y) ((float *) (e(x,y)?NULL:NULL)) +#define epuc(x,y) ((unsigned char *) (e(x,y)?NULL:NULL)) + +void stbi_image_free(void *retval_from_stbi_load) +{ + stb_free(retval_from_stbi_load); +} + +#define MAX_LOADERS 32 +stbi_loader *loaders[MAX_LOADERS]; +static int max_loaders = 0; + +int stbi_register_loader(stbi_loader *loader) +{ + int i; + for (i=0; i < MAX_LOADERS; ++i) { + // already present? + if (loaders[i] == loader) + return 1; + // end of the list? + if (loaders[i] == NULL) { + loaders[i] = loader; + max_loaders = i+1; + return 1; + } + } + // no room for it + return 0; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_STDIO +unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_w(wchar_t const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = _wfopen(filename, L"rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_file(f)) + return stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + if (stbi_png_test_file(f)) + return stbi_png_load_from_file(f,x,y,comp,req_comp); + if (stbi_bmp_test_file(f)) + return stbi_bmp_load_from_file(f,x,y,comp,req_comp); + if (stbi_psd_test_file(f)) + return stbi_psd_load_from_file(f,x,y,comp,req_comp); + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) { + float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_file(f)) + return loaders[i]->load_from_file(f,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_file(f)) + return stbi_tga_load_from_file(f,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_memory(buffer,len)) + return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_png_test_memory(buffer,len)) + return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_bmp_test_memory(buffer,len)) + return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_psd_test_memory(buffer,len)) + return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp); + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) { + float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_memory(buffer,len)) + return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_memory(buffer,len)) + return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} + +#ifndef STBI_NO_HDR + +#ifndef STBI_NO_STDIO +float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + float *result; + if (!f) return epf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) + return stbi_hdr_load_from_file(f,x,y,comp,req_comp); + #endif + data = stbi_load_from_file(f, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) + return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + #endif + data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is +// defined, for API simplicity; if STBI_NO_HDR is defined, it always +// reports false! + +int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_memory(buffer, len); + #else + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +extern int stbi_is_hdr (char const *filename) +{ + FILE *f = fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +extern int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_file(f); + #else + return 0; + #endif +} + +#endif + +// @TODO: get image dimensions & components without fully decoding +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_HDR +static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f; +static float l2h_gamma=2.2f, l2h_scale=1.0f; + +void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; } +void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; } + +void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; } +void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; } +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + SCAN_load=0, + SCAN_type, + SCAN_header, +}; + +typedef struct +{ + uint32 img_x, img_y; + int img_n, img_out_n; + + #ifndef STBI_NO_STDIO + FILE *img_file; + #endif + uint8 *img_buffer, *img_buffer_end; +} stbi; + +#ifndef STBI_NO_STDIO +static void start_file(stbi *s, FILE *f) +{ + s->img_file = f; +} +#endif + +static void start_mem(stbi *s, uint8 const *buffer, int len) +{ +#ifndef STBI_NO_STDIO + s->img_file = NULL; +#endif + s->img_buffer = (uint8 *) buffer; + s->img_buffer_end = (uint8 *) buffer+len; +} + +__forceinline static int get8(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int c = fgetc(s->img_file); + return c == EOF ? 0 : c; + } +#endif + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + return 0; +} + +__forceinline static int at_eof(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) + return feof(s->img_file); +#endif + return s->img_buffer >= s->img_buffer_end; +} + +__forceinline static uint8 get8u(stbi *s) +{ + return (uint8) get8(s); +} + +static void skip(stbi *s, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) + fseek(s->img_file, n, SEEK_CUR); + else +#endif + s->img_buffer += n; +} + +static int get16(stbi *s) +{ + int z = get8(s); + return (z << 8) + get8(s); +} + +static uint32 get32(stbi *s) +{ + uint32 z = get16(s); + return (z << 16) + get16(s); +} + +static int get16le(stbi *s) +{ + int z = get8(s); + return z + (get8(s) << 8); +} + +static uint32 get32le(stbi *s) +{ + uint32 z = get16le(s); + return z + (get16le(s) << 16); +} + +static void getn(stbi *s, stbi_uc *buffer, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + fread(buffer, 1, n, s->img_file); + return; + } +#endif + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; +} + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so stb_malloc a new one and free that one +// only failure mode is stb_malloc failing + +static uint8 compute_y(int r, int g, int b) +{ + return (uint8) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + assert(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stb_malloc(req_comp * x * y); + if (good == NULL) { + stb_free(data); + return epuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define COMBO(a,b) ((a)*8+(b)) + #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch(COMBO(img_n, req_comp)) { + CASE(1,2) dest[0]=src[0], dest[1]=255; break; + CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break; + CASE(2,1) dest[0]=src[0]; break; + CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break; + CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break; + CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break; + CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break; + CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break; + default: assert(0); + } + #undef CASE + } + + stb_free(data); + return good; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output = (float *) stb_malloc(x * y * comp * sizeof(float)); + if (output == NULL) { stb_free(data); return epf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale; + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + stb_free(data); + return output; +} + +#define float2int(x) ((int) (x)) +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output = (stbi_uc *) stb_malloc(x * y * comp); + if (output == NULL) { stb_free(data); return epuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = float2int(z); + } + } + stb_free(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) +// +// simple implementation +// - channel subsampling of at most 2 in each dimension +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - uses a lot of intermediate memory, could cache poorly +// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 +// stb_jpeg: 1.34 seconds (MSVC6, default release build) +// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) +// IJL11.dll: 1.08 seconds (compiled by intel) +// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) +// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + uint8 fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + uint16 code[256]; + uint8 values[256]; + uint8 size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} huffman; + +typedef struct +{ + #if STBI_SIMD + unsigned short dequant2[4][64]; + #endif + stbi s; + huffman huff_dc[4]; + huffman huff_ac[4]; + uint8 dequant[4][64]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + uint8 *data; + void *raw_data; + uint8 *linebuf; + } img_comp[4]; + + uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int scan_n, order[4]; + int restart_interval, todo; +} jpeg; + +static int build_huffman(huffman *h, int *count) +{ + int i,j,k=0,code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (uint8) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (uint16) (code++); + if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (uint8) i; + } + } + } + return 1; +} + +static void grow_buffer_unsafe(jpeg *j) +{ + do { + int b = j->nomore ? 0 : get8(&j->s); + if (b == 0xff) { + int c = get8(&j->s); + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer = (j->code_buffer << 8) | b; + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +__forceinline static int decode(jpeg *j, huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + if (h->size[k] > j->code_bits) + return -1; + j->code_bits -= h->size[k]; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + if (j->code_bits < 16) + temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff; + else + temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k]; + assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + return h->values[c]; +} + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +__forceinline static int extend_receive(jpeg *j, int n) +{ + unsigned int m = 1 << (n-1); + unsigned int k; + if (j->code_bits < n) grow_buffer_unsafe(j); + k = (j->code_buffer >> (j->code_bits - n)) & bmask[n]; + j->code_bits -= n; + // the following test is probably a random branch that won't + // predict well. I tried to table accelerate it but failed. + // maybe it's compiling as a conditional move? + if (k < m) + return (-1 << n) + k + 1; + else + return k; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static uint8 dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) +{ + int diff,dc,k; + int t = decode(j, hdc); + if (t < 0) return e("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) dc; + + // decode AC components, see JPEG spec + k = 1; + do { + int r,s; + int rs = decode(j, hac); + if (rs < 0) return e("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + data[dezigzag[k++]] = (short) extend_receive(j,s); + } + } while (k < 64); + return 1; +} + +// take a -128..127 value and clamp it and convert to 0..255 +__forceinline static uint8 clamp(int x) +{ + x += 128; + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (uint8) x; +} + +#define f2f(x) (int) (((x) * 4096 + 0.5)) +#define fsh(x) ((x) << 12) + +// derived from jidctint -- DCT_ISLOW +#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * f2f(0.5411961f); \ + t2 = p1 + p3*f2f(-1.847759065f); \ + t3 = p1 + p2*f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = fsh(p2+p3); \ + t1 = fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*f2f( 1.175875602f); \ + t0 = t0*f2f( 0.298631336f); \ + t1 = t1*f2f( 2.053119869f); \ + t2 = t2*f2f( 3.072711026f); \ + t3 = t3*f2f( 1.501321110f); \ + p1 = p5 + p1*f2f(-0.899976223f); \ + p2 = p5 + p2*f2f(-2.562915447f); \ + p3 = p3*f2f(-1.961570560f); \ + p4 = p4*f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +#if !STBI_SIMD +// .344 seconds on 3*anemones.jpg +static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize) +{ + int i,val[64],*v=val; + uint8 *o,*dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +#else +static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize) +{ + int i,val[64],*v=val; + uint8 *o; + unsigned short *dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +static stbi_idct_8x8 stbi_idct_installed = idct_block; + +extern void stbi_install_idct(stbi_idct_8x8 func) +{ + stbi_idct_installed = func; +} +#endif + +#define MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static uint8 get_marker(jpeg *j) +{ + uint8 x; + if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; } + x = get8u(&j->s); + if (x != 0xff) return MARKER_none; + while (x == 0xff) + x = get8u(&j->s); + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, reset the entropy decoder and +// the dc prediction +static void reset(jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->marker = MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int parse_entropy_coded_data(jpeg *z) +{ + reset(z); + if (z->scan_n == 1) { + int i,j; + #if STBI_SIMD + __declspec(align(16)) + #endif + short data[64]; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } else { // interleaved! + int i,j,k,x,y; + short data[64]; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } + return 1; +} + +static int process_marker(jpeg *z, int m) +{ + int L; + switch (m) { + case MARKER_none: // no marker found + return e("expected marker","Corrupt JPEG"); + + case 0xC2: // SOF - progressive + return e("progressive jpeg","JPEG format not supported (progressive)"); + + case 0xDD: // DRI - specify restart interval + if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG"); + z->restart_interval = get16(&z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = get16(&z->s)-2; + while (L > 0) { + int q = get8(&z->s); + int p = q >> 4; + int t = q & 15,i; + if (p != 0) return e("bad DQT type","Corrupt JPEG"); + if (t > 3) return e("bad DQT table","Corrupt JPEG"); + for (i=0; i < 64; ++i) + z->dequant[t][dezigzag[i]] = get8u(&z->s); + #if STBI_SIMD + for (i=0; i < 64; ++i) + z->dequant2[t][i] = z->dequant[t][i]; + #endif + L -= 65; + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = get16(&z->s)-2; + while (L > 0) { + uint8 *v; + int sizes[16],i,m=0; + int q = get8(&z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = get8(&z->s); + m += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < m; ++i) + v[i] = get8u(&z->s); + L -= m; + } + return L==0; + } + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + skip(&z->s, get16(&z->s)-2); + return 1; + } + return 0; +} + +// after we see SOS +static int process_scan_header(jpeg *z) +{ + int i; + int Ls = get16(&z->s); + z->scan_n = get8(&z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = get8(&z->s), which; + int q = get8(&z->s); + for (which = 0; which < z->s.img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s.img_n) return 0; + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + get8(&z->s); // should be 63, but might be 0 + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + + return 1; +} + +static int process_frame_header(jpeg *z, int scan) +{ + stbi *s = &z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG + p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires + c = get8(s); + if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG"); + + for (i=0; i < s->img_n; ++i) { + z->img_comp[i].id = get8(s); + if (z->img_comp[i].id != i+1) // JFIF requires + if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! + return e("bad component ID","Corrupt JPEG"); + q = get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); + z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); + } + + if (scan != SCAN_load) return 1; + + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].raw_data = stb_malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); + if (z->img_comp[i].raw_data == NULL) { + for(--i; i >= 0; --i) { + stb_free(z->img_comp[i].raw_data); + z->img_comp[i].data = NULL; + } + return e("outofmem", "Out of memory"); + } + // align blocks for installable-idct using mmx/sse + z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + z->img_comp[i].linebuf = NULL; + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define DNL(x) ((x) == 0xdc) +#define SOI(x) ((x) == 0xd8) +#define EOI(x) ((x) == 0xd9) +#define SOF(x) ((x) == 0xc0 || (x) == 0xc1) +#define SOS(x) ((x) == 0xda) + +static int decode_jpeg_header(jpeg *z, int scan) +{ + int m; + z->marker = MARKER_none; // initialize cached marker to empty + m = get_marker(z); + if (!SOI(m)) return e("no SOI","Corrupt JPEG"); + if (scan == SCAN_type) return 1; + m = get_marker(z); + while (!SOF(m)) { + if (!process_marker(z,m)) return 0; + m = get_marker(z); + while (m == MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG"); + m = get_marker(z); + } + } + if (!process_frame_header(z, scan)) return 0; + return 1; +} + +static int decode_jpeg_image(jpeg *j) +{ + int m; + j->restart_interval = 0; + if (!decode_jpeg_header(j, SCAN_load)) return 0; + m = get_marker(j); + while (!EOI(m)) { + if (SOS(m)) { + if (!process_scan_header(j)) return 0; + if (!parse_entropy_coded_data(j)) return 0; + } else { + if (!process_marker(j, m)) return 0; + } + m = get_marker(j); + } + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1, + int w, int hs); + +#define div4(x) ((uint8) ((x) >> 2)) + +static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + return in_near; +} + +static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + for (i=0; i < w; ++i) + out[i] = div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + uint8 *input = in_near; + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = div4(n+input[i-1]); + out[i*2+1] = div4(n+input[i+1]); + } + out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + return out; +} + +#define div16(x) ((uint8) ((x) >> 4)) + +static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = div16(3*t0 + t1 + 8); + out[i*2 ] = div16(3*t1 + t0 + 8); + } + out[w*2-1] = div4(t1+2); + return out; +} + +static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +#define float2fixed(x) ((int) ((x) * 65536 + 0.5)) + +// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) +// VC6 without processor=Pro is generating multiple LEAs per multiply! +static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 16) + 32768; // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr*float2fixed(1.40200f); + g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); + b = y_fixed + cb*float2fixed(1.77200f); + r >>= 16; + g >>= 16; + b >>= 16; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (uint8)r; + out[1] = (uint8)g; + out[2] = (uint8)b; + out[3] = 255; + out += step; + } +} + +#if STBI_SIMD +static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row; + +void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func) +{ + stbi_YCbCr_installed = func; +} +#endif + + +// clean up the temporary component buffers +static void cleanup_jpeg(jpeg *j) +{ + int i; + for (i=0; i < j->s.img_n; ++i) { + if (j->img_comp[i].data) { + stb_free(j->img_comp[i].raw_data); + j->img_comp[i].data = NULL; + } + if (j->img_comp[i].linebuf) { + stb_free(j->img_comp[i].linebuf); + j->img_comp[i].linebuf = NULL; + } + } +} + +typedef struct +{ + resample_row_func resample; + uint8 *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi_resample; + +static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n; + // validate req_comp + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + z->s.img_n = 0; + + // load a jpeg image from whichever source + if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s.img_n; + + if (z->s.img_n == 3 && n < 3) + decode_n = 1; + else + decode_n = z->s.img_n; + + // resample and color-convert + { + int k; + uint i,j; + uint8 *output; + uint8 *coutput[4]; + + stbi_resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (uint8 *) stb_malloc(z->s.img_x + 3); + if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s.img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2; + else r->resample = resample_row_generic; + } + + // can't error after this so, this is safe + output = (uint8 *) stb_malloc(n * z->s.img_x * z->s.img_y + 1); + if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s.img_y; ++j) { + uint8 *out = output + n * z->s.img_x * j; + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + uint8 *y = coutput[0]; + if (z->s.img_n == 3) { + #if STBI_SIMD + stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n); + #else + YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n); + #endif + } else + for (i=0; i < z->s.img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + uint8 *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s.img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + cleanup_jpeg(z); + *out_x = z->s.img_x; + *out_y = z->s.img_y; + if (comp) *comp = z->s.img_n; // report original components, not output + return output; + } +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_file(&j.s, f); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_jpeg_test_file(FILE *f) +{ + int n,r; + jpeg j; + n = ftell(f); + start_file(&j.s, f); + r = decode_jpeg_header(&j, SCAN_type); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_jpeg_test_memory(stbi_uc const *buffer, int len) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return decode_jpeg_header(&j, SCAN_type); +} + +// @TODO: +#ifndef STBI_NO_STDIO +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can stb_malloc/stb_realloc) +// performance +// - fast huffman + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define ZFAST_BITS 9 // accelerate all cases in default tables +#define ZFAST_MASK ((1 << ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + uint16 fast[1 << ZFAST_BITS]; + uint16 firstcode[16]; + int maxcode[17]; + uint16 firstsymbol[16]; + uint8 size[288]; + uint16 value[288]; +} zhuffman; + +__forceinline static int bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +__forceinline static int bit_reverse(int v, int bits) +{ + assert(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return bitreverse16(v) >> (16-bits); +} + +static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 255, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + assert(sizes[i] <= (1 << i)); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (uint16) code; + z->firstsymbol[i] = (uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + z->size[c] = (uint8)s; + z->value[c] = (uint16)i; + if (s <= ZFAST_BITS) { + int k = bit_reverse(next_code[s],s); + while (k < (1 << ZFAST_BITS)) { + z->fast[k] = (uint16) c; + k += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + uint8 *zbuffer, *zbuffer_end; + int num_bits; + uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + zhuffman z_length, z_distance; +} zbuf; + +__forceinline static int zget8(zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void fill_bits(zbuf *z) +{ + do { + assert(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +__forceinline static unsigned int zreceive(zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z) +{ + int b,s,k; + if (a->num_bits < 16) fill_bits(a); + b = z->fast[a->code_buffer & ZFAST_MASK]; + if (b < 0xffff) { + s = z->size[b]; + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; + } + + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = bit_reverse(a->code_buffer, 16); + for (s=ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + assert(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +static int expand(zbuf *z, int n) // need to make room for n bytes +{ + char *q; + int cur, limit; + if (!z->z_expandable) return e("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) stb_realloc(z->zout_start, limit); + if (q == NULL) return e("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static int length_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static int length_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static int dist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int parse_huffman_block(zbuf *a) +{ + for(;;) { + int z = zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes + if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0; + *a->zout++ = (char) z; + } else { + uint8 *p; + int len,dist; + if (z == 256) return 1; + z -= 257; + len = length_base[z]; + if (length_extra[z]) len += zreceive(a, length_extra[z]); + z = zhuffman_decode(a, &a->z_distance); + if (z < 0) return e("bad huffman code","Corrupt PNG"); + dist = dist_base[z]; + if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); + if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG"); + if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0; + p = (uint8 *) (a->zout - dist); + while (len--) + *a->zout++ = *p++; + } + } +} + +static int compute_huffman_codes(zbuf *a) +{ + static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + zhuffman z_codelength; + uint8 lencodes[286+32+137];//padding for maximum single op + uint8 codelength_sizes[19]; + int i,n; + + int hlit = zreceive(a,5) + 257; + int hdist = zreceive(a,5) + 1; + int hclen = zreceive(a,4) + 4; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (uint8) s; + } + if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < hlit + hdist) { + int c = zhuffman_decode(a, &z_codelength); + assert(c >= 0 && c < 19); + if (c < 16) + lencodes[n++] = (uint8) c; + else if (c == 16) { + c = zreceive(a,2)+3; + memset(lencodes+n, lencodes[n-1], c); + n += c; + } else if (c == 17) { + c = zreceive(a,3)+3; + memset(lencodes+n, 0, c); + n += c; + } else { + assert(c == 18); + c = zreceive(a,7)+11; + memset(lencodes+n, 0, c); + n += c; + } + } + if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG"); + if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int parse_uncompressed_block(zbuf *a) +{ + uint8 header[4]; + int len,nlen,k; + if (a->num_bits & 7) + zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns? + a->code_buffer >>= 8; + a->num_bits -= 8; + } + assert(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = (uint8) zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!expand(a, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int parse_zlib_header(zbuf *a) +{ + int cmf = zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = zget8(a); + if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +// @TODO: should statically initialize these for optimal thread safety +static uint8 default_length[288], default_distance[32]; +static void init_defaults(void) +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) default_length[i] = 8; + for ( ; i <= 255; ++i) default_length[i] = 9; + for ( ; i <= 279; ++i) default_length[i] = 7; + for ( ; i <= 287; ++i) default_length[i] = 8; + + for (i=0; i <= 31; ++i) default_distance[i] = 5; +} + +int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead +static int parse_zlib(zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = zreceive(a,1); + type = zreceive(a,2); + if (type == 0) { + if (!parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!default_distance[31]) init_defaults(); + if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0; + if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0; + } else { + if (!compute_huffman_codes(a)) return 0; + } + if (!parse_huffman_block(a)) return 0; + } + if (stbi_png_partial && a->zout - a->zout_start > 65536) + break; + } while (!final); + return 1; +} + +static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return parse_zlib(a, parse_header); +} + +char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + zbuf a; + char *p = (char *) stb_malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + stb_free(a.zout_start); + return NULL; + } +} + +char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + zbuf a; + char *p = (char *) stb_malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer+len; + if (do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + stb_free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + + +typedef struct +{ + uint32 length; + uint32 type; +} chunk; + +#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +static chunk get_chunk_header(stbi *s) +{ + chunk c; + c.length = get32(s); + c.type = get32(s); + return c; +} + +static int check_png_header(stbi *s) +{ + static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi s; + uint8 *idata, *expanded, *out; +} png; + + +enum { + F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, + F_avg_first, F_paeth_first, +}; + +static uint8 first_row_filter[5] = +{ + F_none, F_sub, F_none, F_avg_first, F_paeth_first +}; + +static int paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +// create the png data from post-deflated data +static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y) +{ + stbi *s = &a->s; + uint32 i,j,stride = x*out_n; + int k; + int img_n = s->img_n; // copy it into a local for later + assert(out_n == s->img_n || out_n == s->img_n+1); + if (stbi_png_partial) y = 1; + a->out = (uint8 *) stb_malloc(x * y * out_n); + if (!a->out) return e("outofmem", "Out of memory"); + if (!stbi_png_partial) { + if (s->img_x == x && s->img_y == y) + if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + else // interlaced: + if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } + for (j=0; j < y; ++j) { + uint8 *cur = a->out + stride*j; + uint8 *prior = cur - stride; + int filter = *raw++; + if (filter > 4) return e("invalid filter","Corrupt PNG"); + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + // handle first pixel explicitly + for (k=0; k < img_n; ++k) { + switch(filter) { + case F_none : cur[k] = raw[k]; break; + case F_sub : cur[k] = raw[k]; break; + case F_up : cur[k] = raw[k] + prior[k]; break; + case F_avg : cur[k] = raw[k] + (prior[k]>>1); break; + case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break; + case F_avg_first : cur[k] = raw[k]; break; + case F_paeth_first: cur[k] = raw[k]; break; + } + } + if (img_n != out_n) cur[img_n] = 255; + raw += img_n; + cur += out_n; + prior += out_n; + // this is a little gross, so that we don't switch per-pixel or per-component + if (img_n == out_n) { + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ + for (k=0; k < img_n; ++k) + switch(filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break; + } + #undef CASE + } else { + assert(img_n+1 == out_n); + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ + for (k=0; k < img_n; ++k) + switch(filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break; + } + #undef CASE + } + } + return 1; +} + +static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced) +{ + uint8 *final; + int p; + int save; + if (!interlaced) + return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y); + save = stbi_png_partial; + stbi_png_partial = 0; + + // de-interlacing + final = (uint8 *) stb_malloc(a->s.img_x * a->s.img_y * out_n); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { + stb_free(final); + return 0; + } + for (j=0; j < y; ++j) + for (i=0; i < x; ++i) + memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, + a->out + (j*x+i)*out_n, out_n); + stb_free(a->out); + raw += (x*out_n+1)*y; + raw_len -= (x*out_n+1)*y; + } + } + a->out = final; + + stbi_png_partial = save; + return 1; +} + +static int compute_transparency(png *z, uint8 tc[3], int out_n) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + assert(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n) +{ + uint32 i, pixel_count = a->s.img_x * a->s.img_y; + uint8 *p, *temp_out, *orig = a->out; + + p = (uint8 *) stb_malloc(pixel_count * pal_img_n); + if (p == NULL) return e("outofmem", "Out of memory"); + + // between here and stb_free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + stb_free(a->out); + a->out = temp_out; + return 1; +} + +static int parse_png_file(png *z, int scan, int req_comp) +{ + uint8 palette[1024], pal_img_n=0; + uint8 has_trans=0, tc[3]; + uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0; + stbi *s = &z->s; + + if (!check_png_header(s)) return 0; + + if (scan == SCAN_type) return 1; + + for(;;first=0) { + chunk c = get_chunk_header(s); + if (first && c.type != PNG_TYPE('I','H','D','R')) + return e("first not IHDR","Corrupt PNG"); + switch (c.type) { + case PNG_TYPE('I','H','D','R'): { + int depth,color,comp,filter; + if (!first) return e("multiple IHDR","Corrupt PNG"); + if (c.length != 13) return e("bad IHDR len","Corrupt PNG"); + s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); + s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); + depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); + color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG"); + comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG"); + filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG"); + interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + if (scan == SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case PNG_TYPE('P','L','T','E'): { + if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = get8u(s); + palette[i*4+1] = get8u(s); + palette[i*4+2] = get8u(s); + palette[i*4+3] = 255; + } + break; + } + + case PNG_TYPE('t','R','N','S'): { + if (z->idata) return e("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = get8u(s); + } else { + if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); + if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG"); + has_trans = 1; + for (k=0; k < s->img_n; ++k) + tc[k] = (uint8) get16(s); // non 8-bit images will be larger + } + break; + } + + case PNG_TYPE('I','D','A','T'): { + if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG"); + if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; } + if (ioff + c.length > idata_limit) { + uint8 *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + p = (uint8 *) stb_realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); + z->idata = p; + } + #ifndef STBI_NO_STDIO + if (s->img_file) + { + if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG"); + } + else + #endif + { + memcpy(z->idata+ioff, s->img_buffer, c.length); + s->img_buffer += c.length; + } + ioff += c.length; + break; + } + + case PNG_TYPE('I','E','N','D'): { + uint32 raw_len; + if (scan != SCAN_load) return 1; + if (z->idata == NULL) return e("no IDAT","Corrupt PNG"); + z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len); + if (z->expanded == NULL) return 0; // zlib should set error + stb_free(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0; + if (has_trans) + if (!compute_transparency(z, tc, s->img_out_n)) return 0; + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!expand_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } + stb_free(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX chunk not known"; + invalid_chunk[0] = (uint8) (c.type >> 24); + invalid_chunk[1] = (uint8) (c.type >> 16); + invalid_chunk[2] = (uint8) (c.type >> 8); + invalid_chunk[3] = (uint8) (c.type >> 0); + #endif + return e(invalid_chunk, "PNG not supported: unknown chunk type"); + } + skip(s, c.length); + break; + } + // end of chunk, read and skip CRC + get32(s); + } +} + +static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp) +{ + unsigned char *result=NULL; + p->expanded = NULL; + p->idata = NULL; + p->out = NULL; + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + if (parse_png_file(p, SCAN_load, req_comp)) { + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s.img_out_n) { + result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y); + p->s.img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s.img_x; + *y = p->s.img_y; + if (n) *n = p->s.img_n; + } + stb_free(p->out); p->out = NULL; + stb_free(p->expanded); p->expanded = NULL; + stb_free(p->idata); p->idata = NULL; + + return result; +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_file(&p.s, f); + return do_png(&p, x,y,comp,req_comp); +} + +unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_png_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_mem(&p.s, buffer,len); + return do_png(&p, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_png_test_file(FILE *f) +{ + png p; + int n,r; + n = ftell(f); + start_file(&p.s, f); + r = parse_png_file(&p, SCAN_type,STBI_default); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_png_test_memory(stbi_uc const *buffer, int len) +{ + png p; + start_mem(&p.s, buffer, len); + return parse_png_file(&p, SCAN_type,STBI_default); +} + +// TODO: load header from png +#ifndef STBI_NO_STDIO +int stbi_png_info (char const *filename, int *x, int *y, int *comp) +{ + png p; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + start_file(&p.s, f); + if (parse_png_file(&p, SCAN_header, 0)) { + if(x) *x = p.s.img_x; + if(y) *y = p.s.img_y; + if (comp) *comp = p.s.img_n; + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// Microsoft/Windows BMP image + +static int bmp_test(stbi *s) +{ + int sz; + if (get8(s) != 'B') return 0; + if (get8(s) != 'M') return 0; + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + get32le(s); // discard data offset + sz = get32le(s); + if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1; + return 0; +} + +#ifndef STBI_NO_STDIO +int stbi_bmp_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = bmp_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_bmp_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_test(&s); +} + +// returns 0..31 for the highest set bit +static int high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +static int shiftsigned(int v, int shift, int bits) +{ + int result; + int z=0; + + if (shift < 0) v <<= -shift; + else v >>= shift; + result = v; + + z = bits; + while (z < 8) { + result += v >> z; + z += bits; + } + return result; +} + +static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + uint8 *out; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; + stbi_uc pal[256][4]; + int psize=0,i,j,compress=0,width; + int bpp, flip_vertically, pad, target, offset, hsz; + if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP"); + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + offset = get32le(s); + hsz = get32le(s); + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown"); + failure_reason = "bad BMP"; + if (hsz == 12) { + s->img_x = get16le(s); + s->img_y = get16le(s); + } else { + s->img_x = get32le(s); + s->img_y = get32le(s); + } + if (get16le(s) != 1) return 0; + bpp = get16le(s); + if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit"); + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + if (hsz == 12) { + if (bpp < 24) + psize = (offset - 14 - 24) / 3; + } else { + compress = get32le(s); + if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE"); + get32le(s); // discard sizeof + get32le(s); // discard hres + get32le(s); // discard vres + get32le(s); // discard colorsused + get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + get32le(s); + get32le(s); + get32le(s); + get32le(s); + } + if (bpp == 16 || bpp == 32) { + mr = mg = mb = 0; + if (compress == 0) { + if (bpp == 32) { + mr = 0xff << 16; + mg = 0xff << 8; + mb = 0xff << 0; + ma = (unsigned int)(0xff << 24); + fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 + } else { + mr = 31 << 10; + mg = 31 << 5; + mb = 31 << 0; + } + } else if (compress == 3) { + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (mr == mg && mg == mb) { + // ?!?!? + return NULL; + } + } else + return NULL; + } + } else { + assert(hsz == 108); + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + ma = get32le(s); + get32le(s); // discard color space + for (i=0; i < 12; ++i) + get32le(s); // discard color space parameters + } + if (bpp < 16) + psize = (offset - 14 - hsz) >> 2; + } + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + out = (stbi_uc *) stb_malloc(target * s->img_x * s->img_y); + if (!out) return epuc("outofmem", "Out of memory"); + if (bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { stb_free(out); return epuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = get8(s); + pal[i][1] = get8(s); + pal[i][0] = get8(s); + if (hsz != 12) get8(s); + pal[i][3] = 255; + } + skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); + if (bpp == 4) width = (s->img_x + 1) >> 1; + else if (bpp == 8) width = s->img_x; + else { stb_free(out); return epuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=get8(s),v2=0; + if (bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (bpp == 8) ? get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + skip(s, pad); + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + skip(s, offset - 14 - hsz); + if (bpp == 24) width = 3 * s->img_x; + else if (bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (bpp == 24) { + easy = 1; + } else if (bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP"); + // right shift amt to put high bit in position #7 + rshift = high_bit(mr)-7; rcount = bitcount(mr); + gshift = high_bit(mg)-7; gcount = bitcount(mr); + bshift = high_bit(mb)-7; bcount = bitcount(mr); + ashift = high_bit(ma)-7; acount = bitcount(mr); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + int a; + out[z+2] = get8(s); + out[z+1] = get8(s); + out[z+0] = get8(s); + z += 3; + a = (easy == 2 ? get8(s) : 255); + if (target == 4) out[z++] = a; + } + } else { + for (i=0; i < (int) s->img_x; ++i) { + uint32 v = (bpp == 16 ? get16le(s) : get32le(s)); + int a; + out[z++] = shiftsigned(v & mr, rshift, rcount); + out[z++] = shiftsigned(v & mg, gshift, gcount); + out[z++] = shiftsigned(v & mb, bshift, bcount); + a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); + if (target == 4) out[z++] = a; + } + } + skip(s, pad); + } + } + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = target; + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_bmp_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return bmp_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_load(&s, x,y,comp,req_comp); +} + +// Targa Truevision - TGA +// by Jonathan Dummer + +static int tga_test(stbi *s) +{ + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE + get16(s); // discard palette start + get16(s); // discard palette length + get8(s); // discard bits per palette color entry + get16(s); // discard x origin + get16(s); // discard y origin + if( get16(s) < 1 ) return 0; // test width + if( get16(s) < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = tga_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_tga_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_test(&s); +} + +static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + // read in the TGA header stuff + int tga_offset = get8u(s); + int tga_indexed = get8u(s); + int tga_image_type = get8u(s); + int tga_is_RLE = 0; + int tga_palette_start = get16le(s); + int tga_palette_len = get16le(s); + int tga_palette_bits = get8u(s); + int tga_x_origin = get16le(s); + int tga_y_origin = get16le(s); + int tga_width = get16le(s); + int tga_height = get16le(s); + int tga_bits_per_pixel = get8u(s); + int tga_inverted = get8u(s); + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4]; + unsigned char trans_data[4]; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + // do a tiny bit of precessing + if( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + /* int tga_alpha_bits = tga_inverted & 15; */ + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // error check + if( //(tga_indexed) || + (tga_width < 1) || (tga_height < 1) || + (tga_image_type < 1) || (tga_image_type > 3) || + ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) && + (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)) + ) + { + return NULL; + } + + // If I'm paletted, then I'll use the number of bits from the palette + if( tga_indexed ) + { + tga_bits_per_pixel = tga_palette_bits; + } + + // tga info + *x = tga_width; + *y = tga_height; + if( (req_comp < 1) || (req_comp > 4) ) + { + // just use whatever the file was + req_comp = tga_bits_per_pixel / 8; + *comp = req_comp; + } else + { + // force a new number of components + *comp = tga_bits_per_pixel/8; + } + tga_data = (unsigned char*)stb_malloc( tga_width * tga_height * req_comp ); + + // skip to the data's starting position (offset usually = 0) + skip(s, tga_offset ); + // do I need to load a palette? + if( tga_indexed ) + { + // any data to skip? (offset usually = 0) + skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stb_malloc( tga_palette_len * tga_palette_bits / 8 ); + getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 ); + } + // load the data + for( i = 0; i < tga_width * tga_height; ++i ) + { + // if I'm in RLE mode, do I need to get a RLE chunk? + if( tga_is_RLE ) + { + if( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = get8u(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if( read_next_pixel ) + { + // load however much data we did have + if( tga_indexed ) + { + // read in 1 byte, then perform the lookup + int pal_idx = get8u(s); + if( pal_idx >= tga_palette_len ) + { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_bits_per_pixel / 8; + for( j = 0; j*8 < tga_bits_per_pixel; ++j ) + { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else + { + // read in the data raw + for( j = 0; j*8 < tga_bits_per_pixel; ++j ) + { + raw_data[j] = get8u(s); + } + } + // convert raw to the intermediate format + switch( tga_bits_per_pixel ) + { + case 8: + // Luminous => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 16: + // Luminous,Alpha => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[1]; + break; + case 24: + // BGR => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 32: + // BGRA => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[3]; + break; + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + // convert to final format + switch( req_comp ) + { + case 1: + // RGBA => Luminance + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + break; + case 2: + // RGBA => Luminance,Alpha + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + tga_data[i*req_comp+1] = trans_data[3]; + break; + case 3: + // RGBA => RGB + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + break; + case 4: + // RGBA => RGBA + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + tga_data[i*req_comp+3] = trans_data[3]; + break; + } + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if( tga_inverted ) + { + for( j = 0; j*2 < tga_height; ++j ) + { + int index1 = j * tga_width * req_comp; + int index2 = (tga_height - 1 - j) * tga_width * req_comp; + for( i = tga_width * req_comp; i > 0; --i ) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if( tga_palette != NULL ) + { + stb_free( tga_palette ); + } + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_tga_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return tga_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB + +static int psd_test(stbi *s) +{ + if (get32(s) != 0x38425053) return 0; // "8BPS" + else return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_psd_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = psd_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_psd_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_test(&s); +} + +static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + int pixelCount; + int channelCount, compression; + int channel, i, count, len; + int w,h; + uint8 *out; + + // Check identifier + if (get32(s) != 0x38425053) // "8BPS" + return epuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (get16(s) != 1) + return epuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = get16(s); + if (channelCount < 0 || channelCount > 16) + return epuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = get32(s); + w = get32(s); + + // Make sure the depth is 8 bits. + if (get16(s) != 8) + return epuc("unsupported bit depth", "PSD bit depth is not 8 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (get16(s) != 3) + return epuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + skip(s,get32(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + skip(s, get32(s) ); + + // Skip the reserved data. + skip(s, get32(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = get16(s); + if (compression > 1) + return epuc("bad compression", "PSD has an unknown compression format"); + + // Create the destination image. + out = (stbi_uc *) stb_malloc(4 * w*h); + if (!out) return epuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4; + } else { + // Read the RLE data. + count = 0; + while (count < pixelCount) { + len = get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + count += len; + while (len) { + *p = get8(s); + p += 4; + len--; + } + } else if (len > 128) { + uint32 val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0x0FF; + len += 2; + val = get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out + channel; + if (channel > channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4; + } else { + // Read the data. + count = 0; + for (i = 0; i < pixelCount; i++) + *p = get8(s), p += 4; + } + } + } + + if (req_comp && req_comp != 4) { + out = convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // convert_format frees input on failure + } + + if (comp) *comp = channelCount; + *y = h; + *x = w; + + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_psd_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return psd_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int hdr_test(stbi *s) +{ + char *signature = "#?RADIANCE\n"; + int i; + for (i=0; signature[i]; ++i) + if (get8(s) != signature[i]) + return 0; + return 1; +} + +int stbi_hdr_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return hdr_test(&s); +} + +#ifndef STBI_NO_STDIO +int stbi_hdr_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = hdr_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +#define HDR_BUFLEN 1024 +static char *hdr_gettoken(stbi *z, char *buffer) +{ + int len=0; + char *s = buffer, c = '\0'; + s; + + c = get8(z); + + while (!at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == HDR_BUFLEN-1) { + // flush to end of line + while (!at_eof(z) && get8(z) != '\n') + ; + break; + } + c = get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + + +static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + char buffer[HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + + + // Check identifier + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + return epf("not HDR", "Corrupt HDR image"); + + // Parse header + while(1) { + token = hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return epf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = strtol(token, NULL, 10); + + *x = width; + *y = height; + + *comp = 3; + if (req_comp == 0) req_comp = 3; + + // Read data + hdr_data = (float *) stb_malloc(height * width * req_comp * sizeof(float)); + + // Load image data + // image data is stored as some number of sca + if( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + getn(s, rgbe, 4); + hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = get8(s); + c2 = get8(s); + len = get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4] = { c1,c2,len, get8(s) }; + hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + stb_free(scanline); + goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format + } + len <<= 8; + len |= get8(s); + if (len != width) { stb_free(hdr_data); stb_free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) scanline = (stbi_uc *) stb_malloc(width * 4); + + for (k = 0; k < 4; ++k) { + i = 0; + while (i < width) { + count = get8(s); + if (count > 128) { + // Run + value = get8(s); + count -= 128; + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = get8(s); + } + } + } + for (i=0; i < width; ++i) + hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + stb_free(scanline); + } + + return hdr_data; +} + +#ifndef STBI_NO_STDIO +float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return hdr_load(&s,x,y,comp,req_comp); +} +#endif + +float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer, len); + return hdr_load(&s,x,y,comp,req_comp); +} + +#endif // STBI_NO_HDR + +/////////////////////// write image /////////////////////// + +#ifndef STBI_NO_WRITE + +static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); } + +static void writefv(FILE *f, char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { uint8 x = va_arg(v, int); write8(f,x); break; } + case '2': { int16 x = va_arg(v, int); write8(f,x); write8(f,x>>8); break; } + case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; } + default: + assert(0); + va_end(v); + return; + } + } +} + +static void writef(FILE *f, char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + writefv(f,fmt,v); + va_end(v); +} + +static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int write_alpha, int scanline_pad) +{ + uint8 bg[3] = { 255, 0, 255}, px[3]; + uint32 zero = 0; + int i,j,k, j_end; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + uint8 *d = (uint8 *) data + (j*x+i)*comp; + if (write_alpha < 0) + fwrite(&d[comp-1], 1, 1, f); + switch (comp) { + case 1: + case 2: writef(f, "111", d[0],d[0],d[0]); + break; + case 4: + if (!write_alpha) { + for (k=0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255; + writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]); + break; + } + if (write_alpha > 0) + fwrite(&d[comp-1], 1, 1, f); + } + fwrite(&zero,scanline_pad,1,f); + } +} + +static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = fopen(filename, "wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +static int outfile_w(wchar_t const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = _wfopen(filename, L"wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return outfile(filename,-1,-1,x,y,comp,data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_bmp_w(wchar_t const *filename, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return outfile_w(filename,-1,-1,x,y,comp,data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + int has_alpha = !(comp & 1); + return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0, + "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +int stbi_write_tga_w(wchar_t const *filename, int x, int y, int comp, const void *data) +{ + int has_alpha = !(comp & 1); + return outfile_w(filename, -1,-1, x, y, comp, data, has_alpha, 0, + "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +// any other image formats that do interleaved rgb data? +// PNG: requires adler32,crc32 -- significant amount of code +// PSD: no, channels output separately +// TIFF: no, stripwise-interleaved... i think + +#endif // STBI_NO_WRITE + +#endif // STBI_HEADER_FILE_ONLY + diff --git a/example2/example2.2008.vcproj b/example2/example2.2008.vcproj new file mode 100644 index 00000000..8df1cac6 --- /dev/null +++ b/example2/example2.2008.vcproj @@ -0,0 +1,720 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/example2/example2.cpp b/example2/example2.cpp new file mode 100644 index 00000000..e4b4a61f --- /dev/null +++ b/example2/example2.cpp @@ -0,0 +1,277 @@ +// File: example2.cpp - This example uses the crn_decomp.h stand-alone header file library +// to transcode .CRN files directly to .DDS, with no intermediate recompression step to DXTn. +// This tool does NOT depend on the crnlib library at all. It only needs the low-level +// decompression/transcoding functionality defined in inc/crn_decomp.h. +// This is the basic functionality a game engine would need to employ at runtime to utilize +// .CRN textures. +// See Copyright Notice and license at the end of inc/crnlib.h +#include +#include +#include +#include + +// CRN transcoder library. +#include "crn_decomp.h" +// .DDS file format definitions. +#include "dds_defs.h" + +// A simple high-precision, platform independent timer class. +#include "timer.h" + +using namespace crnlib; + +static int print_usage() +{ + printf("Description: Transcodes .CRN to .DDS files using crn_decomp.h.\n"); + printf("Copyright (c) 2010-2011 Tenacious Software LLC\n"); + printf("Usage: example2 [source_file] [options]\n"); + printf("\nOptions:\n"); + printf("-out filename - Force output filename.\n"); + return EXIT_FAILURE; +} + +static int error(const char* pMsg, ...) +{ + va_list args; + va_start(args, pMsg); + char buf[512]; + vsprintf_s(buf, sizeof(buf), pMsg, args); + va_end(args); + printf("%s", buf); + return EXIT_FAILURE; +} + +// Loads an entire file into an allocated memory block. +static crn_uint8 *read_file_into_buffer(const char *pFilename, crn_uint32 &size) +{ + size = 0; + + FILE* pFile = NULL; + fopen_s(&pFile, pFilename, "rb"); + if (!pFile) + return NULL; + + fseek(pFile, 0, SEEK_END); + size = ftell(pFile); + fseek(pFile, 0, SEEK_SET); + + crn_uint8 *pSrc_file_data = static_cast(malloc(std::max(1U, size))); + if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) + { + fclose(pFile); + free(pSrc_file_data); + size = 0; + return NULL; + } + + fclose(pFile); + return pSrc_file_data; +} + +int main(int argc, char *argv[]) +{ + printf("example2 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); + + if (argc < 2) + return print_usage(); + + // Parse command line options + const char *pSrc_filename = argv[1]; + char out_filename[FILENAME_MAX] = { '\0' }; + + for (int i = 2; i < argc; i++) + { + if (argv[i][0] == '/') + argv[i][0] = '-'; + + if (!_stricmp(argv[i], "-out")) + { + if (++i >= argc) + return error("Expected output filename!"); + + strcpy_s(out_filename, sizeof(out_filename), argv[i]); + } + else + return error("Invalid option: %s\n", argv[i]); + } + + // Split the source filename into its various components. + char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; + if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) + return error("Invalid source filename!\n"); + + // Load the source file into memory. + printf("Loading source file: %s\n", pSrc_filename); + crn_uint32 src_file_size; + crn_uint8 *pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); + if (!pSrc_file_data) + return error("Unable to read source file\n"); + + // Decompress/transcode CRN to DDS. + // DDS files are organized in face-major order, like this: + // Face0: Mip0, Mip1, Mip2, etc. + // Face1: Mip0, Mip1, Mip2, etc. + // etc. + // While CRN files are organized in mip-major order, like this: + // Mip0: Face0, Face1, Face2, Face3, Face4, Face5 + // Mip1: Face0, Face1, Face2, Face3, Face4, Face5 + // etc. + printf("Transcoding CRN to DDS\n"); + + crnd::crn_texture_info tex_info; + if (!crnd::crnd_get_texture_info(pSrc_file_data, src_file_size, &tex_info)) + { + free(pSrc_file_data); + return error("crnd_get_texture_info() failed!\n"); + } + + timer tm; + + tm.start(); + crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pSrc_file_data, src_file_size); + double total_unpack_begin_time = tm.get_elapsed_ms(); + + if (!pContext) + { + free(pSrc_file_data); + return error("crnd_unpack_begin() failed!\n"); + } + + // Now create the DDS file. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); + if (out_filename[0]) strcpy(dst_filename, out_filename); + + printf("Writing DDS file: %s\n", dst_filename); + + FILE *pDDS_file = fopen(dst_filename, "wb"); + if (!pDDS_file) + { + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + return error("Failed creating destination file!\n"); + } + + // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). + fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); + + // Prepare the DDS header. + crnlib::DDSURFACEDESC2 dds_desc; + memset(&dds_desc, 0, sizeof(dds_desc)); + dds_desc.dwSize = sizeof(dds_desc); + dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | ((tex_info.m_levels > 1) ? DDSD_MIPMAPCOUNT : 0); + dds_desc.dwWidth = tex_info.m_width; + dds_desc.dwHeight = tex_info.m_height; + dds_desc.dwMipMapCount = (tex_info.m_levels > 1) ? tex_info.m_levels : 0; + + dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); + dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; + crn_format fundamental_fmt = crnd::crnd_get_fundamental_dxt_format(tex_info.m_format); + dds_desc.ddpfPixelFormat.dwFourCC = crnd::crnd_crn_format_to_fourcc(fundamental_fmt); + if (fundamental_fmt != tex_info.m_format) + { + dds_desc.ddpfPixelFormat.dwRGBBitCount = crnd::crnd_crn_format_to_fourcc(tex_info.m_format); + } + + dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + if (tex_info.m_levels > 1) + { + dds_desc.ddsCaps.dwCaps |= (DDSCAPS_COMPLEX | DDSCAPS_MIPMAP); + } + + if (tex_info.m_faces == 6) + { + dds_desc.ddsCaps.dwCaps2 = DDSCAPS2_CUBEMAP | + DDSCAPS2_CUBEMAP_POSITIVEX | DDSCAPS2_CUBEMAP_NEGATIVEX | DDSCAPS2_CUBEMAP_POSITIVEY | + DDSCAPS2_CUBEMAP_NEGATIVEY | DDSCAPS2_CUBEMAP_POSITIVEZ | DDSCAPS2_CUBEMAP_NEGATIVEZ; + } + + // Write the DDS header to the output file. + fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); + + // Now transcode all face and mipmap levels into memory, one mip level at a time. + void *pImages[cCRNMaxFaces][cCRNMaxLevels]; + crn_uint32 image_size_in_bytes[cCRNMaxLevels]; + memset(pImages, 0, sizeof(pImages)); + memset(image_size_in_bytes, 0, sizeof(image_size_in_bytes)); + + crn_uint32 total_unpacked_texels = 0; + + double total_unpack_time = 0.0f; + for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) + { + // Compute the face's width, height, number of DXT blocks per row/col, etc. + const crn_uint32 width = std::max(1U, tex_info.m_width >> level_index); + const crn_uint32 height = std::max(1U, tex_info.m_height >> level_index); + const crn_uint32 blocks_x = std::max(1U, (width + 3) >> 2); + const crn_uint32 blocks_y = std::max(1U, (height + 3) >> 2); + const crn_uint32 row_pitch = blocks_x * crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); + const crn_uint32 total_face_size = row_pitch * blocks_y; + + image_size_in_bytes[level_index] = total_face_size; + + for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + { + void *p = malloc(total_face_size); + if (!p) + { + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + free(pImages[f][l]); + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + return error("Out of memory!"); + } + + pImages[face_index][level_index] = p; + } + + // Prepare the face pointer array needed by crnd_unpack_level(). + void *pDecomp_images[cCRNMaxFaces]; + for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + pDecomp_images[face_index] = pImages[face_index][level_index]; + + // Now transcode the level to raw DXTn + tm.start(); + if (!crnd::crnd_unpack_level(pContext, pDecomp_images, total_face_size, row_pitch, level_index)) + { + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + free(pImages[f][l]); + + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + + return error("Failed transcoding texture!"); + } + + total_unpack_time += tm.get_elapsed_ms(); + total_unpacked_texels += (blocks_x * blocks_y * 16); + } + + printf("crnd_unpack_begin time: %3.3fms\n", total_unpack_begin_time); + printf("Total crnd_unpack_level time: %3.3fms\n", total_unpack_time); + double total_time = total_unpack_begin_time + total_unpack_time; + printf("Total transcode time: %3.3fms\n", total_time); + printf("Total texels transcoded: %u\n", total_unpacked_texels); + printf("Overall transcode throughput: %3.3f million texels/sec\n", (total_unpacked_texels / (total_time / 1000.0f)) / 1000000.0f); + + // Now write the DXTn data to the DDS file in face-major order. + for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) + for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) + fwrite(pImages[face_index][level_index], image_size_in_bytes[level_index], 1, pDDS_file); + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + free(pImages[f][l]); + + crnd::crnd_unpack_end(pContext); + free(pSrc_file_data); + + if (fclose(pDDS_file) == EOF) + { + return error("Failed writing to DDS file!\n"); + } + + return EXIT_SUCCESS; +} diff --git a/example2/timer.cpp b/example2/timer.cpp new file mode 100644 index 00000000..c3627fd9 --- /dev/null +++ b/example2/timer.cpp @@ -0,0 +1,153 @@ +// File: timer.cpp +// A simple high-precision, platform independent timer class. +#include +#include +#include +#include + +#include "timer.h" + +#if defined(WIN32) +#include +#elif defined(_XBOX) +#include +#endif + +unsigned long long timer::g_init_ticks; +unsigned long long timer::g_freq; +double timer::g_inv_freq; + +#if defined(WIN32) || defined(_XBOX) +inline void query_counter(timer_ticks *pTicks) +{ + QueryPerformanceCounter(reinterpret_cast(pTicks)); +} +inline void query_counter_frequency(timer_ticks *pTicks) +{ + QueryPerformanceFrequency(reinterpret_cast(pTicks)); +} +#elif defined(__GNUC__) +#include +inline void query_counter(timer_ticks *pTicks) +{ + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec)*1000000ULL + static_cast(cur_time.tv_usec); +} +inline void query_counter_frequency(timer_ticks *pTicks) +{ + *pTicks = 1000000; +} +#endif + +timer::timer() : + m_start_time(0), + m_stop_time(0), + m_started(false), + m_stopped(false) +{ + if (!g_inv_freq) + init(); +} + +timer::timer(timer_ticks start_ticks) +{ + if (!g_inv_freq) + init(); + + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; +} + +void timer::start(timer_ticks start_ticks) +{ + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; +} + +void timer::start() +{ + query_counter(&m_start_time); + + m_started = true; + m_stopped = false; +} + +void timer::stop() +{ + assert(m_started); + + query_counter(&m_stop_time); + + m_stopped = true; +} + +double timer::get_elapsed_secs() const +{ + assert(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return delta * g_inv_freq; +} + +timer_ticks timer::get_elapsed_us() const +{ + assert(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; +} + +void timer::init() +{ + if (!g_inv_freq) + { + query_counter_frequency(&g_freq); + g_inv_freq = 1.0f / g_freq; + + query_counter(&g_init_ticks); + } +} + +timer_ticks timer::get_init_ticks() +{ + if (!g_inv_freq) + init(); + + return g_init_ticks; +} + +timer_ticks timer::get_ticks() +{ + if (!g_inv_freq) + init(); + + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; +} + +double timer::ticks_to_secs(timer_ticks ticks) +{ + if (!g_inv_freq) + init(); + + return ticks * g_inv_freq; +} + diff --git a/example2/timer.h b/example2/timer.h new file mode 100644 index 00000000..6fa3300e --- /dev/null +++ b/example2/timer.h @@ -0,0 +1,41 @@ +// File: timer.h +// A simple high-precision, platform independent timer class. +#pragma once + +typedef unsigned long long timer_ticks; + +class timer +{ +public: + timer(); + timer(timer_ticks start_ticks); + + void start(); + void start(timer_ticks start_ticks); + + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } + timer_ticks get_elapsed_us() const; + + static void init(); + static inline timer_ticks get_ticks_per_sec() { return g_freq; } + static timer_ticks get_init_ticks(); + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } + static inline double get_secs() { return ticks_to_secs(get_ticks()); } + static inline double get_ms() { return ticks_to_ms(get_ticks()); } + +private: + static timer_ticks g_init_ticks; + static timer_ticks g_freq; + static double g_inv_freq; + + timer_ticks m_start_time; + timer_ticks m_stop_time; + + bool m_started : 1; + bool m_stopped : 1; +}; diff --git a/example3/example3.2008.vcproj b/example3/example3.2008.vcproj new file mode 100644 index 00000000..e74ffc50 --- /dev/null +++ b/example3/example3.2008.vcproj @@ -0,0 +1,716 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/example3/example3.cpp b/example3/example3.cpp new file mode 100644 index 00000000..e8800ce8 --- /dev/null +++ b/example3/example3.cpp @@ -0,0 +1,278 @@ +// File: example3.cpp - Demonstrates how to use crnlib's simple block compression +// API's to manually pack images to DXTn compressed .DDS files. This example isn't multithreaded +// so it's not going to be fast. +// Also note that this sample only demonstrates traditional/vanilla 4x4 DXTn block compression (not CRN). + +// See Copyright Notice and license at the end of inc/crnlib.h +#include +#include +#include +#include + +// CRN transcoder library. +#include "crnlib.h" +// .DDS file format definitions. +#include "dds_defs.h" + +// stb_image, for loading/saving image files. +#ifdef _MSC_VER +#pragma warning (disable: 4244) // conversion from 'int' to 'uint8', possible loss of data +#pragma warning (disable: 4100) // unreferenced formal parameter +#pragma warning (disable: 4127) // conditional expression is constant +#endif +#include "stb_image.h" + +using namespace crnlib; + +const uint cDXTBlockSize = 4; + +static int print_usage() +{ + printf("Description: Simple .DDS DXTn block compression using crnlib.\n"); + printf("Copyright (c) 2010-2011 Tenacious Software LLC\n"); + printf("Usage: example3 [source_file] [options]\n"); + printf("\n"); + printf("Note: This simple example is not multithreaded, so it's not going to be\n"); + printf("particularly fast.\n"); + printf("\n"); + printf("Supported source image formats:\n"); + printf("Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); + printf("\nOptions:\n"); + printf("-out filename - Force output filename (always use .DDS extension).\n"); + printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); + printf("-pixelformat X - Output DXTn format. Supported formats:\n"); + printf("DXT1, DXT3, DXT5, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC), DXT5A (ATN1N)\n"); + printf("If no output pixel format is specified, this example uses either DXT1 or DXT5.\n"); + printf("-dxtquality X - DXTn quality: superfast, fast, normal, better, uber (default)\n"); + printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); + printf("-converttoluma - Set RGB to luma before compression.\n"); + return EXIT_FAILURE; +} + +static int error(const char* pMsg, ...) +{ + va_list args; + va_start(args, pMsg); + char buf[512]; + vsprintf_s(buf, sizeof(buf), pMsg, args); + va_end(args); + printf("%s", buf); + return EXIT_FAILURE; +} + +int main(int argc, char *argv[]) +{ + printf("example3 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); + + if (argc < 2) + return print_usage(); + + // Parse command line options + const char *pSrc_filename = argv[1]; + char out_filename[FILENAME_MAX] = { '\0' }; + crn_format fmt = cCRNFmtInvalid; + bool srgb_colorspace = true; + crn_dxt_quality dxt_quality = cCRNDXTQualityUber; // best quality, but slowest + bool set_alpha_to_luma = false; + bool convert_to_luma = false; + + for (int i = 2; i < argc; i++) + { + if (argv[i][0] == '/') + argv[i][0] = '-'; + + if (!_stricmp(argv[i], "-out")) + { + if (++i >= argc) + return error("Expected output filename!"); + + strcpy_s(out_filename, sizeof(out_filename), argv[i]); + } + else if (!_stricmp(argv[i], "-nonsrgb")) + srgb_colorspace = false; + else if (!_stricmp(argv[i], "-pixelformat")) + { + if (++i >= argc) + return error("Expected pixel format!"); + + uint f; + for (f = 0; f < cCRNFmtTotal; f++) + { + crn_format actual_fmt = crn_get_fundamental_dxt_format(static_cast(f)); + if (!_stricmp(argv[i], crn_get_format_stringa(actual_fmt))) + { + fmt = actual_fmt; + break; + } + } + if (f == cCRNFmtTotal) + return error("Unrecognized pixel format: %s\n", argv[i]); + } + else if (!_stricmp(argv[i], "-dxtquality")) + { + if (++i >= argc) + return error("Expected DXTn quality!\n"); + + uint q; + for (q = 0; q < cCRNDXTQualityTotal; q++) + { + if (!_stricmp(argv[i], crn_get_dxt_quality_stringa(static_cast(q)))) + { + dxt_quality = static_cast(q); + break; + } + } + if (q == cCRNDXTQualityTotal) + return error("Unrecognized DXTn quality: %s\n", argv[i]); + } + else if (!_stricmp(argv[i], "-setalphatoluma")) + set_alpha_to_luma = true; + else if (!_stricmp(argv[i], "-converttoluma")) + convert_to_luma = true; + else + return error("Invalid option: %s\n", argv[i]); + } + + // Split the source filename into its various components. + char drive_buf[_MAX_DRIVE], dir_buf[_MAX_DIR], fname_buf[_MAX_FNAME], ext_buf[_MAX_EXT]; + if (_splitpath_s(pSrc_filename, drive_buf, _MAX_DRIVE, dir_buf, _MAX_DIR, fname_buf, _MAX_FNAME, ext_buf, _MAX_EXT)) + return error("Invalid source filename!\n"); + + // Load the source image into memory. + printf("Loading source file: %s\n", pSrc_filename); + int width, height, actual_comps; + crn_uint32 *pSrc_image = (crn_uint32*)stbi_load(pSrc_filename, &width, &height, &actual_comps, 4); + if (!pSrc_image) + return error("Unable to read source file\n"); + + if (fmt == cCRNFmtInvalid) + { + // Format not specified - automatically choose the DXTn format. + fmt = (actual_comps > 3) ? cCRNFmtDXT5 : cCRNFmtDXT1; + } + + if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) + set_alpha_to_luma = true; + + if ((set_alpha_to_luma) || (convert_to_luma)) + { + for (int i = 0; i < width * height; i++) + { + crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; + // Compute CCIR 601 luma. + crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; + crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; + if (set_alpha_to_luma) a = y; + if (convert_to_luma) { r = y; g = y; b = y; } + pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); + } + } + + printf("Source Dimensions: %ux%u, Actual Components: %u\n", width, height, actual_comps); + + const uint num_blocks_x = (width + cDXTBlockSize - 1) / cDXTBlockSize; + const uint num_blocks_y = (height + cDXTBlockSize - 1) / cDXTBlockSize; + const uint bytes_per_block = crn_get_bytes_per_dxt_block(fmt); + const uint total_compressed_size = num_blocks_x * num_blocks_y * bytes_per_block; + + printf("Block Dimensions: %ux%u, BytesPerBlock: %u, Total Compressed Size: %u\n", num_blocks_x, num_blocks_y, bytes_per_block, total_compressed_size); + + void *pCompressed_data = malloc(total_compressed_size); + if (!pCompressed_data) + { + stbi_image_free(pSrc_image); + return error("Out of memory!"); + } + + crn_comp_params comp_params; + comp_params.m_format = fmt; + comp_params.m_dxt_quality = dxt_quality; + comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); + comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, actual_comps > 3); + + crn_block_compressor_context_t pContext = crn_create_block_compressor(comp_params); + + printf("Compressing to %s: ", crn_get_format_stringa(fmt)); + + int prev_percentage_complete = -1; + for (crn_uint32 block_y = 0; block_y < num_blocks_y; block_y++) + { + for (crn_uint32 block_x = 0; block_x < num_blocks_x; block_x++) + { + crn_uint32 pixels[cDXTBlockSize * cDXTBlockSize]; + + // Exact block from image, clamping at the sides of non-divisible by 4 images to avoid artifacts. + crn_uint32 *pDst_pixels = pixels; + for (int y = 0; y < cDXTBlockSize; y++) + { + const uint actual_y = min(height - 1U, (block_y * cDXTBlockSize) + y); + for (int x = 0; x < cDXTBlockSize; x++) + { + const uint actual_x = min(width - 1U, (block_x * cDXTBlockSize) + x); + *pDst_pixels++ = pSrc_image[actual_x + actual_y * width]; + } + } + + // Compress the DXTn block. + crn_compress_block(pContext, pixels, static_cast(pCompressed_data) + (block_x + block_y * num_blocks_x) * bytes_per_block); + } + + int percentage_complete = ((block_y + 1) * 100 + (num_blocks_y / 2)) / num_blocks_y; + if (percentage_complete != prev_percentage_complete) + { + printf("\b\b\b\b%3u%%", percentage_complete); + prev_percentage_complete = percentage_complete; + } + } + printf("\n"); + + // Free the block compressor. + crn_free_block_compressor(pContext); + pContext = NULL; + + // Now create the DDS file. + char dst_filename[FILENAME_MAX]; + sprintf_s(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive_buf, dir_buf, fname_buf); + if (out_filename[0]) strcpy(dst_filename, out_filename); + + printf("Writing DDS file: %s\n", dst_filename); + + FILE *pDDS_file = fopen(dst_filename, "wb"); + if (!pDDS_file) + { + free(pCompressed_data); + return error("Failed creating destination file!\n"); + } + + // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). + fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); + + // Prepare the DDS header. + crnlib::DDSURFACEDESC2 dds_desc; + memset(&dds_desc, 0, sizeof(dds_desc)); + dds_desc.dwSize = sizeof(dds_desc); + dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT; + dds_desc.dwWidth = width; + dds_desc.dwHeight = height; + + dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); + dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; + dds_desc.ddpfPixelFormat.dwFourCC = crn_get_format_fourcc(fmt); + dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; + + // Write the DDS header to the output file. + fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); + + // Write the image's compressed data to the output file. + fwrite(pCompressed_data, total_compressed_size, 1, pDDS_file); + free(pCompressed_data); + + stbi_image_free(pSrc_image); + + if (fclose(pDDS_file) == EOF) + { + return error("Failed writing to DDS file!\n"); + } + + return EXIT_SUCCESS; +} diff --git a/example3/stb_image.h b/example3/stb_image.h new file mode 100644 index 00000000..6da2b729 --- /dev/null +++ b/example3/stb_image.h @@ -0,0 +1,3942 @@ +/* stbi-1.18 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c + when you control the images you're loading + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline (no JPEG progressive, no oddball channel decimations) + PNG 8-bit only + BMP non-1bpp, non-RLE + TGA (not sure what subset, if a subset) + PSD (composited view only, no extra channels) + HDR (radiance rgbE format) + writes BMP,TGA (define STBI_NO_WRITE to remove code) + decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) + + TODO: + stbi_info_* + + history: + 1.18 fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug; header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another stb_malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less + than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant +*/ + +#pragma warning (disable: 4793) // function compiled as native + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +//// begin header file //////////////////////////////////////////////////// +// +// Limitations: +// - no progressive/interlaced support (jpeg, png) +// - 8-bit samples only (jpeg, png) +// - not threadsafe +// - channel subsampling of at most 2 in each dimension (jpeg) +// - no delayed line count (jpeg) -- IJG doesn't support either +// +// Basic usage (see HDR discussion below): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *comp -- outputs # of image components in image file +// int req_comp -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise. +// If req_comp is non-zero, *comp has the number of components that _would_ +// have been output otherwise. E.g. if you set req_comp to 4, you will always +// get RGBA output, but you can check *comp to easily see if it's opaque. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *comp will be unchanged. The function stbi_failure_reason() +// can be queried for an extremely brief, end-user unfriendly explanation +// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid +// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG and BMP images are automatically depalettized. +// +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); + +//#define _CRT_SECURE_NO_WARNINGS + +#ifndef STBI_NO_STDIO +#include +#endif + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for req_comp + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4, +}; + +typedef unsigned char stbi_uc; + +#ifdef __cplusplus +extern "C" { +#endif + +// WRITING API + +#if !defined(STBI_NO_WRITE) && !defined(STBI_NO_STDIO) +// write a BMP/TGA file given tightly packed 'comp' channels (no padding, nor bmp-stride-padding) +// (you must include the appropriate extension in the filename). +// returns TRUE on success, FALSE if couldn't open file, error writing file +extern int stbi_write_bmp (char const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_bmp_w (wchar_t const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_tga (char const *filename, int x, int y, int comp, const void *data); +extern int stbi_write_tga_w (wchar_t const *filename, int x, int y, int comp, const void *data); +#endif + +// PRIMARY API - works on images of any type + +// load image by filename, open file, or memory buffer +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_w (wchar_t const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +// for stbi_load_from_file, file pointer is left pointing immediately after image + +#ifndef STBI_NO_HDR +#ifndef STBI_NO_STDIO +extern float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif +extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + +extern void stbi_hdr_to_ldr_gamma(float gamma); +extern void stbi_hdr_to_ldr_scale(float scale); + +extern void stbi_ldr_to_hdr_gamma(float gamma); +extern void stbi_ldr_to_hdr_scale(float scale); + +#endif // STBI_NO_HDR + +// get a VERY brief reason for failure +// NOT THREADSAFE +extern char *stbi_failure_reason (void); + +// free the loaded image -- this is just stb_free() +extern void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +extern int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_is_hdr (char const *filename); +extern int stbi_is_hdr_from_file(FILE *f); +#endif + +// ZLIB client - used by PNG, available for other purposes + +extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +// TYPE-SPECIFIC ACCESS + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// define new loaders +typedef struct +{ + int (*test_memory)(stbi_uc const *buffer, int len); + stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + #ifndef STBI_NO_STDIO + int (*test_file)(FILE *f); + stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp); + #endif +} stbi_loader; + +// register a loader by filling out the above structure (you must defined ALL functions) +// returns 1 if added or already added, 0 if not added (too many loaders) +// NOT THREADSAFE +extern int stbi_register_loader(stbi_loader *loader); + +// define faster low-level operations (typically SIMD support) +#if STBI_SIMD +typedef void (*stbi_idct_8x8)(uint8 *out, int out_stride, short data[64], unsigned short *dequantize); +// compute an integer IDCT on "input" +// input[x] = data[x] * dequantize[x] +// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride' +// CLAMP results to 0..255 +typedef void (*stbi_YCbCr_to_RGB_run)(uint8 *output, uint8 const *y, uint8 const *cb, uint8 const *cr, int count, int step); +// compute a conversion from YCbCr to RGB +// 'count' pixels +// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B +// y: Y input channel +// cb: Cb input channel; scale/biased to be 0..255 +// cr: Cr input channel; scale/biased to be 0..255 + +extern void stbi_install_idct(stbi_idct_8x8 func); +extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func); +#endif // STBI_SIMD + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifndef STBI_HEADER_FILE_ONLY + +inline void* stb_malloc(size_t c) { return ::malloc(c); } +inline void* stb_realloc(void *p, size_t c) { return ::realloc(p, c); } +inline void stb_free(void *p) { ::free(p); } + +#ifndef STBI_NO_HDR +#include // ldexp +#include // strcmp +#endif + +#ifndef STBI_NO_STDIO +#include +#endif +#include +#include +#include +#include + +#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__MINGW64__) + #ifdef __cplusplus + #define __forceinline inline + #else + #define __forceinline + #endif +#endif + + +// implementation: +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef signed int int32; +typedef unsigned int uint; + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(uint32)==4]; + +#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE) +#define STBI_NO_WRITE +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Generic API that works on all image types +// + +// this is not threadsafe +static char *failure_reason; + +char *stbi_failure_reason(void) +{ + return failure_reason; +} + +static int e(char *str) +{ + failure_reason = str; + return 0; +} + +#ifdef STBI_NO_FAILURE_STRINGS + #define e(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define e(x,y) e(y) +#else + #define e(x,y) e(x) +#endif + +#define epf(x,y) ((float *) (e(x,y)?NULL:NULL)) +#define epuc(x,y) ((unsigned char *) (e(x,y)?NULL:NULL)) + +void stbi_image_free(void *retval_from_stbi_load) +{ + stb_free(retval_from_stbi_load); +} + +#define MAX_LOADERS 32 +stbi_loader *loaders[MAX_LOADERS]; +static int max_loaders = 0; + +int stbi_register_loader(stbi_loader *loader) +{ + int i; + for (i=0; i < MAX_LOADERS; ++i) { + // already present? + if (loaders[i] == loader) + return 1; + // end of the list? + if (loaders[i] == NULL) { + loaders[i] = loader; + max_loaders = i+1; + return 1; + } + } + // no room for it + return 0; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_STDIO +unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_w(wchar_t const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = _wfopen(filename, L"rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_file(f)) + return stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + if (stbi_png_test_file(f)) + return stbi_png_load_from_file(f,x,y,comp,req_comp); + if (stbi_bmp_test_file(f)) + return stbi_bmp_load_from_file(f,x,y,comp,req_comp); + if (stbi_psd_test_file(f)) + return stbi_psd_load_from_file(f,x,y,comp,req_comp); + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) { + float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_file(f)) + return loaders[i]->load_from_file(f,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_file(f)) + return stbi_tga_load_from_file(f,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_memory(buffer,len)) + return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_png_test_memory(buffer,len)) + return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_bmp_test_memory(buffer,len)) + return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_psd_test_memory(buffer,len)) + return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp); + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) { + float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_memory(buffer,len)) + return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_memory(buffer,len)) + return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} + +#ifndef STBI_NO_HDR + +#ifndef STBI_NO_STDIO +float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + float *result; + if (!f) return epf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) + return stbi_hdr_load_from_file(f,x,y,comp,req_comp); + #endif + data = stbi_load_from_file(f, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) + return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + #endif + data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is +// defined, for API simplicity; if STBI_NO_HDR is defined, it always +// reports false! + +int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_memory(buffer, len); + #else + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +extern int stbi_is_hdr (char const *filename) +{ + FILE *f = fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +extern int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_file(f); + #else + return 0; + #endif +} + +#endif + +// @TODO: get image dimensions & components without fully decoding +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_HDR +static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f; +static float l2h_gamma=2.2f, l2h_scale=1.0f; + +void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; } +void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; } + +void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; } +void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; } +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + SCAN_load=0, + SCAN_type, + SCAN_header, +}; + +typedef struct +{ + uint32 img_x, img_y; + int img_n, img_out_n; + + #ifndef STBI_NO_STDIO + FILE *img_file; + #endif + uint8 *img_buffer, *img_buffer_end; +} stbi; + +#ifndef STBI_NO_STDIO +static void start_file(stbi *s, FILE *f) +{ + s->img_file = f; +} +#endif + +static void start_mem(stbi *s, uint8 const *buffer, int len) +{ +#ifndef STBI_NO_STDIO + s->img_file = NULL; +#endif + s->img_buffer = (uint8 *) buffer; + s->img_buffer_end = (uint8 *) buffer+len; +} + +__forceinline static int get8(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int c = fgetc(s->img_file); + return c == EOF ? 0 : c; + } +#endif + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; + return 0; +} + +__forceinline static int at_eof(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) + return feof(s->img_file); +#endif + return s->img_buffer >= s->img_buffer_end; +} + +__forceinline static uint8 get8u(stbi *s) +{ + return (uint8) get8(s); +} + +static void skip(stbi *s, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) + fseek(s->img_file, n, SEEK_CUR); + else +#endif + s->img_buffer += n; +} + +static int get16(stbi *s) +{ + int z = get8(s); + return (z << 8) + get8(s); +} + +static uint32 get32(stbi *s) +{ + uint32 z = get16(s); + return (z << 16) + get16(s); +} + +static int get16le(stbi *s) +{ + int z = get8(s); + return z + (get8(s) << 8); +} + +static uint32 get32le(stbi *s) +{ + uint32 z = get16le(s); + return z + (get16le(s) << 16); +} + +static void getn(stbi *s, stbi_uc *buffer, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + fread(buffer, 1, n, s->img_file); + return; + } +#endif + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; +} + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so stb_malloc a new one and free that one +// only failure mode is stb_malloc failing + +static uint8 compute_y(int r, int g, int b) +{ + return (uint8) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + assert(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) stb_malloc(req_comp * x * y); + if (good == NULL) { + stb_free(data); + return epuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define COMBO(a,b) ((a)*8+(b)) + #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch(COMBO(img_n, req_comp)) { + CASE(1,2) dest[0]=src[0], dest[1]=255; break; + CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break; + CASE(2,1) dest[0]=src[0]; break; + CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break; + CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break; + CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break; + CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break; + CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break; + default: assert(0); + } + #undef CASE + } + + stb_free(data); + return good; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output = (float *) stb_malloc(x * y * comp * sizeof(float)); + if (output == NULL) { stb_free(data); return epf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale; + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + stb_free(data); + return output; +} + +#define float2int(x) ((int) (x)) +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output = (stbi_uc *) stb_malloc(x * y * comp); + if (output == NULL) { stb_free(data); return epuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = float2int(z); + } + } + stb_free(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) +// +// simple implementation +// - channel subsampling of at most 2 in each dimension +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - uses a lot of intermediate memory, could cache poorly +// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 +// stb_jpeg: 1.34 seconds (MSVC6, default release build) +// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) +// IJL11.dll: 1.08 seconds (compiled by intel) +// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) +// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + uint8 fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + uint16 code[256]; + uint8 values[256]; + uint8 size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} huffman; + +typedef struct +{ + #if STBI_SIMD + unsigned short dequant2[4][64]; + #endif + stbi s; + huffman huff_dc[4]; + huffman huff_ac[4]; + uint8 dequant[4][64]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + uint8 *data; + void *raw_data; + uint8 *linebuf; + } img_comp[4]; + + uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int scan_n, order[4]; + int restart_interval, todo; +} jpeg; + +static int build_huffman(huffman *h, int *count) +{ + int i,j,k=0,code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (uint8) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (uint16) (code++); + if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (uint8) i; + } + } + } + return 1; +} + +static void grow_buffer_unsafe(jpeg *j) +{ + do { + int b = j->nomore ? 0 : get8(&j->s); + if (b == 0xff) { + int c = get8(&j->s); + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer = (j->code_buffer << 8) | b; + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +__forceinline static int decode(jpeg *j, huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (j->code_bits - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + if (h->size[k] > j->code_bits) + return -1; + j->code_bits -= h->size[k]; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + if (j->code_bits < 16) + temp = (j->code_buffer << (16 - j->code_bits)) & 0xffff; + else + temp = (j->code_buffer >> (j->code_bits - 16)) & 0xffff; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (j->code_bits - k)) & bmask[k]) + h->delta[k]; + assert((((j->code_buffer) >> (j->code_bits - h->size[c])) & bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + return h->values[c]; +} + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +__forceinline static int extend_receive(jpeg *j, int n) +{ + unsigned int m = 1 << (n-1); + unsigned int k; + if (j->code_bits < n) grow_buffer_unsafe(j); + k = (j->code_buffer >> (j->code_bits - n)) & bmask[n]; + j->code_bits -= n; + // the following test is probably a random branch that won't + // predict well. I tried to table accelerate it but failed. + // maybe it's compiling as a conditional move? + if (k < m) + return (-1 << n) + k + 1; + else + return k; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static uint8 dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) +{ + int diff,dc,k; + int t = decode(j, hdc); + if (t < 0) return e("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) dc; + + // decode AC components, see JPEG spec + k = 1; + do { + int r,s; + int rs = decode(j, hac); + if (rs < 0) return e("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + data[dezigzag[k++]] = (short) extend_receive(j,s); + } + } while (k < 64); + return 1; +} + +// take a -128..127 value and clamp it and convert to 0..255 +__forceinline static uint8 clamp(int x) +{ + x += 128; + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (uint8) x; +} + +#define f2f(x) (int) (((x) * 4096 + 0.5)) +#define fsh(x) ((x) << 12) + +// derived from jidctint -- DCT_ISLOW +#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * f2f(0.5411961f); \ + t2 = p1 + p3*f2f(-1.847759065f); \ + t3 = p1 + p2*f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = fsh(p2+p3); \ + t1 = fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*f2f( 1.175875602f); \ + t0 = t0*f2f( 0.298631336f); \ + t1 = t1*f2f( 2.053119869f); \ + t2 = t2*f2f( 3.072711026f); \ + t3 = t3*f2f( 1.501321110f); \ + p1 = p5 + p1*f2f(-0.899976223f); \ + p2 = p5 + p2*f2f(-2.562915447f); \ + p3 = p3*f2f(-1.961570560f); \ + p4 = p4*f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +#if !STBI_SIMD +// .344 seconds on 3*anemones.jpg +static void idct_block(uint8 *out, int out_stride, short data[64], uint8 *dequantize) +{ + int i,val[64],*v=val; + uint8 *o,*dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +#else +static void idct_block(uint8 *out, int out_stride, short data[64], unsigned short *dequantize) +{ + int i,val[64],*v=val; + uint8 *o; + unsigned short *dq = dequantize; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + x0 += 65536; x1 += 65536; x2 += 65536; x3 += 65536; + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} +static stbi_idct_8x8 stbi_idct_installed = idct_block; + +extern void stbi_install_idct(stbi_idct_8x8 func) +{ + stbi_idct_installed = func; +} +#endif + +#define MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static uint8 get_marker(jpeg *j) +{ + uint8 x; + if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; } + x = get8u(&j->s); + if (x != 0xff) return MARKER_none; + while (x == 0xff) + x = get8u(&j->s); + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, reset the entropy decoder and +// the dc prediction +static void reset(jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->marker = MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int parse_entropy_coded_data(jpeg *z) +{ + reset(z); + if (z->scan_n == 1) { + int i,j; + #if STBI_SIMD + __declspec(align(16)) + #endif + short data[64]; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } else { // interleaved! + int i,j,k,x,y; + short data[64]; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #if STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } + return 1; +} + +static int process_marker(jpeg *z, int m) +{ + int L; + switch (m) { + case MARKER_none: // no marker found + return e("expected marker","Corrupt JPEG"); + + case 0xC2: // SOF - progressive + return e("progressive jpeg","JPEG format not supported (progressive)"); + + case 0xDD: // DRI - specify restart interval + if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG"); + z->restart_interval = get16(&z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = get16(&z->s)-2; + while (L > 0) { + int q = get8(&z->s); + int p = q >> 4; + int t = q & 15,i; + if (p != 0) return e("bad DQT type","Corrupt JPEG"); + if (t > 3) return e("bad DQT table","Corrupt JPEG"); + for (i=0; i < 64; ++i) + z->dequant[t][dezigzag[i]] = get8u(&z->s); + #if STBI_SIMD + for (i=0; i < 64; ++i) + z->dequant2[t][i] = z->dequant[t][i]; + #endif + L -= 65; + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = get16(&z->s)-2; + while (L > 0) { + uint8 *v; + int sizes[16],i,m=0; + int q = get8(&z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = get8(&z->s); + m += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < m; ++i) + v[i] = get8u(&z->s); + L -= m; + } + return L==0; + } + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + skip(&z->s, get16(&z->s)-2); + return 1; + } + return 0; +} + +// after we see SOS +static int process_scan_header(jpeg *z) +{ + int i; + int Ls = get16(&z->s); + z->scan_n = get8(&z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = get8(&z->s), which; + int q = get8(&z->s); + for (which = 0; which < z->s.img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s.img_n) return 0; + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + get8(&z->s); // should be 63, but might be 0 + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + + return 1; +} + +static int process_frame_header(jpeg *z, int scan) +{ + stbi *s = &z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG + p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires + c = get8(s); + if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG"); + + for (i=0; i < s->img_n; ++i) { + z->img_comp[i].id = get8(s); + if (z->img_comp[i].id != i+1) // JFIF requires + if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! + return e("bad component ID","Corrupt JPEG"); + q = get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); + z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); + } + + if (scan != SCAN_load) return 1; + + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].raw_data = stb_malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); + if (z->img_comp[i].raw_data == NULL) { + for(--i; i >= 0; --i) { + stb_free(z->img_comp[i].raw_data); + z->img_comp[i].data = NULL; + } + return e("outofmem", "Out of memory"); + } + // align blocks for installable-idct using mmx/sse + z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + z->img_comp[i].linebuf = NULL; + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define DNL(x) ((x) == 0xdc) +#define SOI(x) ((x) == 0xd8) +#define EOI(x) ((x) == 0xd9) +#define SOF(x) ((x) == 0xc0 || (x) == 0xc1) +#define SOS(x) ((x) == 0xda) + +static int decode_jpeg_header(jpeg *z, int scan) +{ + int m; + z->marker = MARKER_none; // initialize cached marker to empty + m = get_marker(z); + if (!SOI(m)) return e("no SOI","Corrupt JPEG"); + if (scan == SCAN_type) return 1; + m = get_marker(z); + while (!SOF(m)) { + if (!process_marker(z,m)) return 0; + m = get_marker(z); + while (m == MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG"); + m = get_marker(z); + } + } + if (!process_frame_header(z, scan)) return 0; + return 1; +} + +static int decode_jpeg_image(jpeg *j) +{ + int m; + j->restart_interval = 0; + if (!decode_jpeg_header(j, SCAN_load)) return 0; + m = get_marker(j); + while (!EOI(m)) { + if (SOS(m)) { + if (!process_scan_header(j)) return 0; + if (!parse_entropy_coded_data(j)) return 0; + } else { + if (!process_marker(j, m)) return 0; + } + m = get_marker(j); + } + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1, + int w, int hs); + +#define div4(x) ((uint8) ((x) >> 2)) + +static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + return in_near; +} + +static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + for (i=0; i < w; ++i) + out[i] = div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + uint8 *input = in_near; + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = div4(n+input[i-1]); + out[i*2+1] = div4(n+input[i+1]); + } + out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + return out; +} + +#define div16(x) ((uint8) ((x) >> 4)) + +static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = div16(3*t0 + t1 + 8); + out[i*2 ] = div16(3*t1 + t0 + 8); + } + out[w*2-1] = div4(t1+2); + return out; +} + +static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +#define float2fixed(x) ((int) ((x) * 65536 + 0.5)) + +// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) +// VC6 without processor=Pro is generating multiple LEAs per multiply! +static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 16) + 32768; // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr*float2fixed(1.40200f); + g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); + b = y_fixed + cb*float2fixed(1.77200f); + r >>= 16; + g >>= 16; + b >>= 16; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (uint8)r; + out[1] = (uint8)g; + out[2] = (uint8)b; + out[3] = 255; + out += step; + } +} + +#if STBI_SIMD +static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row; + +void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func) +{ + stbi_YCbCr_installed = func; +} +#endif + + +// clean up the temporary component buffers +static void cleanup_jpeg(jpeg *j) +{ + int i; + for (i=0; i < j->s.img_n; ++i) { + if (j->img_comp[i].data) { + stb_free(j->img_comp[i].raw_data); + j->img_comp[i].data = NULL; + } + if (j->img_comp[i].linebuf) { + stb_free(j->img_comp[i].linebuf); + j->img_comp[i].linebuf = NULL; + } + } +} + +typedef struct +{ + resample_row_func resample; + uint8 *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi_resample; + +static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n; + // validate req_comp + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + z->s.img_n = 0; + + // load a jpeg image from whichever source + if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s.img_n; + + if (z->s.img_n == 3 && n < 3) + decode_n = 1; + else + decode_n = z->s.img_n; + + // resample and color-convert + { + int k; + uint i,j; + uint8 *output; + uint8 *coutput[4]; + + stbi_resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (uint8 *) stb_malloc(z->s.img_x + 3); + if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s.img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2; + else r->resample = resample_row_generic; + } + + // can't error after this so, this is safe + output = (uint8 *) stb_malloc(n * z->s.img_x * z->s.img_y + 1); + if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s.img_y; ++j) { + uint8 *out = output + n * z->s.img_x * j; + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + uint8 *y = coutput[0]; + if (z->s.img_n == 3) { + #if STBI_SIMD + stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n); + #else + YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n); + #endif + } else + for (i=0; i < z->s.img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + uint8 *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s.img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + cleanup_jpeg(z); + *out_x = z->s.img_x; + *out_y = z->s.img_y; + if (comp) *comp = z->s.img_n; // report original components, not output + return output; + } +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_file(&j.s, f); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_jpeg_test_file(FILE *f) +{ + int n,r; + jpeg j; + n = ftell(f); + start_file(&j.s, f); + r = decode_jpeg_header(&j, SCAN_type); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_jpeg_test_memory(stbi_uc const *buffer, int len) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return decode_jpeg_header(&j, SCAN_type); +} + +// @TODO: +#ifndef STBI_NO_STDIO +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can stb_malloc/stb_realloc) +// performance +// - fast huffman + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define ZFAST_BITS 9 // accelerate all cases in default tables +#define ZFAST_MASK ((1 << ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + uint16 fast[1 << ZFAST_BITS]; + uint16 firstcode[16]; + int maxcode[17]; + uint16 firstsymbol[16]; + uint8 size[288]; + uint16 value[288]; +} zhuffman; + +__forceinline static int bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +__forceinline static int bit_reverse(int v, int bits) +{ + assert(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return bitreverse16(v) >> (16-bits); +} + +static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 255, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + assert(sizes[i] <= (1 << i)); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (uint16) code; + z->firstsymbol[i] = (uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + z->size[c] = (uint8)s; + z->value[c] = (uint16)i; + if (s <= ZFAST_BITS) { + int k = bit_reverse(next_code[s],s); + while (k < (1 << ZFAST_BITS)) { + z->fast[k] = (uint16) c; + k += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + uint8 *zbuffer, *zbuffer_end; + int num_bits; + uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + zhuffman z_length, z_distance; +} zbuf; + +__forceinline static int zget8(zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void fill_bits(zbuf *z) +{ + do { + assert(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +__forceinline static unsigned int zreceive(zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z) +{ + int b,s,k; + if (a->num_bits < 16) fill_bits(a); + b = z->fast[a->code_buffer & ZFAST_MASK]; + if (b < 0xffff) { + s = z->size[b]; + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; + } + + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = bit_reverse(a->code_buffer, 16); + for (s=ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + assert(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +static int expand(zbuf *z, int n) // need to make room for n bytes +{ + char *q; + int cur, limit; + if (!z->z_expandable) return e("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) stb_realloc(z->zout_start, limit); + if (q == NULL) return e("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static int length_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static int length_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static int dist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int parse_huffman_block(zbuf *a) +{ + for(;;) { + int z = zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes + if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0; + *a->zout++ = (char) z; + } else { + uint8 *p; + int len,dist; + if (z == 256) return 1; + z -= 257; + len = length_base[z]; + if (length_extra[z]) len += zreceive(a, length_extra[z]); + z = zhuffman_decode(a, &a->z_distance); + if (z < 0) return e("bad huffman code","Corrupt PNG"); + dist = dist_base[z]; + if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); + if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG"); + if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0; + p = (uint8 *) (a->zout - dist); + while (len--) + *a->zout++ = *p++; + } + } +} + +static int compute_huffman_codes(zbuf *a) +{ + static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + zhuffman z_codelength; + uint8 lencodes[286+32+137];//padding for maximum single op + uint8 codelength_sizes[19]; + int i,n; + + int hlit = zreceive(a,5) + 257; + int hdist = zreceive(a,5) + 1; + int hclen = zreceive(a,4) + 4; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (uint8) s; + } + if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < hlit + hdist) { + int c = zhuffman_decode(a, &z_codelength); + assert(c >= 0 && c < 19); + if (c < 16) + lencodes[n++] = (uint8) c; + else if (c == 16) { + c = zreceive(a,2)+3; + memset(lencodes+n, lencodes[n-1], c); + n += c; + } else if (c == 17) { + c = zreceive(a,3)+3; + memset(lencodes+n, 0, c); + n += c; + } else { + assert(c == 18); + c = zreceive(a,7)+11; + memset(lencodes+n, 0, c); + n += c; + } + } + if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG"); + if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int parse_uncompressed_block(zbuf *a) +{ + uint8 header[4]; + int len,nlen,k; + if (a->num_bits & 7) + zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns? + a->code_buffer >>= 8; + a->num_bits -= 8; + } + assert(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = (uint8) zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!expand(a, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int parse_zlib_header(zbuf *a) +{ + int cmf = zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = zget8(a); + if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +// @TODO: should statically initialize these for optimal thread safety +static uint8 default_length[288], default_distance[32]; +static void init_defaults(void) +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) default_length[i] = 8; + for ( ; i <= 255; ++i) default_length[i] = 9; + for ( ; i <= 279; ++i) default_length[i] = 7; + for ( ; i <= 287; ++i) default_length[i] = 8; + + for (i=0; i <= 31; ++i) default_distance[i] = 5; +} + +int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead +static int parse_zlib(zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = zreceive(a,1); + type = zreceive(a,2); + if (type == 0) { + if (!parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!default_distance[31]) init_defaults(); + if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0; + if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0; + } else { + if (!compute_huffman_codes(a)) return 0; + } + if (!parse_huffman_block(a)) return 0; + } + if (stbi_png_partial && a->zout - a->zout_start > 65536) + break; + } while (!final); + return 1; +} + +static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return parse_zlib(a, parse_header); +} + +char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + zbuf a; + char *p = (char *) stb_malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + stb_free(a.zout_start); + return NULL; + } +} + +char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + zbuf a; + char *p = (char *) stb_malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer+len; + if (do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + stb_free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + + +typedef struct +{ + uint32 length; + uint32 type; +} chunk; + +#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +static chunk get_chunk_header(stbi *s) +{ + chunk c; + c.length = get32(s); + c.type = get32(s); + return c; +} + +static int check_png_header(stbi *s) +{ + static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi s; + uint8 *idata, *expanded, *out; +} png; + + +enum { + F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, + F_avg_first, F_paeth_first, +}; + +static uint8 first_row_filter[5] = +{ + F_none, F_sub, F_none, F_avg_first, F_paeth_first +}; + +static int paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +// create the png data from post-deflated data +static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y) +{ + stbi *s = &a->s; + uint32 i,j,stride = x*out_n; + int k; + int img_n = s->img_n; // copy it into a local for later + assert(out_n == s->img_n || out_n == s->img_n+1); + if (stbi_png_partial) y = 1; + a->out = (uint8 *) stb_malloc(x * y * out_n); + if (!a->out) return e("outofmem", "Out of memory"); + if (!stbi_png_partial) { + if (s->img_x == x && s->img_y == y) + if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + else // interlaced: + if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } + for (j=0; j < y; ++j) { + uint8 *cur = a->out + stride*j; + uint8 *prior = cur - stride; + int filter = *raw++; + if (filter > 4) return e("invalid filter","Corrupt PNG"); + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + // handle first pixel explicitly + for (k=0; k < img_n; ++k) { + switch(filter) { + case F_none : cur[k] = raw[k]; break; + case F_sub : cur[k] = raw[k]; break; + case F_up : cur[k] = raw[k] + prior[k]; break; + case F_avg : cur[k] = raw[k] + (prior[k]>>1); break; + case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break; + case F_avg_first : cur[k] = raw[k]; break; + case F_paeth_first: cur[k] = raw[k]; break; + } + } + if (img_n != out_n) cur[img_n] = 255; + raw += img_n; + cur += out_n; + prior += out_n; + // this is a little gross, so that we don't switch per-pixel or per-component + if (img_n == out_n) { + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ + for (k=0; k < img_n; ++k) + switch(filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break; + } + #undef CASE + } else { + assert(img_n+1 == out_n); + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ + for (k=0; k < img_n; ++k) + switch(filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break; + } + #undef CASE + } + } + return 1; +} + +static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced) +{ + uint8 *final; + int p; + int save; + if (!interlaced) + return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y); + save = stbi_png_partial; + stbi_png_partial = 0; + + // de-interlacing + final = (uint8 *) stb_malloc(a->s.img_x * a->s.img_y * out_n); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { + stb_free(final); + return 0; + } + for (j=0; j < y; ++j) + for (i=0; i < x; ++i) + memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, + a->out + (j*x+i)*out_n, out_n); + stb_free(a->out); + raw += (x*out_n+1)*y; + raw_len -= (x*out_n+1)*y; + } + } + a->out = final; + + stbi_png_partial = save; + return 1; +} + +static int compute_transparency(png *z, uint8 tc[3], int out_n) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + assert(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n) +{ + uint32 i, pixel_count = a->s.img_x * a->s.img_y; + uint8 *p, *temp_out, *orig = a->out; + + p = (uint8 *) stb_malloc(pixel_count * pal_img_n); + if (p == NULL) return e("outofmem", "Out of memory"); + + // between here and stb_free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + stb_free(a->out); + a->out = temp_out; + return 1; +} + +static int parse_png_file(png *z, int scan, int req_comp) +{ + uint8 palette[1024], pal_img_n=0; + uint8 has_trans=0, tc[3]; + uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0; + stbi *s = &z->s; + + if (!check_png_header(s)) return 0; + + if (scan == SCAN_type) return 1; + + for(;;first=0) { + chunk c = get_chunk_header(s); + if (first && c.type != PNG_TYPE('I','H','D','R')) + return e("first not IHDR","Corrupt PNG"); + switch (c.type) { + case PNG_TYPE('I','H','D','R'): { + int depth,color,comp,filter; + if (!first) return e("multiple IHDR","Corrupt PNG"); + if (c.length != 13) return e("bad IHDR len","Corrupt PNG"); + s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); + s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); + depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); + color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG"); + comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG"); + filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG"); + interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + if (scan == SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case PNG_TYPE('P','L','T','E'): { + if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = get8u(s); + palette[i*4+1] = get8u(s); + palette[i*4+2] = get8u(s); + palette[i*4+3] = 255; + } + break; + } + + case PNG_TYPE('t','R','N','S'): { + if (z->idata) return e("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = get8u(s); + } else { + if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); + if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG"); + has_trans = 1; + for (k=0; k < s->img_n; ++k) + tc[k] = (uint8) get16(s); // non 8-bit images will be larger + } + break; + } + + case PNG_TYPE('I','D','A','T'): { + if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG"); + if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; } + if (ioff + c.length > idata_limit) { + uint8 *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + p = (uint8 *) stb_realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); + z->idata = p; + } + #ifndef STBI_NO_STDIO + if (s->img_file) + { + if (fread(z->idata+ioff,1,c.length,s->img_file) != c.length) return e("outofdata","Corrupt PNG"); + } + else + #endif + { + memcpy(z->idata+ioff, s->img_buffer, c.length); + s->img_buffer += c.length; + } + ioff += c.length; + break; + } + + case PNG_TYPE('I','E','N','D'): { + uint32 raw_len; + if (scan != SCAN_load) return 1; + if (z->idata == NULL) return e("no IDAT","Corrupt PNG"); + z->expanded = (uint8 *) stbi_zlib_decode_malloc((char *) z->idata, ioff, (int *) &raw_len); + if (z->expanded == NULL) return 0; // zlib should set error + stb_free(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0; + if (has_trans) + if (!compute_transparency(z, tc, s->img_out_n)) return 0; + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!expand_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } + stb_free(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX chunk not known"; + invalid_chunk[0] = (uint8) (c.type >> 24); + invalid_chunk[1] = (uint8) (c.type >> 16); + invalid_chunk[2] = (uint8) (c.type >> 8); + invalid_chunk[3] = (uint8) (c.type >> 0); + #endif + return e(invalid_chunk, "PNG not supported: unknown chunk type"); + } + skip(s, c.length); + break; + } + // end of chunk, read and skip CRC + get32(s); + } +} + +static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp) +{ + unsigned char *result=NULL; + p->expanded = NULL; + p->idata = NULL; + p->out = NULL; + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + if (parse_png_file(p, SCAN_load, req_comp)) { + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s.img_out_n) { + result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y); + p->s.img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s.img_x; + *y = p->s.img_y; + if (n) *n = p->s.img_n; + } + stb_free(p->out); p->out = NULL; + stb_free(p->expanded); p->expanded = NULL; + stb_free(p->idata); p->idata = NULL; + + return result; +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_file(&p.s, f); + return do_png(&p, x,y,comp,req_comp); +} + +unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_png_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_mem(&p.s, buffer,len); + return do_png(&p, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_png_test_file(FILE *f) +{ + png p; + int n,r; + n = ftell(f); + start_file(&p.s, f); + r = parse_png_file(&p, SCAN_type,STBI_default); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_png_test_memory(stbi_uc const *buffer, int len) +{ + png p; + start_mem(&p.s, buffer, len); + return parse_png_file(&p, SCAN_type,STBI_default); +} + +// TODO: load header from png +#ifndef STBI_NO_STDIO +int stbi_png_info (char const *filename, int *x, int *y, int *comp) +{ + png p; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + start_file(&p.s, f); + if (parse_png_file(&p, SCAN_header, 0)) { + if(x) *x = p.s.img_x; + if(y) *y = p.s.img_y; + if (comp) *comp = p.s.img_n; + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// Microsoft/Windows BMP image + +static int bmp_test(stbi *s) +{ + int sz; + if (get8(s) != 'B') return 0; + if (get8(s) != 'M') return 0; + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + get32le(s); // discard data offset + sz = get32le(s); + if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1; + return 0; +} + +#ifndef STBI_NO_STDIO +int stbi_bmp_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = bmp_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_bmp_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_test(&s); +} + +// returns 0..31 for the highest set bit +static int high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +static int shiftsigned(int v, int shift, int bits) +{ + int result; + int z=0; + + if (shift < 0) v <<= -shift; + else v >>= shift; + result = v; + + z = bits; + while (z < 8) { + result += v >> z; + z += bits; + } + return result; +} + +static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + uint8 *out; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; + stbi_uc pal[256][4]; + int psize=0,i,j,compress=0,width; + int bpp, flip_vertically, pad, target, offset, hsz; + if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP"); + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + offset = get32le(s); + hsz = get32le(s); + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown"); + failure_reason = "bad BMP"; + if (hsz == 12) { + s->img_x = get16le(s); + s->img_y = get16le(s); + } else { + s->img_x = get32le(s); + s->img_y = get32le(s); + } + if (get16le(s) != 1) return 0; + bpp = get16le(s); + if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit"); + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + if (hsz == 12) { + if (bpp < 24) + psize = (offset - 14 - 24) / 3; + } else { + compress = get32le(s); + if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE"); + get32le(s); // discard sizeof + get32le(s); // discard hres + get32le(s); // discard vres + get32le(s); // discard colorsused + get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + get32le(s); + get32le(s); + get32le(s); + get32le(s); + } + if (bpp == 16 || bpp == 32) { + mr = mg = mb = 0; + if (compress == 0) { + if (bpp == 32) { + mr = 0xff << 16; + mg = 0xff << 8; + mb = 0xff << 0; + ma = (unsigned int)(0xff << 24); + fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 + } else { + mr = 31 << 10; + mg = 31 << 5; + mb = 31 << 0; + } + } else if (compress == 3) { + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (mr == mg && mg == mb) { + // ?!?!? + return NULL; + } + } else + return NULL; + } + } else { + assert(hsz == 108); + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + ma = get32le(s); + get32le(s); // discard color space + for (i=0; i < 12; ++i) + get32le(s); // discard color space parameters + } + if (bpp < 16) + psize = (offset - 14 - hsz) >> 2; + } + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + out = (stbi_uc *) stb_malloc(target * s->img_x * s->img_y); + if (!out) return epuc("outofmem", "Out of memory"); + if (bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { stb_free(out); return epuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = get8(s); + pal[i][1] = get8(s); + pal[i][0] = get8(s); + if (hsz != 12) get8(s); + pal[i][3] = 255; + } + skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); + if (bpp == 4) width = (s->img_x + 1) >> 1; + else if (bpp == 8) width = s->img_x; + else { stb_free(out); return epuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=get8(s),v2=0; + if (bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (bpp == 8) ? get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + skip(s, pad); + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + skip(s, offset - 14 - hsz); + if (bpp == 24) width = 3 * s->img_x; + else if (bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (bpp == 24) { + easy = 1; + } else if (bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP"); + // right shift amt to put high bit in position #7 + rshift = high_bit(mr)-7; rcount = bitcount(mr); + gshift = high_bit(mg)-7; gcount = bitcount(mr); + bshift = high_bit(mb)-7; bcount = bitcount(mr); + ashift = high_bit(ma)-7; acount = bitcount(mr); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + int a; + out[z+2] = get8(s); + out[z+1] = get8(s); + out[z+0] = get8(s); + z += 3; + a = (easy == 2 ? get8(s) : 255); + if (target == 4) out[z++] = a; + } + } else { + for (i=0; i < (int) s->img_x; ++i) { + uint32 v = (bpp == 16 ? get16le(s) : get32le(s)); + int a; + out[z++] = shiftsigned(v & mr, rshift, rcount); + out[z++] = shiftsigned(v & mg, gshift, gcount); + out[z++] = shiftsigned(v & mb, bshift, bcount); + a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); + if (target == 4) out[z++] = a; + } + } + skip(s, pad); + } + } + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = target; + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_bmp_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return bmp_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_load(&s, x,y,comp,req_comp); +} + +// Targa Truevision - TGA +// by Jonathan Dummer + +static int tga_test(stbi *s) +{ + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + if( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE + get16(s); // discard palette start + get16(s); // discard palette length + get8(s); // discard bits per palette color entry + get16(s); // discard x origin + get16(s); // discard y origin + if( get16(s) < 1 ) return 0; // test width + if( get16(s) < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + if( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = tga_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_tga_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_test(&s); +} + +static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + // read in the TGA header stuff + int tga_offset = get8u(s); + int tga_indexed = get8u(s); + int tga_image_type = get8u(s); + int tga_is_RLE = 0; + int tga_palette_start = get16le(s); + int tga_palette_len = get16le(s); + int tga_palette_bits = get8u(s); + int tga_x_origin = get16le(s); + int tga_y_origin = get16le(s); + int tga_width = get16le(s); + int tga_height = get16le(s); + int tga_bits_per_pixel = get8u(s); + int tga_inverted = get8u(s); + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4]; + unsigned char trans_data[4]; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + // do a tiny bit of precessing + if( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + /* int tga_alpha_bits = tga_inverted & 15; */ + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // error check + if( //(tga_indexed) || + (tga_width < 1) || (tga_height < 1) || + (tga_image_type < 1) || (tga_image_type > 3) || + ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) && + (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)) + ) + { + return NULL; + } + + // If I'm paletted, then I'll use the number of bits from the palette + if( tga_indexed ) + { + tga_bits_per_pixel = tga_palette_bits; + } + + // tga info + *x = tga_width; + *y = tga_height; + if( (req_comp < 1) || (req_comp > 4) ) + { + // just use whatever the file was + req_comp = tga_bits_per_pixel / 8; + *comp = req_comp; + } else + { + // force a new number of components + *comp = tga_bits_per_pixel/8; + } + tga_data = (unsigned char*)stb_malloc( tga_width * tga_height * req_comp ); + + // skip to the data's starting position (offset usually = 0) + skip(s, tga_offset ); + // do I need to load a palette? + if( tga_indexed ) + { + // any data to skip? (offset usually = 0) + skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)stb_malloc( tga_palette_len * tga_palette_bits / 8 ); + getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 ); + } + // load the data + for( i = 0; i < tga_width * tga_height; ++i ) + { + // if I'm in RLE mode, do I need to get a RLE chunk? + if( tga_is_RLE ) + { + if( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = get8u(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if( read_next_pixel ) + { + // load however much data we did have + if( tga_indexed ) + { + // read in 1 byte, then perform the lookup + int pal_idx = get8u(s); + if( pal_idx >= tga_palette_len ) + { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_bits_per_pixel / 8; + for( j = 0; j*8 < tga_bits_per_pixel; ++j ) + { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else + { + // read in the data raw + for( j = 0; j*8 < tga_bits_per_pixel; ++j ) + { + raw_data[j] = get8u(s); + } + } + // convert raw to the intermediate format + switch( tga_bits_per_pixel ) + { + case 8: + // Luminous => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 16: + // Luminous,Alpha => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[1]; + break; + case 24: + // BGR => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 32: + // BGRA => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[3]; + break; + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + // convert to final format + switch( req_comp ) + { + case 1: + // RGBA => Luminance + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + break; + case 2: + // RGBA => Luminance,Alpha + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + tga_data[i*req_comp+1] = trans_data[3]; + break; + case 3: + // RGBA => RGB + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + break; + case 4: + // RGBA => RGBA + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + tga_data[i*req_comp+3] = trans_data[3]; + break; + } + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if( tga_inverted ) + { + for( j = 0; j*2 < tga_height; ++j ) + { + int index1 = j * tga_width * req_comp; + int index2 = (tga_height - 1 - j) * tga_width * req_comp; + for( i = tga_width * req_comp; i > 0; --i ) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if( tga_palette != NULL ) + { + stb_free( tga_palette ); + } + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_tga_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return tga_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicholas Schulz, tweaked by STB + +static int psd_test(stbi *s) +{ + if (get32(s) != 0x38425053) return 0; // "8BPS" + else return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_psd_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = psd_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_psd_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_test(&s); +} + +static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + int pixelCount; + int channelCount, compression; + int channel, i, count, len; + int w,h; + uint8 *out; + + // Check identifier + if (get32(s) != 0x38425053) // "8BPS" + return epuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (get16(s) != 1) + return epuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = get16(s); + if (channelCount < 0 || channelCount > 16) + return epuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = get32(s); + w = get32(s); + + // Make sure the depth is 8 bits. + if (get16(s) != 8) + return epuc("unsupported bit depth", "PSD bit depth is not 8 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (get16(s) != 3) + return epuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + skip(s,get32(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + skip(s, get32(s) ); + + // Skip the reserved data. + skip(s, get32(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = get16(s); + if (compression > 1) + return epuc("bad compression", "PSD has an unknown compression format"); + + // Create the destination image. + out = (stbi_uc *) stb_malloc(4 * w*h); + if (!out) return epuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4; + } else { + // Read the RLE data. + count = 0; + while (count < pixelCount) { + len = get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + count += len; + while (len) { + *p = get8(s); + p += 4; + len--; + } + } else if (len > 128) { + uint32 val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0x0FF; + len += 2; + val = get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out + channel; + if (channel > channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4; + } else { + // Read the data. + count = 0; + for (i = 0; i < pixelCount; i++) + *p = get8(s), p += 4; + } + } + } + + if (req_comp && req_comp != 4) { + out = convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // convert_format frees input on failure + } + + if (comp) *comp = channelCount; + *y = h; + *x = w; + + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_psd_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return psd_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int hdr_test(stbi *s) +{ + char *signature = "#?RADIANCE\n"; + int i; + for (i=0; signature[i]; ++i) + if (get8(s) != signature[i]) + return 0; + return 1; +} + +int stbi_hdr_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return hdr_test(&s); +} + +#ifndef STBI_NO_STDIO +int stbi_hdr_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = hdr_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +#define HDR_BUFLEN 1024 +static char *hdr_gettoken(stbi *z, char *buffer) +{ + int len=0; + char *s = buffer, c = '\0'; + s; + + c = get8(z); + + while (!at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == HDR_BUFLEN-1) { + // flush to end of line + while (!at_eof(z) && get8(z) != '\n') + ; + break; + } + c = get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + + +static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + char buffer[HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + + + // Check identifier + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + return epf("not HDR", "Corrupt HDR image"); + + // Parse header + while(1) { + token = hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return epf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = strtol(token, NULL, 10); + + *x = width; + *y = height; + + *comp = 3; + if (req_comp == 0) req_comp = 3; + + // Read data + hdr_data = (float *) stb_malloc(height * width * req_comp * sizeof(float)); + + // Load image data + // image data is stored as some number of sca + if( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + getn(s, rgbe, 4); + hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = get8(s); + c2 = get8(s); + len = get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + stbi_uc rgbe[4] = { c1,c2,len, get8(s) }; + hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + stb_free(scanline); + goto main_decode_loop; // yes, this is fucking insane; blame the fucking insane format + } + len <<= 8; + len |= get8(s); + if (len != width) { stb_free(hdr_data); stb_free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) scanline = (stbi_uc *) stb_malloc(width * 4); + + for (k = 0; k < 4; ++k) { + i = 0; + while (i < width) { + count = get8(s); + if (count > 128) { + // Run + value = get8(s); + count -= 128; + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = get8(s); + } + } + } + for (i=0; i < width; ++i) + hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + stb_free(scanline); + } + + return hdr_data; +} + +#ifndef STBI_NO_STDIO +float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return hdr_load(&s,x,y,comp,req_comp); +} +#endif + +float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer, len); + return hdr_load(&s,x,y,comp,req_comp); +} + +#endif // STBI_NO_HDR + +/////////////////////// write image /////////////////////// + +#ifndef STBI_NO_WRITE + +static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); } + +static void writefv(FILE *f, char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { uint8 x = va_arg(v, int); write8(f,x); break; } + case '2': { int16 x = va_arg(v, int); write8(f,x); write8(f,x>>8); break; } + case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; } + default: + assert(0); + va_end(v); + return; + } + } +} + +static void writef(FILE *f, char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + writefv(f,fmt,v); + va_end(v); +} + +static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int write_alpha, int scanline_pad) +{ + uint8 bg[3] = { 255, 0, 255}, px[3]; + uint32 zero = 0; + int i,j,k, j_end; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + uint8 *d = (uint8 *) data + (j*x+i)*comp; + if (write_alpha < 0) + fwrite(&d[comp-1], 1, 1, f); + switch (comp) { + case 1: + case 2: writef(f, "111", d[0],d[0],d[0]); + break; + case 4: + if (!write_alpha) { + for (k=0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255; + writef(f, "111", px[1-rgb_dir],px[1],px[1+rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + writef(f, "111", d[1-rgb_dir],d[1],d[1+rgb_dir]); + break; + } + if (write_alpha > 0) + fwrite(&d[comp-1], 1, 1, f); + } + fwrite(&zero,scanline_pad,1,f); + } +} + +static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = fopen(filename, "wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +static int outfile_w(wchar_t const *filename, int rgb_dir, int vdir, int x, int y, int comp, const void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = _wfopen(filename, L"wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return outfile(filename,-1,-1,x,y,comp,data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_bmp_w(wchar_t const *filename, int x, int y, int comp, const void *data) +{ + int pad = (-x*3) & 3; + return outfile_w(filename,-1,-1,x,y,comp,data,0,pad, + "11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) +{ + int has_alpha = !(comp & 1); + return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0, + "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +int stbi_write_tga_w(wchar_t const *filename, int x, int y, int comp, const void *data) +{ + int has_alpha = !(comp & 1); + return outfile_w(filename, -1,-1, x, y, comp, data, has_alpha, 0, + "111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +// any other image formats that do interleaved rgb data? +// PNG: requires adler32,crc32 -- significant amount of code +// PSD: no, channels output separately +// TIFF: no, stripwise-interleaved... i think + +#endif // STBI_NO_WRITE + +#endif // STBI_HEADER_FILE_ONLY + diff --git a/inc/crn_decomp.h b/inc/crn_decomp.h new file mode 100644 index 00000000..15f88305 --- /dev/null +++ b/inc/crn_decomp.h @@ -0,0 +1,4849 @@ +// File: crn_decomp.h - CRN texture decompressor v.96 +// Copyright (c) 2010-2011 Tenacious Software LLC +// +// This single header file contains *all* of the code necessary to unpack .CRN files to raw DXTn bits. +// It does NOT depend on the crn compression library. +// +// Note: This is a single file, stand-alone C++ library which is controlled by the use of two macros: +// If CRND_INCLUDE_CRND_H is NOT defined, the header is included. +// If CRND_HEADER_FILE_ONLY is NOT defined, the implementation is included. + +// Define PLATFORM_NACL if compiling under native client. +//#define PLATFORM_NACL + +#ifndef CRND_INCLUDE_CRND_H +#define CRND_INCLUDE_CRND_H + +// Include crnlib header - only to bring in some basic some CRN-related types. +#include "crnlib.h" + +#define CRND_VERSION_STRING "00.96" + +#ifdef _DEBUG +#define CRND_BUILD_DEBUG +#else +#define CRND_BUILD_RELEASE +#endif + +// CRN decompression API +namespace crnd +{ + typedef unsigned char uint8; + typedef signed char int8; + typedef unsigned short uint16; + typedef signed short int16; + typedef unsigned int uint32; + typedef uint32 uint32; + typedef unsigned int uint; + typedef signed int int32; +#ifndef PLATFORM_NACL + typedef unsigned __int64 uint64; + typedef signed __int64 int64; +#endif + + // The crnd library assumes all allocation blocks have at least CRND_MIN_ALLOC_ALIGNMENT alignment. + const uint32 CRND_MIN_ALLOC_ALIGNMENT = sizeof(uint32) * 2U; + + // realloc callback: + // Used to allocate, resize, or free memory blocks. + // If p is NULL, the realloc function attempts to allocate a block of at least size bytes. Returns NULL on out of memory. + // *pActual_size must be set to the actual size of the allocated block, which must be greater than or equal to the requested size. + // If p is not NULL, and size is 0, the realloc function frees the specified block, and always returns NULL. *pActual_size should be set to 0. + // If p is not NULL, and size is non-zero, the realloc function attempts to resize the specified block: + // If movable is false, the realloc function attempts to shrink or expand the block in-place. NULL is returned if the block cannot be resized in place, or if the + // underlying heap implementation doesn't support in-place resizing. Otherwise, the pointer to the original block is returned. + // If movable is true, it is permissible to move the block's contents if it cannot be resized in place. NULL is returned if the block cannot be resized in place, and there + // is not enough memory to relocate the block. + // In all cases, *pActual_size must be set to the actual size of the allocated block, whether it was successfully resized or not. + typedef void* (*crnd_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); + + // msize callback: Returns the size of the memory block in bytes, or 0 if the pointer or block is invalid. + typedef size_t (*crnd_msize_func)(void* p, void* pUser_data); + + // crnd_set_memory_callbacks() - Use to override the crnd library's memory allocation functions. + // If any input parameters are NULL, the memory callback functions are reset to the default functions. + // The default functions call malloc(), free(), _msize(), _expand(), etc. + void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data); + + struct crn_file_info + { + inline crn_file_info() : m_struct_size(sizeof(crn_file_info)) { } + + uint32 m_struct_size; + uint32 m_actual_data_size; + uint32 m_header_size; + uint32 m_total_palette_size; + uint32 m_tables_size; + uint32 m_levels; + uint32 m_level_compressed_size[cCRNMaxLevels]; + uint32 m_color_endpoint_palette_entries; + uint32 m_color_selector_palette_entries; + uint32 m_alpha_endpoint_palette_entries; + uint32 m_alpha_selector_palette_entries; + }; + + struct crn_texture_info + { + inline crn_texture_info() : m_struct_size(sizeof(crn_texture_info)) { } + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_levels; + uint32 m_faces; + uint32 m_bytes_per_block; + uint32 m_userdata0; + uint32 m_userdata1; + crn_format m_format; + }; + + struct crn_level_info + { + inline crn_level_info() : m_struct_size(sizeof(crn_level_info)) { } + + uint32 m_struct_size; + uint32 m_width; + uint32 m_height; + uint32 m_faces; + uint32 m_blocks_x; + uint32 m_blocks_y; + uint32 m_bytes_per_block; + crn_format m_format; + }; + + // Returns the FOURCC format code corresponding to the specified CRN format. + uint32 crnd_crn_format_to_fourcc(crn_format fmt); + + // Returns the fundamental GPU format given a potentially swizzled DXT5 crn_format. + crn_format crnd_get_fundamental_dxt_format(crn_format fmt); + + // Returns the size of the crn_format in bits/texel (either 4 or 8). + uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt); + + // Returns the number of bytes per DXTn block (8 or 16). + uint32 crnd_get_bytes_per_dxt_block(crn_format fmt); + + // Validates the entire file by checking the header and data CRC's. + // This is not something you want to be doing much! + // The crn_file_info.m_struct_size field must be set before calling this function. + bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info); + + // The crn_texture_info.m_struct_size field must be set before calling this function. + bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pTexture_info); + + // The crn_level_info.m_struct_size field must be set before calling this function. + bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info); + + typedef void* crnd_unpack_context; + + // crnd_unpack_begin() - Decompresses the texture's decoder tables and endpoint/selector palettes. + // Once you call this function, you may call crnd_unpack_level() to unpack one or more mip levels. + // Don't call this once per mip level (unless you absolutely must)! + // This function allocated enough memory to hold: Huffman decompression tables, and the endpoint/selector palettes (color and/or alpha). + // Worst case allocation is approx. 200k, assuming all palettes contain 8192 entries. + // pData must point to a buffer holding all of the compressed data. + // This buffer must be stable until crnd_unpack_end() is called. + // Returns NULL on out of memory or if any of the input parameters are invalid. + crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size); + + // Returns the compressed data associated with a context. + // Returns false if any of the input parameters are invalid. + bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size); + + // crnd_unpack_level() - Unpacks the specified mipmap level to a destination buffer in cached or write combined memory. + // pContext - Context created by a call to crnd_unpack_begin(). + // ppDst - A pointer to an array of 1 or 6 destination buffer pointers. Cubemaps require an array of 6 pointers, 2D textures require an array of 1 pointer. + // dst_size_in_bytes - Optional size of each destination buffer. Only used for debugging - OK to set to UINT32_MAX. + // row_pitch_in_bytes - The pitch in bytes from one row of DXT blocks to the next. Must be a multiple of 4. + // level_index - mipmap level index, where 0 is the largest/first level. + // Returns false if any of the input parameters, or the compressed stream, are invalid. + // This function does not allocate any memory. + bool crnd_unpack_level( + crnd_unpack_context pContext, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + + // crnd_unpack_level_segmented() - Unpacks the specified mipmap level from a "segmented" CRN file. + // See the crnd_create_segmented_file() API below. + // Segmented files allow the user to control where the compressed mipmaps are stored. + bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index); + + // crnd_unpack_end() - Frees the decompress tables and unpacked palettes associated with the specified context. + // Returns false if the context is NULL, or if it points to an invalid context. + // This function frees all memory associated with the context. + bool crnd_unpack_end(crnd_unpack_context pContext); + + // The following API's allow the user to create "segmented" CRN files. A segmented file contains multiple pieces: + // - Base data: Header + compression tables + // - Level data: Individual mipmap levels + // This allows mipmap levels from multiple CRN files to be tightly packed together into single files. + + // Returns a pointer to the level's compressed data, and optionally returns the level's compressed data size if pSize is not NULL. + const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize); + + // Returns the compressed size of the texture's header and compression tables (but no levels). + uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size); + + // Creates a "segmented" CRN texture. The new texture will be created at pBase_data, and will be crnd_get_base_data_size() bytes long. + // base_data_size must be >= crnd_get_base_data_size(). + // The base data will contain the CRN header and compression tables, but no mipmap data. + bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size); + +} // namespace crnd + +// Low-level CRN file header cracking. +namespace crnd +{ + template + struct crn_packed_uint + { + inline crn_packed_uint() { } + + inline crn_packed_uint(unsigned int val) { *this = val; } + + inline crn_packed_uint(const crn_packed_uint& other) { *this = other; } + + inline crn_packed_uint& operator= (const crn_packed_uint& rhs) + { + if (this != &rhs) + memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); + return *this; + } + + inline crn_packed_uint& operator= (unsigned int val) + { + //CRND_ASSERT((N == 4U) || (val < (1U << (N * 8U)))); + + val <<= (8U * (4U - N)); + + for (unsigned int i = 0; i < N; i++) + { + m_buf[i] = static_cast(val >> 24U); + val <<= 8U; + } + + return *this; + } + + inline operator unsigned int() const + { + switch (N) + { + case 1: return m_buf[0]; + case 2: return (m_buf[0] << 8U) | m_buf[1]; + case 3: return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); + default: return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); + } + } + + unsigned char m_buf[N]; + }; + +#pragma pack(push) +#pragma pack(1) + struct crn_palette + { + crn_packed_uint<3> m_ofs; + crn_packed_uint<3> m_size; + crn_packed_uint<2> m_num; + }; + + enum crn_header_flags + { + // If set, the compressed mipmap level data is not located after the file's base data - it will be separately managed by the user instead. + cCRNHeaderFlagSegmented = 1 + }; + + struct crn_header + { + enum { cCRNSigValue = ('H' << 8) | 'x' }; + + crn_packed_uint<2> m_sig; + crn_packed_uint<2> m_header_size; + crn_packed_uint<2> m_header_crc16; + + crn_packed_uint<4> m_data_size; + crn_packed_uint<2> m_data_crc16; + + crn_packed_uint<2> m_width; + crn_packed_uint<2> m_height; + + crn_packed_uint<1> m_levels; + crn_packed_uint<1> m_faces; + + crn_packed_uint<1> m_format; + crn_packed_uint<2> m_flags; + + crn_packed_uint<4> m_reserved; + crn_packed_uint<4> m_userdata0; + crn_packed_uint<4> m_userdata1; + + crn_palette m_color_endpoints; + crn_palette m_color_selectors; + + crn_palette m_alpha_endpoints; + crn_palette m_alpha_selectors; + + crn_packed_uint<2> m_tables_size; + crn_packed_uint<3> m_tables_ofs; + + // m_level_ofs[] is actually an array of offsets: m_level_ofs[m_levels] + crn_packed_uint<4> m_level_ofs[1]; + }; + + const unsigned int cCRNHeaderMinSize = 62U; + +#pragma pack(pop) + +} // namespace crnd + +#endif // CRND_INCLUDE_CRND_H + +// Internal library source follows this line. + +#ifndef CRND_HEADER_FILE_ONLY + +#include +#include +#ifndef PLATFORM_NACL +#include +#else +#include +#include +#endif +#include +#include // needed for placement new, _msize, _expand + +#define CRND_RESTRICT __restrict + +#ifdef _MSC_VER +#include +#pragma intrinsic(_WriteBarrier) +#pragma intrinsic(_ReadWriteBarrier) +#define CRND_WRITE_BARRIER _WriteBarrier(); +#define CRND_FULL_BARRIER _ReadWriteBarrier(); +#else +#define CRND_WRITE_BARRIER +#define CRND_FULL_BARRIER +#endif + +#ifdef _MSC_VER +#pragma warning(disable:4127) // warning C4127: conditional expression is constant +#endif + +#ifdef CRND_DEVEL +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x500 +#endif +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#ifndef +#define NOMINMAX +#endif +#include "windows.h" // only for IsDebuggerPresent(), DebugBreak(), and OutputDebugStringA() +#endif + +// File: crnd_types.h +namespace crnd +{ + const crn_uint8 cUINT8_MIN = 0; + const crn_uint8 cUINT8_MAX = 0xFFU; + const uint16 cUINT16_MIN = 0; + const uint16 cUINT16_MAX = 0xFFFFU; + const uint32 cUINT32_MIN = 0; + const uint32 cUINT32_MAX = 0xFFFFFFFFU; + + const int8 cINT8_MIN = -128; + const int8 cINT8_MAX = 127; + const int16 cINT16_MIN = -32768; + const int16 cINT16_MAX = 32767; + const int32 cINT32_MIN = (-2147483647 - 1); + const int32 cINT32_MAX = 2147483647; + + enum eClear { cClear }; + + const uint32 cIntBits = 32U; + +#ifdef _WIN64 + typedef uint64 ptr_bits; +#else + typedef uint32 ptr_bits; +#endif + + template struct int_traits { enum { cMin = crnd::cINT32_MIN, cMax = crnd::cINT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = crnd::cINT8_MIN, cMax = crnd::cINT8_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = crnd::cINT16_MIN, cMax = crnd::cINT16_MAX, cSigned = true }; }; + template<> struct int_traits { enum { cMin = crnd::cINT32_MIN, cMax = crnd::cINT32_MAX, cSigned = true }; }; + + template<> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT8_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT16_MAX, cSigned = false }; }; + template<> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT32_MAX, cSigned = false }; }; + + struct empty_type { }; + +} // namespace crnd + +// File: crnd_platform.h +namespace crnd +{ +#ifdef _XBOX + const bool c_crnd_little_endian_platform = false; + const bool c_crnd_big_endian_platform = true; +#define CRND_BIG_ENDIAN_PLATFORM 1 +#else + const bool c_crnd_little_endian_platform = true; + const bool c_crnd_big_endian_platform = false; +#endif + + bool crnd_is_debugger_present(); + void crnd_debug_break(); + void crnd_output_debug_string(const char* p); + + // actually in crnd_assert.cpp + void crnd_assert(const char* pExp, const char* pFile, unsigned line); + void crnd_fail(const char* pExp, const char* pFile, unsigned line); + +} // namespace crnd + +// File: crnd_assert.h +namespace crnd +{ + void crnd_assert(const char* pExp, const char* pFile, unsigned line); + +#ifdef NDEBUG +#define CRND_ASSERT(x) ((void)0) +#undef CRND_ASSERTS_ENABLED +#else +#define CRND_ASSERT(_exp) (void)( (!!(_exp)) || (crnd::crnd_assert(#_exp, __FILE__, __LINE__), 0) ) +#define CRND_ASSERTS_ENABLED +#endif + + void crnd_trace(const char* pFmt, va_list args); + void crnd_trace(const char* pFmt, ...); + +} // namespace crnd + +// File: crnd_helpers.h +namespace crnd +{ + namespace helpers + { + template struct rel_ops + { + friend bool operator!= (const T& x, const T& y) { return (!(x == y)); } + friend bool operator> (const T& x, const T& y) { return (y < x); } + friend bool operator<= (const T& x, const T& y) { return (!(y < x)); } + friend bool operator>= (const T& x, const T& y) { return (!(x < y)); } + }; + + template + inline T* construct(T* p) + { + return new (static_cast(p)) T; + } + + template + inline T* construct(T* p, const U& init) + { + return new (static_cast(p)) T(init); + } + + template + void construct_array(T* p, uint32 n) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T; + } + + template + void construct_array(T* p, uint32 n, const U& init) + { + T* q = p + n; + for ( ; p != q; ++p) + new (static_cast(p)) T(init); + } + + template + inline void destruct(T* p) + { + p; + p->~T(); + } + + template inline void destruct_array(T* p, uint32 n) + { + T* q = p + n; + for ( ; p != q; ++p) + p->~T(); + } + + } // namespace helpers + +} // namespace crnd + +// File: crnd_traits.h +namespace crnd +{ + template + struct scalar_type + { + enum { cFlag = false }; + static inline void construct(T* p) { helpers::construct(p); } + static inline void construct(T* p, const T& init) { helpers::construct(p, init); } + static inline void construct_array(T* p, uint32 n) { helpers::construct_array(p, n); } + static inline void destruct(T* p) { helpers::destruct(p); } + static inline void destruct_array(T* p, uint32 n) { helpers::destruct_array(p, n); } + }; + + template struct scalar_type + { + enum { cFlag = true }; + static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } + static inline void construct(T** p, T* init) { *p = init; } + static inline void construct_array(T** p, uint32 n) { memset(p, 0, sizeof(T*) * n); } + static inline void destruct(T** p) { p; } + static inline void destruct_array(T** p, uint32 n) { p, n; } + }; + +#define CRND_DEFINE_BUILT_IN_TYPE(X) \ + template<> struct scalar_type { \ + enum { cFlag = true }; \ + static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ + static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ + static inline void construct_array(X* p, uint32 n) { memset(p, 0, sizeof(X) * n); } \ + static inline void destruct(X* p) { p; } \ + static inline void destruct_array(X* p, uint32 n) { p, n; } }; + + CRND_DEFINE_BUILT_IN_TYPE(bool) + CRND_DEFINE_BUILT_IN_TYPE(char) + CRND_DEFINE_BUILT_IN_TYPE(unsigned char) + CRND_DEFINE_BUILT_IN_TYPE(short) + CRND_DEFINE_BUILT_IN_TYPE(unsigned short) + CRND_DEFINE_BUILT_IN_TYPE(int) + CRND_DEFINE_BUILT_IN_TYPE(unsigned int) + CRND_DEFINE_BUILT_IN_TYPE(long) + CRND_DEFINE_BUILT_IN_TYPE(unsigned long) +#ifndef PLATFORM_NACL + CRND_DEFINE_BUILT_IN_TYPE(__int64) + CRND_DEFINE_BUILT_IN_TYPE(unsigned __int64) +#endif + CRND_DEFINE_BUILT_IN_TYPE(float) + CRND_DEFINE_BUILT_IN_TYPE(double) + CRND_DEFINE_BUILT_IN_TYPE(long double) + +#undef CRND_DEFINE_BUILT_IN_TYPE + + // See: http://erdani.org/publications/cuj-2004-06.pdf + + template + struct bitwise_movable { enum { cFlag = false }; }; + + // Defines type Q as bitwise movable. +#define CRND_DEFINE_BITWISE_MOVABLE(Q) template<> struct bitwise_movable { enum { cFlag = true }; }; + + // From yasli_traits.h: + // Credit goes to Boost; + // also found in the C++ Templates book by Vandevoorde and Josuttis + + typedef char (&yes_t)[1]; + typedef char (&no_t)[2]; + + template yes_t class_test(int U::*); + template no_t class_test(...); + + template struct is_class + { + enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; + }; + + template struct is_pointer + { + enum { value = false }; + }; + + template struct is_pointer + { + enum { value = true }; + }; + +#define CRND_IS_POD(T) __is_pod(T) + +} // namespace crnd + +// File: crnd_mem.h +namespace crnd +{ + void* crnd_malloc(size_t size, size_t* pActual_size = NULL); + void* crnd_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); + void crnd_free(void* p); + size_t crnd_msize(void* p); + + template + inline T* crnd_new() + { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; + + return helpers::construct(p); + } + + template + inline T* crnd_new(const T& init) + { + T* p = static_cast(crnd_malloc(sizeof(T))); + if (!p) + return NULL; + + return helpers::construct(p, init); + } + + template + inline T* crnd_new_array(uint32 num) + { + if (!num) num = 1; + + uint8* q = static_cast(crnd_malloc(CRND_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); + if (!q) + return NULL; + + T* p = reinterpret_cast(q + CRND_MIN_ALLOC_ALIGNMENT); + + reinterpret_cast(p)[-1] = num; + reinterpret_cast(p)[-2] = ~num; + + helpers::construct_array(p, num); + return p; + } + + template + inline void crnd_delete(T* p) + { + if (p) + { + helpers::destruct(p); + crnd_free(p); + } + } + + template + inline void crnd_delete_array(T* p) + { + if (p) + { + const uint32 num = reinterpret_cast(p)[-1]; + const uint32 num_check = reinterpret_cast(p)[-2]; + num_check; + CRND_ASSERT(num && (num == ~num_check)); + + helpers::destruct_array(p, num); + + crnd_free(reinterpret_cast(p) - CRND_MIN_ALLOC_ALIGNMENT); + } + } + +} // namespace crnd + +// File: crnd_math.h +namespace crnd +{ + namespace math + { + const float cNearlyInfinite = 1.0e+37f; + + const float cDegToRad = 0.01745329252f; + const float cRadToDeg = 57.29577951f; + + extern uint32 g_bitmasks[32]; + + // Yes I know these should probably be pass by ref, not val: + // http://www.stepanovpapers.com/notes.pdf + // Just don't use them on non-simple (non built-in) types! + template inline T minimum(T a, T b) + { + return (a < b) ? a : b; + } + + template inline T minimum(T a, T b, T c) + { + return minimum(minimum(a, b), c); + } + + template inline T maximum(T a, T b) + { + return (a > b) ? a : b; + } + + template inline T maximum(T a, T b, T c) + { + return maximum(maximum(a, b), c); + } + + template inline T clamp(T value, T low, T high) + { + return (value < low) ? low : ((value > high) ? high : value); + } + + template inline T square(T value) + { + return value * value; + } + + inline bool is_power_of_2(uint32 x) + { + return x && ((x & (x - 1U)) == 0U); + } + + // From "Hackers Delight" + inline int next_pow2(uint32 val) + { + val--; + val |= val >> 16; + val |= val >> 8; + val |= val >> 4; + val |= val >> 2; + val |= val >> 1; + return val + 1; + } + + // Returns the total number of bits needed to encode v. + inline uint32 total_bits(uint32 v) + { + uint32 l = 0; + while (v > 0U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint floor_log2i(uint v) + { + uint l = 0; + while (v > 1U) + { + v >>= 1; + l++; + } + return l; + } + + inline uint ceil_log2i(uint v) + { + uint l = floor_log2i(v); + if ((l != cIntBits) && (v > (1U << l))) + l++; + return l; + } + } +} + +// File: crnd_utils.h +namespace crnd +{ + namespace utils + { + template inline void zero_object(T& obj) + { + memset(&obj, 0, sizeof(obj)); + } + + template inline void zero_this(T* pObj) + { + memset(pObj, 0, sizeof(*pObj)); + } + + template + inline void swap(T& left, T& right) + { + T temp(left); + left = right; + right = temp; + } + + inline void invert_buf(void* pBuf, uint32 size) + { + uint8* p = static_cast(pBuf); + + const uint32 half_size = size >> 1; + for (uint32 i = 0; i < half_size; i++) + swap(p[i], p[size - 1U - i]); + } + + static inline uint16 swap16(uint16 x) { return static_cast((x << 8) | (x >> 8)); } + static inline uint32 swap32(uint32 x) { return ((x << 24) | ((x << 8) & 0x00FF0000) | (( x >> 8) & 0x0000FF00) | (x >> 24)); } + + uint32 compute_max_mips(uint32 width, uint32 height); + + } // namespace utils + +} // namespace crnd + +// File: crnd_vector.h +namespace crnd +{ + struct elemental_vector + { + void* m_p; + uint32 m_size; + uint32 m_capacity; + + typedef void (*object_mover)(void* pDst, void* pSrc, uint32 num); + + bool increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pRelocate); + }; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4127) // warning C4127: conditional expression is constant +#endif + + template + class vector : public helpers::rel_ops< vector > + { + public: + typedef T* iterator; + typedef const T* const_iterator; + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + + inline vector() : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + } + + inline vector(const vector& other) : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + *this = other; + } + + inline vector(uint32 size) : + m_p(NULL), + m_size(0), + m_capacity(0), + m_alloc_failed(false) + { + resize(size); + } + + inline ~vector() + { + clear(); + } + + // I don't like this. Not at all. But exceptions, or just failing suck worse. + inline bool get_alloc_failed() const { return m_alloc_failed; } + inline void clear_alloc_failed() { m_alloc_failed = false; } + + inline bool assign(const vector& other) + { + if (this == &other) + return true; + + if (m_capacity == other.m_size) + resize(0); + else + { + clear(); + + if (!increase_capacity(other.m_size, false)) + return false; + } + + if (scalar_type::cFlag) + memcpy(m_p, other.m_p, other.m_size * sizeof(T)); + else + { + T* pDst = m_p; + const T* pSrc = other.m_p; + for (uint32 i = other.m_size; i > 0; i--) + helpers::construct(pDst++, *pSrc++); + } + + m_size = other.m_size; + + return true; + } + + inline vector& operator= (const vector& other) + { + assign(other); + return *this; + } + + inline const T* begin() const { return m_p; } + T* begin() { return m_p; } + + inline const T* end() const { return m_p + m_size; } + T* end() { return m_p + m_size; } + + inline bool empty() const { return !m_size; } + inline uint32 size() const { return m_size; } + inline uint32 capacity() const { return m_capacity; } + + inline const T& operator[] (uint32 i) const { CRND_ASSERT(i < m_size); return m_p[i]; } + inline T& operator[] (uint32 i) { CRND_ASSERT(i < m_size); return m_p[i]; } + + inline const T& front() const { CRND_ASSERT(m_size); return m_p[0]; } + inline T& front() { CRND_ASSERT(m_size); return m_p[0]; } + + inline const T& back() const { CRND_ASSERT(m_size); return m_p[m_size - 1]; } + inline T& back() { CRND_ASSERT(m_size); return m_p[m_size - 1]; } + + inline void clear() + { + if (m_p) + { + scalar_type::destruct_array(m_p, m_size); + crnd_free(m_p); + m_p = NULL; + m_size = 0; + m_capacity = 0; + } + + m_alloc_failed = false; + } + + inline bool reserve(uint32 new_capacity) + { + if (!increase_capacity(new_capacity, false)) + return false; + + return true; + } + + inline bool resize(uint32 new_size) + { + if (m_size != new_size) + { + if (new_size < m_size) + scalar_type::destruct_array(m_p + new_size, m_size - new_size); + else + { + if (new_size > m_capacity) + { + if (!increase_capacity(new_size, new_size == (m_size + 1))) + return false; + } + + scalar_type::construct_array(m_p + m_size, new_size - m_size); + } + + m_size = new_size; + } + + return true; + } + + inline bool push_back(const T& obj) + { + CRND_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); + + if (m_size >= m_capacity) + { + if (!increase_capacity(m_size + 1, true)) + return false; + } + + scalar_type::construct(m_p + m_size, obj); + m_size++; + + return true; + } + + inline void pop_back() + { + CRND_ASSERT(m_size); + + if (m_size) + { + m_size--; + scalar_type::destruct(&m_p[m_size]); + } + } + + inline void insert(uint32 index, const T* p, uint32 n) + { + CRND_ASSERT(index <= m_size); + if (!n) + return; + + const uint32 orig_size = m_size; + resize(m_size + n); + + const T* pSrc = m_p + orig_size - 1; + T* pDst = const_cast(pSrc) + n; + + const uint32 num_to_move = orig_size - index; + + for (uint32 i = 0; i < num_to_move; i++) + { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst-- = *pSrc--; + } + + pSrc = p; + pDst = m_p + index; + + for (uint32 i = 0; i < n; i++) + { + CRND_ASSERT((pDst - m_p) < (int)m_size); + *pDst++ = *p++; + } + } + + inline void erase(uint32 start, uint32 n) + { + CRND_ASSERT((start + n) <= m_size); + + if (!n) + return; + + const uint32 num_to_move = m_size - (start + n); + + T* pDst = m_p + start; + T* pDst_end = pDst + num_to_move; + const T* pSrc = m_p + start + n; + + while (pDst != pDst_end) + *pDst++ = *pSrc++; + + scalar_type::destruct_array(pDst_end, n); + + m_size -= n; + } + + inline void erase(uint32 index) + { + erase(index, 1); + } + + inline void erase(T* p) + { + CRND_ASSERT((p >= m_p) && (p < (m_p + m_size))); + erase(p - m_p); + } + + inline bool operator== (const vector& rhs) const + { + if (m_size != rhs.m_size) + return false; + else if (m_size) + { + if (scalar_type::cFlag) + return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; + else + { + const T* pSrc = m_p; + const T* pDst = rhs.m_p; + for (uint32 i = m_size; i; i--) + if (!(*pSrc++ == *pDst++)) + return false; + } + } + + return true; + } + + inline bool operator< (const vector& rhs) const + { + const uint32 min_size = math::minimum(m_size, rhs.m_size); + + const T* pSrc = m_p; + const T* pSrc_end = m_p + min_size; + const T* pDst = rhs.m_p; + + while ((pSrc < pSrc_end) && (*pSrc == *pDst)) + { + pSrc++; + pDst++; + } + + if (pSrc < pSrc_end) + return *pSrc < *pDst; + + return m_size < rhs.m_size; + } + + void swap(vector& other) + { + utils::swap(m_p, other.m_p); + utils::swap(m_size, other.m_size); + utils::swap(m_capacity, other.m_capacity); + } + + private: + T* m_p; + uint32 m_size; + uint32 m_capacity; + bool m_alloc_failed; + + template struct is_vector { enum { cFlag = false }; }; + template struct is_vector< vector > { enum { cFlag = true }; }; + + static void object_mover(void* pDst_void, void* pSrc_void, uint32 num) + { + T* pSrc = static_cast(pSrc_void); + T* const pSrc_end = pSrc + num; + T* pDst = static_cast(pDst_void); + + while (pSrc != pSrc_end) + { + helpers::construct(pDst, *pSrc); + pSrc->~T(); + pSrc++; + pDst++; + } + } + + inline bool increase_capacity(uint32 min_new_capacity, bool grow_hint) + { + if (!reinterpret_cast(this)->increase_capacity( + min_new_capacity, grow_hint, sizeof(T), + ((scalar_type::cFlag) || (is_vector::cFlag) || (bitwise_movable::cFlag) || CRND_IS_POD(T)) ? NULL : object_mover)) + { + m_alloc_failed = true; + return false; + } + return true; + } + }; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + extern void vector_test(); + +} // namespace crnd + +// File: crnd_private.h +namespace crnd +{ + const crn_header* crnd_get_header(crn_header& header, const void* pData, uint32 data_size); + +} // namespace crnd + +// File: checksum.h +namespace crnd +{ + // crc16() intended for small buffers - doesn't use an acceleration table. + const uint16 cInitCRC16 = 0; + uint16 crc16(const void* pBuf, uint32 len, uint16 crc = cInitCRC16); + +} // namespace crnd + +// File: crnd_color.h +namespace crnd +{ + template struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT8_MIN, + cMax = cUINT8_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT16_MIN, + cMax = cINT16_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT16_MIN, + cMax = cUINT16_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = true, + cFloat = false, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = false, + cMin = cUINT32_MIN, + cMax = cUINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + + template<> struct color_quad_component_traits + { + enum + { + cSigned = false, + cFloat = true, + cMin = cINT32_MIN, + cMax = cINT32_MAX + }; + }; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4201) // warning C4201: nonstandard extension used : nameless struct/union +#pragma warning(disable:4127) // warning C4127: conditional expression is constant +#endif + + template + class color_quad : public helpers::rel_ops > + { + static parameter_type clamp(parameter_type v) + { + if (component_traits::cFloat) + return v; + else + { + if (v < component_traits::cMin) + return component_traits::cMin; + else if (v > component_traits::cMax) + return component_traits::cMax; + return v; + } + } + + public: + typedef component_type component_t; + typedef parameter_type parameter_t; + typedef color_quad_component_traits component_traits; + + enum { cNumComps = 4 }; + + union + { + struct + { + component_type r; + component_type g; + component_type b; + component_type a; + }; + + component_type c[cNumComps]; + }; + + color_quad() + { + } + + color_quad(eClear) : + r(0), g(0), b(0), a(0) + { + } + + color_quad(const color_quad& other) : + r(other.r), g(other.g), b(other.b), a(other.a) + { + } + + color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) + { + set(y, alpha); + } + + color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + set(red, green, blue, alpha); + } + + template + color_quad(const color_quad& other) : + r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) + { + } + + inline void clear() + { + r = 0; + g = 0; + b = 0; + a = 0; + } + + color_quad& operator= (const color_quad& other) + { + r = other.r; + g = other.g; + b = other.b; + a = other.a; + return *this; + } + + template + color_quad& operator=(const color_quad& other) + { + r = clamp(other.r); + g = clamp(other.g); + b = clamp(other.b); + a = clamp(other.a); + return *this; + } + + color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) + { + y = clamp(y); + r = static_cast(y); + g = static_cast(y); + b = static_cast(y); + a = static_cast(alpha); + return *this; + } + + color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) + { + r = static_cast(clamp(red)); + g = static_cast(clamp(green)); + b = static_cast(clamp(blue)); + a = static_cast(clamp(alpha)); + return *this; + } + + color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) + { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + a = static_cast(alpha); + return *this; + } + + color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) + { + r = static_cast(red); + g = static_cast(green); + b = static_cast(blue); + return *this; + } + + static parameter_type get_min_comp() { return component_traits::cMin; } + static parameter_type get_max_comp() { return component_traits::cMax; } + static bool get_comps_are_signed() { return component_traits::cSigned; } + + component_type operator[] (uint32 i) const { CRND_ASSERT(i < cNumComps); return c[i]; } + component_type& operator[] (uint32 i) { CRND_ASSERT(i < cNumComps); return c[i]; } + + color_quad& set_component(uint32 i, parameter_type f) + { + CRND_ASSERT(i < cNumComps); + + c[i] = static_cast(clamp(f)); + + return *this; + } + + color_quad& clamp(const color_quad& l, const color_quad& h) + { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l[i], h[i])); + return *this; + } + + color_quad& clamp(parameter_type l, parameter_type h) + { + for (uint32 i = 0; i < cNumComps; i++) + c[i] = static_cast(math::clamp(c[i], l, h)); + return *this; + } + + // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). + inline parameter_type get_luma() const + { + return static_cast((19595U * r + 38470U * g + 7471U * b + 32768) >> 16U); + } + + // Returns REC 709 luma. + inline parameter_type get_luma_rec709() const + { + return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); + } + + uint32 squared_distance(const color_quad& c, bool alpha = true) const + { + return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); + } + + bool rgb_equals(const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); + } + + bool operator== (const color_quad& rhs) const + { + return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); + } + + bool operator< (const color_quad& rhs) const + { + for (uint32 i = 0; i < cNumComps; i++) + { + if (c[i] < rhs.c[i]) + return true; + else if (!(c[i] == rhs.c[i])) + return false; + } + return false; + } + + color_quad& operator+= (const color_quad& other) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] + other.c[i])); + return *this; + } + + color_quad& operator-= (const color_quad& other) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] - other.c[i])); + return *this; + } + + color_quad& operator*= (parameter_type v) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(clamp(c[i] * v)); + return *this; + } + + color_quad& operator/= (parameter_type v) + { + for (uint32 i = 0; i < 4; i++) + c[i] = static_cast(c[i] / v); + return *this; + } + + color_quad get_swizzled(uint32 x, uint32 y, uint32 z, uint32 w) const + { + CRND_ASSERT((x | y | z | w) < 4); + return color_quad(c[x], c[y], c[z], c[w]); + } + + friend color_quad operator+ (const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result += rhs; + return result; + } + + friend color_quad operator- (const color_quad& lhs, const color_quad& rhs) + { + color_quad result(lhs); + result -= rhs; + return result; + } + + friend color_quad operator* (const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result *= v; + return result; + } + + friend color_quad operator/ (const color_quad& lhs, parameter_type v) + { + color_quad result(lhs); + result /= v; + return result; + } + + friend color_quad operator* (parameter_type v, const color_quad& rhs) + { + color_quad result(rhs); + result *= v; + return result; + } + + uint32 get_min_component_index(bool alpha = true) const + { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] < c[index]) + index = i; + return index; + } + + uint32 get_max_component_index(bool alpha = true) const + { + uint32 index = 0; + uint32 limit = alpha ? cNumComps : (cNumComps - 1); + for (uint32 i = 1; i < limit; i++) + if (c[i] > c[index]) + index = i; + return index; + } + + void get_float4(float* pDst) + { + for (uint32 i = 0; i < 4; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + void get_float3(float* pDst) + { + for (uint32 i = 0; i < 3; i++) + pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); + } + + static color_quad make_black() + { + return color_quad(0, 0, 0, component_traits::cMax); + } + + static color_quad make_white() + { + return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); + } + }; // class color_quad + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + template + struct scalar_type< color_quad > + { + enum { cFlag = true }; + static inline void construct(color_quad* p) { } + static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } + static inline void construct_array(color_quad* p, uint32 n) { p, n; } + static inline void destruct(color_quad* p) { p; } + static inline void destruct_array(color_quad* p, uint32 n) { p, n; } + }; + + typedef color_quad color_quad_u8; + typedef color_quad color_quad_i16; + typedef color_quad color_quad_u16; + typedef color_quad color_quad_i32; + typedef color_quad color_quad_u32; + typedef color_quad color_quad_f; + typedef color_quad color_quad_d; + +} // namespace crnd + +// File: crnd_dxt.h +namespace crnd +{ + enum dxt_format + { + cDXTInvalid = -1, + + // cDXT1/1A must appear first! + cDXT1, + cDXT1A, + + cDXT3, + cDXT5, + cDXT5A, + + cDXN_XY, // inverted relative to standard ATI2, 360's DXN + cDXN_YX // standard ATI2 + }; + + enum dxt_constants + { + cDXTBlockShift = 2U, + cDXTBlockSize = 1U << cDXTBlockShift, + + cDXT1BytesPerBlock = 8U, + cDXT5NBytesPerBlock = 16U, + + cDXT1SelectorBits = 2U, + cDXT1SelectorValues = 1U << cDXT1SelectorBits, + cDXT1SelectorMask = cDXT1SelectorValues - 1U, + + cDXT5SelectorBits = 3U, + cDXT5SelectorValues = 1U << cDXT5SelectorBits, + cDXT5SelectorMask = cDXT5SelectorValues - 1U + }; + + const float cDXT1MaxLinearValue = 3.0f; + const float cDXT1InvMaxLinearValue = 1.0f/3.0f; + + const float cDXT5MaxLinearValue = 7.0f; + const float cDXT5InvMaxLinearValue = 1.0f/7.0f; + + // Converts DXT1 raw color selector index to a linear value. + extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; + + // Converts DXT5 raw alpha selector index to a linear value. + extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; + + // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). + extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; + + // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). + extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; + + extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; + extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; + + struct dxt1_block + { + uint8 m_low_color[2]; + uint8 m_high_color[2]; + + enum { cNumSelectorBytes = 4 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + // These methods assume the in-memory rep is in LE byte order. + inline uint32 get_low_color() const + { + return m_low_color[0] | (m_low_color[1] << 8U); + } + + inline uint32 get_high_color() const + { + return m_high_color[0] | (m_high_color[1] << 8U); + } + + inline void set_low_color(uint16 c) + { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline void set_high_color(uint16 c) + { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } + + inline uint32 get_selector(uint32 x, uint32 y) const + { + CRND_ASSERT((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; + } + + inline void set_selector(uint32 x, uint32 y, uint32 val) + { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); + + m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); + m_selectors[y] |= (val << (x * cDXT1SelectorBits)); + } + + static uint16 pack_color(const color_quad_u8& color, bool scaled, uint32 bias = 127U); + static uint16 pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias = 127U); + + static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint32 alpha = 255U); + static void unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled); + + static uint32 get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); + static uint32 get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); + // pDst must point to an array at least cDXT1SelectorValues long. + static uint32 get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); + + static color_quad_u8 unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha = 255U); + static uint32 pack_endpoints(uint32 lo, uint32 hi); + }; + + CRND_DEFINE_BITWISE_MOVABLE(dxt1_block); + + struct dxt3_block + { + enum { cNumAlphaBytes = 8 }; + uint8 m_alpha[cNumAlphaBytes]; + + void set_alpha(uint32 x, uint32 y, uint32 value, bool scaled); + uint32 get_alpha(uint32 x, uint32 y, bool scaled) const; + }; + + CRND_DEFINE_BITWISE_MOVABLE(dxt3_block); + + struct dxt5_block + { + uint8 m_endpoints[2]; + + enum { cNumSelectorBytes = 6 }; + uint8 m_selectors[cNumSelectorBytes]; + + inline void clear() + { + utils::zero_this(this); + } + + inline uint32 get_low_alpha() const + { + return m_endpoints[0]; + } + + inline uint32 get_high_alpha() const + { + return m_endpoints[1]; + } + + inline void set_low_alpha(uint32 i) + { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[0] = static_cast(i); + } + + inline void set_high_alpha(uint32 i) + { + CRND_ASSERT(i <= cUINT8_MAX); + m_endpoints[1] = static_cast(i); + } + + uint32 get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } + + uint32 get_selectors_as_word(uint32 index) { CRND_ASSERT(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); } + + inline uint32 get_selector(uint32 x, uint32 y) const + { + CRND_ASSERT((x < 4U) && (y < 4U)); + + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; + + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; + + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + return (v >> bit_ofs) & 7; + } + + inline void set_selector(uint32 x, uint32 y, uint32 val) + { + CRND_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); + + uint32 selector_index = (y * 4) + x; + uint32 bit_index = selector_index * cDXT5SelectorBits; + + uint32 byte_index = bit_index >> 3; + uint32 bit_ofs = bit_index & 7; + + uint32 v = m_selectors[byte_index]; + if (byte_index < (cNumSelectorBytes - 1)) + v |= (m_selectors[byte_index + 1] << 8); + + v &= (~(7 << bit_ofs)); + v |= (val << bit_ofs); + + m_selectors[byte_index] = static_cast(v); + if (byte_index < (cNumSelectorBytes - 1)) + m_selectors[byte_index + 1] = static_cast(v >> 8); + } + + // Results written to alpha channel. + static uint32 get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h); + static uint32 get_block_values(color_quad_u8* pDst, uint32 l, uint32 h); + + static uint32 get_block_values6(uint32* pDst, uint32 l, uint32 h); + static uint32 get_block_values8(uint32* pDst, uint32 l, uint32 h); + // pDst must point to an array at least cDXT5SelectorValues long. + static uint32 get_block_values(uint32* pDst, uint32 l, uint32 h); + + static uint32 unpack_endpoint(uint32 packed, uint32 index); + static uint32 pack_endpoints(uint32 lo, uint32 hi); + }; + + CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); + +} // namespace crnd + +// File: crnd_dxt_hc_common.h +namespace crnd +{ + struct chunk_tile_desc + { + // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. + uint32 m_x_ofs; + uint32 m_y_ofs; + uint32 m_width; + uint32 m_height; + uint32 m_layout_index; + }; + + struct chunk_encoding_desc + { + uint32 m_num_tiles; + chunk_tile_desc m_tiles[4]; + }; + + const uint32 cChunkPixelWidth = 8; + const uint32 cChunkPixelHeight = 8; + const uint32 cChunkBlockWidth = 2; + const uint32 cChunkBlockHeight = 2; + + const uint32 cChunkMaxTiles = 4; + + const uint32 cBlockPixelWidthShift = 2; + const uint32 cBlockPixelHeightShift = 2; + + const uint32 cBlockPixelWidth = 4; + const uint32 cBlockPixelHeight = 4; + + const uint32 cNumChunkEncodings = 8; + extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; + + const uint32 cNumChunkTileLayouts = 9; + const uint32 cFirst4x4ChunkTileLayout = 5; + extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; + +} // namespace crnd + +// File: crnd_prefix_coding.h +#ifdef _XBOX +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 +#else +#define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 0 +#endif + +namespace crnd +{ + namespace prefix_coding + { + const uint32 cMaxExpectedCodeSize = 16; + const uint32 cMaxSupportedSyms = 8192; + const uint32 cMaxTableBits = 11; + + class decoder_tables + { + public: + inline decoder_tables() : + m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + } + + inline decoder_tables(const decoder_tables& other) : + m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) + { + *this = other; + } + + decoder_tables& operator= (const decoder_tables& other) + { + if (this == &other) + return *this; + + clear(); + + memcpy(this, &other, sizeof(*this)); + + if (other.m_lookup) + { + m_lookup = crnd_new_array(m_cur_lookup_size); + if (m_lookup) + memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); + } + + if (other.m_sorted_symbol_order) + { + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (m_sorted_symbol_order) + memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); + } + + return *this; + } + + inline void clear() + { + if (m_lookup) + { + crnd_delete_array(m_lookup); + m_lookup = 0; + m_cur_lookup_size = 0; + } + + if (m_sorted_symbol_order) + { + crnd_delete_array(m_sorted_symbol_order); + m_sorted_symbol_order = NULL; + m_cur_sorted_symbol_order_size = 0; + } + } + + inline ~decoder_tables() + { + if (m_lookup) + crnd_delete_array(m_lookup); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + } + + bool init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits); + + // DO NOT use any complex classes here - it is bitwise copied. + + uint32 m_num_syms; + uint32 m_total_used_syms; + uint32 m_table_bits; + uint32 m_table_shift; + uint32 m_table_max_code; + uint32 m_decode_start_code_size; + + uint8 m_min_code_size; + uint8 m_max_code_size; + + uint32 m_max_codes[cMaxExpectedCodeSize + 1]; + int32 m_val_ptrs[cMaxExpectedCodeSize + 1]; + + uint32 m_cur_lookup_size; + uint32* m_lookup; + + uint32 m_cur_sorted_symbol_order_size; + uint16* m_sorted_symbol_order; + + inline uint32 get_unshifted_max_code(uint32 len) const + { + CRND_ASSERT( (len >= 1) && (len <= cMaxExpectedCodeSize) ); + uint32 k = m_max_codes[len - 1]; + if (!k) + return crnd::cUINT32_MAX; + return (k - 1) >> (16 - len); + } + }; + + } // namespace prefix_coding + +} // namespace crnd + +// File: crnd_symbol_codec.h +namespace crnd +{ + class static_huffman_data_model + { + public: + static_huffman_data_model(); + static_huffman_data_model(const static_huffman_data_model& other); + ~static_huffman_data_model(); + + static_huffman_data_model& operator= (const static_huffman_data_model& rhs); + + bool init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit); + void clear(); + + inline bool is_valid() const { return m_pDecode_tables != NULL; } + + inline uint32 get_total_syms() const { return m_total_syms; } + + inline uint32 get_code_size(uint32 sym) const { return m_code_sizes[sym]; } + + inline const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } + + public: + uint32 m_total_syms; + crnd::vector m_code_sizes; + prefix_coding::decoder_tables* m_pDecode_tables; + + private: + bool prepare_decoder_tables(); + uint compute_decoder_table_bits() const; + + friend class symbol_codec; + }; + + class symbol_codec + { + public: + symbol_codec(); + + bool start_decoding(const uint8* pBuf, uint32 buf_size); + bool decode_receive_static_data_model(static_huffman_data_model& model); + + uint32 decode_bits(uint32 num_bits); + uint32 decode(const static_huffman_data_model& model); + +#ifdef PLATFORM_NACL + uint32 stop_decoding(); +#else + uint64 stop_decoding(); +#endif + + public: + const uint8* m_pDecode_buf; + const uint8* m_pDecode_buf_next; + const uint8* m_pDecode_buf_end; + uint32 m_decode_buf_size; + + typedef uint32 bit_buf_type; + enum { cBitBufSize = 32U }; + bit_buf_type m_bit_buf; + + int m_bit_count; + + private: + void get_bits_init(); + uint32 get_bits(uint32 num_bits); + }; + +} // namespace crnd + +#define CRND_HUFF_DECODE_BEGIN(x) +#define CRND_HUFF_DECODE_END(x) +#define CRND_HUFF_DECODE(codec, model, symbol) symbol = codec.decode(model); + +namespace crnd +{ + void crnd_assert(const char* pExp, const char* pFile, unsigned line) + { + char buf[512]; + +#if defined(WIN32) && defined(_MSC_VER) + sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); +#else + sprintf(buf, "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); +#endif + + crnd_output_debug_string(buf); + + printf(buf); + + if (crnd_is_debugger_present()) + crnd_debug_break(); + } + + void crnd_trace(const char* pFmt, va_list args) + { + if (crnd_is_debugger_present()) + { + char buf[512]; +#if defined(WIN32) && defined(_MSC_VER) + vsprintf_s(buf, sizeof(buf), pFmt, args); +#else + vsprintf(buf, pFmt, args); +#endif + + crnd_output_debug_string(buf); + } + }; + + void crnd_trace(const char* pFmt, ...) + { + va_list args; + va_start(args, pFmt); + crnd_trace(pFmt, args); + va_end(args); + }; + +} // namespace crnd + +// File: checksum.cpp +// From the public domain stb.h header. +namespace crnd +{ + uint16 crc16(const void* pBuf, uint32 len, uint16 crc) + { + crc = ~crc; + + const uint8* p = reinterpret_cast(pBuf); + while (len) + { + const uint16 q = *p++ ^ (crc >> 8U); + crc <<= 8U; + + uint16 r = (q >> 4U) ^ q; + crc ^= r; + r <<= 5U; + crc ^= r; + r <<= 7U; + crc ^= r; + + len--; + } + + return static_cast(~crc); + } + +} // namespace crnd + + +// File: crnd_vector.cpp +namespace crnd +{ + bool elemental_vector::increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pMover) + { + CRND_ASSERT(m_size <= m_capacity); + CRND_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); + + if (m_capacity >= min_new_capacity) + return true; + + uint32 new_capacity = min_new_capacity; + if ((grow_hint) && (!math::is_power_of_2(new_capacity))) + new_capacity = math::next_pow2(new_capacity); + + CRND_ASSERT(new_capacity && (new_capacity > m_capacity)); + + const uint32 desired_size = element_size * new_capacity; + size_t actual_size; + if (!pMover) + { + void* new_p = crnd_realloc(m_p, desired_size, &actual_size, true); + if (!new_p) + return false; + m_p = new_p; + } + else + { + void* new_p = crnd_malloc(desired_size, &actual_size); + if (!new_p) + return false; + + (*pMover)(new_p, m_p, m_size); + + if (m_p) + crnd_free(m_p); + + m_p = new_p; + } + + if (actual_size > desired_size) + m_capacity = static_cast(actual_size / element_size); + else + m_capacity = new_capacity; + + return true; + } + +} // namespace crnd + +// File: crnd_utils.cpp +namespace crnd +{ + namespace utils + { + uint32 compute_max_mips(uint32 width, uint32 height) + { + if ((width | height) == 0) + return 0; + + uint32 num_mips = 1; + + while ((width > 1U) || (height > 1U)) + { + width >>= 1U; + height >>= 1U; + num_mips++; + } + + return num_mips; + } + + } // namespace utils + +} // namespace crnd + +// File: crnd_prefix_coding.cpp +namespace crnd +{ + namespace prefix_coding + { + bool decoder_tables::init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits) + { + uint32 min_codes[cMaxExpectedCodeSize]; + if ((!num_syms) || (table_bits > cMaxTableBits)) + return false; + + m_num_syms = num_syms; + + uint32 num_codes[cMaxExpectedCodeSize + 1]; + utils::zero_object(num_codes); + + for (uint32 i = 0; i < num_syms; i++) + { + uint32 c = pCodesizes[i]; + if (c) + num_codes[c]++; + } + + uint32 sorted_positions[cMaxExpectedCodeSize + 1]; + + uint32 cur_code = 0; + + uint32 total_used_syms = 0; + uint32 max_code_size = 0; + uint32 min_code_size = cUINT32_MAX; + for (uint32 i = 1; i <= cMaxExpectedCodeSize; i++) + { + const uint32 n = num_codes[i]; + + if (!n) + m_max_codes[i - 1] = 0;//UINT_MAX; + else + { + min_code_size = math::minimum(min_code_size, i); + max_code_size = math::maximum(max_code_size, i); + + min_codes[i - 1] = cur_code; + + m_max_codes[i - 1] = cur_code + n - 1; + m_max_codes[i - 1] = 1 + ((m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); + + m_val_ptrs[i - 1] = total_used_syms; + + sorted_positions[i] = total_used_syms; + + cur_code += n; + total_used_syms += n; + } + + cur_code <<= 1; + } + + m_total_used_syms = total_used_syms; + + if (total_used_syms > m_cur_sorted_symbol_order_size) + { + m_cur_sorted_symbol_order_size = total_used_syms; + + if (!math::is_power_of_2(total_used_syms)) + m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); + + if (m_sorted_symbol_order) + crnd_delete_array(m_sorted_symbol_order); + + m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); + if (!m_sorted_symbol_order) + return false; + } + + m_min_code_size = static_cast(min_code_size); + m_max_code_size = static_cast(max_code_size); + + for (uint32 i = 0; i < num_syms; i++) + { + uint32 c = pCodesizes[i]; + if (c) + { + CRND_ASSERT(num_codes[c]); + + uint32 sorted_pos = sorted_positions[c]++; + + CRND_ASSERT(sorted_pos < total_used_syms); + + m_sorted_symbol_order[sorted_pos] = static_cast(i); + } + } + + if (table_bits <= m_min_code_size) + table_bits = 0; + m_table_bits = table_bits; + + if (table_bits) + { + uint32 table_size = 1 << table_bits; + if (table_size > m_cur_lookup_size) + { + m_cur_lookup_size = table_size; + + if (m_lookup) + crnd_delete_array(m_lookup); + + m_lookup = crnd_new_array(table_size); + if (!m_lookup) + return false; + } + + memset(m_lookup, 0xFF, (uint)sizeof(m_lookup[0]) * (1UL << table_bits)); + + for (uint32 codesize = 1; codesize <= table_bits; codesize++) + { + if (!num_codes[codesize]) + continue; + + const uint32 fillsize = table_bits - codesize; + const uint32 fillnum = 1 << fillsize; + + const uint32 min_code = min_codes[codesize - 1]; + const uint32 max_code = get_unshifted_max_code(codesize); + const uint32 val_ptr = m_val_ptrs[codesize - 1]; + + for (uint32 code = min_code; code <= max_code; code++) + { + const uint32 sym_index = m_sorted_symbol_order[ val_ptr + code - min_code ]; + CRND_ASSERT( pCodesizes[sym_index] == codesize ); + + for (uint32 j = 0; j < fillnum; j++) + { + const uint32 t = j + (code << fillsize); + + CRND_ASSERT(t < (1U << table_bits)); + + CRND_ASSERT(m_lookup[t] == cUINT32_MAX); + + m_lookup[t] = sym_index | (codesize << 16U); + } + } + } + } + + for (uint32 i = 0; i < cMaxExpectedCodeSize; i++) + m_val_ptrs[i] -= min_codes[i]; + + m_table_max_code = 0; + m_decode_start_code_size = m_min_code_size; + + if (table_bits) + { + uint32 i; + for (i = table_bits; i >= 1; i--) + { + if (num_codes[i]) + { + m_table_max_code = m_max_codes[i - 1]; + break; + } + } + if (i >= 1) + { + m_decode_start_code_size = table_bits + 1; + for (uint32 j = table_bits + 1; j <= max_code_size; j++) + { + if (num_codes[j]) + { + m_decode_start_code_size = j; + break; + } + } + } + } + + // sentinels + m_max_codes[cMaxExpectedCodeSize] = cUINT32_MAX; + m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; + + m_table_shift = 32 - m_table_bits; + return true; + } + + } // namespace prefix_codig + +} // namespace crnd + +// File: crnd_platform.cpp +namespace crnd +{ + bool crnd_is_debugger_present() + { +#ifdef CRND_DEVEL + return IsDebuggerPresent() != 0; +#else + return false; +#endif + } + + void crnd_debug_break() + { +#ifdef CRND_DEVEL + DebugBreak(); +#endif + } + + void crnd_output_debug_string(const char* p) + { + p; +#ifdef CRND_DEVEL + OutputDebugStringA(p); +#endif + } + +} // namespace crnd + +// File: crnd_mem.cpp +namespace crnd +{ + const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; + + static void* crnd_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data) + { + pUser_data; + + void* p_new; + + if (!p) + { + p_new = ::malloc(size); + + if (pActual_size) +#ifdef PLATFORM_NACL + *pActual_size = p_new ? malloc_usable_size(p_new) : 0; +#else + *pActual_size = p_new ? ::_msize(p_new) : 0; +#endif + } + else if (!size) + { + ::free(p); + p_new = NULL; + + if (pActual_size) + *pActual_size = 0; + } + else + { + void* p_final_block = p; +#ifdef PLATFORM_NACL + p_new = ::realloc(p, size); +#else + p_new = ::_expand(p, size); +#endif + + if (p_new) + p_final_block = p_new; + else if (movable) + { + p_new = ::realloc(p, size); + + if (p_new) + p_final_block = p_new; + } + + if (pActual_size) +#ifdef PLATFORM_NACL + *pActual_size = ::malloc_usable_size(p_final_block); +#else + *pActual_size = ::_msize(p_final_block); +#endif + } + + return p_new; + } + + static size_t crnd_default_msize(void* p, void* pUser_data) + { + pUser_data; +#ifdef PLATFORM_NACL + return p ? malloc_usable_size(p) : 0; +#else + return p ? _msize(p) : 0; +#endif + } + + static crnd_realloc_func g_pRealloc = crnd_default_realloc; + static crnd_msize_func g_pMSize = crnd_default_msize; + static void* g_pUser_data; + + void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data) + { + if ((!pRealloc) || (!pMSize)) + { + g_pRealloc = crnd_default_realloc; + g_pMSize = crnd_default_msize; + g_pUser_data = NULL; + } + else + { + g_pRealloc = pRealloc; + g_pMSize = pMSize; + g_pUser_data = pUser_data; + } + } + + static inline void crnd_mem_error(const char* p_msg) + { + crnd_assert(p_msg, __FILE__, __LINE__); + } + + void* crnd_malloc(size_t size, size_t* pActual_size) + { + size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); + if (!size) + size = sizeof(uint32); + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); + + if (pActual_size) + *pActual_size = actual_size; + + if ((!p_new) || (actual_size < size)) + { + crnd_mem_error("crnd_malloc: out of memory"); + return NULL; + } + + CRND_ASSERT(((uint32)p_new & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + return p_new; + } + + void* crnd_realloc(void* p, size_t size, size_t* pActual_size, bool movable) + { + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_realloc: bad ptr"); + return NULL; + } + + if (size > MAX_POSSIBLE_BLOCK_SIZE) + { + crnd_mem_error("crnd_malloc: size too big"); + return NULL; + } + + size_t actual_size = size; + void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); + + if (pActual_size) + *pActual_size = actual_size; + + CRND_ASSERT(((uint32)p_new & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); + + return p_new; + } + + void crnd_free(void* p) + { + if (!p) + return; + + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_free: bad ptr"); + return; + } + + (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); + } + + size_t crnd_msize(void* p) + { + if (!p) + return 0; + + if ((uint32)reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) + { + crnd_mem_error("crnd_msize: bad ptr"); + return 0; + } + + return (*g_pMSize)(p, g_pUser_data); + } + +} // namespace crnd + +// File: crnd_math.cpp +namespace crnd +{ + namespace math + { + uint32 g_bitmasks[32] = + { + 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, + 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, + 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, + 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, + 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, + 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, + 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, + 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U + }; + + } // namespace math +} // namespace crnd + +// File: crnd_info.cpp +namespace crnd +{ +#define CRND_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) + + uint32 crnd_crn_format_to_fourcc(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: return CRND_FOURCC('D', 'X', 'T', '1'); + case cCRNFmtDXT3: return CRND_FOURCC('D', 'X', 'T', '3'); + case cCRNFmtDXT5: return CRND_FOURCC('D', 'X', 'T', '5'); + case cCRNFmtDXN_XY: return CRND_FOURCC('A', '2', 'X', 'Y'); + case cCRNFmtDXN_YX: return CRND_FOURCC('A', 'T', 'I', '2'); + case cCRNFmtDXT5A: return CRND_FOURCC('A', 'T', 'I', '1'); + case cCRNFmtDXT5_CCxY: return CRND_FOURCC('C', 'C', 'x', 'Y'); + case cCRNFmtDXT5_xGxR: return CRND_FOURCC('x', 'G', 'x', 'R'); + case cCRNFmtDXT5_xGBR: return CRND_FOURCC('x', 'G', 'B', 'R'); + case cCRNFmtDXT5_AGBR: return CRND_FOURCC('A', 'G', 'B', 'R'); + default: break; + } + CRND_ASSERT(false); + return 0; + } + + crn_format crnd_get_fundamental_dxt_format(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return cCRNFmtDXT5; + default: break; + } + return fmt; + } + + uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt) + { + switch (fmt) + { + case cCRNFmtDXT1: + case cCRNFmtDXT5A: + return 4; + case cCRNFmtDXT3: + case cCRNFmtDXT5: + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGxR: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + return 8; + default: break; + } + CRND_ASSERT(false); + return 0; + } + + uint32 crnd_get_bytes_per_dxt_block(crn_format fmt) + { + return (crnd_get_crn_format_bits_per_texel(fmt) << 4) >> 3; + } + + // TODO: tmp_header isn't used/This function is a helper to support old headers. + const crn_header* crnd_get_header(crn_header& tmp_header, const void* pData, uint32 data_size) + { + tmp_header; + + if ((!pData) || (data_size < sizeof(crn_header))) + return NULL; + + const crn_header& file_header = *static_cast(pData); + if (file_header.m_sig != crn_header::cCRNSigValue) + return NULL; + + if ((file_header.m_header_size < sizeof(crn_header)) || (data_size < file_header.m_data_size)) + return NULL; + + return &file_header; + } + + bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info) + { + if (pFile_info) + { + if (pFile_info->m_struct_size != sizeof(crn_file_info)) + return false; + + memset(&pFile_info->m_struct_size + 1, 0, sizeof(crn_file_info) - sizeof(pFile_info->m_struct_size)); + } + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + const uint32 header_crc = crc16(&pHeader->m_data_size, (uint32)(pHeader->m_header_size - ((const uint8*)&pHeader->m_data_size - (const uint8*)pHeader))); + if (header_crc != pHeader->m_header_crc16) + return false; + + const uint32 data_crc = crc16((const uint8*)pData + pHeader->m_header_size, pHeader->m_data_size - pHeader->m_header_size); + if (data_crc != pHeader->m_data_crc16) + return false; + + if ((pHeader->m_faces != 1) && (pHeader->m_faces != 6)) + return false; + if ((pHeader->m_width < 1) || (pHeader->m_width > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_height < 1) || (pHeader->m_height > cCRNMaxLevelResolution)) + return false; + if ((pHeader->m_levels < 1) || (pHeader->m_levels > utils::compute_max_mips(pHeader->m_width, pHeader->m_height))) + return false; + if ((pHeader->m_format < cCRNFmtDXT1) || (pHeader->m_format >= cCRNFmtTotal)) + return false; + + if (pFile_info) + { + pFile_info->m_actual_data_size = pHeader->m_data_size; + pFile_info->m_header_size = pHeader->m_header_size; + pFile_info->m_total_palette_size = pHeader->m_color_endpoints.m_size + pHeader->m_color_selectors.m_size + pHeader->m_alpha_endpoints.m_size + pHeader->m_alpha_selectors.m_size; + pFile_info->m_tables_size = pHeader->m_tables_size; + + pFile_info->m_levels = pHeader->m_levels; + + for (uint32 i = 0; i < pHeader->m_levels; i++) + { + uint32 next_ofs = pHeader->m_data_size; + + // assumes the levels are packed together sequentially + if ((i + 1) < pHeader->m_levels) + next_ofs = pHeader->m_level_ofs[i + 1]; + + pFile_info->m_level_compressed_size[i] = next_ofs - pHeader->m_level_ofs[i]; + } + + pFile_info->m_color_endpoint_palette_entries = pHeader->m_color_endpoints.m_num; + pFile_info->m_color_selector_palette_entries = pHeader->m_color_selectors.m_num;; + pFile_info->m_alpha_endpoint_palette_entries = pHeader->m_alpha_endpoints.m_num;; + pFile_info->m_alpha_selector_palette_entries = pHeader->m_alpha_selectors.m_num;; + } + + return true; + } + + bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pInfo) + { + if ((!pData) || (data_size < sizeof(crn_header)) || (!pInfo)) + return false; + + if (pInfo->m_struct_size != sizeof(crn_texture_info)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + pInfo->m_width = pHeader->m_width; + pInfo->m_height = pHeader->m_height; + pInfo->m_levels = pHeader->m_levels; + pInfo->m_faces = pHeader->m_faces; + pInfo->m_format = static_cast((uint32)pHeader->m_format); + pInfo->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + pInfo->m_userdata0 = pHeader->m_userdata0; + pInfo->m_userdata1 = pHeader->m_userdata1; + + return true; + } + + bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info) + { + if ((!pData) || (data_size < cCRNHeaderMinSize) || (!pLevel_info)) + return false; + + if (pLevel_info->m_struct_size != sizeof(crn_level_info)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + if (level_index >= pHeader->m_levels) + return false; + + uint32 width = math::maximum(1U, pHeader->m_width >> level_index); + uint32 height = math::maximum(1U, pHeader->m_height >> level_index); + + pLevel_info->m_width = width; + pLevel_info->m_height = height; + pLevel_info->m_faces = pHeader->m_faces; + pLevel_info->m_blocks_x = (width + 3) >> 2; + pLevel_info->m_blocks_y = (height + 3) >> 2; + pLevel_info->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + pLevel_info->m_format = static_cast((uint32)pHeader->m_format); + + return true; + } + + const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize) + { + if (pSize) + *pSize = 0; + + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + if (level_index >= pHeader->m_levels) + return false; + + uint32 cur_level_ofs = pHeader->m_level_ofs[level_index]; + + if (pSize) + { + uint32 next_level_ofs = data_size; + if ((level_index + 1) < (pHeader->m_levels)) + next_level_ofs = pHeader->m_level_ofs[level_index + 1]; + + *pSize = next_level_ofs - cur_level_ofs; + } + + return static_cast(pData) + cur_level_ofs; + } + + uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + uint32 size = pHeader->m_header_size; + + size = math::maximum(size, pHeader->m_color_endpoints.m_ofs + pHeader->m_color_endpoints.m_size); + size = math::maximum(size, pHeader->m_color_selectors.m_ofs + pHeader->m_color_selectors.m_size); + size = math::maximum(size, pHeader->m_alpha_endpoints.m_ofs + pHeader->m_alpha_endpoints.m_size); + size = math::maximum(size, pHeader->m_alpha_selectors.m_ofs + pHeader->m_alpha_selectors.m_size); + size = math::maximum(size, pHeader->m_tables_ofs + pHeader->m_tables_size); + + return size; + } + + bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return false; + + crn_header tmp_header; + const crn_header* pHeader = crnd_get_header(tmp_header, pData, data_size); + if (!pHeader) + return false; + + if (pHeader->m_flags & cCRNHeaderFlagSegmented) + return false; + + const uint actual_base_data_size = crnd_get_segmented_file_size(pData, data_size); + if (base_data_size < actual_base_data_size) + return false; + + memcpy(pBase_data, pData, actual_base_data_size); + + crn_header& new_header = *static_cast(pBase_data); + new_header.m_flags = new_header.m_flags | cCRNHeaderFlagSegmented; + new_header.m_data_size = actual_base_data_size; + + new_header.m_data_crc16 = crc16((const uint8*)pBase_data + new_header.m_header_size, new_header.m_data_size - new_header.m_header_size); + + new_header.m_header_crc16 = crc16(&new_header.m_data_size, new_header.m_header_size - (uint32)((const uint8*)&new_header.m_data_size - (const uint8*)&new_header)); + + CRND_ASSERT(crnd_validate_file(&new_header, actual_base_data_size, NULL)); + + return true; + } + +} // namespace crnd + +// File: symbol_codec.cpp +namespace crnd +{ + static_huffman_data_model::static_huffman_data_model() : +m_total_syms(0), +m_pDecode_tables(NULL) +{ +} + +static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : +m_total_syms(0), +m_pDecode_tables(NULL) +{ + *this = other; +} + +static_huffman_data_model::~static_huffman_data_model() +{ + if (m_pDecode_tables) + crnd_delete(m_pDecode_tables); +} + +static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) +{ + if (this == &rhs) + return *this; + + m_total_syms = rhs.m_total_syms; + m_code_sizes = rhs.m_code_sizes; + if (m_code_sizes.get_alloc_failed()) + { + clear(); + return *this; + } + + if (rhs.m_pDecode_tables) + { + if (m_pDecode_tables) + *m_pDecode_tables = *rhs.m_pDecode_tables; + else + m_pDecode_tables = crnd_new(*rhs.m_pDecode_tables); + } + else + { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } + + return *this; +} + +void static_huffman_data_model::clear() +{ + m_total_syms = 0; + m_code_sizes.clear(); + if (m_pDecode_tables) + { + crnd_delete(m_pDecode_tables); + m_pDecode_tables = NULL; + } +} + +bool static_huffman_data_model::init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit) +{ + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); + + code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); + + if (!m_code_sizes.resize(total_syms)) + return false; + + uint32 min_code_size = cUINT32_MAX; + uint32 max_code_size = 0; + + for (uint32 i = 0; i < total_syms; i++) + { + uint32 s = pCode_sizes[i]; + m_code_sizes[i] = static_cast(s); + min_code_size = math::minimum(min_code_size, s); + max_code_size = math::maximum(max_code_size, s); + } + + if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) + return false; + + if (max_code_size > code_size_limit) + return false; + + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); + + if (!m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits())) + return false; + + return true; +} + +bool static_huffman_data_model::prepare_decoder_tables() +{ + uint32 total_syms = m_code_sizes.size(); + + CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); + + m_total_syms = total_syms; + + if (!m_pDecode_tables) + m_pDecode_tables = crnd_new(); + + return m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits()); +} + +uint static_huffman_data_model::compute_decoder_table_bits() const +{ +#if CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE + return prefix_coding::cMaxTableBits; +#else + uint32 decoder_table_bits = 0; + if (m_total_syms > 16) + decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); + return decoder_table_bits; +#endif +} + +symbol_codec::symbol_codec() : + m_pDecode_buf(NULL), + m_pDecode_buf_next(NULL), + m_pDecode_buf_end(NULL), + m_decode_buf_size(0), + m_bit_buf(0), + m_bit_count(0) +{ +} + +// Code length encoding symbols: +// 0-16 - actual code lengths +const uint32 cMaxCodelengthCodes = 21; + +const uint32 cSmallZeroRunCode = 17; +const uint32 cLargeZeroRunCode = 18; +const uint32 cSmallRepeatCode = 19; +const uint32 cLargeRepeatCode = 20; + +const uint32 cMinSmallZeroRunSize = 3; +const uint32 cMaxSmallZeroRunSize = 10; +const uint32 cMinLargeZeroRunSize = 11; +const uint32 cMaxLargeZeroRunSize = 138; + +const uint32 cSmallMinNonZeroRunSize = 3; +const uint32 cSmallMaxNonZeroRunSize = 6; +const uint32 cLargeMinNonZeroRunSize = 7; +const uint32 cLargeMaxNonZeroRunSize = 70; + +const uint32 cSmallZeroRunExtraBits = 3; +const uint32 cLargeZeroRunExtraBits = 7; +const uint32 cSmallNonZeroRunExtraBits = 2; +const uint32 cLargeNonZeroRunExtraBits = 6; + +static const uint8 g_most_probable_codelength_codes[] = +{ + cSmallZeroRunCode, cLargeZeroRunCode, + cSmallRepeatCode, cLargeRepeatCode, + + 0, 8, + 7, 9, + 6, 10, + 5, 11, + 4, 12, + 3, 13, + 2, 14, + 1, 15, + 16 +}; +const uint32 cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); + +bool symbol_codec::decode_receive_static_data_model(static_huffman_data_model& model) +{ + const uint32 total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); + + if (!total_used_syms) + { + model.clear(); + return true; + } + + if (!model.m_code_sizes.resize(total_used_syms)) + return false; + + memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); + + const uint32 num_codelength_codes_to_send = decode_bits(5); + if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) + return false; + + static_huffman_data_model dm; + if (!dm.m_code_sizes.resize(cMaxCodelengthCodes)) + return false; + + for (uint32 i = 0; i < num_codelength_codes_to_send; i++) + dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); + + if (!dm.prepare_decoder_tables()) + return false; + + uint32 ofs = 0; + while (ofs < total_used_syms) + { + const uint32 num_remaining = total_used_syms - ofs; + + uint32 code = decode(dm); + if (code <= 16) + model.m_code_sizes[ofs++] = static_cast(code); + else if (code == cSmallZeroRunCode) + { + uint32 len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if (code == cLargeZeroRunCode) + { + uint32 len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; + if (len > num_remaining) + return false; + ofs += len; + } + else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) + { + uint32 len; + if (code == cSmallRepeatCode) + len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; + else + len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; + + if ((!ofs) || (len > num_remaining)) + return false; + const uint32 prev = model.m_code_sizes[ofs - 1]; + if (!prev) + return false; + const uint32 end = ofs + len; + while (ofs < end) + model.m_code_sizes[ofs++] = static_cast(prev); + } + else + { + CRND_ASSERT(0); + return false; + } + } + + if (ofs != total_used_syms) + return false; + + return model.prepare_decoder_tables(); +} + +bool symbol_codec::start_decoding(const uint8* pBuf, uint32 buf_size) +{ + if (!buf_size) + return false; + + m_pDecode_buf = pBuf; + m_pDecode_buf_next = pBuf; + m_decode_buf_size = buf_size; + m_pDecode_buf_end = pBuf + buf_size; + + get_bits_init(); + + return true; +} + +void symbol_codec::get_bits_init() +{ + m_bit_buf = 0; + m_bit_count = 0; +} + +uint32 symbol_codec::decode_bits(uint32 num_bits) +{ + if (!num_bits) + return 0; + + if (num_bits > 16) + { + uint32 a = get_bits(num_bits - 16); + uint32 b = get_bits(16); + + return (a << 16) | b; + } + else + return get_bits(num_bits); +} + +uint32 symbol_codec::get_bits(uint32 num_bits) +{ + CRND_ASSERT(num_bits <= 32U); + + while (m_bit_count < (int)num_bits) + { + bit_buf_type c = 0; + if (m_pDecode_buf_next != m_pDecode_buf_end) + c = *m_pDecode_buf_next++; + + m_bit_count += 8; + CRND_ASSERT(m_bit_count <= cBitBufSize); + + m_bit_buf |= (c << (cBitBufSize - m_bit_count)); + } + + uint32 result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); + + m_bit_buf <<= num_bits; + m_bit_count -= num_bits; + + return result; +} + +uint32 symbol_codec::decode(const static_huffman_data_model& model) +{ + const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; + + if (m_bit_count < 24) + { + if (m_bit_count < 16) + { + uint32 c0 = 0, c1 = 0; + const uint8* p = m_pDecode_buf_next; + if (p < m_pDecode_buf_end) c0 = *p++; + if (p < m_pDecode_buf_end) c1 = *p++; + m_pDecode_buf_next = p; + m_bit_count += 16; + uint32 c = (c0 << 8) | c1; + m_bit_buf |= (c << (32 - m_bit_count)); + } + else + { + uint32 c = (m_pDecode_buf_next < m_pDecode_buf_end) ? *m_pDecode_buf_next++ : 0; + m_bit_count += 8; + m_bit_buf |= (c << (32 - m_bit_count)); + } + } + + uint32 k = (m_bit_buf >> 16) + 1; + uint32 sym, len; + + if (k <= pTables->m_table_max_code) + { + uint32 t = pTables->m_lookup[m_bit_buf >> (32 - pTables->m_table_bits)]; + + CRND_ASSERT(t != cUINT32_MAX); + sym = t & cUINT16_MAX; + len = t >> 16; + + CRND_ASSERT(model.m_code_sizes[sym] == len); + } + else + { + len = pTables->m_decode_start_code_size; + + for ( ; ; ) + { + if (k <= pTables->m_max_codes[len - 1]) + break; + len++; + } + + int val_ptr = pTables->m_val_ptrs[len - 1] + (m_bit_buf >> (32 - len)); + + if (((uint32)val_ptr >= model.m_total_syms)) + { + // corrupted stream, or a bug + CRND_ASSERT(0); + return 0; + } + + sym = pTables->m_sorted_symbol_order[val_ptr]; + } + + m_bit_buf <<= len; + m_bit_count -= len; + + return sym; +} + +#ifdef PLATFORM_NACL + + uint32 symbol_codec::stop_decoding() + { + #if 0 + uint32 i = get_bits(4); + uint32 k = get_bits(3); + i, k; + CRND_ASSERT((i == 15) && (k == 3)); + #endif + + uint32 n = m_pDecode_buf_next - m_pDecode_buf; + + return n; + } + +#else + + uint64 symbol_codec::stop_decoding() + { + #if 0 + uint32 i = get_bits(4); + uint32 k = get_bits(3); + i, k; + CRND_ASSERT((i == 15) && (k == 3)); + #endif + + uint64 n = m_pDecode_buf_next - m_pDecode_buf; + + return n; + } +#endif +} // namespace crnd + +// File: crnd_dxt_hc_common.cpp +namespace crnd +{ + chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = + { + { 1, { { 0, 0, 8, 8, 0 } } }, + + { 2, { { 0, 0, 8, 4, 1 }, { 0, 4, 8, 4, 2 } } }, + { 2, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 8, 4 } } }, + + { 3, { { 0, 0, 8, 4, 1 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 0, 4, 8, 4, 2 }, { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 } } }, + + { 3, { { 0, 0, 4, 8, 3 }, { 4, 0, 4, 4, 6 }, { 4, 4, 4, 4, 8 } } }, + { 3, { { 4, 0, 4, 8, 4 }, { 0, 0, 4, 4, 5 }, { 0, 4, 4, 4, 7 } } }, + + { 4, { { 0, 0, 4, 4, 5 }, { 4, 0, 4, 4, 6 }, { 0, 4, 4, 4, 7 }, { 4, 4, 4, 4, 8 } } } + }; + + chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = + { + // 2x2 + { 0, 0, 8, 8, 0 }, + + // 2x1 + { 0, 0, 8, 4, 1 }, + { 0, 4, 8, 4, 2 }, + + // 1x2 + { 0, 0, 4, 8, 3 }, + { 4, 0, 4, 8, 4 }, + + // 1x1 + { 0, 0, 4, 4, 5 }, + { 4, 0, 4, 4, 6 }, + { 0, 4, 4, 4, 7 }, + { 4, 4, 4, 4, 8 } + }; + +} // namespace crnd + +// File: crnd_dxt.cpp +namespace crnd +{ + const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = { 0U, 3U, 1U, 2U }; + const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = { 0U, 2U, 3U, 1U }; + + const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = { 0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U }; + const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = { 0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U }; + + const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 5, 4, 3, 2, 6, 7 }; + const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = { 1, 0, 7, 6, 5, 4, 3, 2 }; + + uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint32 bias) + { + uint32 r = color.r; + uint32 g = color.g; + uint32 b = color.b; + + if (scaled) + { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + + r = math::minimum(r, 31U); + g = math::minimum(g, 63U); + b = math::minimum(b, 31U); + + return static_cast(b | (g << 5U) | (r << 11U)); + } + + uint16 dxt1_block::pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias) + { + return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); + } + + color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint32 alpha) + { + uint32 b = packed_color & 31U; + uint32 g = (packed_color >> 5U) & 63U; + uint32 r = (packed_color >> 11U) & 31U; + + if (scaled) + { + b = (b << 3U) | (b >> 2U); + g = (g << 2U) | (g >> 4U); + r = (r << 3U) | (r >> 2U); + } + + return color_quad_u8(r, g, b, alpha); + } + + void dxt1_block::unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled) + { + color_quad_u8 c(unpack_color(packed_color, scaled, 0)); + r = c.r; + g = c.g; + b = c.b; + } + + uint32 dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2].set( (c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); + pDst[3].set(0, 0, 0, 0); + + return 3; + } + + uint32 dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + color_quad_u8 c0(unpack_color(color0, true)); + color_quad_u8 c1(unpack_color(color1, true)); + + pDst[0] = c0; + pDst[1] = c1; + + // 12/14/09 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? + // Turns out some GPU's round and some don't. Great. + //pDst[2].set( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); + //pDst[3].set( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); + + pDst[2].set( (c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); + pDst[3].set( (c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); + + return 4; + } + + uint32 dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) + { + if (color0 > color1) + return get_block_colors4(pDst, color0, color1); + else + return get_block_colors3(pDst, color0, color1); + } + + color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha) + { + CRND_ASSERT(index < 2); + return unpack_color( static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha ); + } + + uint32 dxt1_block::pack_endpoints(uint32 lo, uint32 hi) + { + CRND_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); + return lo | (hi << 16U); + } + + void dxt3_block::set_alpha(uint32 x, uint32 y, uint32 value, bool scaled) + { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + if (scaled) + { + CRND_ASSERT(value <= 0xFF); + value = (value * 15U + 128U) / 255U; + } + else + { + CRND_ASSERT(value <= 0xF); + } + + uint32 ofs = (y << 1U) + (x >> 1U); + uint32 c = m_alpha[ofs]; + + c &= ~(0xF << ((x & 1U) << 2U)); + c |= (value << ((x & 1U) << 2U)); + + m_alpha[ofs] = static_cast(c); + } + + uint32 dxt3_block::get_alpha(uint32 x, uint32 y, bool scaled) const + { + CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); + + uint32 value = m_alpha[(y << 1U) + (x >> 1U)]; + if (x & 1) + value >>= 4; + value &= 0xF; + + if (scaled) + value = (value << 4U) | value; + + return value; + } + + uint32 dxt5_block::get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 4 + h ) / 5); + pDst[3].a = static_cast((l * 3 + h * 2) / 5); + pDst[4].a = static_cast((l * 2 + h * 3) / 5); + pDst[5].a = static_cast((l + h * 4) / 5); + pDst[6].a = 0; + pDst[7].a = 255; + return 6; + } + + uint32 dxt5_block::get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h) + { + pDst[0].a = static_cast(l); + pDst[1].a = static_cast(h); + pDst[2].a = static_cast((l * 6 + h ) / 7); + pDst[3].a = static_cast((l * 5 + h * 2) / 7); + pDst[4].a = static_cast((l * 4 + h * 3) / 7); + pDst[5].a = static_cast((l * 3 + h * 4) / 7); + pDst[6].a = static_cast((l * 2 + h * 5) / 7); + pDst[7].a = static_cast((l + h * 6) / 7); + return 8; + } + + uint32 dxt5_block::get_block_values(color_quad_u8* pDst, uint32 l, uint32 h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + + uint32 dxt5_block::get_block_values6(uint32* pDst, uint32 l, uint32 h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 4 + h ) / 5; + pDst[3] = (l * 3 + h * 2) / 5; + pDst[4] = (l * 2 + h * 3) / 5; + pDst[5] = (l + h * 4) / 5; + pDst[6] = 0; + pDst[7] = 255; + return 6; + } + + uint32 dxt5_block::get_block_values8(uint32* pDst, uint32 l, uint32 h) + { + pDst[0] = l; + pDst[1] = h; + pDst[2] = (l * 6 + h ) / 7; + pDst[3] = (l * 5 + h * 2) / 7; + pDst[4] = (l * 4 + h * 3) / 7; + pDst[5] = (l * 3 + h * 4) / 7; + pDst[6] = (l * 2 + h * 5) / 7; + pDst[7] = (l + h * 6) / 7; + return 8; + } + + uint32 dxt5_block::unpack_endpoint(uint32 packed, uint32 index) + { + CRND_ASSERT(index < 2); + return (packed >> (8 * index)) & 0xFF; + } + + uint32 dxt5_block::pack_endpoints(uint32 lo, uint32 hi) + { + CRND_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); + return lo | (hi << 8U); + } + + uint32 dxt5_block::get_block_values(uint32* pDst, uint32 l, uint32 h) + { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } + +} // namespace crnd + +// File: crnd_decode.cpp +#define CRND_CREATE_BYTE_STREAMS 0 + +namespace crnd +{ +#if CRND_CREATE_BYTE_STREAMS + static void write_array_to_file(const wchar_t* pFilename, const vector& buf) + { + FILE* pFile; + _wfopen_s(&pFile, pFilename, L"wb"); + fwrite(&buf[0], buf.size(), 1, pFile); + fclose(pFile); + } +#endif + + struct crnd_chunk_tile_desc + { + // These values are in blocks + uint8 m_x_ofs; + uint8 m_y_ofs; + uint8 m_width; + uint8 m_height; + }; + + struct crnd_chunk_encoding_desc + { + uint32 m_num_tiles; + chunk_tile_desc m_tiles[4]; + }; + +#if 0 + static crnd_chunk_encoding_desc g_crnd_chunk_encodings[cNumChunkEncodings] = + { + { 1, { { 0, 0, 2, 2 } } }, + + { 2, { { 0, 0, 2, 1 }, { 0, 1, 2, 1 } } }, + { 2, { { 0, 0, 1, 2 }, { 1, 0, 1, 2 } } }, + + { 3, { { 0, 0, 2, 1 }, { 0, 1, 1, 1 }, { 1, 1, 1, 1 } } }, + { 3, { { 0, 1, 2, 1 }, { 0, 0, 1, 1 }, { 1, 0, 1, 1 } } }, + + { 3, { { 0, 0, 1, 2 }, { 1, 0, 1, 1 }, { 1, 1, 1, 1 } } }, + { 3, { { 1, 0, 1, 2 }, { 0, 0, 1, 1 }, { 0, 1, 1, 1 } } }, + + { 1, { { 0, 0, 1, 1 }, { 1, 0, 1, 1 }, { 0, 1, 1, 1 }, { 1, 1, 1, 1 } } } + }; +#endif + + struct crnd_encoding_tile_indices + { + uint8 m_tiles[4]; + }; + + static crnd_encoding_tile_indices g_crnd_chunk_encoding_tiles[cNumChunkEncodings] = + { + { { 0, 0, 0, 0 } }, + + { { 0, 0, 1, 1 } }, + { { 0, 1, 0, 1 } }, + + { { 0, 0, 1, 2 } }, + { { 1, 2, 0, 0 } }, + + { { 0, 1, 0, 2 } }, + { { 1, 0, 2, 0 } }, + + { { 0, 1, 2, 3 } } + }; + + static uint8 g_crnd_chunk_encoding_num_tiles[cNumChunkEncodings] = { 1, 2, 2, 3, 3, 3, 3, 4 }; + + class crn_unpacker + { + public: + crn_unpacker() : + m_magic(cMagicValue), + m_pData(NULL), + m_data_size(0), + m_pHeader(NULL) + { + } + + ~crn_unpacker() + { + m_magic = 0; + } + + inline bool is_valid() const { return m_magic == cMagicValue; } + + bool init(const void* pData, uint32 data_size) + { + m_pHeader = crnd_get_header(m_tmp_header, pData, data_size); + if (!m_pHeader) + return false; + + m_pData = static_cast(pData); + m_data_size = data_size; + + if (!init_tables()) + return false; + + if (!decode_palettes()) + return false; + + return true; + } + + bool unpack_level( + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + uint32 cur_level_ofs = m_pHeader->m_level_ofs[level_index]; + + uint32 next_level_ofs = m_data_size; + if ((level_index + 1) < (m_pHeader->m_levels)) + next_level_ofs = m_pHeader->m_level_ofs[level_index + 1]; + + CRND_ASSERT(next_level_ofs > cur_level_ofs); + + return unpack_level(m_pData + cur_level_ofs, next_level_ofs - cur_level_ofs, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool unpack_level( + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + dst_size_in_bytes; + +#ifdef CRND_BUILD_DEBUG + for (uint32 f = 0; f < m_pHeader->m_faces; f++) + if (!pDst[f]) + return false; +#endif + + const uint32 width = math::maximum(m_pHeader->m_width >> level_index, 1U); + const uint32 height = math::maximum(m_pHeader->m_height >> level_index, 1U); + const uint32 blocks_x = (width + 3U) >> 2U; + const uint32 blocks_y = (height + 3U) >> 2U; + const uint32 block_size = ((m_pHeader->m_format == cCRNFmtDXT1) || (m_pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; + + uint32 minimal_row_pitch = block_size * blocks_x; + if (!row_pitch_in_bytes) + row_pitch_in_bytes = minimal_row_pitch; + else if ((row_pitch_in_bytes < minimal_row_pitch) || (row_pitch_in_bytes & 3)) + return false; + if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) + return false; + + const uint32 chunks_x = (blocks_x + 1) >> 1; + const uint32 chunks_y = (blocks_y + 1) >> 1; + +#if CRND_CREATE_BYTE_STREAMS + crnd_trace("Index stream: %u bytes\n", src_size_in_bytes); +#endif + + if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) + return false; + + bool status = false; + switch (m_pHeader->m_format) + { + case cCRNFmtDXT1: + status = unpack_dxt1((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + case cCRNFmtDXT5: + case cCRNFmtDXT5_CCxY: + case cCRNFmtDXT5_xGBR: + case cCRNFmtDXT5_AGBR: + case cCRNFmtDXT5_xGxR: + status = unpack_dxt5((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + case cCRNFmtDXT5A: + status = unpack_dxt5a((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + case cCRNFmtDXN_XY: + case cCRNFmtDXN_YX: + status = unpack_dxn((uint8**)pDst, dst_size_in_bytes, row_pitch_in_bytes, blocks_x, blocks_y, chunks_x, chunks_y); + break; + default: + return false; + } + if (!status) + return false; + + m_codec.stop_decoding(); + return true; + } + + inline const void* get_data() const { return m_pData; } + inline uint32 get_data_size() const { return m_data_size; } + + private: + enum { cMagicValue = 0x1EF9CABD }; + uint32 m_magic; + + const uint8* m_pData; + uint32 m_data_size; + crn_header m_tmp_header; + const crn_header* m_pHeader; + + symbol_codec m_codec; + + static_huffman_data_model m_chunk_encoding_dm; + static_huffman_data_model m_endpoint_delta_dm[2]; + static_huffman_data_model m_selector_delta_dm[2]; + + crnd::vector m_color_endpoints; + crnd::vector m_color_selectors; + + crnd::vector m_alpha_endpoints; + crnd::vector m_alpha_selectors; + + bool init_tables() + { + if (!m_codec.start_decoding(m_pData + m_pHeader->m_tables_ofs, m_pHeader->m_tables_size)) + return false; + + if (!m_codec.decode_receive_static_data_model(m_chunk_encoding_dm)) + return false; + + if ((!m_pHeader->m_color_endpoints.m_num) && (!m_pHeader->m_alpha_endpoints.m_num)) + return false; + + if (m_pHeader->m_color_endpoints.m_num) + { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[0])) return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[0])) return false; + } + + if (m_pHeader->m_alpha_endpoints.m_num) + { + if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[1])) return false; + if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[1])) return false; + } + + m_codec.stop_decoding(); + + return true; + } + + bool decode_palettes() + { + if (m_pHeader->m_color_endpoints.m_num) + { + if (!decode_color_endpoints()) return false; + if (!decode_color_selectors()) return false; + } + + if (m_pHeader->m_alpha_endpoints.m_num) + { + if (!decode_alpha_endpoints()) return false; + if (!decode_alpha_selectors()) return false; + } + + return true; + } + + bool decode_color_endpoints() + { + const uint32 num_color_endpoints = m_pHeader->m_color_endpoints.m_num; + + if (!m_color_endpoints.resize(num_color_endpoints)) + return false; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_endpoints.m_ofs, m_pHeader->m_color_endpoints.m_size)) + return false; + + static_huffman_data_model dm[2]; + for (uint32 i = 0; i < 2; i++) + if (!m_codec.decode_receive_static_data_model(dm[i])) + return false; + + uint32 a = 0, b = 0, c = 0; + uint32 d = 0, e = 0, f = 0; + + uint32* CRND_RESTRICT pDst = &m_color_endpoints[0]; + + CRND_HUFF_DECODE_BEGIN(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + vector byte_stream; +#endif + + for (uint32 i = 0; i < num_color_endpoints; i++) + { + uint32 da, db, dc, dd, de, df; + CRND_HUFF_DECODE(m_codec, dm[0], da); a = (a + da) & 31; + CRND_HUFF_DECODE(m_codec, dm[1], db); b = (b + db) & 63; + CRND_HUFF_DECODE(m_codec, dm[0], dc); c = (c + dc) & 31; + + CRND_HUFF_DECODE(m_codec, dm[0], dd); d = (d + dd) & 31; + CRND_HUFF_DECODE(m_codec, dm[1], de); e = (e + de) & 63; + CRND_HUFF_DECODE(m_codec, dm[0], df); f = (f + df) & 31; + +#if CRND_CREATE_BYTE_STREAMS + byte_stream.push_back(da); + byte_stream.push_back(db); + byte_stream.push_back(dc); + byte_stream.push_back(dd); + byte_stream.push_back(de); + byte_stream.push_back(df); +#endif + + if (c_crnd_little_endian_platform) + *pDst++ = c | (b << 5U) | (a << 11U) | (f << 16U) | (e << 21U) | (d << 27U); + else + *pDst++ = f | (e << 5U) | (d << 11U) | (c << 16U) | (b << 21U) | (a << 27U); + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + +#if CRND_CREATE_BYTE_STREAMS + write_array_to_file(L"colorendpoints.bin", byte_stream); + crnd_trace("color endpoints: %u\n", (uint)m_pHeader->m_color_endpoints.m_size); +#endif + + return true; + } + + bool decode_color_selectors() + { + const uint32 cMaxSelectorValue = 3U; + const uint32 cMaxUniqueSelectorDeltas = cMaxSelectorValue * 2U + 1U; + + const uint32 num_color_selectors = m_pHeader->m_color_selectors.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + int32 delta0[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 delta1[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 l = -(int32)cMaxSelectorValue, m = -(int32)cMaxSelectorValue; + for (uint32 i = 0; i < (cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas); i++) + { + delta0[i] = l; + delta1[i] = m; + + if (++l > (int32)cMaxSelectorValue) + { + l = -(int32)cMaxSelectorValue; + m++; + } + } + + uint32 cur[16]; + utils::zero_object(cur); + + if (!m_color_selectors.resize(num_color_selectors)) + return false; + + uint32* CRND_RESTRICT pDst = &m_color_selectors[0]; + + const uint8* pFrom_linear = g_dxt1_from_linear; + + CRND_HUFF_DECODE_BEGIN(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + vector byte_stream; +#endif + + for (uint32 i = 0; i < num_color_selectors; i++) + { + for (uint32 j = 0; j < 8; j++) + { + int32 sym; + CRND_HUFF_DECODE(m_codec, dm, sym); + +#if CRND_CREATE_BYTE_STREAMS + byte_stream.push_back(sym); +#endif + + cur[j*2+0] = (delta0[sym] + cur[j*2+0]) & 3; + cur[j*2+1] = (delta1[sym] + cur[j*2+1]) & 3; + } + + if (c_crnd_little_endian_platform) + { + *pDst++ = + (pFrom_linear[cur[0 ]] ) | (pFrom_linear[cur[1 ]] << 2) | (pFrom_linear[cur[2 ]] << 4) | (pFrom_linear[cur[3 ]] << 6) | + (pFrom_linear[cur[4 ]] << 8) | (pFrom_linear[cur[5 ]] << 10) | (pFrom_linear[cur[6 ]] << 12) | (pFrom_linear[cur[7 ]] << 14) | + (pFrom_linear[cur[8 ]] << 16) | (pFrom_linear[cur[9 ]] << 18) | (pFrom_linear[cur[10]] << 20) | (pFrom_linear[cur[11]] << 22) | + (pFrom_linear[cur[12]] << 24) | (pFrom_linear[cur[13]] << 26) | (pFrom_linear[cur[14]] << 28) | (pFrom_linear[cur[15]] << 30); + } + else + { + *pDst++ = + (pFrom_linear[cur[8 ]] ) | (pFrom_linear[cur[9 ]] << 2) | (pFrom_linear[cur[10]] << 4) | (pFrom_linear[cur[11]] << 6) | + (pFrom_linear[cur[12]] << 8) | (pFrom_linear[cur[13]] << 10) | (pFrom_linear[cur[14]] << 12) | (pFrom_linear[cur[15]] << 14) | + (pFrom_linear[cur[0 ]] << 16) | (pFrom_linear[cur[1 ]] << 18) | (pFrom_linear[cur[2 ]] << 20) | (pFrom_linear[cur[3 ]] << 22) | + (pFrom_linear[cur[4 ]] << 24) | (pFrom_linear[cur[5 ]] << 26) | (pFrom_linear[cur[6 ]] << 28) | (pFrom_linear[cur[7 ]] << 30); + } + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + +#if CRND_CREATE_BYTE_STREAMS + write_array_to_file(L"colorselectors.bin", byte_stream); + crnd_trace("color selectors: %u\n", (uint)m_pHeader->m_color_selectors.m_size); +#endif + + return true; + } + + bool decode_alpha_endpoints() + { + const uint32 num_alpha_endpoints = m_pHeader->m_alpha_endpoints.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_endpoints.m_ofs, m_pHeader->m_alpha_endpoints.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + if (!m_alpha_endpoints.resize(num_alpha_endpoints)) + return false; + + uint16* CRND_RESTRICT pDst = &m_alpha_endpoints[0]; + uint32 a = 0, b = 0; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 i = 0; i < num_alpha_endpoints; i++) + { + uint sa; CRND_HUFF_DECODE(m_codec, dm, sa); + uint sb; CRND_HUFF_DECODE(m_codec, dm, sb); + + a = (sa + a) & 255; + b = (sb + b) & 255; + + *pDst++ = (uint16)(a | (b << 8)); + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + + return true; + } + + bool decode_alpha_selectors() + { + const uint32 cMaxSelectorValue = 7U; + const uint32 cMaxUniqueSelectorDeltas = cMaxSelectorValue * 2U + 1U; + + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size)) + return false; + + static_huffman_data_model dm; + if (!m_codec.decode_receive_static_data_model(dm)) + return false; + + int32 delta0[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 delta1[cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas]; + int32 l = -(int32)cMaxSelectorValue, m = -(int32)cMaxSelectorValue; + for (uint32 i = 0; i < (cMaxUniqueSelectorDeltas * cMaxUniqueSelectorDeltas); i++) + { + delta0[i] = l; + delta1[i] = m; + + if (++l > (int32)cMaxSelectorValue) + { + l = -(int32)cMaxSelectorValue; + m++; + } + } + + uint32 cur[16]; + utils::zero_object(cur); + + if (!m_alpha_selectors.resize(num_alpha_selectors * 3)) + return false; + + uint16* CRND_RESTRICT pDst = &m_alpha_selectors[0]; + + const uint8* pFrom_linear = g_dxt5_from_linear; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 i = 0; i < num_alpha_selectors; i++) + { + for (uint32 j = 0; j < 8; j++) + { + int32 sym; + CRND_HUFF_DECODE(m_codec, dm, sym); + + cur[j*2+0] = (delta0[sym] + cur[j*2+0]) & 7; + cur[j*2+1] = (delta1[sym] + cur[j*2+1]) & 7; + //cur[j*2+0] = ((sym%15)-7 + cur[j*2+0]) & 7; + //cur[j*2+1] = ((sym/15)-7 + cur[j*2+1]) & 7; + } + +#if 0 + dxt5_block blk; + for (uint32 y = 0; y < 4; y++) + for (uint32 x = 0; x < 4; x++) + blk.set_selector(x, y, pFrom_linear[cur[x+y*4]]); + + *pDst++ = blk.get_selectors_as_word(0); + *pDst++ = blk.get_selectors_as_word(1); + *pDst++ = blk.get_selectors_as_word(2); +#else + *pDst++ = (uint16)((pFrom_linear[cur[0 ]] ) | (pFrom_linear[cur[1 ]] << 3) | (pFrom_linear[cur[2 ]] << 6) | (pFrom_linear[cur[3 ]] << 9) | + (pFrom_linear[cur[4 ]] << 12) | (pFrom_linear[cur[5 ]] << 15)); + + *pDst++ = (uint16)((pFrom_linear[cur[5 ]] >> 1) | (pFrom_linear[cur[6 ]] << 2) | (pFrom_linear[cur[7 ]] << 5) | + (pFrom_linear[cur[8 ]] << 8) | (pFrom_linear[cur[9 ]] << 11) | (pFrom_linear[cur[10]] << 14)); + + *pDst++ = (uint16)((pFrom_linear[cur[10]] >> 2) | (pFrom_linear[cur[11]] << 1) | (pFrom_linear[cur[12]] << 4) | + (pFrom_linear[cur[13]] << 7) | (pFrom_linear[cur[14]] << 10) | (pFrom_linear[cur[15]] << 13)); +#endif + } + + CRND_HUFF_DECODE_END(m_codec); + + m_codec.stop_decoding(); + + return true; + } + + static inline uint32 tiled_offset_2d_outer(uint32 y, uint32 AlignedWidth, uint32 LogBpp) + { + uint32 Macro = ((y >> 5) * (AlignedWidth >> 5)) << (LogBpp + 7); + uint32 Micro = ((y & 6) << 2) << LogBpp; + + return Macro + + ((Micro & ~15) << 1) + + (Micro & 15) + + ((y & 8) << (3 + LogBpp)) + ((y & 1) << 4); + } + + static inline uint32 tiled_offset_2d_inner(uint32 x, uint32 y, uint32 LogBpp, uint32 BaseOffset) + { + uint32 Macro = (x >> 5) << (LogBpp + 7); + uint32 Micro = (x & 7) << LogBpp; + uint32 Offset = BaseOffset + Macro + ((Micro & ~15) << 1) + (Micro & 15); + + return ((Offset & ~511) << 3) + ((Offset & 448) << 2) + (Offset & 63) + + ((y & 16) << 7) + + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); + } + + static inline void limit(uint& x, uint n) + { + int v = x - n; + int msk = (v >> 31); + x = (x & msk) | (v & ~msk); + } + + bool unpack_dxt1(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_color_selectors = m_color_selectors.size(); + + uint32 prev_color_endpoint_index = 0; + uint32 prev_color_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + const uint32 row_pitch_in_dwords = row_pitch_in_bytes >> 2U; + + const int32 cBytesPerBlock = 8; + + CRND_HUFF_DECODE_BEGIN(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + vector tile_encoding_stream; + vector endpoint_indices_stream; + vector selector_indices_stream; +#endif + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 color_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); +#if CRND_CREATE_BYTE_STREAMS + tile_encoding_stream.push_back(chunk_encoding_bits & 7); + tile_encoding_stream.push_back((chunk_encoding_bits >> 3) & 7); + tile_encoding_stream.push_back((chunk_encoding_bits >> 6) & 7); +#endif + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; + CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[0], delta); +#if CRND_CREATE_BYTE_STREAMS + endpoint_indices_stream.push_back(delta); +#endif + prev_color_endpoint_index += delta; + limit(prev_color_endpoint_index, num_color_endpoints); + color_endpoints[i] = m_color_endpoints[prev_color_endpoint_index]; + } + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + if ((!skip_bottom_row) && (!skip_right_col)) + { + //CRND_ASSERT( ((uint8*)&pD[4 + row_pitch_in_dwords] - pDst) <= dst_size_in_bytes ); + + pD[0] = color_endpoints[pTile_indices[0]]; + CRND_WRITE_BARRIER + uint32 delta0; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta0); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta0); +#endif + prev_color_selector_index += delta0; + limit(prev_color_selector_index, num_color_selectors); + pD[1] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + + pD[2] = color_endpoints[pTile_indices[1]]; + CRND_WRITE_BARRIER + uint32 delta1; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta1); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta1); +#endif + prev_color_selector_index += delta1; + limit(prev_color_selector_index, num_color_selectors); + pD[3] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + + pD[0 + row_pitch_in_dwords] = color_endpoints[pTile_indices[2]]; + CRND_WRITE_BARRIER + uint32 delta2; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta2); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta2); +#endif + prev_color_selector_index += delta2; + limit(prev_color_selector_index, num_color_selectors); + pD[1 + row_pitch_in_dwords] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + + pD[2 + row_pitch_in_dwords] = color_endpoints[pTile_indices[3]]; + CRND_WRITE_BARRIER + uint32 delta3; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta3); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta3); +#endif + prev_color_selector_index += delta3; + limit(prev_color_selector_index, num_color_selectors); + pD[3 + row_pitch_in_dwords] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + } + else + { + for (uint32 by = 0; by < 2; by++) + { + pD = (uint32*)((uint8*)pBlock + row_pitch_in_bytes * by); + for (uint32 bx = 0; bx < 2; bx++, pD += 2) + { + uint32 delta; + CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta); +#if CRND_CREATE_BYTE_STREAMS + selector_indices_stream.push_back(delta); +#endif + prev_color_selector_index += delta; + limit(prev_color_selector_index, num_color_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + pD[0] = color_endpoints[pTile_indices[bx + by * 2]]; + CRND_WRITE_BARRIER + pD[1] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER + } + } + } + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + +#if CRND_CREATE_BYTE_STREAMS + write_array_to_file(L"tile_encodings.bin", tile_encoding_stream); + write_array_to_file(L"endpoint_indices.bin", endpoint_indices_stream); + write_array_to_file(L"selector_indices.bin", selector_indices_stream); +#endif + + return true; + } + + bool unpack_dxt5(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_color_endpoints = m_color_endpoints.size(); + const uint32 num_color_selectors = m_color_selectors.size(); + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + uint32 prev_color_endpoint_index = 0; + uint32 prev_color_selector_index = 0; + uint32 prev_alpha_endpoint_index = 0; + uint32 prev_alpha_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + //const uint32 row_pitch_in_dwords = row_pitch_in_bytes >> 2U; + + const int32 cBytesPerBlock = 16; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 color_endpoints[4]; + uint32 alpha_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha_endpoint_index += delta; + limit(prev_alpha_endpoint_index, num_alpha_endpoints); + alpha_endpoints[i] = m_alpha_endpoints[prev_alpha_endpoint_index]; + } + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[0], delta); + prev_color_endpoint_index += delta; + limit(prev_color_endpoint_index, num_color_endpoints); + color_endpoints[i] = m_color_endpoints[prev_color_endpoint_index]; + } + + pD = (uint32*)pBlock; + for (uint32 by = 0; by < 2; by++) + { + for (uint32 bx = 0; bx < 2; bx++, pD += 4) + { + uint32 delta0; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta0); + prev_alpha_selector_index += delta0; + limit(prev_alpha_selector_index, num_alpha_selectors); + + uint32 delta1; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[0], delta1); + prev_color_selector_index += delta1; + limit(prev_color_selector_index, num_color_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + const uint32 tile_index = pTile_indices[bx + by * 2]; + const uint16* pAlpha_selectors = &m_alpha_selectors[prev_alpha_selector_index * 3]; + +#ifdef CRND_BIG_ENDIAN_PLATFORM + pD[0] = (alpha_endpoints[tile_index] << 16) | pAlpha_selectors[0]; + CRND_WRITE_BARRIER + pD[1] = (pAlpha_selectors[1] << 16) | pAlpha_selectors[2]; + CRND_WRITE_BARRIER + pD[2] = color_endpoints[tile_index]; + CRND_WRITE_BARRIER + pD[3] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER +#else + pD[0] = alpha_endpoints[tile_index] | (pAlpha_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[1] = pAlpha_selectors[1] | (pAlpha_selectors[2] << 16); + CRND_WRITE_BARRIER + pD[2] = color_endpoints[tile_index]; + CRND_WRITE_BARRIER + pD[3] = m_color_selectors[prev_color_selector_index]; + CRND_WRITE_BARRIER +#endif + } + } + + pD = (uint32*)((uint8*)pD - cBytesPerBlock * 2 + row_pitch_in_bytes); + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + + return true; + } + + bool unpack_dxn(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + uint32 prev_alpha0_endpoint_index = 0; + uint32 prev_alpha0_selector_index = 0; + uint32 prev_alpha1_endpoint_index = 0; + uint32 prev_alpha1_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + //const uint32 row_pitch_in_dwords = row_pitch_in_bytes >> 2U; + + const int32 cBytesPerBlock = 16; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 alpha0_endpoints[4]; + uint32 alpha1_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha0_endpoint_index += delta; + limit(prev_alpha0_endpoint_index, num_alpha_endpoints); + alpha0_endpoints[i] = m_alpha_endpoints[prev_alpha0_endpoint_index]; + } + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha1_endpoint_index += delta; + limit(prev_alpha1_endpoint_index, num_alpha_endpoints); + alpha1_endpoints[i] = m_alpha_endpoints[prev_alpha1_endpoint_index]; + } + + pD = (uint32*)pBlock; + for (uint32 by = 0; by < 2; by++) + { + for (uint32 bx = 0; bx < 2; bx++, pD += 4) + { + uint32 delta0; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta0); + prev_alpha0_selector_index += delta0; + limit(prev_alpha0_selector_index, num_alpha_selectors); + + uint32 delta1; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta1); + prev_alpha1_selector_index += delta1; + limit(prev_alpha1_selector_index, num_alpha_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + const uint32 tile_index = pTile_indices[bx + by * 2]; + const uint16* pAlpha0_selectors = &m_alpha_selectors[prev_alpha0_selector_index * 3]; + const uint16* pAlpha1_selectors = &m_alpha_selectors[prev_alpha1_selector_index * 3]; + +#ifdef CRND_BIG_ENDIAN_PLATFORM + pD[0] = (alpha0_endpoints[tile_index] << 16) | pAlpha0_selectors[0]; + CRND_WRITE_BARRIER + pD[1] = (pAlpha0_selectors[1] << 16) | pAlpha0_selectors[2]; + CRND_WRITE_BARRIER + pD[2] = (alpha1_endpoints[tile_index] << 16) | pAlpha1_selectors[0]; + CRND_WRITE_BARRIER + pD[3] = (pAlpha1_selectors[1] << 16) | pAlpha1_selectors[2]; + CRND_WRITE_BARRIER +#else + pD[0] = alpha0_endpoints[tile_index] | (pAlpha0_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + CRND_WRITE_BARRIER + pD[2] = alpha1_endpoints[tile_index] | (pAlpha1_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[3] = pAlpha1_selectors[1] | (pAlpha1_selectors[2] << 16); + CRND_WRITE_BARRIER +#endif + } + } + + pD = (uint32*)((uint8*)pD - cBytesPerBlock * 2 + row_pitch_in_bytes); + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + + return true; + } + + bool unpack_dxt5a(uint8** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 blocks_x, uint32 blocks_y, uint32 chunks_x, uint32 chunks_y) + { + dst_size_in_bytes; + + uint32 chunk_encoding_bits = 1; + + const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); + const uint32 num_alpha_selectors = m_pHeader->m_alpha_selectors.m_num; + + uint32 prev_alpha0_endpoint_index = 0; + uint32 prev_alpha0_selector_index = 0; + + const uint32 num_faces = m_pHeader->m_faces; + + const int32 cBytesPerBlock = 8; + + CRND_HUFF_DECODE_BEGIN(m_codec); + + for (uint32 f = 0; f < num_faces; f++) + { + uint8* CRND_RESTRICT pRow = pDst[f]; + + for (uint32 y = 0; y < chunks_y; y++) + { + int32 start_x = 0; + int32 end_x = chunks_x; + int32 dir_x = 1; + int32 block_delta = cBytesPerBlock*2; + uint8* CRND_RESTRICT pBlock = pRow; + + if (y & 1) + { + start_x = chunks_x - 1; + end_x = -1; + dir_x = -1; + block_delta = -cBytesPerBlock*2; + pBlock += (chunks_x - 1) * cBytesPerBlock * 2; + } + + const bool skip_bottom_row = (y == (chunks_y - 1)) && (blocks_y & 1); + + for (int32 x = start_x; x != end_x; x += dir_x) + { + uint32 alpha0_endpoints[4]; + + if (chunk_encoding_bits == 1) + { + CRND_HUFF_DECODE(m_codec, m_chunk_encoding_dm, chunk_encoding_bits); + chunk_encoding_bits |= 512; + } + + const uint32 chunk_encoding_index = chunk_encoding_bits & 7; + chunk_encoding_bits >>= 3; + + const uint32 num_tiles = g_crnd_chunk_encoding_num_tiles[chunk_encoding_index]; + + const uint8* pTile_indices = g_crnd_chunk_encoding_tiles[chunk_encoding_index].m_tiles; + + const bool skip_right_col = (blocks_x & 1) && (x == ((int32)chunks_x - 1)); + + uint32* CRND_RESTRICT pD = (uint32*)pBlock; + + for (uint32 i = 0; i < num_tiles; i++) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_endpoint_delta_dm[1], delta); + prev_alpha0_endpoint_index += delta; + limit(prev_alpha0_endpoint_index, num_alpha_endpoints); + alpha0_endpoints[i] = m_alpha_endpoints[prev_alpha0_endpoint_index]; + } + + pD = (uint32*)pBlock; + for (uint32 by = 0; by < 2; by++) + { + for (uint32 bx = 0; bx < 2; bx++, pD += 2) + { + uint32 delta; CRND_HUFF_DECODE(m_codec, m_selector_delta_dm[1], delta); + prev_alpha0_selector_index += delta; + limit(prev_alpha0_selector_index, num_alpha_selectors); + + if (!((bx && skip_right_col) || (by && skip_bottom_row))) + { + const uint32 tile_index = pTile_indices[bx + by * 2]; + const uint16* pAlpha0_selectors = &m_alpha_selectors[prev_alpha0_selector_index * 3]; + +#if CRND_BIG_ENDIAN_PLATFORM + pD[0] = (alpha0_endpoints[tile_index] << 16) | pAlpha0_selectors[0]; + CRND_WRITE_BARRIER + pD[1] = (pAlpha0_selectors[1] << 16) | pAlpha0_selectors[2]; + CRND_WRITE_BARRIER +#else + pD[0] = alpha0_endpoints[tile_index] | (pAlpha0_selectors[0] << 16); + CRND_WRITE_BARRIER + pD[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); + CRND_WRITE_BARRIER +#endif + } + } + + pD = (uint32*)((uint8*)pD - cBytesPerBlock * 2 + row_pitch_in_bytes); + } + + pBlock += block_delta; + + } // x + + pRow += row_pitch_in_bytes * 2; + + } // y + + } // f + + CRND_HUFF_DECODE_END(m_codec); + + return true; + } + }; + + crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size) + { + if ((!pData) || (data_size < cCRNHeaderMinSize)) + return NULL; + + crn_unpacker* p = crnd_new(); + if (!p) + return NULL; + + if (!p->init(pData, data_size)) + { + crnd_delete(p); + return NULL; + } + + return p; + } + + bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size) + { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + if (ppData) + *ppData = pUnpacker->get_data(); + + if (pData_size) + *pData_size = pUnpacker->get_data_size(); + + return true; + } + + bool crnd_unpack_level( + crnd_unpack_context pContext, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + if ((!pContext) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool crnd_unpack_level_segmented( + crnd_unpack_context pContext, + const void* pSrc, uint32 src_size_in_bytes, + void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, + uint32 level_index) + { + if ((!pContext) || (!pSrc) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + return pUnpacker->unpack_level(pSrc, src_size_in_bytes, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); + } + + bool crnd_unpack_end(crnd_unpack_context pContext) + { + if (!pContext) + return false; + + crn_unpacker* pUnpacker = static_cast(pContext); + + if (!pUnpacker->is_valid()) + return false; + + crnd_delete(pUnpacker); + + return true; + } + +} // namespace crnd + +#endif // CRND_HEADER_FILE_ONLY + +//------------------------------------------------------------------------------ +// +// crn_decomp.h uses the ZLIB license: +// http://opensource.org/licenses/Zlib +// +// Copyright (c) 2010-2011 Tenacious Software LLC +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------ diff --git a/inc/crnlib.h b/inc/crnlib.h new file mode 100644 index 00000000..00322da0 --- /dev/null +++ b/inc/crnlib.h @@ -0,0 +1,552 @@ +// File: crnlib.h - Advanced DXTn texture compression library. +// Copyright (c) 2010-2011 Tenacious Software LLC +// See copyright notice and license at the end of this file. +// +// This header file contains the public crnlib declarations for DXTn and +// clustered DXTn compression/decompression. +// +// Note: This library does NOT need to be linked into your game executable if +// all you want to do is transcode .CRN files to raw DXTn bits at run-time. +// The crn_decomp.h header file library contains all the code necessary for +// decompression. +// +#ifndef CRNLIB_H +#define CRNLIB_H + +#define CRNLIB_VERSION 100 + +typedef unsigned char crn_uint8; +typedef unsigned short crn_uint16; +typedef unsigned int crn_uint32; +typedef signed char crn_int8; +typedef signed short crn_int16; +typedef signed int crn_int32; +typedef unsigned int crn_bool; + +// crnlib can compress to these file types. +enum crn_file_type +{ + // .CRN + cCRNFileTypeCRN = 0, + + // .DDS using regular DXT or clustered DXT + cCRNFileTypeDDS, + + cCRNFileTypeForceDWORD = 0xFFFFFFFF +}; + +// Supported compressed pixel formats. +// Basically all the standard DX9 formats, with some swizzled DXT5 formats +// (most of them supported by ATI's Compressonator), along with some ATI/X360 GPU specific formats. +enum crn_format +{ + cCRNFmtInvalid = -1, + + cCRNFmtDXT1 = 0, + + cCRNFmtFirstValid = cCRNFmtDXT1, + + // cCRNFmtDXT3 is not currently supported when writing to CRN - only DDS. + cCRNFmtDXT3, + + cCRNFmtDXT5, + + // Various DXT5 derivatives + cCRNFmtDXT5_CCxY, // Luma-chroma + cCRNFmtDXT5_xGxR, // Swizzled 2-component + cCRNFmtDXT5_xGBR, // Swizzled 3-component + cCRNFmtDXT5_AGBR, // Swizzled 4-component + + // ATI 3DC and X360 DXN + cCRNFmtDXN_XY, + cCRNFmtDXN_YX, + + // DXT5 alpha blocks only + cCRNFmtDXT5A, + + cCRNFmtTotal, + + cCRNFmtForceDWORD = 0xFFFFFFFF +}; + +// Various library/file format limits. +enum crn_limits +{ + // Max. mipmap level resolution on any axis (will be doubled to 8k in next release). + cCRNMaxLevelResolution = 4096, + + cCRNMinPaletteSize = 8, + cCRNMaxPaletteSize = 8192, + + cCRNMaxFaces = 6, + cCRNMaxLevels = 16, + + cCRNMaxHelperThreads = 16, + + cCRNMinQualityLevel = 0, + cCRNMaxQualityLevel = 255 +}; + +// CRN/DDS compression flags. +// See the m_flags member in the crn_comp_params struct, below. +enum crn_comp_flags +{ + // Enables perceptual colorspace distance metrics if set. + // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! + // Default: Set + cCRNCompFlagPerceptual = 1, + + // Enables (up to) 8x8 macroblock usage if set. If disabled, only 4x4 blocks are allowed. + // Compression ratio will be lower when disabled, but may cut down on blocky artifacts because the process used to determine + // where large macroblocks can be used without artifacts isn't perfect. + // Default: Set. + cCRNCompFlagHierarchical = 2, + + // cCRNCompFlagQuick disables several output file optimizations - intended for things like quicker previews. + // Default: Not set. + cCRNCompFlagQuick = 4, + + // DXT1: OK to use DXT1 alpha blocks for better quality or DXT1A transparency. + // DXT5: OK to use both DXT5 block types. + // Currently only used when writing to .DDS files, as .CRN uses only a subset of the possible DXTn block types. + // Default: Set. + cCRNCompFlagUseBothBlockTypes = 8, + + // OK to use DXT1A transparent indices to encode black (assumes pixel shader ignores fetched alpha). + // Currently only used when writing to .DDS files, .CRN never uses alpha blocks. + // Default: Not set. + cCRNCompFlagUseTransparentIndicesForBlack = 16, + + // Disables endpoint caching, for more deterministic output. + // Currently only used when writing to .DDS files. + // Default: Not set. + cCRNCompFlagDisableEndpointCaching = 32, + + // If enabled, use the cCRNColorEndpointPaletteSize, etc. params to control the CRN palette sizes. Only useful when writing to .CRN files. + // Default: Not set. + cCRNCompFlagManualPaletteSizes = 64, + + // If enabled, DXT1A alpha blocks are used to encode single bit transparency. + // Default: Not set. + cCRNCompFlagDXT1AForTransparency = 128, + + // If enabled, the DXT1 compressor's color distance metric assumes the pixel shader will be converting the fetched RGB results to luma (Y part of YCbCr). + // This increases quality when compressing grayscale images, because the compressor can spread the luma error amoung all three channels (i.e. it can generate blocks + // with some chroma present if doing so will ultimately lead to lower luma error). + // Only enable on grayscale source images. + // Default: Not set. + cCRNCompFlagGrayscaleSampling = 256, + + // If enabled, debug information will be output during compression. + // Default: Not set. + cCRNCompFlagDebugging = 0x80000000, + + cCRNCompFlagForceDWORD = 0xFFFFFFFF +}; + +// Controls DXTn quality vs. speed control - only used when compressing to .DDS. +enum crn_dxt_quality +{ + cCRNDXTQualitySuperFast, + cCRNDXTQualityFast, + cCRNDXTQualityNormal, + cCRNDXTQualityBetter, + cCRNDXTQualityUber, + + cCRNDXTQualityTotal, + + cCRNDXTQualityForceDWORD = 0xFFFFFFFF +}; + +// Which DXTn compressor to use when compressing to .DDS. +enum crn_dxt_compressor_type +{ + cCRNDXTCompressorCRN, + cCRNDXTCompressorCRNF, + cCRNDXTCompressorRYG, + + cCRNTotalDXTCompressors, + + cCRNDXTCompressorForceDWORD = 0xFFFFFFFF +}; + +// Compression will stop prematurely (and fail) if the callback returns false. +// phase_index, total_phases - high level progress +// subphase_index, total_subphases - progress within current phase +typedef crn_bool (*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); + +// CRN/DDS compression parameters struct. +struct crn_comp_params +{ + inline crn_comp_params() { clear(); } + + inline void clear() + { + m_size_of_obj = sizeof(*this); + m_file_type = cCRNFileTypeCRN; + m_faces = 1; + m_width = 0; + m_height = 0; + m_levels = 1; + m_format = cCRNFmtDXT1; + m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; + + for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) + for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) + m_pImages[f][l] = NULL; + + m_target_bitrate = 0.0f; + m_quality_level = cCRNMaxQualityLevel; + m_dxt1a_alpha_threshold = 128; + m_dxt_quality = cCRNDXTQualityUber; + m_dxt_compressor_type = cCRNDXTCompressorCRN; + m_alpha_component = 3; + + m_crn_adaptive_tile_color_psnr_derating = 2.0f; + m_crn_adaptive_tile_alpha_psnr_derating = 2.0f; + m_crn_color_endpoint_palette_size = 0; + m_crn_color_selector_palette_size = 0; + m_crn_alpha_endpoint_palette_size = 0; + m_crn_alpha_selector_palette_size = 0; + + m_num_helper_threads = 0; + m_userdata0 = 0; + m_userdata1 = 0; + m_pProgress_func = NULL; + m_pProgress_func_data = NULL; + } + + // Returns true if the input parameters are reasonable. + inline bool check() const + { + if ( (m_file_type > cCRNFileTypeDDS) || + (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || + (m_dxt1a_alpha_threshold > 255) || + ((m_faces != 1) && (m_faces != 6)) || + ((m_width < 1) || (m_width > cCRNMaxLevelResolution)) || + ((m_height < 1) || (m_height > cCRNMaxLevelResolution)) || + ((m_levels < 1) || (m_levels > cCRNMaxLevels)) || + ((m_format < cCRNFmtDXT1) || (m_format >= cCRNFmtTotal)) || + ((m_crn_color_endpoint_palette_size) && ((m_crn_color_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_color_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_color_selector_palette_size) && ((m_crn_color_selector_palette_size < cCRNMinPaletteSize) || (m_crn_color_selector_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_endpoint_palette_size) && ((m_crn_alpha_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_endpoint_palette_size > cCRNMaxPaletteSize))) || + ((m_crn_alpha_selector_palette_size) && ((m_crn_alpha_selector_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_selector_palette_size > cCRNMaxPaletteSize))) || + (m_alpha_component > 3) || + (m_num_helper_threads > cCRNMaxHelperThreads) || + (m_dxt_quality > cCRNDXTQualityUber) || + (m_dxt_compressor_type >= cCRNTotalDXTCompressors) ) + { + return false; + } + return true; + } + + // Helper to set/get flags from m_flags member. + inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } + inline void set_flag(crn_comp_flags flag, bool val) { m_flags &= ~flag; if (val) m_flags |= flag; } + + crn_uint32 m_size_of_obj; + + crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. + + crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) + crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK + + crn_format m_format; // Output pixel format. + + crn_uint32 m_flags; // see crn_comp_flags enum + + // Array of pointers to 32bpp input images. + const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; + + // Target bitrate - if non-zero, the compressor will use an interpolative search to find the + // highest quality level that is <= the target bitrate. If it fails to find a bitrate high enough, it'll + // disabling adaptive block sizes (cCRNCompFlagHierarchical flag) and try again. This process can be pretty slow. + float m_target_bitrate; + + // Desired quality level. + // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. + crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] + + // DXTn compression parameters. + crn_uint32 m_dxt1a_alpha_threshold; + crn_dxt_quality m_dxt_quality; + crn_dxt_compressor_type m_dxt_compressor_type; + + // Alpha channel's component. Defaults to 3. + crn_uint32 m_alpha_component; + + // Various low-level CRN specific parameters. + float m_crn_adaptive_tile_color_psnr_derating; + float m_crn_adaptive_tile_alpha_psnr_derating; + + crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + + crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] + + // Number of helper threads to create to assist the compressor. 0=no threading. + crn_uint32 m_num_helper_threads; + + // CRN userdata0 and userdata1 members, which are written directly to the header of the output file. + crn_uint32 m_userdata0; + crn_uint32 m_userdata1; + + // User provided progress callback. + crn_progress_callback_func m_pProgress_func; + void* m_pProgress_func_data; +}; + +// Mipmap generator's mode. +enum crn_mip_mode +{ + cCRNMipModeUseSourceOrGenerateMips, + cCRNMipModeUseSourceMips, + cCRNMipModeGenerateMips, + cCRNMipModeNoMips, + + cCRNMipModeTotal, + + cCRNModeForceDWORD = 0xFFFFFFFF +}; + +const wchar_t* crn_get_mip_mode_desc(crn_mip_mode m); +const wchar_t* crn_get_mip_mode_name(crn_mip_mode m); + +// Mipmap generator's filter kernel. +enum crn_mip_filter +{ + cCRNMipFilterBox, + cCRNMipFilterTent, + cCRNMipFilterLanczos4, + cCRNMipFilterMitchell, + cCRNMipFilterKaiser, + + cCRNMipFilterTotal, + + cCRNMipFilterForceDWORD = 0xFFFFFFFF +}; + +const char* crn_get_mip_filter_name(crn_mip_filter f); + +// Mipmap generator's scale mode. +enum crn_scale_mode +{ + cCRNSMDisabled, + cCRNSMAbsolute, + cCRNSMRelative, + cCRNSMLowerPow2, + cCRNSMNearestPow2, + cCRNSMNextPow2, + + cCRNSMTotal, + + cCRNSMForceDWORD = 0xFFFFFFFF +}; + +const wchar_t* crn_get_scale_mode_desc(crn_scale_mode sm); + +// Mipmap generator parameters. +struct crn_mipmap_params +{ + inline crn_mipmap_params() { clear(); } + + inline void clear() + { + m_size_of_obj = sizeof(*this); + m_mode = cCRNMipModeUseSourceOrGenerateMips; + m_filter = cCRNMipFilterKaiser; + m_gamma_filtering = true; + m_gamma = 2.2f; + m_blurriness = .9f; + m_renormalize = false; + m_tiled = false; + m_max_levels = cCRNMaxLevels; + m_min_mip_size = 1; + + m_scale_mode = cCRNSMDisabled; + m_scale_x = 1.0f; + m_scale_y = 1.0f; + + m_window_left = 0; + m_window_top = 0; + m_window_right = 0; + m_window_bottom = 0; + + m_clamp_scale = false; + m_clamp_width = 0; + m_clamp_height = 0; + } + + inline bool check() const { return true; } + + crn_uint32 m_size_of_obj; + + crn_mip_mode m_mode; + crn_mip_filter m_filter; + + crn_bool m_gamma_filtering; + float m_gamma; + + float m_blurriness; + + crn_uint32 m_max_levels; + crn_uint32 m_min_mip_size; + + crn_bool m_renormalize; + crn_bool m_tiled; + + crn_scale_mode m_scale_mode; + float m_scale_x; + float m_scale_y; + + crn_uint32 m_window_left; + crn_uint32 m_window_top; + crn_uint32 m_window_right; + crn_uint32 m_window_bottom; + + crn_bool m_clamp_scale; + crn_uint32 m_clamp_width; + crn_uint32 m_clamp_height; +}; + +// -------- High-level helper function definitions for CDN/DDS compression. + +#ifndef CRNLIB_MIN_ALLOC_ALIGNMENT +#define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 +#endif + +// Function to set an optional user provided memory allocation/reallocation/msize routines. +// By default, crnlib just uses malloc(), free(), etc. for all allocations. +typedef void* (*crn_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); +typedef size_t (*crn_msize_func)(void* p, void* pUser_data); +void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data); + +// Frees memory blocks allocated by crn_compress(), crn_decompress_crn_to_dds(), or crn_decompress_dds_to_images(). +void crn_free_block(void *pBlock); + +// Compresses a 32-bit/pixel texture to either: a regular DX9 DDS file, a "clustered" (or reduced entropy) DX9 DDS file, or a CRN file in memory. +// Input parameters: +// comp_params is the compression parameters struct, defined above. +// compressed_size will be set to the size of the returned memory block containing the output file. +// The returned block must be freed by calling crn_free_block(). +// *pActual_quality_level will be set to the actual quality level used to compress the image. May be NULL. +// *pActual_bitrate will be set to the output file's effective bitrate, possibly taking into account LZMA compression. May be NULL. +// Return value: +// The compressed file data, or NULL on failure. +// compressed_size will be set to the size of the returned memory buffer. +// Notes: +// A "regular" DDS file is compressed using normal DXTn compression at the specified DXT quality level. +// A "clustered" DDS file is compressed using clustered DXTn compression to either the target bitrate or the specified integer quality factor. +// The output file is a standard DX9 format DDS file, except the compressor assumes you will be later losslessly compressing the DDS output file using the LZMA algorithm. +// A texture is defined as an array of 1 or 6 "faces" (6 faces=cubemap), where each "face" consists of between [1,cCRNMaxLevels] mipmap levels. +// Mipmap levels are simple 32-bit 2D images with a pitch of width*sizeof(uint32), arranged in the usual raster order (top scanline first). +// The image pixels may be grayscale (YYYX), grayscale/alpha (YYYA), 24-bit RGBX, or 32-bit RGBA colors (where "X"=don't care). +// RGB color data is generally assumed to be in the sRGB colorspace. If not, be sure to clear the "cCRNCompFlagPerceptual" in the crn_comp_params struct! +void *crn_compress(const crn_comp_params &comp_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); + +// Like the above function, except this function can also do things like generate mipmaps, and resize or crop the input texture before compression. +// The actual operations performed are controlled by the crn_mipmap_params struct members. +// Be sure to set the "m_gamma_filtering" member of crn_mipmap_params to false if the input texture is not sRGB. +void *crn_compress(const crn_comp_params &comp_params, const crn_mipmap_params &mip_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); + +// Transcodes an entire CRN file to DDS using the crn_decomp.h header file library to do most of the heavy lifting. +// The output DDS file's format is guaranteed to be one of the DXTn formats in the crn_format enum. +// This is a fast operation, because the CRN format is explicitly designed to be efficiently transcodable to DXTn. +// For more control over decompression, see the lower-level helper functions in crn_decomp.h, which do not depend at all on crnlib. +void *crn_decompress_crn_to_dds(const void *pCRN_file_data, crn_uint32 &file_size); + +// Decompresses an entire DDS file in any supported format to uncompressed 32-bit/pixel image(s). +// See the crnlib::pixel_format in inc/dds_defs.h for a list of the supported DDS formats. +// You are responsible for freeing each image, either by calling crn_free_all_images() or manually calling crn_free_block() on each image pointer. +struct crn_texture_desc +{ + crn_uint32 m_faces; + crn_uint32 m_width; + crn_uint32 m_height; + crn_uint32 m_levels; + crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format +}; +bool crn_decompress_dds_to_images(const void *pDDS_file_data, crn_uint32 dds_file_size, crn_uint32 **ppImages, crn_texture_desc &tex_desc); + +// Frees all images allocated by crn_decompress_dds_to_images(). +void crn_free_all_images(crn_uint32 **ppImages, const crn_texture_desc &desc); + +// -------- crn_format related helpers functions. + +// Returns the FOURCC format equivalent to the specified crn_format. +crn_uint32 crn_get_format_fourcc(crn_format fmt); + +// Returns the crn_format's bits per texel. +crn_uint32 crn_get_format_bits_per_texel(crn_format fmt); + +// Returns the crn_format's number of bytes per block. +crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt); + +// Returns the non-swizzled, basic DXTn version of the specified crn_format. +// This is the format you would supply D3D or OpenGL. +crn_format crn_get_fundamental_dxt_format(crn_format fmt); + +// -------- String helpers. + +// Converts a crn_file_type to a string. +const wchar_t* crn_get_file_type_ext(crn_file_type file_type); +const char* crn_get_file_type_exta(crn_file_type file_type); + +// Converts a crn_format to a string. +const char* crn_get_format_stringa(crn_format fmt); +const wchar_t* crn_get_format_string(crn_format fmt); + +// Converts a crn_dxt_quality to a string. +const wchar_t* crn_get_dxt_quality_string(crn_dxt_quality q); +const char* crn_get_dxt_quality_stringa(crn_dxt_quality q); + +// -------- Low-level DXTn 4x4 block compressor API + +// crnlib's DXTn endpoint optimizer actually supports any number of source pixels (i.e. from 1 to thousands, not just 16), +// but for simplicity this API only supports 4x4 texel blocks. +typedef void *crn_block_compressor_context_t; + +// Create a DXTn block compressor. +// Notes this function only supports the basic/nonswizzled DXTn formats (DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX). +// Avoid calling this multiple times if you intend on compressing many blocks, because it allocates some memory. +crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params ¶ms); + +// Compresses a block of 16 pixels to the destination DXTn block. +// pDst_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). +void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32 *pPixels, void *pDst_block); + +// Frees a DXTn block compressor. +void crn_free_block_compressor(crn_block_compressor_context_t pContext); + +#endif // CRNLIB_H + +//------------------------------------------------------------------------------ +// +// crnlib uses the ZLIB license: +// http://opensource.org/licenses/Zlib +// +// Copyright (c) 2010-2011 Tenacious Software LLC +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source distribution. +// +//------------------------------------------------------------------------------ diff --git a/inc/dds_defs.h b/inc/dds_defs.h new file mode 100644 index 00000000..b92ac666 --- /dev/null +++ b/inc/dds_defs.h @@ -0,0 +1,151 @@ +// File: dds_defs.h +// DX9 .DDS file header definitions. +#ifndef CRNLIB_DDS_DEFS_H +#define CRNLIB_DDS_DEFS_H + +#include "crnlib.h" + +#define CRNLIB_PIXEL_FMT_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) + +namespace crnlib +{ + enum pixel_format + { + PIXEL_FMT_INVALID = 0, + + PIXEL_FMT_DXT1 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '1'), + PIXEL_FMT_DXT2 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '2'), + PIXEL_FMT_DXT3 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '3'), + PIXEL_FMT_DXT4 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '4'), + PIXEL_FMT_DXT5 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '5'), + PIXEL_FMT_3DC = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX + PIXEL_FMT_DXN = CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY + PIXEL_FMT_DXT5A = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, http://developer.amd.com/media/gpu_assets/Radeon_X1x00_Programming_Guide.pdf + + // Non-standard, crnlib-specific pixel formats (some of these are supported by ATI's compressonator) + PIXEL_FMT_DXT5_CCxY = CRNLIB_PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'), + PIXEL_FMT_DXT5_xGxR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'x', 'R'), + PIXEL_FMT_DXT5_xGBR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'B', 'R'), + PIXEL_FMT_DXT5_AGBR = CRNLIB_PIXEL_FMT_FOURCC('A', 'G', 'B', 'R'), + + PIXEL_FMT_DXT1A = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', '1', 'A'), + + PIXEL_FMT_R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'x'), + PIXEL_FMT_L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'x'), + PIXEL_FMT_A8 = CRNLIB_PIXEL_FMT_FOURCC('x', 'x', 'x', 'A'), + PIXEL_FMT_A8L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'A'), + PIXEL_FMT_A8R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'A') + }; + + const crn_uint32 cDDSMaxImageDimensions = 8192U; + + // Total size of header is sizeof(uint32)+cDDSSizeofDDSurfaceDesc2; + const crn_uint32 cDDSSizeofDDSurfaceDesc2 = 124; + + // "DDS " + const crn_uint32 cDDSFileSignature = 0x20534444; + + struct DDCOLORKEY + { + crn_uint32 dwUnused0; + crn_uint32 dwUnused1; + }; + + struct DDPIXELFORMAT + { + crn_uint32 dwSize; + crn_uint32 dwFlags; + crn_uint32 dwFourCC; + crn_uint32 dwRGBBitCount; // ATI compressonator and crnlib sometimes place a FOURCC code here + crn_uint32 dwRBitMask; + crn_uint32 dwGBitMask; + crn_uint32 dwBBitMask; + crn_uint32 dwRGBAlphaBitMask; + }; + + struct DDSCAPS2 + { + crn_uint32 dwCaps; + crn_uint32 dwCaps2; + crn_uint32 dwCaps3; + crn_uint32 dwCaps4; + }; + + struct DDSURFACEDESC2 + { + crn_uint32 dwSize; + crn_uint32 dwFlags; + crn_uint32 dwHeight; + crn_uint32 dwWidth; + union + { + crn_int32 lPitch; + crn_uint32 dwLinearSize; + }; + crn_uint32 dwBackBufferCount; + crn_uint32 dwMipMapCount; + crn_uint32 dwAlphaBitDepth; + crn_uint32 dwUnused0; + crn_uint32 lpSurface; + DDCOLORKEY unused0; + DDCOLORKEY unused1; + DDCOLORKEY unused2; + DDCOLORKEY unused3; + DDPIXELFORMAT ddpfPixelFormat; + DDSCAPS2 ddsCaps; + crn_uint32 dwUnused1; + }; + + const crn_uint32 DDSD_CAPS = 0x00000001; + const crn_uint32 DDSD_HEIGHT = 0x00000002; + const crn_uint32 DDSD_WIDTH = 0x00000004; + const crn_uint32 DDSD_PITCH = 0x00000008; + + const crn_uint32 DDSD_BACKBUFFERCOUNT = 0x00000020; + const crn_uint32 DDSD_ZBUFFERBITDEPTH = 0x00000040; + const crn_uint32 DDSD_ALPHABITDEPTH = 0x00000080; + + const crn_uint32 DDSD_LPSURFACE = 0x00000800; + + const crn_uint32 DDSD_PIXELFORMAT = 0x00001000; + const crn_uint32 DDSD_CKDESTOVERLAY = 0x00002000; + const crn_uint32 DDSD_CKDESTBLT = 0x00004000; + const crn_uint32 DDSD_CKSRCOVERLAY = 0x00008000; + + const crn_uint32 DDSD_CKSRCBLT = 0x00010000; + const crn_uint32 DDSD_MIPMAPCOUNT = 0x00020000; + const crn_uint32 DDSD_REFRESHRATE = 0x00040000; + const crn_uint32 DDSD_LINEARSIZE = 0x00080000; + + const crn_uint32 DDSD_TEXTURESTAGE = 0x00100000; + const crn_uint32 DDSD_FVF = 0x00200000; + const crn_uint32 DDSD_SRCVBHANDLE = 0x00400000; + const crn_uint32 DDSD_DEPTH = 0x00800000; + + const crn_uint32 DDSD_ALL = 0x00fff9ee; + + const crn_uint32 DDPF_ALPHAPIXELS = 0x00000001; + const crn_uint32 DDPF_ALPHA = 0x00000002; + const crn_uint32 DDPF_FOURCC = 0x00000004; + const crn_uint32 DDPF_PALETTEINDEXED8 = 0x00000020; + const crn_uint32 DDPF_RGB = 0x00000040; + const crn_uint32 DDPF_LUMINANCE = 0x00020000; + + const crn_uint32 DDSCAPS_COMPLEX = 0x00000008; + const crn_uint32 DDSCAPS_TEXTURE = 0x00001000; + const crn_uint32 DDSCAPS_MIPMAP = 0x00400000; + + const crn_uint32 DDSCAPS2_CUBEMAP = 0x00000200; + const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400; + const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800; + + const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000; + const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000; + const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000; + const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000; + + const crn_uint32 DDSCAPS2_VOLUME = 0x00200000; + +} // namespace crnlib + +#endif // CRNLIB_DDS_DEFS_H diff --git a/license.txt b/license.txt new file mode 100644 index 00000000..c4382b0b --- /dev/null +++ b/license.txt @@ -0,0 +1,22 @@ +crunch/crnlib uses the ZLIB license: +http://opensource.org/licenses/Zlib + +Copyright (c) 2010-2011 Tenacious Software LLC + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software +in a product, an acknowledgment in the product documentation would be +appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. + +3. This notice may not be removed or altered from any source distribution. diff --git a/readme.txt b/readme.txt new file mode 100644 index 00000000..3f02177c --- /dev/null +++ b/readme.txt @@ -0,0 +1,271 @@ +crunch/crnlib v1.00 - Advanced DXTn texture compression library +Copyright (C) 2010-2011 Tenacious Software LLC + +For bugs or support contact Rich Geldreich . + +This software uses the ZLIB license, which is located in license.txt. +http://opensource.org/licenses/Zlib + +If you use this software in a product, an acknowledgment in the product +documentation would be highly appreciated but is not required. + +Overview +-------- + +crnlib is a lossy texture compression library for developers that ship +content using the DXT1/5/N or 3DC compressed color/normal map/cubemap +mipmapped texture formats. It was written by the same author as the open +source LZHAM lossless data compression library: +http://code.google.com/p/lzham/ + +It can compress mipmapped 2D textures, normal maps, and cubemaps to +approx. 1-1.25 bits/texel, and normal maps to 1.75-2 bits/texel. The +actual bitrate depends on the complexity of the texture itself, the +specified quality factor/target bitrate, and ultimately on the desired +quality needed for a particular texture. + +crnlib's differs significantly from other approaches because its +compressed texture data format was carefully designed to be quickly +transcodable directly to DXTn with no intermediate recompression step. +The typical (single threaded) transcode to DXTn rate is generally +between 100-250 megatexels/sec. The current library supports PC +(Win32/x64) and Xbox 360. Fast random access to individual mipmap levels +is supported. + +crnlib can also generates standard .DDS files at specified quality +setting, which results in files that are much more compressible by +LZMA/Deflate/etc. compared to files generated by standard DXTn texture +tools (see below). This feature allows easy integration into any engine +or graphics library that already supports .DDS files. + +The .CRN file format supports the following core DXTn texture formats: +DXT1 (but not DXT1A), DXT5, DXT5A, and DXN/3DC + +It also supports several popular swizzled variants (several are +also supported by AMD's Compressonator): +DXT5_XGBR, DXT5_xGxR, DXT5_AGBR, and DXT5_CCxY (experimental luma-chroma YCoCg). + +Recommended Software +-------------------- + +AMD's Compressonator tool is recommended to view the .DDS files created by +the crunch tool and the included example projects: + +http://developer.amd.com/gpu/compressonator/pages/default.aspx + +Note: Some of the swizzled DXTn .DDS output formats (such as DXT5_xGBR) +read/written by the crunch tool or examples deviate from the DX9 DDS +standard, so DXSDK tools such as DXTEX.EXE won't load them at all or +they won't be properly displayed. + +Compression Algorithm Details +----------------------------- + +The compression process employed in creating both .CRN and +clustered .DDS files utilizes a very high quality, scalable DXTn +endpoint optimizer capable of processing any number of pixels (instead +of the typical hard coded 16), optional adaptive switching between +several macroblock sizes/configurations (currently any combination of +4x4, 8x4, 4x8, and 8x8 pixel blocks), endpoint clusterization using +top-down cluster analysis, vector quantization (VQ) of the selector +indices, and several custom algorithms for compressing the resulting +endpoint/selector codebooks and macroblock indices. Multiple feedback +passes are performed between the clusterization and VQ steps to optimize +quality, and several steps use a brute force refinement approach to improve +quality. The majority of compression steps are multithreaded. + +The .CRN format currently utilizes canonical Huffman coding for speed +(similar to Deflate but with much larger tables), but the next major +version will also utilize adaptive binary arithmetic coding and higher +order context modeling using already developed tech from the my LZHAM +compression library. + +Supported File Formats +---------------------- + +crnlib supports two compressed texture file formats. The first +format (clustered .DDS) is simple to integrate into an existing project +(typically, no code changes are required), but it doesn't offer the +highest quality/compression ratio that crnlib is capable of. Integrating +the second, higher quality custom format (.CRN) requires a few +typically straightforward engine modifications to integrate the +.CRN->DXTn transcoder header file library into your tools/engine. + +.DDS +crnlib can compress textures to standard DX9-style .DDS files using +clustered DXTn compression, which is a subset of the approach used to +create .CRN files.(For completeness, crnlib also supports vanilla, block +by block DXTn compression too, but that's not very interesting.) +Clustered DXTn compressed .DDS files are much more compressible than +files created by other libraries/tools. Apart from increased +compressibility, the .DDS files generated by this process are completely +standard so they should be fairly easy to add to a project with little +to no code changes. + +To actually benefit from clustered DXTn .DDS files, your engine needs to +further losslessly compress the .DDS data generated by crnlib using a +lossless codec such as zlib, lzo, LZMA, LZHAM, etc. Most likely, your +engine does this already. (If not, you definitely should because DXTn +compressed textures generally contain a large amount of highly redundant +data.) + +Clustered .DDS files are intended to be the simplest/fastest way to +integrate crnlib's tech into a project. + +.CRN +The second, better, option is to compress your textures to .CRN files +using crnlib. To read the resulting .CRN data, you must add the .CRN +transcoder library (located in the included single file, stand-alone +header file library inc/crn_decomp.h) into your application. .CRN files +provide noticeably higher quality at the same effective bitrate compared +to clustered DXTn compressed .DDS files. Also, .CRN files don't require +further lossless compression because they're already highly compressed. + +.CRN files are a bit more difficult/risky to integrate into a project, but +the resulting compression ratio and quality is superior vs. clustered .DDS files. + +Building the Examples +--------------------- + +This release contains the source code and projects for three simple +example projects: + +crn_examples.2008.sln is a Visual Studio 2008 (VC9) solution file +containing projects for Win32 and x64. crnlib itself also builds with +VS2005, VS2010, and gcc 4.5.0 (TDM GCC+MinGW). A codeblocks 10.05 +workspace and project file is also included, but compiling crnlib this +way hasn't been tested much. + +example1: Demonstrates how to use crnlib's high-level C-helper +compression/decompression/transcoding functions in inc/crnlib.h. It's a +fairly complete example of crnlib's functionality. + +example2: Shows how to transcodec .CRN files to .DDS using *only* +the functionality in inc/crn_decomp.h. It does not link against against +crnlib.lib or depend on it in any way. (Note: The complete source code, +approx. 4800 lines, to the CRN transcoder is included in inc/crn_decomp.h.) + +example2 is intended to show how simple it is to integrate CRN textures +into your application. + +example3: Shows how to use the regular, low-level DXTn block compressor +functions in inc/crnlib.h. This functionality is included for +completeness. (Your engine or toolchain most likely already has its own +DXTn compressor. crnlib's compressor is typically very competitive or +superior to most available closed and open source CPU-based +compressors.) + +Creating Compressed Textures from the Command Line (crunch.exe) +--------------------------------------------------------------- + +The simplest way to create compressed textures using crnlib is to +integrate the bin\crunch.exe or bin\crunch_x64.exe) command line tool +into your texture build toolchain or export process. It can write DXTn +compressed 2D/cubemap textures to regular DXTn compressed .DDS, +clustered (or reduced entropy) DXTn compressed .DDS, or .CRN files. It +can also transcode or decompress files to several standard image +formats, such as TGA or BMP. Run crunch.exe with no options for help. + +The .CRN files created by crunch.exe can be efficiently transcoded to +DXTn using the included CRN transcoding library, located in full source +form under inc/crn_decomp.h. + +Here are a few example crunch.exe command lines: + +1. Compress blah.tga to blah.dds using normal DXT1 compression: +crunch -file blah.tga -fileformat dds -dxt1 + +2. Compress blah.tga to blah.dds using clustered DXT1 at an effective bitrate of 1.5 bits/texel, display image statistic: +crunch -file blah.tga -fileformat dds -dxt1 -bitrate 1.5 -imagestats + +3. Compress blah.tga to blah.dds using clustered DXT1 at quality level 100 (from [0,255]), with no mipmaps, display LZMA statistics: +crunch -file blah.tga -fileformat dds -dxt1 -quality 100 -mipmode none -lzmastats + +3. Compress blah.tga to blah.crn using clustered DXT1 at a bitrate of 1.2 bits/texel, no mipmaps: +crunch -file blah.tga -dxt1 -bitrate 1.2 -mipmode none + +4. Decompress blah.dds to a .tga file: +crunch -file blah.dds -fileformat tga + +5. Transcode blah.crn to a .dds file: +crunch -file blah.crn + +6. Decompress blah.crn, writing each mipmap level to a separate .tga file: +crunch -split -file blah.crn -fileformat tga + +crunch.exe can do a lot more, like rescale/crop images before +compression, convert images from one file format to another, compare +images, process multiple images, etc. + +Note: I would have included the full source to crunch.exe, but it still +has some low-level dependencies to crnlib internals which I didn't have +time to address. This version of crunch.exe has some reduced +functionality compared to an earlier eval release. For example, XML file +support is not included in this version. + +Using crnlib +------------ + +The most flexible and powerful way of using crnlib is to integrate the +library into your editor/toolchain/etc. and directly supply it your +raw/source texture bits. See the C-style API's and comments in +inc/crnlib.h. + +To compress, you basically fill in a few structs in and call one function: + + void *crn_compress(const crn_comp_params &comp_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); + +Or, if you want crnlib to also generate mipmaps, you call this function: + + void *crn_compress(const crn_comp_params &comp_params, const crn_mipmap_params &mip_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); + +You can also transcode/uncompress .DDS/.CRN files to raw 32bpp images +using crn_decompress_crn_to_dds() and crn_decompress_dds_to_images(). + +Internally, crnlib just uses inc/crn_decomp.h to transcode textures to +DXTn. If you only need to transcode .CRN format files to raw DXTn bits +at runtime (and not compress), you don't actually need to compile or +link against crnlib at all. Just include inc/crn_decomp.h, which +contains a completely self-contained CRN transcoder in the "crnd" +namespace. The crnd_get_texture_info(), crnd_unpack_begin(), +crnd_unpack_level(), etc. functions are all you need to efficiently get +at the raw DXTn bits, which can be directly supplied to whatever API or +GPU you're using. (See example2.) + +Important note: When compiling under native client, be sure to define +the PLATFORM_NACL macro before including the inc/crn_decomp.h header file library. + +Known Issues/Bugs +----------------- + +* crnlib currently assumes you'll be further losslessly compressing its +output .DDS files using LZMA. However, some engines use weaker codecs +such as LZO, zlib, or custom codecs, so crnlib's bitrate measurements +will be inaccurate. It should be easy to allow the caller to plug-in +custom lossless compressors for bitrate measurement. + +* Compressing to a desired bitrate can be time consuming, especially when +processing large (2k or 4k) images to the .CRN format. There are several +high-level optimizations employed when compressing to clustered DXTn .DDS +files using multiple trials, but not so for .CRN. + +* The .CRN compressor does not currently use 3 color (transparent) DXT1 +blocks at all, only 4 color blocks. So it doesn't support DXT1A +transparency, and its output quality suffers a little due to this +limitation. (Note that the clustered DXTn compressor used when +writing clustered .DDS files does *not* have this limitation.) + +* Clustered DXT5/DXT5A compressor is able to group DXT5A blocks into +clusters only if they use absolute (black/white) selector indices. This +hurts performance at very low bitrates, because too many bits are +effectively given to alpha. + +* DXT3 is not supported when writing .CRN or clustered DXTn DDS files. +(DXT3 is supported by crnlib's when compressing to regular DXTn DDS +files.) You'll get DXT5 files if you request DXT3. However, DXT3 is +supported by the regular DXTn block compressor. (DXT3's 4bpp fixed alpha +sucks verses DXT5 alpha blocks, so I don't see this as a bug deal.) + +* The DXT5_CCXY format uses a simple YCoCg encoding that is workable but +hasn't been tuned for max. quality yet. +