From 53d2e42cbe70898f9b13969e896a8f555d2947aa Mon Sep 17 00:00:00 2001 From: lvhan028 Date: Sat, 1 Jul 2023 15:39:44 +0800 Subject: [PATCH] rename src/fastertransformer to src/turbomind (#33) --- src/CMakeLists.txt | 2 +- src/{fastertransformer => turbomind}/CMakeLists.txt | 0 src/{fastertransformer => turbomind}/kernels/CMakeLists.txt | 0 .../kernels/activation_kernels.cu | 0 .../kernels/activation_kernels.h | 0 src/{fastertransformer => turbomind}/kernels/ban_bad_words.cu | 0 src/{fastertransformer => turbomind}/kernels/ban_bad_words.h | 0 .../kernels/beam_search_penalty_kernels.cu | 0 .../kernels/beam_search_penalty_kernels.h | 0 .../kernels/beam_search_topk_kernels.cu | 0 .../kernels/beam_search_topk_kernels.h | 0 .../kernels/bert_preprocess_kernels.cu | 0 .../kernels/bert_preprocess_kernels.h | 0 .../kernels/custom_ar_kernels.cu | 0 .../kernels/custom_ar_kernels.h | 0 .../kernels/decoder_masked_multihead_attention.cu | 0 .../kernels/decoder_masked_multihead_attention.h | 0 .../decoder_masked_multihead_attention_128.cu | 0 .../decoder_masked_multihead_attention_template.cuh | 0 .../kernels/decoder_masked_multihead_attention_utils.h | 0 .../kernels/decoding_kernels.cu | 0 src/{fastertransformer => turbomind}/kernels/decoding_kernels.h | 0 .../kernels/gen_relative_pos_bias.cu | 0 .../kernels/gen_relative_pos_bias.h | 0 src/{fastertransformer => turbomind}/kernels/gpt_kernels.cu | 0 src/{fastertransformer => turbomind}/kernels/gpt_kernels.h | 0 src/{fastertransformer => turbomind}/kernels/logprob_kernels.cu | 0 src/{fastertransformer => turbomind}/kernels/logprob_kernels.h | 0 .../kernels/online_softmax_beamsearch_kernels.cu | 0 .../kernels/online_softmax_beamsearch_kernels.h | 0 src/{fastertransformer => turbomind}/kernels/penalty_types.h | 0 .../kernels/reduce_kernel_utils.cuh | 0 .../kernels/sampling_penalty_kernels.cu | 0 .../kernels/sampling_penalty_kernels.h | 0 .../kernels/sampling_topk_kernels.cu | 0 .../kernels/sampling_topk_kernels.h | 0 .../kernels/sampling_topp_kernels.cu | 0 .../kernels/sampling_topp_kernels.h | 0 .../kernels/stop_criteria_kernels.cu | 0 .../kernels/stop_criteria_kernels.h | 0 .../kernels/unfused_attention_kernels.cu | 0 .../kernels/unfused_attention_kernels.h | 0 src/{fastertransformer => turbomind}/layers/BaseLayer.h | 0 src/{fastertransformer => turbomind}/layers/CMakeLists.txt | 0 src/{fastertransformer => turbomind}/layers/DenseWeight.h | 0 .../layers/DynamicDecodeBaseLayer.h | 0 .../layers/DynamicDecodeLayer.cc | 0 .../layers/DynamicDecodeLayer.h | 0 src/{fastertransformer => turbomind}/layers/FfnFP8Layer.cc | 0 src/{fastertransformer => turbomind}/layers/FfnFP8Layer.h | 0 src/{fastertransformer => turbomind}/layers/FfnFP8Weight.h | 0 src/{fastertransformer => turbomind}/layers/FfnINT8Weight.h | 0 src/{fastertransformer => turbomind}/layers/FfnLayer.cc | 0 src/{fastertransformer => turbomind}/layers/FfnLayer.h | 0 src/{fastertransformer => turbomind}/layers/FfnLayerINT8.cc | 0 src/{fastertransformer => turbomind}/layers/FfnLayerINT8.h | 0 src/{fastertransformer => turbomind}/layers/FfnWeight.h | 0 .../layers/attention_layers/AttentionWeight.h | 0 .../layers/attention_layers/BaseAttentionLayer.h | 0 .../layers/attention_layers/CMakeLists.txt | 0 .../layers/attention_layers_fp8/AttentionFP8Weight.h | 0 .../layers/attention_layers_fp8/BaseAttentionFP8Layer.h | 0 .../layers/attention_layers_fp8/CMakeLists.txt | 0 .../layers/attention_layers_int8/AttentionINT8Weight.h | 0 .../layers/attention_layers_int8/CMakeLists.txt | 0 .../layers/beam_search_layers/BaseBeamSearchLayer.cu | 0 .../layers/beam_search_layers/BaseBeamSearchLayer.h | 0 .../layers/beam_search_layers/BeamSearchLayer.cu | 0 .../layers/beam_search_layers/BeamSearchLayer.h | 0 .../layers/beam_search_layers/CMakeLists.txt | 0 .../layers/beam_search_layers/OnlineBeamSearchLayer.cu | 0 .../layers/beam_search_layers/OnlineBeamSearchLayer.h | 0 .../layers/sampling_layers/BaseSamplingLayer.cc | 0 .../layers/sampling_layers/BaseSamplingLayer.h | 0 .../layers/sampling_layers/CMakeLists.txt | 0 .../layers/sampling_layers/TopKSamplingLayer.cu | 0 .../layers/sampling_layers/TopKSamplingLayer.h | 0 .../layers/sampling_layers/TopPSamplingLayer.cu | 0 .../layers/sampling_layers/TopPSamplingLayer.h | 0 src/{fastertransformer => turbomind}/models/BaseWeight.h | 0 src/{fastertransformer => turbomind}/models/CMakeLists.txt | 0 src/{fastertransformer => turbomind}/models/llama/Barrier.h | 0 .../models/llama/CMakeLists.txt | 0 src/{fastertransformer => turbomind}/models/llama/LlamaBatch.cc | 0 src/{fastertransformer => turbomind}/models/llama/LlamaBatch.h | 0 .../models/llama/LlamaCacheManager.cc | 0 .../models/llama/LlamaCacheManager.h | 0 .../models/llama/LlamaContextAttentionLayer.cc | 0 .../models/llama/LlamaContextAttentionLayer.h | 0 .../models/llama/LlamaContextDecoder.cc | 0 .../models/llama/LlamaContextDecoder.h | 0 .../models/llama/LlamaDecoder.cc | 0 .../models/llama/LlamaDecoder.h | 0 .../models/llama/LlamaDecoderLayerWeight.cc | 0 .../models/llama/LlamaDecoderLayerWeight.h | 0 .../models/llama/LlamaDecoderSelfAttentionLayer.cc | 0 .../models/llama/LlamaDecoderSelfAttentionLayer.h | 0 .../models/llama/LlamaDenseWeight.h | 0 .../models/llama/LlamaFfnLayer.cc | 0 .../models/llama/LlamaFfnLayer.h | 0 .../models/llama/LlamaInstanceComm.h | 0 src/{fastertransformer => turbomind}/models/llama/LlamaLinear.h | 0 .../models/llama/LlamaNcclGuard.h | 0 src/{fastertransformer => turbomind}/models/llama/LlamaV2.cc | 0 src/{fastertransformer => turbomind}/models/llama/LlamaV2.h | 0 .../models/llama/LlamaWeight.cc | 0 src/{fastertransformer => turbomind}/models/llama/LlamaWeight.h | 0 src/{fastertransformer => turbomind}/models/llama/Request.h | 0 .../models/llama/fused_multi_head_attention/CMakeLists.txt | 0 .../fused_multi_head_attention/llama_flash_attention_kernel.cu | 0 .../fused_multi_head_attention/mma_accum_lambda_iterator.h | 0 .../models/llama/fused_multi_head_attention/tile_smem_loader.h | 0 .../models/llama/llama_decoder_kernels.cu | 0 .../models/llama/llama_decoder_kernels.h | 0 src/{fastertransformer => turbomind}/models/llama/llama_gemm.cc | 0 .../models/llama/llama_kernels.cu | 0 .../models/llama/llama_kernels.h | 0 .../models/llama/llama_utils.cu | 0 src/{fastertransformer => turbomind}/models/llama/llama_utils.h | 0 .../models/llama/prefix_cache.cu | 0 .../models/llama/prefix_cache.h | 0 .../triton_backend/CMakeLists.txt | 0 .../triton_backend/libfastertransformer.cc | 0 .../triton_backend/libtriton_fastertransformer.ldscript | 0 .../triton_backend/llama/CMakeLists.txt | 0 .../triton_backend/llama/LlamaTritonModel.cc | 0 .../triton_backend/llama/LlamaTritonModel.h | 0 .../triton_backend/llama/LlamaTritonModelInstance.cc | 0 .../triton_backend/llama/LlamaTritonModelInstance.h | 0 .../triton_backend/transformer_triton_backend.cpp | 0 .../triton_backend/transformer_triton_backend.hpp | 0 .../triton_backend/triton_utils.hpp | 0 src/{fastertransformer => turbomind}/utils/CMakeLists.txt | 0 src/{fastertransformer => turbomind}/utils/IA3.h | 0 src/{fastertransformer => turbomind}/utils/ScaleList.h | 0 src/{fastertransformer => turbomind}/utils/Tensor.cc | 0 src/{fastertransformer => turbomind}/utils/Tensor.h | 0 src/{fastertransformer => turbomind}/utils/activation_types.h | 0 src/{fastertransformer => turbomind}/utils/allocator.h | 0 src/{fastertransformer => turbomind}/utils/conv2d.h | 0 src/{fastertransformer => turbomind}/utils/convert_data_type.h | 0 src/{fastertransformer => turbomind}/utils/cublasAlgoMap.cc | 0 src/{fastertransformer => turbomind}/utils/cublasAlgoMap.h | 0 .../utils/cublasFP8MMWrapper.cu | 0 src/{fastertransformer => turbomind}/utils/cublasFP8MMWrapper.h | 0 .../utils/cublasINT8MMWrapper.cc | 0 .../utils/cublasINT8MMWrapper.h | 0 src/{fastertransformer => turbomind}/utils/cublasMMWrapper.cc | 0 src/{fastertransformer => turbomind}/utils/cublasMMWrapper.h | 0 .../utils/cuda_bf16_fallbacks.cuh | 0 src/{fastertransformer => turbomind}/utils/cuda_bf16_wrapper.h | 0 src/{fastertransformer => turbomind}/utils/cuda_fp8_utils.cu | 0 src/{fastertransformer => turbomind}/utils/cuda_fp8_utils.h | 0 src/{fastertransformer => turbomind}/utils/cuda_type_utils.cuh | 0 src/{fastertransformer => turbomind}/utils/cuda_utils.cc | 0 src/{fastertransformer => turbomind}/utils/cuda_utils.h | 0 src/{fastertransformer => turbomind}/utils/custom_ar_comm.cc | 0 src/{fastertransformer => turbomind}/utils/custom_ar_comm.h | 0 src/{fastertransformer => turbomind}/utils/gemm.cc | 0 src/{fastertransformer => turbomind}/utils/gemm.h | 0 .../utils/gemm_test/CMakeLists.txt | 0 .../utils/gemm_test/decoding_gemm_func.cc | 0 .../utils/gemm_test/decoding_gemm_func.h | 0 .../utils/gemm_test/encoder_gemm_func.cc | 0 .../utils/gemm_test/encoder_gemm_func.h | 0 .../utils/gemm_test/encoder_igemm_func.cc | 0 .../utils/gemm_test/encoder_igemm_func.h | 0 .../utils/gemm_test/gemm_func.cc | 0 .../utils/gemm_test/gemm_func.h | 0 .../utils/gemm_test/gpt_gemm_func.cc | 0 .../utils/gemm_test/gpt_gemm_func.h | 0 .../utils/gemm_test/swin_gemm_func.cc | 0 .../utils/gemm_test/swin_gemm_func.h | 0 .../utils/gemm_test/swin_igemm_func.cc | 0 .../utils/gemm_test/swin_igemm_func.h | 0 .../utils/gemm_test/t5_gemm_func.cc | 0 .../utils/gemm_test/t5_gemm_func.h | 0 .../utils/gemm_test/xlnet_gemm_func.cc | 0 .../utils/gemm_test/xlnet_gemm_func.h | 0 src/{fastertransformer => turbomind}/utils/gpu_buf.h | 0 src/{fastertransformer => turbomind}/utils/instance_comm.h | 0 src/{fastertransformer => turbomind}/utils/logger.cc | 0 src/{fastertransformer => turbomind}/utils/logger.h | 0 src/{fastertransformer => turbomind}/utils/memory_utils.cu | 0 src/{fastertransformer => turbomind}/utils/memory_utils.h | 0 src/{fastertransformer => turbomind}/utils/mpi_utils.cc | 0 src/{fastertransformer => turbomind}/utils/mpi_utils.h | 0 src/{fastertransformer => turbomind}/utils/nccl_utils.cc | 0 src/{fastertransformer => turbomind}/utils/nccl_utils.h | 0 src/{fastertransformer => turbomind}/utils/nvtx_utils.cc | 0 src/{fastertransformer => turbomind}/utils/nvtx_utils.h | 0 src/{fastertransformer => turbomind}/utils/prompt_learning.h | 0 src/{fastertransformer => turbomind}/utils/string_utils.h | 0 src/{fastertransformer => turbomind}/utils/test_utils.h | 0 src/{fastertransformer => turbomind}/utils/wenet_conv2d.h | 0 src/{fastertransformer => turbomind}/utils/word_list.cc | 0 src/{fastertransformer => turbomind}/utils/word_list.h | 0 197 files changed, 1 insertion(+), 1 deletion(-) rename src/{fastertransformer => turbomind}/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/kernels/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/kernels/activation_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/activation_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/ban_bad_words.cu (100%) rename src/{fastertransformer => turbomind}/kernels/ban_bad_words.h (100%) rename src/{fastertransformer => turbomind}/kernels/beam_search_penalty_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/beam_search_penalty_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/beam_search_topk_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/beam_search_topk_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/bert_preprocess_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/bert_preprocess_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/custom_ar_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/custom_ar_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/decoder_masked_multihead_attention.cu (100%) rename src/{fastertransformer => turbomind}/kernels/decoder_masked_multihead_attention.h (100%) rename src/{fastertransformer => turbomind}/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu (100%) rename src/{fastertransformer => turbomind}/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh (100%) rename src/{fastertransformer => turbomind}/kernels/decoder_masked_multihead_attention_utils.h (100%) rename src/{fastertransformer => turbomind}/kernels/decoding_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/decoding_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/gen_relative_pos_bias.cu (100%) rename src/{fastertransformer => turbomind}/kernels/gen_relative_pos_bias.h (100%) rename src/{fastertransformer => turbomind}/kernels/gpt_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/gpt_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/logprob_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/logprob_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/online_softmax_beamsearch_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/online_softmax_beamsearch_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/penalty_types.h (100%) rename src/{fastertransformer => turbomind}/kernels/reduce_kernel_utils.cuh (100%) rename src/{fastertransformer => turbomind}/kernels/sampling_penalty_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/sampling_penalty_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/sampling_topk_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/sampling_topk_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/sampling_topp_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/sampling_topp_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/stop_criteria_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/stop_criteria_kernels.h (100%) rename src/{fastertransformer => turbomind}/kernels/unfused_attention_kernels.cu (100%) rename src/{fastertransformer => turbomind}/kernels/unfused_attention_kernels.h (100%) rename src/{fastertransformer => turbomind}/layers/BaseLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/layers/DenseWeight.h (100%) rename src/{fastertransformer => turbomind}/layers/DynamicDecodeBaseLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/DynamicDecodeLayer.cc (100%) rename src/{fastertransformer => turbomind}/layers/DynamicDecodeLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/FfnFP8Layer.cc (100%) rename src/{fastertransformer => turbomind}/layers/FfnFP8Layer.h (100%) rename src/{fastertransformer => turbomind}/layers/FfnFP8Weight.h (100%) rename src/{fastertransformer => turbomind}/layers/FfnINT8Weight.h (100%) rename src/{fastertransformer => turbomind}/layers/FfnLayer.cc (100%) rename src/{fastertransformer => turbomind}/layers/FfnLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/FfnLayerINT8.cc (100%) rename src/{fastertransformer => turbomind}/layers/FfnLayerINT8.h (100%) rename src/{fastertransformer => turbomind}/layers/FfnWeight.h (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers/AttentionWeight.h (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers/BaseAttentionLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers_fp8/AttentionFP8Weight.h (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers_fp8/BaseAttentionFP8Layer.h (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers_fp8/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers_int8/AttentionINT8Weight.h (100%) rename src/{fastertransformer => turbomind}/layers/attention_layers_int8/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/BaseBeamSearchLayer.cu (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/BaseBeamSearchLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/BeamSearchLayer.cu (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/BeamSearchLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/OnlineBeamSearchLayer.cu (100%) rename src/{fastertransformer => turbomind}/layers/beam_search_layers/OnlineBeamSearchLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/BaseSamplingLayer.cc (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/BaseSamplingLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/TopKSamplingLayer.cu (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/TopKSamplingLayer.h (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/TopPSamplingLayer.cu (100%) rename src/{fastertransformer => turbomind}/layers/sampling_layers/TopPSamplingLayer.h (100%) rename src/{fastertransformer => turbomind}/models/BaseWeight.h (100%) rename src/{fastertransformer => turbomind}/models/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/models/llama/Barrier.h (100%) rename src/{fastertransformer => turbomind}/models/llama/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaBatch.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaBatch.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaCacheManager.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaCacheManager.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaContextAttentionLayer.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaContextAttentionLayer.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaContextDecoder.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaContextDecoder.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDecoder.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDecoder.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDecoderLayerWeight.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDecoderLayerWeight.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDecoderSelfAttentionLayer.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDecoderSelfAttentionLayer.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaDenseWeight.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaFfnLayer.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaFfnLayer.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaInstanceComm.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaLinear.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaNcclGuard.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaV2.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaV2.h (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaWeight.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/LlamaWeight.h (100%) rename src/{fastertransformer => turbomind}/models/llama/Request.h (100%) rename src/{fastertransformer => turbomind}/models/llama/fused_multi_head_attention/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu (100%) rename src/{fastertransformer => turbomind}/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h (100%) rename src/{fastertransformer => turbomind}/models/llama/fused_multi_head_attention/tile_smem_loader.h (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_decoder_kernels.cu (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_decoder_kernels.h (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_gemm.cc (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_kernels.cu (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_kernels.h (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_utils.cu (100%) rename src/{fastertransformer => turbomind}/models/llama/llama_utils.h (100%) rename src/{fastertransformer => turbomind}/models/llama/prefix_cache.cu (100%) rename src/{fastertransformer => turbomind}/models/llama/prefix_cache.h (100%) rename src/{fastertransformer => turbomind}/triton_backend/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/triton_backend/libfastertransformer.cc (100%) rename src/{fastertransformer => turbomind}/triton_backend/libtriton_fastertransformer.ldscript (100%) rename src/{fastertransformer => turbomind}/triton_backend/llama/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/triton_backend/llama/LlamaTritonModel.cc (100%) rename src/{fastertransformer => turbomind}/triton_backend/llama/LlamaTritonModel.h (100%) rename src/{fastertransformer => turbomind}/triton_backend/llama/LlamaTritonModelInstance.cc (100%) rename src/{fastertransformer => turbomind}/triton_backend/llama/LlamaTritonModelInstance.h (100%) rename src/{fastertransformer => turbomind}/triton_backend/transformer_triton_backend.cpp (100%) rename src/{fastertransformer => turbomind}/triton_backend/transformer_triton_backend.hpp (100%) rename src/{fastertransformer => turbomind}/triton_backend/triton_utils.hpp (100%) rename src/{fastertransformer => turbomind}/utils/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/utils/IA3.h (100%) rename src/{fastertransformer => turbomind}/utils/ScaleList.h (100%) rename src/{fastertransformer => turbomind}/utils/Tensor.cc (100%) rename src/{fastertransformer => turbomind}/utils/Tensor.h (100%) rename src/{fastertransformer => turbomind}/utils/activation_types.h (100%) rename src/{fastertransformer => turbomind}/utils/allocator.h (100%) rename src/{fastertransformer => turbomind}/utils/conv2d.h (100%) rename src/{fastertransformer => turbomind}/utils/convert_data_type.h (100%) rename src/{fastertransformer => turbomind}/utils/cublasAlgoMap.cc (100%) rename src/{fastertransformer => turbomind}/utils/cublasAlgoMap.h (100%) rename src/{fastertransformer => turbomind}/utils/cublasFP8MMWrapper.cu (100%) rename src/{fastertransformer => turbomind}/utils/cublasFP8MMWrapper.h (100%) rename src/{fastertransformer => turbomind}/utils/cublasINT8MMWrapper.cc (100%) rename src/{fastertransformer => turbomind}/utils/cublasINT8MMWrapper.h (100%) rename src/{fastertransformer => turbomind}/utils/cublasMMWrapper.cc (100%) rename src/{fastertransformer => turbomind}/utils/cublasMMWrapper.h (100%) rename src/{fastertransformer => turbomind}/utils/cuda_bf16_fallbacks.cuh (100%) rename src/{fastertransformer => turbomind}/utils/cuda_bf16_wrapper.h (100%) rename src/{fastertransformer => turbomind}/utils/cuda_fp8_utils.cu (100%) rename src/{fastertransformer => turbomind}/utils/cuda_fp8_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/cuda_type_utils.cuh (100%) rename src/{fastertransformer => turbomind}/utils/cuda_utils.cc (100%) rename src/{fastertransformer => turbomind}/utils/cuda_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/custom_ar_comm.cc (100%) rename src/{fastertransformer => turbomind}/utils/custom_ar_comm.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/CMakeLists.txt (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/decoding_gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/decoding_gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/encoder_gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/encoder_gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/encoder_igemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/encoder_igemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/gpt_gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/gpt_gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/swin_gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/swin_gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/swin_igemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/swin_igemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/t5_gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/t5_gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/xlnet_gemm_func.cc (100%) rename src/{fastertransformer => turbomind}/utils/gemm_test/xlnet_gemm_func.h (100%) rename src/{fastertransformer => turbomind}/utils/gpu_buf.h (100%) rename src/{fastertransformer => turbomind}/utils/instance_comm.h (100%) rename src/{fastertransformer => turbomind}/utils/logger.cc (100%) rename src/{fastertransformer => turbomind}/utils/logger.h (100%) rename src/{fastertransformer => turbomind}/utils/memory_utils.cu (100%) rename src/{fastertransformer => turbomind}/utils/memory_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/mpi_utils.cc (100%) rename src/{fastertransformer => turbomind}/utils/mpi_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/nccl_utils.cc (100%) rename src/{fastertransformer => turbomind}/utils/nccl_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/nvtx_utils.cc (100%) rename src/{fastertransformer => turbomind}/utils/nvtx_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/prompt_learning.h (100%) rename src/{fastertransformer => turbomind}/utils/string_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/test_utils.h (100%) rename src/{fastertransformer => turbomind}/utils/wenet_conv2d.h (100%) rename src/{fastertransformer => turbomind}/utils/word_list.cc (100%) rename src/{fastertransformer => turbomind}/utils/word_list.h (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d86b1d93c9..b5b19bd136 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -add_subdirectory(fastertransformer) +add_subdirectory(turbomind) diff --git a/src/fastertransformer/CMakeLists.txt b/src/turbomind/CMakeLists.txt similarity index 100% rename from src/fastertransformer/CMakeLists.txt rename to src/turbomind/CMakeLists.txt diff --git a/src/fastertransformer/kernels/CMakeLists.txt b/src/turbomind/kernels/CMakeLists.txt similarity index 100% rename from src/fastertransformer/kernels/CMakeLists.txt rename to src/turbomind/kernels/CMakeLists.txt diff --git a/src/fastertransformer/kernels/activation_kernels.cu b/src/turbomind/kernels/activation_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/activation_kernels.cu rename to src/turbomind/kernels/activation_kernels.cu diff --git a/src/fastertransformer/kernels/activation_kernels.h b/src/turbomind/kernels/activation_kernels.h similarity index 100% rename from src/fastertransformer/kernels/activation_kernels.h rename to src/turbomind/kernels/activation_kernels.h diff --git a/src/fastertransformer/kernels/ban_bad_words.cu b/src/turbomind/kernels/ban_bad_words.cu similarity index 100% rename from src/fastertransformer/kernels/ban_bad_words.cu rename to src/turbomind/kernels/ban_bad_words.cu diff --git a/src/fastertransformer/kernels/ban_bad_words.h b/src/turbomind/kernels/ban_bad_words.h similarity index 100% rename from src/fastertransformer/kernels/ban_bad_words.h rename to src/turbomind/kernels/ban_bad_words.h diff --git a/src/fastertransformer/kernels/beam_search_penalty_kernels.cu b/src/turbomind/kernels/beam_search_penalty_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/beam_search_penalty_kernels.cu rename to src/turbomind/kernels/beam_search_penalty_kernels.cu diff --git a/src/fastertransformer/kernels/beam_search_penalty_kernels.h b/src/turbomind/kernels/beam_search_penalty_kernels.h similarity index 100% rename from src/fastertransformer/kernels/beam_search_penalty_kernels.h rename to src/turbomind/kernels/beam_search_penalty_kernels.h diff --git a/src/fastertransformer/kernels/beam_search_topk_kernels.cu b/src/turbomind/kernels/beam_search_topk_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/beam_search_topk_kernels.cu rename to src/turbomind/kernels/beam_search_topk_kernels.cu diff --git a/src/fastertransformer/kernels/beam_search_topk_kernels.h b/src/turbomind/kernels/beam_search_topk_kernels.h similarity index 100% rename from src/fastertransformer/kernels/beam_search_topk_kernels.h rename to src/turbomind/kernels/beam_search_topk_kernels.h diff --git a/src/fastertransformer/kernels/bert_preprocess_kernels.cu b/src/turbomind/kernels/bert_preprocess_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/bert_preprocess_kernels.cu rename to src/turbomind/kernels/bert_preprocess_kernels.cu diff --git a/src/fastertransformer/kernels/bert_preprocess_kernels.h b/src/turbomind/kernels/bert_preprocess_kernels.h similarity index 100% rename from src/fastertransformer/kernels/bert_preprocess_kernels.h rename to src/turbomind/kernels/bert_preprocess_kernels.h diff --git a/src/fastertransformer/kernels/custom_ar_kernels.cu b/src/turbomind/kernels/custom_ar_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/custom_ar_kernels.cu rename to src/turbomind/kernels/custom_ar_kernels.cu diff --git a/src/fastertransformer/kernels/custom_ar_kernels.h b/src/turbomind/kernels/custom_ar_kernels.h similarity index 100% rename from src/fastertransformer/kernels/custom_ar_kernels.h rename to src/turbomind/kernels/custom_ar_kernels.h diff --git a/src/fastertransformer/kernels/decoder_masked_multihead_attention.cu b/src/turbomind/kernels/decoder_masked_multihead_attention.cu similarity index 100% rename from src/fastertransformer/kernels/decoder_masked_multihead_attention.cu rename to src/turbomind/kernels/decoder_masked_multihead_attention.cu diff --git a/src/fastertransformer/kernels/decoder_masked_multihead_attention.h b/src/turbomind/kernels/decoder_masked_multihead_attention.h similarity index 100% rename from src/fastertransformer/kernels/decoder_masked_multihead_attention.h rename to src/turbomind/kernels/decoder_masked_multihead_attention.h diff --git a/src/fastertransformer/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu b/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu similarity index 100% rename from src/fastertransformer/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu rename to src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu diff --git a/src/fastertransformer/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh b/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh similarity index 100% rename from src/fastertransformer/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh rename to src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh diff --git a/src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h b/src/turbomind/kernels/decoder_masked_multihead_attention_utils.h similarity index 100% rename from src/fastertransformer/kernels/decoder_masked_multihead_attention_utils.h rename to src/turbomind/kernels/decoder_masked_multihead_attention_utils.h diff --git a/src/fastertransformer/kernels/decoding_kernels.cu b/src/turbomind/kernels/decoding_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/decoding_kernels.cu rename to src/turbomind/kernels/decoding_kernels.cu diff --git a/src/fastertransformer/kernels/decoding_kernels.h b/src/turbomind/kernels/decoding_kernels.h similarity index 100% rename from src/fastertransformer/kernels/decoding_kernels.h rename to src/turbomind/kernels/decoding_kernels.h diff --git a/src/fastertransformer/kernels/gen_relative_pos_bias.cu b/src/turbomind/kernels/gen_relative_pos_bias.cu similarity index 100% rename from src/fastertransformer/kernels/gen_relative_pos_bias.cu rename to src/turbomind/kernels/gen_relative_pos_bias.cu diff --git a/src/fastertransformer/kernels/gen_relative_pos_bias.h b/src/turbomind/kernels/gen_relative_pos_bias.h similarity index 100% rename from src/fastertransformer/kernels/gen_relative_pos_bias.h rename to src/turbomind/kernels/gen_relative_pos_bias.h diff --git a/src/fastertransformer/kernels/gpt_kernels.cu b/src/turbomind/kernels/gpt_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/gpt_kernels.cu rename to src/turbomind/kernels/gpt_kernels.cu diff --git a/src/fastertransformer/kernels/gpt_kernels.h b/src/turbomind/kernels/gpt_kernels.h similarity index 100% rename from src/fastertransformer/kernels/gpt_kernels.h rename to src/turbomind/kernels/gpt_kernels.h diff --git a/src/fastertransformer/kernels/logprob_kernels.cu b/src/turbomind/kernels/logprob_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/logprob_kernels.cu rename to src/turbomind/kernels/logprob_kernels.cu diff --git a/src/fastertransformer/kernels/logprob_kernels.h b/src/turbomind/kernels/logprob_kernels.h similarity index 100% rename from src/fastertransformer/kernels/logprob_kernels.h rename to src/turbomind/kernels/logprob_kernels.h diff --git a/src/fastertransformer/kernels/online_softmax_beamsearch_kernels.cu b/src/turbomind/kernels/online_softmax_beamsearch_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/online_softmax_beamsearch_kernels.cu rename to src/turbomind/kernels/online_softmax_beamsearch_kernels.cu diff --git a/src/fastertransformer/kernels/online_softmax_beamsearch_kernels.h b/src/turbomind/kernels/online_softmax_beamsearch_kernels.h similarity index 100% rename from src/fastertransformer/kernels/online_softmax_beamsearch_kernels.h rename to src/turbomind/kernels/online_softmax_beamsearch_kernels.h diff --git a/src/fastertransformer/kernels/penalty_types.h b/src/turbomind/kernels/penalty_types.h similarity index 100% rename from src/fastertransformer/kernels/penalty_types.h rename to src/turbomind/kernels/penalty_types.h diff --git a/src/fastertransformer/kernels/reduce_kernel_utils.cuh b/src/turbomind/kernels/reduce_kernel_utils.cuh similarity index 100% rename from src/fastertransformer/kernels/reduce_kernel_utils.cuh rename to src/turbomind/kernels/reduce_kernel_utils.cuh diff --git a/src/fastertransformer/kernels/sampling_penalty_kernels.cu b/src/turbomind/kernels/sampling_penalty_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/sampling_penalty_kernels.cu rename to src/turbomind/kernels/sampling_penalty_kernels.cu diff --git a/src/fastertransformer/kernels/sampling_penalty_kernels.h b/src/turbomind/kernels/sampling_penalty_kernels.h similarity index 100% rename from src/fastertransformer/kernels/sampling_penalty_kernels.h rename to src/turbomind/kernels/sampling_penalty_kernels.h diff --git a/src/fastertransformer/kernels/sampling_topk_kernels.cu b/src/turbomind/kernels/sampling_topk_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/sampling_topk_kernels.cu rename to src/turbomind/kernels/sampling_topk_kernels.cu diff --git a/src/fastertransformer/kernels/sampling_topk_kernels.h b/src/turbomind/kernels/sampling_topk_kernels.h similarity index 100% rename from src/fastertransformer/kernels/sampling_topk_kernels.h rename to src/turbomind/kernels/sampling_topk_kernels.h diff --git a/src/fastertransformer/kernels/sampling_topp_kernels.cu b/src/turbomind/kernels/sampling_topp_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/sampling_topp_kernels.cu rename to src/turbomind/kernels/sampling_topp_kernels.cu diff --git a/src/fastertransformer/kernels/sampling_topp_kernels.h b/src/turbomind/kernels/sampling_topp_kernels.h similarity index 100% rename from src/fastertransformer/kernels/sampling_topp_kernels.h rename to src/turbomind/kernels/sampling_topp_kernels.h diff --git a/src/fastertransformer/kernels/stop_criteria_kernels.cu b/src/turbomind/kernels/stop_criteria_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/stop_criteria_kernels.cu rename to src/turbomind/kernels/stop_criteria_kernels.cu diff --git a/src/fastertransformer/kernels/stop_criteria_kernels.h b/src/turbomind/kernels/stop_criteria_kernels.h similarity index 100% rename from src/fastertransformer/kernels/stop_criteria_kernels.h rename to src/turbomind/kernels/stop_criteria_kernels.h diff --git a/src/fastertransformer/kernels/unfused_attention_kernels.cu b/src/turbomind/kernels/unfused_attention_kernels.cu similarity index 100% rename from src/fastertransformer/kernels/unfused_attention_kernels.cu rename to src/turbomind/kernels/unfused_attention_kernels.cu diff --git a/src/fastertransformer/kernels/unfused_attention_kernels.h b/src/turbomind/kernels/unfused_attention_kernels.h similarity index 100% rename from src/fastertransformer/kernels/unfused_attention_kernels.h rename to src/turbomind/kernels/unfused_attention_kernels.h diff --git a/src/fastertransformer/layers/BaseLayer.h b/src/turbomind/layers/BaseLayer.h similarity index 100% rename from src/fastertransformer/layers/BaseLayer.h rename to src/turbomind/layers/BaseLayer.h diff --git a/src/fastertransformer/layers/CMakeLists.txt b/src/turbomind/layers/CMakeLists.txt similarity index 100% rename from src/fastertransformer/layers/CMakeLists.txt rename to src/turbomind/layers/CMakeLists.txt diff --git a/src/fastertransformer/layers/DenseWeight.h b/src/turbomind/layers/DenseWeight.h similarity index 100% rename from src/fastertransformer/layers/DenseWeight.h rename to src/turbomind/layers/DenseWeight.h diff --git a/src/fastertransformer/layers/DynamicDecodeBaseLayer.h b/src/turbomind/layers/DynamicDecodeBaseLayer.h similarity index 100% rename from src/fastertransformer/layers/DynamicDecodeBaseLayer.h rename to src/turbomind/layers/DynamicDecodeBaseLayer.h diff --git a/src/fastertransformer/layers/DynamicDecodeLayer.cc b/src/turbomind/layers/DynamicDecodeLayer.cc similarity index 100% rename from src/fastertransformer/layers/DynamicDecodeLayer.cc rename to src/turbomind/layers/DynamicDecodeLayer.cc diff --git a/src/fastertransformer/layers/DynamicDecodeLayer.h b/src/turbomind/layers/DynamicDecodeLayer.h similarity index 100% rename from src/fastertransformer/layers/DynamicDecodeLayer.h rename to src/turbomind/layers/DynamicDecodeLayer.h diff --git a/src/fastertransformer/layers/FfnFP8Layer.cc b/src/turbomind/layers/FfnFP8Layer.cc similarity index 100% rename from src/fastertransformer/layers/FfnFP8Layer.cc rename to src/turbomind/layers/FfnFP8Layer.cc diff --git a/src/fastertransformer/layers/FfnFP8Layer.h b/src/turbomind/layers/FfnFP8Layer.h similarity index 100% rename from src/fastertransformer/layers/FfnFP8Layer.h rename to src/turbomind/layers/FfnFP8Layer.h diff --git a/src/fastertransformer/layers/FfnFP8Weight.h b/src/turbomind/layers/FfnFP8Weight.h similarity index 100% rename from src/fastertransformer/layers/FfnFP8Weight.h rename to src/turbomind/layers/FfnFP8Weight.h diff --git a/src/fastertransformer/layers/FfnINT8Weight.h b/src/turbomind/layers/FfnINT8Weight.h similarity index 100% rename from src/fastertransformer/layers/FfnINT8Weight.h rename to src/turbomind/layers/FfnINT8Weight.h diff --git a/src/fastertransformer/layers/FfnLayer.cc b/src/turbomind/layers/FfnLayer.cc similarity index 100% rename from src/fastertransformer/layers/FfnLayer.cc rename to src/turbomind/layers/FfnLayer.cc diff --git a/src/fastertransformer/layers/FfnLayer.h b/src/turbomind/layers/FfnLayer.h similarity index 100% rename from src/fastertransformer/layers/FfnLayer.h rename to src/turbomind/layers/FfnLayer.h diff --git a/src/fastertransformer/layers/FfnLayerINT8.cc b/src/turbomind/layers/FfnLayerINT8.cc similarity index 100% rename from src/fastertransformer/layers/FfnLayerINT8.cc rename to src/turbomind/layers/FfnLayerINT8.cc diff --git a/src/fastertransformer/layers/FfnLayerINT8.h b/src/turbomind/layers/FfnLayerINT8.h similarity index 100% rename from src/fastertransformer/layers/FfnLayerINT8.h rename to src/turbomind/layers/FfnLayerINT8.h diff --git a/src/fastertransformer/layers/FfnWeight.h b/src/turbomind/layers/FfnWeight.h similarity index 100% rename from src/fastertransformer/layers/FfnWeight.h rename to src/turbomind/layers/FfnWeight.h diff --git a/src/fastertransformer/layers/attention_layers/AttentionWeight.h b/src/turbomind/layers/attention_layers/AttentionWeight.h similarity index 100% rename from src/fastertransformer/layers/attention_layers/AttentionWeight.h rename to src/turbomind/layers/attention_layers/AttentionWeight.h diff --git a/src/fastertransformer/layers/attention_layers/BaseAttentionLayer.h b/src/turbomind/layers/attention_layers/BaseAttentionLayer.h similarity index 100% rename from src/fastertransformer/layers/attention_layers/BaseAttentionLayer.h rename to src/turbomind/layers/attention_layers/BaseAttentionLayer.h diff --git a/src/fastertransformer/layers/attention_layers/CMakeLists.txt b/src/turbomind/layers/attention_layers/CMakeLists.txt similarity index 100% rename from src/fastertransformer/layers/attention_layers/CMakeLists.txt rename to src/turbomind/layers/attention_layers/CMakeLists.txt diff --git a/src/fastertransformer/layers/attention_layers_fp8/AttentionFP8Weight.h b/src/turbomind/layers/attention_layers_fp8/AttentionFP8Weight.h similarity index 100% rename from src/fastertransformer/layers/attention_layers_fp8/AttentionFP8Weight.h rename to src/turbomind/layers/attention_layers_fp8/AttentionFP8Weight.h diff --git a/src/fastertransformer/layers/attention_layers_fp8/BaseAttentionFP8Layer.h b/src/turbomind/layers/attention_layers_fp8/BaseAttentionFP8Layer.h similarity index 100% rename from src/fastertransformer/layers/attention_layers_fp8/BaseAttentionFP8Layer.h rename to src/turbomind/layers/attention_layers_fp8/BaseAttentionFP8Layer.h diff --git a/src/fastertransformer/layers/attention_layers_fp8/CMakeLists.txt b/src/turbomind/layers/attention_layers_fp8/CMakeLists.txt similarity index 100% rename from src/fastertransformer/layers/attention_layers_fp8/CMakeLists.txt rename to src/turbomind/layers/attention_layers_fp8/CMakeLists.txt diff --git a/src/fastertransformer/layers/attention_layers_int8/AttentionINT8Weight.h b/src/turbomind/layers/attention_layers_int8/AttentionINT8Weight.h similarity index 100% rename from src/fastertransformer/layers/attention_layers_int8/AttentionINT8Weight.h rename to src/turbomind/layers/attention_layers_int8/AttentionINT8Weight.h diff --git a/src/fastertransformer/layers/attention_layers_int8/CMakeLists.txt b/src/turbomind/layers/attention_layers_int8/CMakeLists.txt similarity index 100% rename from src/fastertransformer/layers/attention_layers_int8/CMakeLists.txt rename to src/turbomind/layers/attention_layers_int8/CMakeLists.txt diff --git a/src/fastertransformer/layers/beam_search_layers/BaseBeamSearchLayer.cu b/src/turbomind/layers/beam_search_layers/BaseBeamSearchLayer.cu similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/BaseBeamSearchLayer.cu rename to src/turbomind/layers/beam_search_layers/BaseBeamSearchLayer.cu diff --git a/src/fastertransformer/layers/beam_search_layers/BaseBeamSearchLayer.h b/src/turbomind/layers/beam_search_layers/BaseBeamSearchLayer.h similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/BaseBeamSearchLayer.h rename to src/turbomind/layers/beam_search_layers/BaseBeamSearchLayer.h diff --git a/src/fastertransformer/layers/beam_search_layers/BeamSearchLayer.cu b/src/turbomind/layers/beam_search_layers/BeamSearchLayer.cu similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/BeamSearchLayer.cu rename to src/turbomind/layers/beam_search_layers/BeamSearchLayer.cu diff --git a/src/fastertransformer/layers/beam_search_layers/BeamSearchLayer.h b/src/turbomind/layers/beam_search_layers/BeamSearchLayer.h similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/BeamSearchLayer.h rename to src/turbomind/layers/beam_search_layers/BeamSearchLayer.h diff --git a/src/fastertransformer/layers/beam_search_layers/CMakeLists.txt b/src/turbomind/layers/beam_search_layers/CMakeLists.txt similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/CMakeLists.txt rename to src/turbomind/layers/beam_search_layers/CMakeLists.txt diff --git a/src/fastertransformer/layers/beam_search_layers/OnlineBeamSearchLayer.cu b/src/turbomind/layers/beam_search_layers/OnlineBeamSearchLayer.cu similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/OnlineBeamSearchLayer.cu rename to src/turbomind/layers/beam_search_layers/OnlineBeamSearchLayer.cu diff --git a/src/fastertransformer/layers/beam_search_layers/OnlineBeamSearchLayer.h b/src/turbomind/layers/beam_search_layers/OnlineBeamSearchLayer.h similarity index 100% rename from src/fastertransformer/layers/beam_search_layers/OnlineBeamSearchLayer.h rename to src/turbomind/layers/beam_search_layers/OnlineBeamSearchLayer.h diff --git a/src/fastertransformer/layers/sampling_layers/BaseSamplingLayer.cc b/src/turbomind/layers/sampling_layers/BaseSamplingLayer.cc similarity index 100% rename from src/fastertransformer/layers/sampling_layers/BaseSamplingLayer.cc rename to src/turbomind/layers/sampling_layers/BaseSamplingLayer.cc diff --git a/src/fastertransformer/layers/sampling_layers/BaseSamplingLayer.h b/src/turbomind/layers/sampling_layers/BaseSamplingLayer.h similarity index 100% rename from src/fastertransformer/layers/sampling_layers/BaseSamplingLayer.h rename to src/turbomind/layers/sampling_layers/BaseSamplingLayer.h diff --git a/src/fastertransformer/layers/sampling_layers/CMakeLists.txt b/src/turbomind/layers/sampling_layers/CMakeLists.txt similarity index 100% rename from src/fastertransformer/layers/sampling_layers/CMakeLists.txt rename to src/turbomind/layers/sampling_layers/CMakeLists.txt diff --git a/src/fastertransformer/layers/sampling_layers/TopKSamplingLayer.cu b/src/turbomind/layers/sampling_layers/TopKSamplingLayer.cu similarity index 100% rename from src/fastertransformer/layers/sampling_layers/TopKSamplingLayer.cu rename to src/turbomind/layers/sampling_layers/TopKSamplingLayer.cu diff --git a/src/fastertransformer/layers/sampling_layers/TopKSamplingLayer.h b/src/turbomind/layers/sampling_layers/TopKSamplingLayer.h similarity index 100% rename from src/fastertransformer/layers/sampling_layers/TopKSamplingLayer.h rename to src/turbomind/layers/sampling_layers/TopKSamplingLayer.h diff --git a/src/fastertransformer/layers/sampling_layers/TopPSamplingLayer.cu b/src/turbomind/layers/sampling_layers/TopPSamplingLayer.cu similarity index 100% rename from src/fastertransformer/layers/sampling_layers/TopPSamplingLayer.cu rename to src/turbomind/layers/sampling_layers/TopPSamplingLayer.cu diff --git a/src/fastertransformer/layers/sampling_layers/TopPSamplingLayer.h b/src/turbomind/layers/sampling_layers/TopPSamplingLayer.h similarity index 100% rename from src/fastertransformer/layers/sampling_layers/TopPSamplingLayer.h rename to src/turbomind/layers/sampling_layers/TopPSamplingLayer.h diff --git a/src/fastertransformer/models/BaseWeight.h b/src/turbomind/models/BaseWeight.h similarity index 100% rename from src/fastertransformer/models/BaseWeight.h rename to src/turbomind/models/BaseWeight.h diff --git a/src/fastertransformer/models/CMakeLists.txt b/src/turbomind/models/CMakeLists.txt similarity index 100% rename from src/fastertransformer/models/CMakeLists.txt rename to src/turbomind/models/CMakeLists.txt diff --git a/src/fastertransformer/models/llama/Barrier.h b/src/turbomind/models/llama/Barrier.h similarity index 100% rename from src/fastertransformer/models/llama/Barrier.h rename to src/turbomind/models/llama/Barrier.h diff --git a/src/fastertransformer/models/llama/CMakeLists.txt b/src/turbomind/models/llama/CMakeLists.txt similarity index 100% rename from src/fastertransformer/models/llama/CMakeLists.txt rename to src/turbomind/models/llama/CMakeLists.txt diff --git a/src/fastertransformer/models/llama/LlamaBatch.cc b/src/turbomind/models/llama/LlamaBatch.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaBatch.cc rename to src/turbomind/models/llama/LlamaBatch.cc diff --git a/src/fastertransformer/models/llama/LlamaBatch.h b/src/turbomind/models/llama/LlamaBatch.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaBatch.h rename to src/turbomind/models/llama/LlamaBatch.h diff --git a/src/fastertransformer/models/llama/LlamaCacheManager.cc b/src/turbomind/models/llama/LlamaCacheManager.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaCacheManager.cc rename to src/turbomind/models/llama/LlamaCacheManager.cc diff --git a/src/fastertransformer/models/llama/LlamaCacheManager.h b/src/turbomind/models/llama/LlamaCacheManager.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaCacheManager.h rename to src/turbomind/models/llama/LlamaCacheManager.h diff --git a/src/fastertransformer/models/llama/LlamaContextAttentionLayer.cc b/src/turbomind/models/llama/LlamaContextAttentionLayer.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaContextAttentionLayer.cc rename to src/turbomind/models/llama/LlamaContextAttentionLayer.cc diff --git a/src/fastertransformer/models/llama/LlamaContextAttentionLayer.h b/src/turbomind/models/llama/LlamaContextAttentionLayer.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaContextAttentionLayer.h rename to src/turbomind/models/llama/LlamaContextAttentionLayer.h diff --git a/src/fastertransformer/models/llama/LlamaContextDecoder.cc b/src/turbomind/models/llama/LlamaContextDecoder.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaContextDecoder.cc rename to src/turbomind/models/llama/LlamaContextDecoder.cc diff --git a/src/fastertransformer/models/llama/LlamaContextDecoder.h b/src/turbomind/models/llama/LlamaContextDecoder.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaContextDecoder.h rename to src/turbomind/models/llama/LlamaContextDecoder.h diff --git a/src/fastertransformer/models/llama/LlamaDecoder.cc b/src/turbomind/models/llama/LlamaDecoder.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaDecoder.cc rename to src/turbomind/models/llama/LlamaDecoder.cc diff --git a/src/fastertransformer/models/llama/LlamaDecoder.h b/src/turbomind/models/llama/LlamaDecoder.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaDecoder.h rename to src/turbomind/models/llama/LlamaDecoder.h diff --git a/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc b/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaDecoderLayerWeight.cc rename to src/turbomind/models/llama/LlamaDecoderLayerWeight.cc diff --git a/src/fastertransformer/models/llama/LlamaDecoderLayerWeight.h b/src/turbomind/models/llama/LlamaDecoderLayerWeight.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaDecoderLayerWeight.h rename to src/turbomind/models/llama/LlamaDecoderLayerWeight.h diff --git a/src/fastertransformer/models/llama/LlamaDecoderSelfAttentionLayer.cc b/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaDecoderSelfAttentionLayer.cc rename to src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.cc diff --git a/src/fastertransformer/models/llama/LlamaDecoderSelfAttentionLayer.h b/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaDecoderSelfAttentionLayer.h rename to src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h diff --git a/src/fastertransformer/models/llama/LlamaDenseWeight.h b/src/turbomind/models/llama/LlamaDenseWeight.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaDenseWeight.h rename to src/turbomind/models/llama/LlamaDenseWeight.h diff --git a/src/fastertransformer/models/llama/LlamaFfnLayer.cc b/src/turbomind/models/llama/LlamaFfnLayer.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaFfnLayer.cc rename to src/turbomind/models/llama/LlamaFfnLayer.cc diff --git a/src/fastertransformer/models/llama/LlamaFfnLayer.h b/src/turbomind/models/llama/LlamaFfnLayer.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaFfnLayer.h rename to src/turbomind/models/llama/LlamaFfnLayer.h diff --git a/src/fastertransformer/models/llama/LlamaInstanceComm.h b/src/turbomind/models/llama/LlamaInstanceComm.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaInstanceComm.h rename to src/turbomind/models/llama/LlamaInstanceComm.h diff --git a/src/fastertransformer/models/llama/LlamaLinear.h b/src/turbomind/models/llama/LlamaLinear.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaLinear.h rename to src/turbomind/models/llama/LlamaLinear.h diff --git a/src/fastertransformer/models/llama/LlamaNcclGuard.h b/src/turbomind/models/llama/LlamaNcclGuard.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaNcclGuard.h rename to src/turbomind/models/llama/LlamaNcclGuard.h diff --git a/src/fastertransformer/models/llama/LlamaV2.cc b/src/turbomind/models/llama/LlamaV2.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaV2.cc rename to src/turbomind/models/llama/LlamaV2.cc diff --git a/src/fastertransformer/models/llama/LlamaV2.h b/src/turbomind/models/llama/LlamaV2.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaV2.h rename to src/turbomind/models/llama/LlamaV2.h diff --git a/src/fastertransformer/models/llama/LlamaWeight.cc b/src/turbomind/models/llama/LlamaWeight.cc similarity index 100% rename from src/fastertransformer/models/llama/LlamaWeight.cc rename to src/turbomind/models/llama/LlamaWeight.cc diff --git a/src/fastertransformer/models/llama/LlamaWeight.h b/src/turbomind/models/llama/LlamaWeight.h similarity index 100% rename from src/fastertransformer/models/llama/LlamaWeight.h rename to src/turbomind/models/llama/LlamaWeight.h diff --git a/src/fastertransformer/models/llama/Request.h b/src/turbomind/models/llama/Request.h similarity index 100% rename from src/fastertransformer/models/llama/Request.h rename to src/turbomind/models/llama/Request.h diff --git a/src/fastertransformer/models/llama/fused_multi_head_attention/CMakeLists.txt b/src/turbomind/models/llama/fused_multi_head_attention/CMakeLists.txt similarity index 100% rename from src/fastertransformer/models/llama/fused_multi_head_attention/CMakeLists.txt rename to src/turbomind/models/llama/fused_multi_head_attention/CMakeLists.txt diff --git a/src/fastertransformer/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu b/src/turbomind/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu similarity index 100% rename from src/fastertransformer/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu rename to src/turbomind/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu diff --git a/src/fastertransformer/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h b/src/turbomind/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h similarity index 100% rename from src/fastertransformer/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h rename to src/turbomind/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h diff --git a/src/fastertransformer/models/llama/fused_multi_head_attention/tile_smem_loader.h b/src/turbomind/models/llama/fused_multi_head_attention/tile_smem_loader.h similarity index 100% rename from src/fastertransformer/models/llama/fused_multi_head_attention/tile_smem_loader.h rename to src/turbomind/models/llama/fused_multi_head_attention/tile_smem_loader.h diff --git a/src/fastertransformer/models/llama/llama_decoder_kernels.cu b/src/turbomind/models/llama/llama_decoder_kernels.cu similarity index 100% rename from src/fastertransformer/models/llama/llama_decoder_kernels.cu rename to src/turbomind/models/llama/llama_decoder_kernels.cu diff --git a/src/fastertransformer/models/llama/llama_decoder_kernels.h b/src/turbomind/models/llama/llama_decoder_kernels.h similarity index 100% rename from src/fastertransformer/models/llama/llama_decoder_kernels.h rename to src/turbomind/models/llama/llama_decoder_kernels.h diff --git a/src/fastertransformer/models/llama/llama_gemm.cc b/src/turbomind/models/llama/llama_gemm.cc similarity index 100% rename from src/fastertransformer/models/llama/llama_gemm.cc rename to src/turbomind/models/llama/llama_gemm.cc diff --git a/src/fastertransformer/models/llama/llama_kernels.cu b/src/turbomind/models/llama/llama_kernels.cu similarity index 100% rename from src/fastertransformer/models/llama/llama_kernels.cu rename to src/turbomind/models/llama/llama_kernels.cu diff --git a/src/fastertransformer/models/llama/llama_kernels.h b/src/turbomind/models/llama/llama_kernels.h similarity index 100% rename from src/fastertransformer/models/llama/llama_kernels.h rename to src/turbomind/models/llama/llama_kernels.h diff --git a/src/fastertransformer/models/llama/llama_utils.cu b/src/turbomind/models/llama/llama_utils.cu similarity index 100% rename from src/fastertransformer/models/llama/llama_utils.cu rename to src/turbomind/models/llama/llama_utils.cu diff --git a/src/fastertransformer/models/llama/llama_utils.h b/src/turbomind/models/llama/llama_utils.h similarity index 100% rename from src/fastertransformer/models/llama/llama_utils.h rename to src/turbomind/models/llama/llama_utils.h diff --git a/src/fastertransformer/models/llama/prefix_cache.cu b/src/turbomind/models/llama/prefix_cache.cu similarity index 100% rename from src/fastertransformer/models/llama/prefix_cache.cu rename to src/turbomind/models/llama/prefix_cache.cu diff --git a/src/fastertransformer/models/llama/prefix_cache.h b/src/turbomind/models/llama/prefix_cache.h similarity index 100% rename from src/fastertransformer/models/llama/prefix_cache.h rename to src/turbomind/models/llama/prefix_cache.h diff --git a/src/fastertransformer/triton_backend/CMakeLists.txt b/src/turbomind/triton_backend/CMakeLists.txt similarity index 100% rename from src/fastertransformer/triton_backend/CMakeLists.txt rename to src/turbomind/triton_backend/CMakeLists.txt diff --git a/src/fastertransformer/triton_backend/libfastertransformer.cc b/src/turbomind/triton_backend/libfastertransformer.cc similarity index 100% rename from src/fastertransformer/triton_backend/libfastertransformer.cc rename to src/turbomind/triton_backend/libfastertransformer.cc diff --git a/src/fastertransformer/triton_backend/libtriton_fastertransformer.ldscript b/src/turbomind/triton_backend/libtriton_fastertransformer.ldscript similarity index 100% rename from src/fastertransformer/triton_backend/libtriton_fastertransformer.ldscript rename to src/turbomind/triton_backend/libtriton_fastertransformer.ldscript diff --git a/src/fastertransformer/triton_backend/llama/CMakeLists.txt b/src/turbomind/triton_backend/llama/CMakeLists.txt similarity index 100% rename from src/fastertransformer/triton_backend/llama/CMakeLists.txt rename to src/turbomind/triton_backend/llama/CMakeLists.txt diff --git a/src/fastertransformer/triton_backend/llama/LlamaTritonModel.cc b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc similarity index 100% rename from src/fastertransformer/triton_backend/llama/LlamaTritonModel.cc rename to src/turbomind/triton_backend/llama/LlamaTritonModel.cc diff --git a/src/fastertransformer/triton_backend/llama/LlamaTritonModel.h b/src/turbomind/triton_backend/llama/LlamaTritonModel.h similarity index 100% rename from src/fastertransformer/triton_backend/llama/LlamaTritonModel.h rename to src/turbomind/triton_backend/llama/LlamaTritonModel.h diff --git a/src/fastertransformer/triton_backend/llama/LlamaTritonModelInstance.cc b/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.cc similarity index 100% rename from src/fastertransformer/triton_backend/llama/LlamaTritonModelInstance.cc rename to src/turbomind/triton_backend/llama/LlamaTritonModelInstance.cc diff --git a/src/fastertransformer/triton_backend/llama/LlamaTritonModelInstance.h b/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h similarity index 100% rename from src/fastertransformer/triton_backend/llama/LlamaTritonModelInstance.h rename to src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h diff --git a/src/fastertransformer/triton_backend/transformer_triton_backend.cpp b/src/turbomind/triton_backend/transformer_triton_backend.cpp similarity index 100% rename from src/fastertransformer/triton_backend/transformer_triton_backend.cpp rename to src/turbomind/triton_backend/transformer_triton_backend.cpp diff --git a/src/fastertransformer/triton_backend/transformer_triton_backend.hpp b/src/turbomind/triton_backend/transformer_triton_backend.hpp similarity index 100% rename from src/fastertransformer/triton_backend/transformer_triton_backend.hpp rename to src/turbomind/triton_backend/transformer_triton_backend.hpp diff --git a/src/fastertransformer/triton_backend/triton_utils.hpp b/src/turbomind/triton_backend/triton_utils.hpp similarity index 100% rename from src/fastertransformer/triton_backend/triton_utils.hpp rename to src/turbomind/triton_backend/triton_utils.hpp diff --git a/src/fastertransformer/utils/CMakeLists.txt b/src/turbomind/utils/CMakeLists.txt similarity index 100% rename from src/fastertransformer/utils/CMakeLists.txt rename to src/turbomind/utils/CMakeLists.txt diff --git a/src/fastertransformer/utils/IA3.h b/src/turbomind/utils/IA3.h similarity index 100% rename from src/fastertransformer/utils/IA3.h rename to src/turbomind/utils/IA3.h diff --git a/src/fastertransformer/utils/ScaleList.h b/src/turbomind/utils/ScaleList.h similarity index 100% rename from src/fastertransformer/utils/ScaleList.h rename to src/turbomind/utils/ScaleList.h diff --git a/src/fastertransformer/utils/Tensor.cc b/src/turbomind/utils/Tensor.cc similarity index 100% rename from src/fastertransformer/utils/Tensor.cc rename to src/turbomind/utils/Tensor.cc diff --git a/src/fastertransformer/utils/Tensor.h b/src/turbomind/utils/Tensor.h similarity index 100% rename from src/fastertransformer/utils/Tensor.h rename to src/turbomind/utils/Tensor.h diff --git a/src/fastertransformer/utils/activation_types.h b/src/turbomind/utils/activation_types.h similarity index 100% rename from src/fastertransformer/utils/activation_types.h rename to src/turbomind/utils/activation_types.h diff --git a/src/fastertransformer/utils/allocator.h b/src/turbomind/utils/allocator.h similarity index 100% rename from src/fastertransformer/utils/allocator.h rename to src/turbomind/utils/allocator.h diff --git a/src/fastertransformer/utils/conv2d.h b/src/turbomind/utils/conv2d.h similarity index 100% rename from src/fastertransformer/utils/conv2d.h rename to src/turbomind/utils/conv2d.h diff --git a/src/fastertransformer/utils/convert_data_type.h b/src/turbomind/utils/convert_data_type.h similarity index 100% rename from src/fastertransformer/utils/convert_data_type.h rename to src/turbomind/utils/convert_data_type.h diff --git a/src/fastertransformer/utils/cublasAlgoMap.cc b/src/turbomind/utils/cublasAlgoMap.cc similarity index 100% rename from src/fastertransformer/utils/cublasAlgoMap.cc rename to src/turbomind/utils/cublasAlgoMap.cc diff --git a/src/fastertransformer/utils/cublasAlgoMap.h b/src/turbomind/utils/cublasAlgoMap.h similarity index 100% rename from src/fastertransformer/utils/cublasAlgoMap.h rename to src/turbomind/utils/cublasAlgoMap.h diff --git a/src/fastertransformer/utils/cublasFP8MMWrapper.cu b/src/turbomind/utils/cublasFP8MMWrapper.cu similarity index 100% rename from src/fastertransformer/utils/cublasFP8MMWrapper.cu rename to src/turbomind/utils/cublasFP8MMWrapper.cu diff --git a/src/fastertransformer/utils/cublasFP8MMWrapper.h b/src/turbomind/utils/cublasFP8MMWrapper.h similarity index 100% rename from src/fastertransformer/utils/cublasFP8MMWrapper.h rename to src/turbomind/utils/cublasFP8MMWrapper.h diff --git a/src/fastertransformer/utils/cublasINT8MMWrapper.cc b/src/turbomind/utils/cublasINT8MMWrapper.cc similarity index 100% rename from src/fastertransformer/utils/cublasINT8MMWrapper.cc rename to src/turbomind/utils/cublasINT8MMWrapper.cc diff --git a/src/fastertransformer/utils/cublasINT8MMWrapper.h b/src/turbomind/utils/cublasINT8MMWrapper.h similarity index 100% rename from src/fastertransformer/utils/cublasINT8MMWrapper.h rename to src/turbomind/utils/cublasINT8MMWrapper.h diff --git a/src/fastertransformer/utils/cublasMMWrapper.cc b/src/turbomind/utils/cublasMMWrapper.cc similarity index 100% rename from src/fastertransformer/utils/cublasMMWrapper.cc rename to src/turbomind/utils/cublasMMWrapper.cc diff --git a/src/fastertransformer/utils/cublasMMWrapper.h b/src/turbomind/utils/cublasMMWrapper.h similarity index 100% rename from src/fastertransformer/utils/cublasMMWrapper.h rename to src/turbomind/utils/cublasMMWrapper.h diff --git a/src/fastertransformer/utils/cuda_bf16_fallbacks.cuh b/src/turbomind/utils/cuda_bf16_fallbacks.cuh similarity index 100% rename from src/fastertransformer/utils/cuda_bf16_fallbacks.cuh rename to src/turbomind/utils/cuda_bf16_fallbacks.cuh diff --git a/src/fastertransformer/utils/cuda_bf16_wrapper.h b/src/turbomind/utils/cuda_bf16_wrapper.h similarity index 100% rename from src/fastertransformer/utils/cuda_bf16_wrapper.h rename to src/turbomind/utils/cuda_bf16_wrapper.h diff --git a/src/fastertransformer/utils/cuda_fp8_utils.cu b/src/turbomind/utils/cuda_fp8_utils.cu similarity index 100% rename from src/fastertransformer/utils/cuda_fp8_utils.cu rename to src/turbomind/utils/cuda_fp8_utils.cu diff --git a/src/fastertransformer/utils/cuda_fp8_utils.h b/src/turbomind/utils/cuda_fp8_utils.h similarity index 100% rename from src/fastertransformer/utils/cuda_fp8_utils.h rename to src/turbomind/utils/cuda_fp8_utils.h diff --git a/src/fastertransformer/utils/cuda_type_utils.cuh b/src/turbomind/utils/cuda_type_utils.cuh similarity index 100% rename from src/fastertransformer/utils/cuda_type_utils.cuh rename to src/turbomind/utils/cuda_type_utils.cuh diff --git a/src/fastertransformer/utils/cuda_utils.cc b/src/turbomind/utils/cuda_utils.cc similarity index 100% rename from src/fastertransformer/utils/cuda_utils.cc rename to src/turbomind/utils/cuda_utils.cc diff --git a/src/fastertransformer/utils/cuda_utils.h b/src/turbomind/utils/cuda_utils.h similarity index 100% rename from src/fastertransformer/utils/cuda_utils.h rename to src/turbomind/utils/cuda_utils.h diff --git a/src/fastertransformer/utils/custom_ar_comm.cc b/src/turbomind/utils/custom_ar_comm.cc similarity index 100% rename from src/fastertransformer/utils/custom_ar_comm.cc rename to src/turbomind/utils/custom_ar_comm.cc diff --git a/src/fastertransformer/utils/custom_ar_comm.h b/src/turbomind/utils/custom_ar_comm.h similarity index 100% rename from src/fastertransformer/utils/custom_ar_comm.h rename to src/turbomind/utils/custom_ar_comm.h diff --git a/src/fastertransformer/utils/gemm.cc b/src/turbomind/utils/gemm.cc similarity index 100% rename from src/fastertransformer/utils/gemm.cc rename to src/turbomind/utils/gemm.cc diff --git a/src/fastertransformer/utils/gemm.h b/src/turbomind/utils/gemm.h similarity index 100% rename from src/fastertransformer/utils/gemm.h rename to src/turbomind/utils/gemm.h diff --git a/src/fastertransformer/utils/gemm_test/CMakeLists.txt b/src/turbomind/utils/gemm_test/CMakeLists.txt similarity index 100% rename from src/fastertransformer/utils/gemm_test/CMakeLists.txt rename to src/turbomind/utils/gemm_test/CMakeLists.txt diff --git a/src/fastertransformer/utils/gemm_test/decoding_gemm_func.cc b/src/turbomind/utils/gemm_test/decoding_gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/decoding_gemm_func.cc rename to src/turbomind/utils/gemm_test/decoding_gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/decoding_gemm_func.h b/src/turbomind/utils/gemm_test/decoding_gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/decoding_gemm_func.h rename to src/turbomind/utils/gemm_test/decoding_gemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/encoder_gemm_func.cc b/src/turbomind/utils/gemm_test/encoder_gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/encoder_gemm_func.cc rename to src/turbomind/utils/gemm_test/encoder_gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/encoder_gemm_func.h b/src/turbomind/utils/gemm_test/encoder_gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/encoder_gemm_func.h rename to src/turbomind/utils/gemm_test/encoder_gemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/encoder_igemm_func.cc b/src/turbomind/utils/gemm_test/encoder_igemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/encoder_igemm_func.cc rename to src/turbomind/utils/gemm_test/encoder_igemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/encoder_igemm_func.h b/src/turbomind/utils/gemm_test/encoder_igemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/encoder_igemm_func.h rename to src/turbomind/utils/gemm_test/encoder_igemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/gemm_func.cc b/src/turbomind/utils/gemm_test/gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/gemm_func.cc rename to src/turbomind/utils/gemm_test/gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/gemm_func.h b/src/turbomind/utils/gemm_test/gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/gemm_func.h rename to src/turbomind/utils/gemm_test/gemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc b/src/turbomind/utils/gemm_test/gpt_gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/gpt_gemm_func.cc rename to src/turbomind/utils/gemm_test/gpt_gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/gpt_gemm_func.h b/src/turbomind/utils/gemm_test/gpt_gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/gpt_gemm_func.h rename to src/turbomind/utils/gemm_test/gpt_gemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/swin_gemm_func.cc b/src/turbomind/utils/gemm_test/swin_gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/swin_gemm_func.cc rename to src/turbomind/utils/gemm_test/swin_gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/swin_gemm_func.h b/src/turbomind/utils/gemm_test/swin_gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/swin_gemm_func.h rename to src/turbomind/utils/gemm_test/swin_gemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/swin_igemm_func.cc b/src/turbomind/utils/gemm_test/swin_igemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/swin_igemm_func.cc rename to src/turbomind/utils/gemm_test/swin_igemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/swin_igemm_func.h b/src/turbomind/utils/gemm_test/swin_igemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/swin_igemm_func.h rename to src/turbomind/utils/gemm_test/swin_igemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/t5_gemm_func.cc b/src/turbomind/utils/gemm_test/t5_gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/t5_gemm_func.cc rename to src/turbomind/utils/gemm_test/t5_gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/t5_gemm_func.h b/src/turbomind/utils/gemm_test/t5_gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/t5_gemm_func.h rename to src/turbomind/utils/gemm_test/t5_gemm_func.h diff --git a/src/fastertransformer/utils/gemm_test/xlnet_gemm_func.cc b/src/turbomind/utils/gemm_test/xlnet_gemm_func.cc similarity index 100% rename from src/fastertransformer/utils/gemm_test/xlnet_gemm_func.cc rename to src/turbomind/utils/gemm_test/xlnet_gemm_func.cc diff --git a/src/fastertransformer/utils/gemm_test/xlnet_gemm_func.h b/src/turbomind/utils/gemm_test/xlnet_gemm_func.h similarity index 100% rename from src/fastertransformer/utils/gemm_test/xlnet_gemm_func.h rename to src/turbomind/utils/gemm_test/xlnet_gemm_func.h diff --git a/src/fastertransformer/utils/gpu_buf.h b/src/turbomind/utils/gpu_buf.h similarity index 100% rename from src/fastertransformer/utils/gpu_buf.h rename to src/turbomind/utils/gpu_buf.h diff --git a/src/fastertransformer/utils/instance_comm.h b/src/turbomind/utils/instance_comm.h similarity index 100% rename from src/fastertransformer/utils/instance_comm.h rename to src/turbomind/utils/instance_comm.h diff --git a/src/fastertransformer/utils/logger.cc b/src/turbomind/utils/logger.cc similarity index 100% rename from src/fastertransformer/utils/logger.cc rename to src/turbomind/utils/logger.cc diff --git a/src/fastertransformer/utils/logger.h b/src/turbomind/utils/logger.h similarity index 100% rename from src/fastertransformer/utils/logger.h rename to src/turbomind/utils/logger.h diff --git a/src/fastertransformer/utils/memory_utils.cu b/src/turbomind/utils/memory_utils.cu similarity index 100% rename from src/fastertransformer/utils/memory_utils.cu rename to src/turbomind/utils/memory_utils.cu diff --git a/src/fastertransformer/utils/memory_utils.h b/src/turbomind/utils/memory_utils.h similarity index 100% rename from src/fastertransformer/utils/memory_utils.h rename to src/turbomind/utils/memory_utils.h diff --git a/src/fastertransformer/utils/mpi_utils.cc b/src/turbomind/utils/mpi_utils.cc similarity index 100% rename from src/fastertransformer/utils/mpi_utils.cc rename to src/turbomind/utils/mpi_utils.cc diff --git a/src/fastertransformer/utils/mpi_utils.h b/src/turbomind/utils/mpi_utils.h similarity index 100% rename from src/fastertransformer/utils/mpi_utils.h rename to src/turbomind/utils/mpi_utils.h diff --git a/src/fastertransformer/utils/nccl_utils.cc b/src/turbomind/utils/nccl_utils.cc similarity index 100% rename from src/fastertransformer/utils/nccl_utils.cc rename to src/turbomind/utils/nccl_utils.cc diff --git a/src/fastertransformer/utils/nccl_utils.h b/src/turbomind/utils/nccl_utils.h similarity index 100% rename from src/fastertransformer/utils/nccl_utils.h rename to src/turbomind/utils/nccl_utils.h diff --git a/src/fastertransformer/utils/nvtx_utils.cc b/src/turbomind/utils/nvtx_utils.cc similarity index 100% rename from src/fastertransformer/utils/nvtx_utils.cc rename to src/turbomind/utils/nvtx_utils.cc diff --git a/src/fastertransformer/utils/nvtx_utils.h b/src/turbomind/utils/nvtx_utils.h similarity index 100% rename from src/fastertransformer/utils/nvtx_utils.h rename to src/turbomind/utils/nvtx_utils.h diff --git a/src/fastertransformer/utils/prompt_learning.h b/src/turbomind/utils/prompt_learning.h similarity index 100% rename from src/fastertransformer/utils/prompt_learning.h rename to src/turbomind/utils/prompt_learning.h diff --git a/src/fastertransformer/utils/string_utils.h b/src/turbomind/utils/string_utils.h similarity index 100% rename from src/fastertransformer/utils/string_utils.h rename to src/turbomind/utils/string_utils.h diff --git a/src/fastertransformer/utils/test_utils.h b/src/turbomind/utils/test_utils.h similarity index 100% rename from src/fastertransformer/utils/test_utils.h rename to src/turbomind/utils/test_utils.h diff --git a/src/fastertransformer/utils/wenet_conv2d.h b/src/turbomind/utils/wenet_conv2d.h similarity index 100% rename from src/fastertransformer/utils/wenet_conv2d.h rename to src/turbomind/utils/wenet_conv2d.h diff --git a/src/fastertransformer/utils/word_list.cc b/src/turbomind/utils/word_list.cc similarity index 100% rename from src/fastertransformer/utils/word_list.cc rename to src/turbomind/utils/word_list.cc diff --git a/src/fastertransformer/utils/word_list.h b/src/turbomind/utils/word_list.h similarity index 100% rename from src/fastertransformer/utils/word_list.h rename to src/turbomind/utils/word_list.h