diff --git a/runtime/onnxruntime/CMakeLists.txt b/runtime/onnxruntime/CMakeLists.txt index cf10f54f3..2e6cd09b1 100644 --- a/runtime/onnxruntime/CMakeLists.txt +++ b/runtime/onnxruntime/CMakeLists.txt @@ -4,6 +4,7 @@ project(FunASROnnx) option(ENABLE_GLOG "Whether to build glog" ON) option(ENABLE_FST "Whether to build openfst" ON) # ITN need openfst compiled +option(ENABLE_FFMPEG "Whether to enable ffmpeg audio decoding" OFF) option(GPU "Whether to build with GPU" OFF) # set(CMAKE_CXX_STANDARD 11) @@ -40,7 +41,9 @@ IF(WIN32) ${PROJECT_SOURCE_DIR}/third_party/glog/src/glog/vlog_is_on.h) ELSE() link_directories(${ONNXRUNTIME_DIR}/lib) - link_directories(${FFMPEG_DIR}/lib) + if(ENABLE_FFMPEG AND NOT APPLE) + link_directories(${FFMPEG_DIR}/lib) + endif() endif() include_directories(${CMAKE_SOURCE_DIR}/src) include_directories(${PROJECT_SOURCE_DIR}/third_party/kaldi-native-fbank) diff --git a/runtime/onnxruntime/bin/CMakeLists.txt b/runtime/onnxruntime/bin/CMakeLists.txt index 0ca7f1eb9..a9f79d077 100644 --- a/runtime/onnxruntime/bin/CMakeLists.txt +++ b/runtime/onnxruntime/bin/CMakeLists.txt @@ -4,7 +4,9 @@ if(WIN32) add_compile_options("$<$:/execution-charset:utf-8>") add_compile_options("$<$:/source-charset:utf-8>") include_directories(${ONNXRUNTIME_DIR}/include) +if(ENABLE_FFMPEG) include_directories(${FFMPEG_DIR}/include) +endif() include_directories(${PROJECT_SOURCE_DIR}/third_party) SET(RELATION_SOURCE "../src/resample.cpp" "../src/util.cpp" "../src/alignedmem.cpp" "../src/encode_converter.cpp") endif() diff --git a/runtime/onnxruntime/realtime-mic b/runtime/onnxruntime/realtime-mic new file mode 160000 index 000000000..3154dd5bb --- /dev/null +++ b/runtime/onnxruntime/realtime-mic @@ -0,0 +1 @@ +Subproject commit 3154dd5bb6c319d62c777ddbbd777551d29aa544 diff --git a/runtime/onnxruntime/src/CMakeLists.txt b/runtime/onnxruntime/src/CMakeLists.txt index ad83fe739..57960edce 100644 --- a/runtime/onnxruntime/src/CMakeLists.txt +++ b/runtime/onnxruntime/src/CMakeLists.txt @@ -11,7 +11,7 @@ if(GPU) set(files ${files} "${CMAKE_CURRENT_SOURCE_DIR}/paraformer-torch.cpp") endif() -message("files: "${files}) +message("files: ${files}") if(WIN32) add_compile_options("$<$:/execution-charset:utf-8>") @@ -22,22 +22,25 @@ endif() add_library(funasr SHARED ${files}) if(WIN32) - set(EXTRA_LIBS yaml-cpp csrc kaldi-decoder fst glog gflags avutil avcodec avformat swresample onnxruntime) + set(EXTRA_LIBS yaml-cpp csrc kaldi-decoder fst glog gflags onnxruntime) include_directories(${ONNXRUNTIME_DIR}/include) - include_directories(${FFMPEG_DIR}/include) target_link_directories(funasr PUBLIC ${ONNXRUNTIME_DIR}/lib) - target_link_directories(funasr PUBLIC ${FFMPEG_DIR}/lib) target_compile_definitions(funasr PUBLIC -D_FUNASR_API_EXPORT -DNOMINMAX -DYAML_CPP_DLL) else() - set(EXTRA_LIBS pthread yaml-cpp csrc kaldi-decoder fst glog gflags avutil avcodec avformat swresample) + set(EXTRA_LIBS pthread yaml-cpp csrc kaldi-decoder fst glog gflags) include_directories(${ONNXRUNTIME_DIR}/include) - include_directories(${FFMPEG_DIR}/include) if(APPLE) target_link_directories(funasr PUBLIC ${ONNXRUNTIME_DIR}/lib) - target_link_directories(funasr PUBLIC ${FFMPEG_DIR}/lib) endif(APPLE) endif() +if(ENABLE_FFMPEG AND NOT APPLE) + target_compile_definitions(funasr PUBLIC -DENABLE_FFMPEG) + include_directories(${FFMPEG_DIR}/include) + target_link_directories(funasr PUBLIC ${FFMPEG_DIR}/lib) + target_link_libraries(funasr PUBLIC avutil avcodec avformat swresample) +endif() + if(GPU) set(TORCH_DEPS torch torch_cuda torch_cpu c10 c10_cuda torch_blade ral_base_context) endif() diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp index 22a9ecd29..98899170a 100644 --- a/runtime/onnxruntime/src/audio.cpp +++ b/runtime/onnxruntime/src/audio.cpp @@ -13,7 +13,7 @@ #pragma warning(disable:4996) #endif -#if defined(__APPLE__) +#if defined(__APPLE__) || !defined(ENABLE_FFMPEG) #include #else @@ -284,7 +284,8 @@ void Audio::WavResample(int32_t sampling_rate, const float *waveform, } bool Audio::FfmpegLoad(const char *filename, bool copy2char){ -#if defined(__APPLE__) +#if defined(__APPLE__) || !defined(ENABLE_FFMPEG) + LOG(ERROR) << "FFmpeg audio decoding is disabled in this build."; return false; #else // from file @@ -446,7 +447,8 @@ bool Audio::FfmpegLoad(const char *filename, bool copy2char){ } bool Audio::FfmpegLoad(const char* buf, int n_file_len){ -#if defined(__APPLE__) +#if defined(__APPLE__) || !defined(ENABLE_FFMPEG) + LOG(ERROR) << "FFmpeg audio decoding is disabled in this build."; return false; #else // from buf diff --git a/runtime/onnxruntime/src/ct-transformer-online.cpp b/runtime/onnxruntime/src/ct-transformer-online.cpp index 3be99b845..d3d1612b1 100644 --- a/runtime/onnxruntime/src/ct-transformer-online.cpp +++ b/runtime/onnxruntime/src/ct-transformer-online.cpp @@ -45,7 +45,7 @@ string CTTransformerOnline::AddPunc(const char* sz_input, vector &arr_ca strText = accumulate(arr_cache.begin(), arr_cache.end(), strText); // 如果上一句的结尾是英语字母,并且这一句的开始也是英语字母,应该添加空格 - if ((strText.size() > 0 and !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80))) + if ((strText.size() > 0 && !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80))) strText += " "; strText += sz_input; // full_text = precache + text diff --git a/runtime/onnxruntime/third_party/kaldi/base/kaldi-types.h b/runtime/onnxruntime/third_party/kaldi/base/kaldi-types.h index 7ebf4f853..fd25765cf 100644 --- a/runtime/onnxruntime/third_party/kaldi/base/kaldi-types.h +++ b/runtime/onnxruntime/third_party/kaldi/base/kaldi-types.h @@ -32,7 +32,8 @@ typedef float BaseFloat; #ifdef _MSC_VER #include -#define ssize_t SSIZE_T +// Don't define ssize_t as macro - let fst/types.h handle it +// #define ssize_t SSIZE_T // This causes conflicts with fst/types.h #endif // we can do this a different way if some platform diff --git a/runtime/onnxruntime/third_party/openfst/src/include/fst/types.h b/runtime/onnxruntime/third_party/openfst/src/include/fst/types.h index 9c0b79988..48dc20893 100644 --- a/runtime/onnxruntime/third_party/openfst/src/include/fst/types.h +++ b/runtime/onnxruntime/third_party/openfst/src/include/fst/types.h @@ -35,7 +35,11 @@ using uint64 = uint64_t; #ifdef _MSC_VER // Not really Windows-specific: they should have used ptrdiff_t in the first // place. But on Windows there has never been ssize_t. +// Check if ssize_t/SSIZE_T is already defined by the SDK +#if !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED) +// Use ptrdiff_t as ssize_t (same size as pointer) using ssize_t = std::ptrdiff_t; +#endif // !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED) #endif // _MSC_VER #endif // FST_LIB_TYPES_H_ diff --git a/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt b/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt index 491f41a27..87d5c8a54 100644 --- a/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt +++ b/runtime/onnxruntime/third_party/yaml-cpp/CMakeLists.txt @@ -3,14 +3,14 @@ ### ## Due to Mac OSX we need to keep compatibility with CMake 2.6 # see http://www.cmake.org/Wiki/CMake_Policies -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.8.12) # see http://www.cmake.org/cmake/help/cmake-2-8-docs.html#policy:CMP0012 if(POLICY CMP0012) - cmake_policy(SET CMP0012 OLD) + cmake_policy(SET CMP0012 NEW) endif() # see http://www.cmake.org/cmake/help/cmake-2-8-docs.html#policy:CMP0015 if(POLICY CMP0015) - cmake_policy(SET CMP0015 OLD) + cmake_policy(SET CMP0015 NEW) endif() # see https://cmake.org/cmake/help/latest/policy/CMP0042.html if(POLICY CMP0042)